Mercurial > pylearn
comparison gradient_learner.py @ 20:266c68cb6136
Minor editions, plus adding untested ApplyFunctionDataset for GradientLearner in the works.
author | bengioy@bengiomac.local |
---|---|
date | Mon, 07 Apr 2008 09:48:39 -0400 |
parents | 5ede27026e05 |
children | 526e192b0699 |
comparison
equal
deleted
inserted
replaced
19:57f4015e2e09 | 20:266c68cb6136 |
---|---|
1 | 1 |
2 from learner import * | 2 from learner import * |
3 from tensor import * | 3 from tensor import * |
4 import gradient | 4 import gradient |
5 from compile import Function | 5 from compile import Function |
6 from gradient_based_optimizer import * | |
7 | 6 |
8 class GradientLearner(Learner): | 7 class GradientLearner(Learner): |
9 """ | 8 """ |
10 Base class for gradient-based optimization of a training criterion | 9 Base class for gradient-based optimization of a training criterion |
11 that can consist in two parts, an additive part over examples, and | 10 that can consist in two parts, an additive part over examples, and |
12 an example-independent part (usually called the regularizer). | 11 an example-independent part (usually called the regularizer). |
13 The user provides a Theano formula that maps the fields of a training example | 12 The user provides a Theano formula that maps the fields of a training example |
14 and parameters to output fields (for the use function), one of which must be a cost | 13 and parameters to output fields (for the use function), one of which must be a cost |
15 that is the training criterion to be minimized. Subclasses implement | 14 that is the training criterion to be minimized. Subclasses implement |
16 a training strategy that uses the function to compute gradients and | 15 a training strategy that uses the Theano formula to compute gradients and |
17 to compute outputs in the update method. | 16 to compute outputs in the update method. |
18 The inputs, parameters, and outputs are lists of Theano tensors, | 17 The inputs, parameters, and outputs are lists of Theano tensors, |
19 while the example_wise_cost and regularization_term are Theano tensors. | 18 while the example_wise_cost and regularization_term are Theano tensors. |
20 The user can specify a regularization coefficient that multiplies the regularization term. | 19 The user can specify a regularization coefficient that multiplies the regularization term. |
21 The training algorithm looks for parameters that minimize | 20 The training algorithm looks for parameters that minimize |
22 regularization_coefficienet * regularization_term(parameters) + | 21 regularization_coefficient * regularization_term(parameters) + |
23 sum_{inputs in training_set} example_wise_cost(inputs,parameters) | 22 sum_{inputs in training_set} example_wise_cost(inputs,parameters) |
24 i.e. the regularization_term should not depend on the inputs, only on the parameters. | 23 i.e. the regularization_term should not depend on the inputs, only on the parameters. |
25 The learned function can map a subset of inputs to a subset of outputs (as long as the inputs subset | 24 The learned function can map a subset of inputs to a subset of outputs (as long as the inputs subset |
26 includes all the inputs required in the Theano expression for the selected outputs). | 25 includes all the inputs required in the Theano expression for the selected outputs). |
27 It is assumed that all the inputs are provided in the training set, but | 26 It is assumed that all the inputs are provided in the training set (as dataset fields |
28 not necessarily when using the learned function. | 27 with the corresponding name), but not necessarily when using the learned function. |
29 """ | 28 """ |
30 def __init__(self, inputs, parameters, outputs, example_wise_cost, regularization_term, | 29 def __init__(self, inputs, parameters, outputs, example_wise_cost, regularization_term, |
31 gradient_based_optimizer=StochasticGradientDescent(), regularization_coefficient = astensor(1.0)): | 30 regularization_coefficient = astensor(1.0)): |
32 self.inputs = inputs | 31 self.inputs = inputs |
33 self.outputs = outputs | 32 self.outputs = outputs |
34 self.parameters = parameters | 33 self.parameters = parameters |
35 self.example_wise_cost = example_wise_cost | 34 self.example_wise_cost = example_wise_cost |
36 self.regularization_term = regularization_term | 35 self.regularization_term = regularization_term |
37 self.gradient_based_optimizer = gradient_based_optimizer | |
38 self.regularization_coefficient = regularization_coefficient | 36 self.regularization_coefficient = regularization_coefficient |
39 self.parameters_example_wise_gradient = gradient.grad(example_wise_cost, parameters) | 37 self.parameters_example_wise_gradient = gradient.grad(example_wise_cost, parameters) |
40 self.parameters_regularization_gradient = gradient.grad(self.regularization_coefficient * regularization, parameters) | 38 self.parameters_regularization_gradient = gradient.grad(self.regularization_coefficient * regularization_term, parameters) |
41 if example_wise_cost not in outputs: | 39 if example_wise_cost not in outputs: |
42 outputs.append(example_wise_cost) | 40 outputs.append(example_wise_cost) |
43 if regularization_term not in outputs: | 41 if regularization_term not in outputs: |
44 outputs.append(regularization_term) | 42 outputs.append(regularization_term) |
45 self.example_wise_gradient_fn = Function(inputs + parameters, | 43 self.example_wise_gradient_fn = Function(inputs + parameters, |
46 [self.parameters_example_wise_gradient + self.parameters_regularization_gradient]) | 44 [self.parameters_example_wise_gradient + self.parameters_regularization_gradient]) |
47 self.use_functions = {frozenset([input.name for input in inputs]) : Function(inputs, outputs)} | 45 self.use_functions = {frozenset([input.name for input in inputs]+[output.name for output in outputs]) |
46 : Function(inputs, outputs)} | |
48 | 47 |
49 def update(self,training_set): | 48 def use(self,input_dataset,output_fields=None,copy_inputs=True): |
50 | 49 # obtain the function that maps the desired inputs to desired outputs |
50 input_fields = input_dataset.fieldNames() | |
51 if output_fields is None: output_fields = [output.name for output in outputs] | |
52 # handle special case of inputs that are directly copied into outputs | |
53 | |
54 use_function_key = input_fields+output_fields | |
55 if not self.use_functions.has_key(use_function_key): | |
56 self.use_function[use_function_key]=Function(input_fields,output_fields) | |
57 use_function = self.use_functions[use_function_key] | |
58 # return a virtual dataset that computes the outputs on demand | |
59 return input_dataset.apply_function(use_function,input_fields,output_fields,copy_inputs,accept_minibatches=???) | |
60 |