Mercurial > pylearn
diff sandbox/gradient_learner.py @ 426:d7611a3811f2
Moved incomplete stuff to sandbox
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Tue, 22 Jul 2008 15:20:25 -0400 |
parents | gradient_learner.py@46c5c90019c2 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sandbox/gradient_learner.py Tue Jul 22 15:20:25 2008 -0400 @@ -0,0 +1,71 @@ + +from learner import * +from tensor import * +import gradient +from compile import Function + +class GradientLearner(Learner): + """ + Base class for gradient-based optimization of a training criterion + that can consist in two parts, an additive part over examples, and + an example-independent part (usually called the regularizer). + The user provides a Theano formula that maps the fields of a minibatch (each being a tensor with the + same number of rows = minibatch size) and parameters to output fields (for the use function), one of which + must be a cost that is the training criterion to be minimized. Subclasses implement + a training strategy that uses the Theano formula to compute gradients and + to compute outputs in the update method. + The inputs, parameters, and outputs are lists of Theano tensors, + while the example_wise_cost and regularization_term are Theano tensors. + The user can specify a regularization coefficient that multiplies the regularization term. + The training algorithm looks for parameters that minimize + regularization_coefficient * regularization_term(parameters) + + sum_{inputs in training_set} example_wise_cost(inputs,parameters) + i.e. the regularization_term should not depend on the inputs, only on the parameters. + The learned function can map a subset of inputs to a subset of outputs (as long as the inputs subset + includes all the inputs required in the Theano expression for the selected outputs). + It is assumed that all the inputs are provided in the training set (as dataset fields + with the corresponding name), but not necessarily when using the learned function. + """ + def __init__(self, inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0), + regularization_coefficient = astensor(1.0)): + self.inputs = inputs + self.outputs = outputs + self.parameters = parameters + self.example_wise_cost = example_wise_cost + self.regularization_term = regularization_term + self.regularization_coefficient = regularization_coefficient + self.parameters_example_wise_gradient = gradient.grad(example_wise_cost, parameters) + self.parameters_regularization_gradient = gradient.grad(self.regularization_coefficient * regularization_term, parameters) + if example_wise_cost not in outputs: + outputs.append(example_wise_cost) + if regularization_term not in outputs: + outputs.append(regularization_term) + self.example_wise_gradient_fn = Function(inputs + parameters, + [self.parameters_example_wise_gradient + self.parameters_regularization_gradient]) + self.use_functions = {frozenset([input.name for input in inputs]+[output.name for output in outputs]) + : Function(inputs, outputs)} + + def use(self,input_dataset,output_fields=None,copy_inputs=True): + # obtain the function that maps the desired inputs to desired outputs + input_fields = input_dataset.fieldNames() + # map names of input fields to Theano tensors in self.inputs + input_variables = ??? + if output_fields is None: output_fields = [output.name for output in outputs] + # handle special case of inputs that are directly copied into outputs + # map names of output fields to Theano tensors in self.outputs + output_variables = ??? + use_function_key = input_fields+output_fields + if not self.use_functions.has_key(use_function_key): + self.use_function[use_function_key]=Function(input_variables,output_variables) + use_function = self.use_functions[use_function_key] + # return a dataset that computes the outputs + return input_dataset.apply_function(use_function,input_fields,output_fields,copy_inputs,compute_now=True) + + +class StochasticGradientDescent(object): + def update_parameters(self): + +class StochasticGradientLearner(GradientLearner,StochasticGradientDescent): + def __init__(self,inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0), + regularization_coefficient = astensor(1.0),) + def update()