diff sandbox/gradient_learner.py @ 426:d7611a3811f2

Moved incomplete stuff to sandbox
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Tue, 22 Jul 2008 15:20:25 -0400
parents gradient_learner.py@46c5c90019c2
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sandbox/gradient_learner.py	Tue Jul 22 15:20:25 2008 -0400
@@ -0,0 +1,71 @@
+
+from learner import *
+from tensor import *
+import gradient
+from compile import Function
+
+class GradientLearner(Learner):
+    """
+    Base class for gradient-based optimization of a training criterion
+    that can consist in two parts, an additive part over examples, and
+    an example-independent part (usually called the regularizer).
+    The user provides a Theano formula that maps the fields of a minibatch (each being a tensor with the
+    same number of rows = minibatch size) and parameters to output fields (for the use function), one of which
+    must be a cost that is the training criterion to be minimized. Subclasses implement
+    a training strategy that uses the Theano formula to compute gradients and
+    to compute outputs in the update method.
+    The inputs, parameters, and outputs are lists of Theano tensors,
+    while the example_wise_cost and regularization_term are Theano tensors.
+    The user can specify a regularization coefficient that multiplies the regularization term.
+    The training algorithm looks for parameters that minimize
+       regularization_coefficient * regularization_term(parameters) +
+       sum_{inputs in training_set} example_wise_cost(inputs,parameters)
+    i.e. the regularization_term should not depend on the inputs, only on the parameters.
+    The learned function can map a subset of inputs to a subset of outputs (as long as the inputs subset
+    includes all the inputs required in the Theano expression for the selected outputs).
+    It is assumed that all the inputs are provided in the training set (as dataset fields
+    with the corresponding name), but not necessarily when using the learned function.
+    """
+    def __init__(self, inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0),
+                 regularization_coefficient = astensor(1.0)):
+        self.inputs = inputs
+        self.outputs = outputs
+        self.parameters = parameters
+        self.example_wise_cost = example_wise_cost
+        self.regularization_term = regularization_term
+        self.regularization_coefficient = regularization_coefficient
+        self.parameters_example_wise_gradient = gradient.grad(example_wise_cost, parameters)
+        self.parameters_regularization_gradient = gradient.grad(self.regularization_coefficient * regularization_term, parameters)
+        if example_wise_cost not in outputs:
+            outputs.append(example_wise_cost)
+        if regularization_term not in outputs:
+            outputs.append(regularization_term)
+        self.example_wise_gradient_fn = Function(inputs + parameters, 
+                                       [self.parameters_example_wise_gradient + self.parameters_regularization_gradient])
+        self.use_functions = {frozenset([input.name for input in inputs]+[output.name for output in outputs])
+                                        : Function(inputs, outputs)}
+
+    def use(self,input_dataset,output_fields=None,copy_inputs=True):
+        # obtain the function that maps the desired inputs to desired outputs
+        input_fields = input_dataset.fieldNames()
+        # map names of input fields to Theano tensors in self.inputs
+        input_variables = ???
+        if output_fields is None: output_fields = [output.name for output in outputs]
+        # handle special case of inputs that are directly copied into outputs
+        # map names of output fields to Theano tensors in self.outputs
+        output_variables = ???
+        use_function_key = input_fields+output_fields
+        if not self.use_functions.has_key(use_function_key):
+            self.use_function[use_function_key]=Function(input_variables,output_variables)
+        use_function = self.use_functions[use_function_key]
+        # return a dataset that computes the outputs
+        return input_dataset.apply_function(use_function,input_fields,output_fields,copy_inputs,compute_now=True)
+    
+
+class StochasticGradientDescent(object):
+    def update_parameters(self):
+        
+class StochasticGradientLearner(GradientLearner,StochasticGradientDescent):
+    def __init__(self,inputs, parameters, outputs, example_wise_cost, regularization_term=astensor(0.0),
+                 regularization_coefficient = astensor(1.0),)
+    def update()