changeset 118:d0a1bd0378c6

Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Wed, 07 May 2008 15:07:56 -0400
parents 88257dfedf8c
children 7ffecde9dadc
files learner.py linear_regression.py mlp.py
diffstat 3 files changed, 80 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- a/learner.py	Wed May 07 09:16:04 2008 -0400
+++ b/learner.py	Wed May 07 15:07:56 2008 -0400
@@ -1,6 +1,6 @@
 
-from dataset import *
-from compile import Function
+from dataset import AttributesHolder
+import compile
     
 class Learner(AttributesHolder):
     """Base class for learning algorithms, provides an interface
@@ -173,8 +173,8 @@
         if key not in self.use_functions_dictionary:
             use_input_attributes = self.useInputAttributes()
             use_output_attributes = self.useOutputAttributes()
-            complete_f = Function(self.names2OpResults(input_fields+use_input_attributes),
-                                  self.names2OpResults(output_fields+use_output_attributes))
+            complete_f = compile.function(self.names2OpResults(input_fields+use_input_attributes),
+                                          self.names2OpResults(output_fields+use_output_attributes))
             def f(*input_field_values):
                 input_attribute_values = self.names2attributes(use_input_attributes)
                 results = complete_f(*(input_field_values + input_attribute_values))
@@ -273,12 +273,13 @@
 
     def __init__(self):
         TLearner.__init__(self)
-        self.update_minibatch_function =
-        Function(self.names2OpResults(self.updateMinibatchOutputAttributes()+
-                                      self.updateMinibatchInputFields()),
+        self.update_minibatch_function = compile.function
+        (self.names2OpResults(self.updateMinibatchOutputAttributes()+
+                              self.updateMinibatchInputFields()),
                  self.names2OpResults(self.updateMinibatchOutputAttributes()))
-        self.update_end_function = Function(self.names2OpResults(self.updateEndInputAttributes()),
-                                            self.names2OpResults(self.updateEndOutputAttributes()))
+        self.update_end_function = compile.function
+        (self.names2OpResults(self.updateEndInputAttributes()),
+         self.names2OpResults(self.updateEndOutputAttributes()))
 
     def updateMinibatchInputFields(self):
         raise AbstractFunction()
@@ -310,7 +311,9 @@
         # make sure all required fields are allocated and initialized
         self.allocate(minibatch)
         self.setAttributes(self.updateMinibatchOutputAttributes(),
-                           self.update_minibatch_function(*(self.names2attributes(self.updateMinibatchInputAttributes()))
+                           # concatenate the attribute values and field values and then apply update fn
+                           self.update_minibatch_function(*(self.names2attributes
+                                                            (self.updateMinibatchInputAttributes()))
                                                           + minibatch(self.updateMinibatchInputFields())))
         
     def isLastEpoch(self):
@@ -347,17 +350,40 @@
     Specialization of MinibatchUpdatesTLearner in which the minibatch updates
     are obtained by performing an online (minibatch-based) gradient step.
 
-    Sub-classes must define the following methods:
-    
+    Sub-classes must define the following:
+
+      self._learning_rate (may be changed by the sub-class between epochs or minibatches)
+     
+      self.lossAttribute()  = name of the loss field 
+      
     """
     def __init__(self,truly_online=False):
         """
         If truly_online then only one pass is made through the training set passed to update().
-        
+
+        SUBCLASSES SHOULD CALL THIS CONSTRUCTOR ONLY AFTER HAVING DEFINED ALL THEIR THEANO FORMULAS
         """
         self.truly_online=truly_online
 
+        # create the formulas for the gradient update
+        old_params = [self.__getattr__("_"+name) for name in self.parameterAttributes()]
+        new_params_names = ["_new_"+name for name in self.parameterAttributes()]
+        loss = self.__getattr__(self.lossAttribute())
+        self.setAttributes(new_params_names,
+                           [t.add_inplace(self.param,
+                                          self._learning_rate*t.grad(loss,param))
+                            for param in old_params])
+
     def isLastEpoch(self):
         return self.truly_online
 
+    def updateMinibatchInputAttributes(self):
+        return self.parameterAttributes()
+    
+    def updateMinibatchOutputAttributes(self):
+        return ["_new"+name for name in self.parameterAttributes()]
+    
+    def updateEndInputAttributes(self):
+        return self.parameterAttributes()
 
+
--- a/linear_regression.py	Wed May 07 09:16:04 2008 -0400
+++ b/linear_regression.py	Wed May 07 15:07:56 2008 -0400
@@ -114,7 +114,7 @@
         self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
         self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
 
-        OneShotTLearner.__init__(self)
+        MinibatchUpdatesTLearner.__init__(self)
             
     def allocate(self,minibatch):
         minibatch_n_inputs  = minibatch["input"].shape[1]
--- a/mlp.py	Wed May 07 09:16:04 2008 -0400
+++ b/mlp.py	Wed May 07 15:07:56 2008 -0400
@@ -2,6 +2,7 @@
 from learner import *
 from theano import tensor as t
 from theano.scalar import as_scalar
+from nnet_ops import *
 
 # this is one of the simplest example of learner, and illustrates
 # the use of theano
@@ -82,64 +83,61 @@
     def updateMinibatchInputFields(self):
         return ["input","target"]
     
-    def updateMinibatchInputAttributes(self):
-        return self.parameterAttributes()
-    
-    def updateMinibatchOutputAttributes(self):
-        return self.parameterAttributes()
-    
-    def updateEndInputAttributes(self):
-        return self.parameterAttributes()
-
     def updateEndOutputAttributes(self):
         return ["regularization_term"]
 
+    def lossAttribute(self):
+        return "minibatch_criterion"
+    
     def defaultOutputFields(self, input_fields):
         output_fields = ["output", "output_class",]
         if "target" in input_fields:
             output_fields += ["class_error", "nll"]
         return output_fields
         
-    def __init__(self):
+    def __init__(self,n_hidden,n_classes,learning_rate,init_range=1.):
+        self._n_outputs = n_classes
+        self._n_hidden = n_hidden
+        self._init_range = init_range
+        self.learning_rate = learning_rate # this is the float
+        self._learning_rate = t.scalar('learning_rate') # this is the symbol
         self._input = t.matrix('input') # n_examples x n_inputs
         self._target = t.matrix('target') # n_examples x n_outputs
-        self._lambda = as_scalar(0.,'lambda')
-        self._theta = t.matrix('theta')
-        self._W = self._theta[:,1:] 
-        self._b = self._theta[:,0]
-        self._XtX = t.matrix('XtX')
-        self._XtY = t.matrix('XtY')
-        self._extended_input = t.prepend_one_to_each_row(self._input)
-        self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
-        self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-        self._regularizer = self._lambda * t.dot(self._W,self._W)
-        self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
-        self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
-        self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
-
-        OneShotTLearner.__init__(self)
+        self._L2_regularizer = as_scalar(0.,'L2_regularizer')
+        self._W1 = t.matrix('W1')
+        self._W2 = t.matrix('W2')
+        self._b1 = t.row('b1')
+        self._b2 = t.row('b2')
+        self._regularizer = self._L2_regularizer * (t.dot(self._W1,self._W1) + t.dot(self._W2,self._W2))
+        self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T)
+        self._output = t.softmax(self._output_activations)
+        self._output_class = t.argmax(self._output,1)
+        self._class_error = self._output_class != self._target
+        self._nll,self._output = crossentropy_softmax_1hot(self._output_activation,self._target)
+        self._minibatch_criterion = self._nll + self._regularizer / t.shape(self._input)[0]
+        MinibatchUpdatesTLearner.__init__(self)
             
     def allocate(self,minibatch):
         minibatch_n_inputs  = minibatch["input"].shape[1]
-        minibatch_n_outputs = minibatch["target"].shape[1]
         if not self._n_inputs:
-            self._n_inputs = minibatch_n_inputs 
-            self._n_outputs = minibatch_n_outputs
-            self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
-            self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
-            self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
+            self._n_inputs = minibatch_n_inputs
+            self.b1 = numpy.zeros(self._n_hidden)
+            self.b2 = numpy.zeros(self._n_outputs)
             self.forget()
-        elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
-            # if the input or target changes dimension on the fly, we resize and forget everything
+        elif self._n_inputs!=minibatch_n_inputs:
+            # if the input changes dimension on the fly, we resize and forget everything
             self.forget()
             
     def forget(self):
-        if self._n_inputs and self._n_outputs:
-            self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
-            self.XtY.resize((1+self.n_inputs,self.n_outputs))
-            self.XtX.data[:,:]=0
-            self.XtY.data[:,:]=0
-            numpy.diag(self.XtX.data)[1:]=self.lambda
+        if self._n_inputs:
+            r = self._init_range/math.sqrt(self._n_inputs)
+            self.W1 = numpy.random.uniform(low=-r,high=r,
+                                           size=(self._n_hidden,self._n_inputs))
+            r = self._init_range/math.sqrt(self._n_hidden)
+            self.W2 = numpy.random.uniform(low=-r,high=r,
+                                           size=(self._n_outputs,self._n_hidden))
+            self.b1[:]=0
+            self.b2[:]=0
 
 
 class MLP(MinibatchUpdatesTLearner):
@@ -236,7 +234,7 @@
     def __init__(self):
         self._input = t.matrix('input') # n_examples x n_inputs
         self._target = t.matrix('target') # n_examples x n_outputs
-        self._lambda = as_scalar(0.,'lambda')
+        self._L2_regularizer = as_scalar(0.,'L2_regularizer')
         self._theta = t.matrix('theta')
         self._W = self._theta[:,1:] 
         self._b = self._theta[:,0]
@@ -245,7 +243,7 @@
         self._extended_input = t.prepend_one_to_each_row(self._input)
         self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
         self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-        self._regularizer = self._lambda * t.dot(self._W,self._W)
+        self._regularizer = self._L2_regularizer * t.dot(self._W,self._W)
         self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
         self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
         self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
@@ -272,5 +270,5 @@
             self.XtY.resize((1+self.n_inputs,self.n_outputs))
             self.XtX.data[:,:]=0
             self.XtY.data[:,:]=0
-            numpy.diag(self.XtX.data)[1:]=self.lambda
+            numpy.diag(self.XtX.data)[1:]=self.L2_regularizer