changeset 134:3f4e5c9bdc5e

Fixes to ApplyFunctionDataSet and other things to make learner and mlp work
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Fri, 09 May 2008 17:38:57 -0400
parents b4657441dd65
children 0d8e721cc63c ad144fa72bf5
files dataset.py learner.py lookup_list.py mlp.py
diffstat 4 files changed, 65 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/dataset.py	Fri May 09 13:38:54 2008 -0400
+++ b/dataset.py	Fri May 09 17:38:57 2008 -0400
@@ -16,6 +16,11 @@
         raise AbstractFunction()
 
     def setAttributes(self,attribute_names,attribute_values,make_copies=False):
+        """
+        Allow the attribute_values to not be a list (but a single value) if the attribute_names is of length 1.
+        """
+        if len(attribute_names)==1 and not (isinstance(attribute_values,list) or isinstance(attribute_values,tuple) ):
+            attribute_values = [attribute_values]
         if make_copies:
             for name,value in zip(attribute_names,attribute_values):
                 self.__setattr__(name,copy.deepcopy(value))
@@ -1113,14 +1118,14 @@
       self.function=function
       self.output_names=output_names
       self.minibatch_mode=minibatch_mode
-      DataSet.__init__(description,fieldtypes)
+      DataSet.__init__(self,description,fieldtypes)
       self.valuesHStack = values_hstack if values_hstack else input_dataset.valuesHStack
       self.valuesVStack = values_vstack if values_vstack else input_dataset.valuesVStack
 
   def __len__(self):
       return len(self.input_dataset)
 
-  def fieldnames(self):
+  def fieldNames(self):
       return self.output_names
 
   def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
@@ -1128,8 +1133,8 @@
           def __init__(self,output_dataset):
               self.input_dataset=output_dataset.input_dataset
               self.output_dataset=output_dataset
-              self.input_iterator=input_dataset.minibatches(minibatch_size=minibatch_size,
-                                                            n_batches=n_batches,offset=offset).__iter__()
+              self.input_iterator=self.input_dataset.minibatches(minibatch_size=minibatch_size,
+                                                                 n_batches=n_batches,offset=offset).__iter__()
 
           def __iter__(self): return self
 
@@ -1137,7 +1142,7 @@
               function_inputs = self.input_iterator.next()
               all_output_names = self.output_dataset.output_names
               if self.output_dataset.minibatch_mode:
-                  function_outputs = self.output_dataset.function(function_inputs)
+                  function_outputs = self.output_dataset.function(*function_inputs)
               else:
                   input_examples = zip(*function_inputs)
                   output_examples = [self.output_dataset.function(input_example)
@@ -1150,7 +1155,7 @@
                   return all_outputs
               return Example(fieldnames,[all_outputs[name] for name in fieldnames])
 
-      return ApplyFunctionIterator(self.input_dataset,self)
+      return ApplyFunctionIterator(self)
 
   def __iter__(self): # only implemented for increased efficiency
       class ApplyFunctionSingleExampleIterator(object):
--- a/learner.py	Fri May 09 13:38:54 2008 -0400
+++ b/learner.py	Fri May 09 17:38:57 2008 -0400
@@ -1,6 +1,7 @@
 
-from dataset import AttributesHolder,AbstractFunction
-import compile
+from dataset import AttributesHolder,AbstractFunction,ApplyFunctionDataSet,DataSet,CachedDataSet
+import theano
+from theano import compile
 from theano import tensor as t
     
 class Learner(AttributesHolder):
@@ -132,22 +133,16 @@
         """
         return self.names2attributes(self.attributeNames(),return_copy)
 
-    def names2attributes(self,names,return_copy=False):
+    def names2attributes(self,names):
         """
         Private helper function that maps a list of attribute names to a list
         of (optionally copies) values of attributes.
         """
-        if return_copy:
-            return [copy.deepcopy(self.__getattribute__(name).data) for name in names]
-        else:
-            return [self.__getattribute__(name).data for name in names]
-
-    def updateInputAttributes(self):
-        """
-        A subset of self.attributeNames() which are the names of attributes needed by update() in order
-        to do its work.
-        """
-        raise AbstractFunction()
+        res=[]
+        for name in names:
+            assert name in names
+            res.append(self.__getattribute__(name))
+        return res
 
     def useInputAttributes(self):
         """
@@ -156,15 +151,6 @@
         """
         raise AbstractFunction()
 
-    def updateOutputAttributes(self):
-        """
-        A subset of self.attributeNames() which are the names of attributes modified/created by update() in order
-        to do its work.
-
-        By default these are inferred from the various update output attributes:
-        """
-        return ["parameters"] + self.updateMinibatchOutputAttributes() + self.updateEndOutputAttributes()
-
     def useOutputAttributes(self):
         """
         A subset of self.attributeNames() which are the names of attributes modified/created by use() in order
@@ -210,6 +196,7 @@
 
     def __init__(self):
         Learner.__init__(self)
+        self.use_functions_dictionary={}
 
     def defaultOutputFields(self, input_fields):
         """
@@ -232,7 +219,7 @@
             for attribute in stats_collector_inputs:
                 if attribute not in input_fields:
                     output_fields.append(attribute)
-        key = (input_fields,output_fields)
+        key = (tuple(input_fields),tuple(output_fields))
         if key not in self.use_functions_dictionary:
             use_input_attributes = self.useInputAttributes()
             use_output_attributes = self.useOutputAttributes()
@@ -240,7 +227,7 @@
                                           self.names2OpResults(output_fields+use_output_attributes))
             def f(*input_field_values):
                 input_attribute_values = self.names2attributes(use_input_attributes)
-                results = complete_f(*(input_field_values + input_attribute_values))
+                results = complete_f(*(list(input_field_values) + input_attribute_values))
                 output_field_values = results[0:len(output_fields)]
                 output_attribute_values = results[len(output_fields):len(results)]
                 if use_output_attributes:
@@ -276,13 +263,11 @@
 
     def __init__(self):
         TLearner.__init__(self)
-        self.update_minibatch_function = compile.function
-        (self.names2OpResults(self.updateMinibatchOutputAttributes()+
-                              self.updateMinibatchInputFields()),
-                 self.names2OpResults(self.updateMinibatchOutputAttributes()))
-        self.update_end_function = compile.function
-        (self.names2OpResults(self.updateEndInputAttributes()),
-         self.names2OpResults(self.updateEndOutputAttributes()))
+        self.update_minibatch_function = compile.function(self.names2OpResults(self.updateMinibatchOutputAttributes()+
+                                                                               self.updateMinibatchInputFields()),
+                                                          self.names2OpResults(self.updateMinibatchOutputAttributes()))
+        self.update_end_function = compile.function(self.names2OpResults(self.updateEndInputAttributes()),
+                                                    self.names2OpResults(self.updateEndOutputAttributes()))
 
     def allocate(self, minibatch):
         """
@@ -316,18 +301,17 @@
 
     def updateEnd(self):
         self.setAttributes(self.updateEndOutputAttributes(),
-                           self.update_end_function
-                           (self.names2attributes(self.updateEndInputAttributes())))
+                           self.update_end_function(*self.names2attributes(self.updateEndInputAttributes())))
         self.parameters = self.names2attributes(self.parameterAttributes())
         
     def updateMinibatch(self,minibatch):
         # make sure all required fields are allocated and initialized
         self.allocate(minibatch)
+        input_attributes = self.names2attributes(self.updateMinibatchInputAttributes())
+        input_fields = minibatch(*self.updateMinibatchInputFields())
         self.setAttributes(self.updateMinibatchOutputAttributes(),
                            # concatenate the attribute values and field values and then apply update fn
-                           self.update_minibatch_function(*(self.names2attributes
-                                                            (self.updateMinibatchInputAttributes()))
-                                                          + minibatch(self.updateMinibatchInputFields())))
+                           self.update_minibatch_function(*(input_attributes+input_fields)))
         
     def isLastEpoch(self):
         """
@@ -387,6 +371,24 @@
                             for param in old_params])
         MinibatchUpdatesTLearner.__init__(self)
         
+
+    def namesOfAttributesToComputeOutputs(self,output_names):
+        """
+        The output_names are attribute names (not the corresponding Result names, which have leading _).
+        Return the corresponding input names
+        """
+        all_inputs = t.gof.graph.inputs(self.names2OpResults(output_names))
+        # remove constants and leading '_' in name
+
+        return [r.name for r in all_inputs if isinstance(r,theano.Result) and \
+                not isinstance(r,theano.Constant) and not isinstance(r,theano.Value)]
+        #inputs = []
+        #for r in all_inputs:
+        #    if isinstance(r,theano.Result) and \
+        #    not isinstance(r,theano.Constant) and not isinstance(r,theano.Value):
+        #       inputs.append(r.name)
+        #return inputs
+        
     def isLastEpoch(self):
         return self.truly_online
 
@@ -397,6 +399,11 @@
         return ["new_"+name for name in self.parameterAttributes()]
     
     def updateEndInputAttributes(self):
+        return self.namesOfAttributesToComputeOutputs(self.updateEndOutputAttributes())
+
+    def useInputAttributes(self):
         return self.parameterAttributes()
 
+    def useOutputAttributes(self):
+        return []
 
--- a/lookup_list.py	Fri May 09 13:38:54 2008 -0400
+++ b/lookup_list.py	Fri May 09 17:38:57 2008 -0400
@@ -49,7 +49,7 @@
         The key in example[key] can either be an integer to index the fields
         or the name of the field.
         """
-        if isinstance(key,int) or isinstance(key,slice) or isinstance(key,list):
+        if isinstance(key,int) or isinstance(key,slice) or (isinstance(key,list) and all([isinstance(i,int) for i in key])):
             return self._values[key]
         else: # if not an int, key must be a name
             # expecting key to be a valid field name
--- a/mlp.py	Fri May 09 13:38:54 2008 -0400
+++ b/mlp.py	Fri May 09 17:38:57 2008 -0400
@@ -68,7 +68,7 @@
 
     """
 
-    def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,init_range=1.,n_inputs=None,minibatch_size=None):
+    def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None):
         self._n_inputs = n_inputs
         self._n_outputs = n_classes
         self._n_hidden = n_hidden
@@ -76,9 +76,11 @@
         self._max_n_epochs = max_n_epochs
         self._minibatch_size = minibatch_size
         self.learning_rate = learning_rate # this is the float
+        self.L2_regularizer = L2_regularizer
         self._learning_rate = t.scalar('learning_rate') # this is the symbol
         self._input = t.matrix('input') # n_examples x n_inputs
-        self._target = t.ivector('target') # n_examples x n_outputs
+        self._target = t.imatrix('target') # n_examples x 1
+        self._target_vector = self._target[:,0]
         self._L2_regularizer = t.scalar('L2_regularizer')
         self._W1 = t.matrix('W1')
         self._W2 = t.matrix('W2')
@@ -86,9 +88,9 @@
         self._b2 = t.row('b2')
         self._regularization_term = self._L2_regularizer * (t.sum(self._W1*self._W1) + t.sum(self._W2*self._W2))
         self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T)
-        self._nll,self._output = crossentropy_softmax_1hot(self._output_activations,self._target)
-        self._output_class = t.argmax(self._output,1)
-        self._class_error = self._output_class != self._target
+        self._nll,self._output = crossentropy_softmax_1hot(self._output_activations,self._target_vector)
+        self._output_class, self._max_output = t.argmax(self._output,1)
+        self._class_error = t.neq(self._output_class,self._target_vector)
         self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
         OnlineGradientTLearner.__init__(self)
             
@@ -98,15 +100,6 @@
     def parameterAttributes(self):
         return ["b1","W1", "b2", "W2"]
     
-    def useInputAttributes(self):
-        return self.parameterAttributes()
-
-    def useOutputAttributes(self):
-        return []
-
-    def updateInputAttributes(self):
-        return self.parameterAttributes() + ["L2_regularizer"]
-
     def updateMinibatchInputFields(self):
         return ["input","target"]
     
@@ -126,8 +119,8 @@
         minibatch_n_inputs  = minibatch["input"].shape[1]
         if not self._n_inputs:
             self._n_inputs = minibatch_n_inputs
-            self.b1 = numpy.zeros(self._n_hidden)
-            self.b2 = numpy.zeros(self._n_outputs)
+            self.b1 = numpy.zeros((1,self._n_hidden))
+            self.b2 = numpy.zeros((1,self._n_outputs))
             self.forget()
         elif self._n_inputs!=minibatch_n_inputs:
             # if the input changes dimension on the fly, we resize and forget everything