changeset 182:4afb41e61fcf

strange bug in linker obtained by 'python test_mlp.py'
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Tue, 13 May 2008 17:00:53 -0400
parents 1b06bc2c3ca9
children 25d0a0c713da 62c7527c9ec1
files mlp.py
diffstat 1 files changed, 4 insertions(+), 132 deletions(-) [+]
line wrap: on
line diff
--- a/mlp.py	Tue May 13 15:49:39 2008 -0400
+++ b/mlp.py	Tue May 13 17:00:53 2008 -0400
@@ -117,6 +117,10 @@
             output_fields += ["class_error", "nll"]
         return output_fields
         
+    def updateMinibatch(self,minibatch):
+        MinibatchUpdatesTLearner.updateMinibatch(self,minibatch)
+        print self.nll
+
     def allocate(self,minibatch):
         minibatch_n_inputs  = minibatch["input"].shape[1]
         if not self._n_inputs:
@@ -163,135 +167,3 @@
             print 'n2', self.names2OpResults(self.updateEndInputAttributes())
             print 'n3', self.names2OpResults(self.updateEndOutputAttributes())
 
-class MLP(MinibatchUpdatesTLearner):
-    """
-    Implement a feedforward multi-layer perceptron, with or without L1 and/or L2 regularization.
-
-    The predictor parameters are obtained by minibatch/online gradient descent.
-    Training can proceed sequentially (with multiple calls to update with
-    different disjoint subsets of the training sets).
-
-    Hyper-parameters:
-      - L1_regularizer
-      - L2_regularizer
-      - neuron_sparsity_regularizer
-      - initial_learning_rate
-      - learning_rate_decrease_rate
-      - n_hidden_per_layer (a list of integers)
-      - activation_function ("sigmoid","tanh", or "ratio")
-
-    The output/task type (classification, regression, etc.) is obtained by specializing MLP.
-
-    For each (input[t],output[t]) pair in a minibatch,::
-
-       activation[0] = input_t
-       for k=1 to n_hidden_layers:
-          activation[k]=activation_function(b[k]+ W[k]*activation[k-1])
-       output_t = output_activation_function(b[n_hidden_layers+1]+W[n_hidden_layers+1]*activation[n_hidden_layers])
-
-    and the b and W are obtained by minimizing the following by stochastic minibatch gradient descent::
-
-       L2_regularizer sum_{ijk} W_{kij}^2  + L1_regularizer sum_{kij} |W_{kij}|
-       + neuron_sparsity_regularizer sum_{ki} |b_{ki} + infinity|
-       - sum_t log P_{output_model}(target_t | output_t)
-
-    The fields and attributes expected and produced by use and update are the following:
-
-     - Input and output fields (example-wise quantities):
-
-       - 'input' (always expected by use and update)
-       - 'target' (optionally expected by use and always by update)
-       - 'output' (optionally produced by use)
-       - error fields produced by sub-class of MLP
-
-     - optional attributes (optionally expected as input_dataset attributes)
-       (warning, this may be dangerous, the 'use' method will use those provided in the 
-       input_dataset rather than those learned during 'update'; currently no support
-       for providing these to update):
-       
-       - 'L1_regularizer'
-       - 'L2_regularizer'
-       - 'b' 
-       - 'W'
-       - 'parameters' = [b[1], W[1], b[2], W[2], ...] 
-       - 'regularization_term'
-
-    """
-
-    def attributeNames(self):
-        return ["parameters","b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer","regularization_term"]
-
-    def useInputAttributes(self):
-        return ["b","W"]
-
-    def useOutputAttributes(self):
-        return []
-
-    def updateInputAttributes(self):
-        return ["b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer"]
-
-    def updateMinibatchInputFields(self):
-        return ["input","target"]
-    
-    def updateMinibatchInputAttributes(self):
-        return ["b","W"]
-    
-    def updateMinibatchOutputAttributes(self):
-        return ["new_XtX","new_XtY"]
-    
-    def updateEndInputAttributes(self):
-        return ["theta","XtX","XtY"]
-
-    def updateEndOutputAttributes(self):
-        return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ?
-
-    def parameterAttributes(self):
-        return ["b","W"]
-    
-    def defaultOutputFields(self, input_fields):
-        output_fields = ["output"]
-        if "target" in input_fields:
-            output_fields.append("squared_error")
-        return output_fields
-        
-    def __init__(self):
-        self._input = t.matrix('input') # n_examples x n_inputs
-        self._target = t.matrix('target') # n_examples x n_outputs
-        self._L2_regularizer = t.scalar('L2_regularizer')
-        self._theta = t.matrix('theta')
-        self._W = self._theta[:,1:] 
-        self._b = self._theta[:,0]
-        self._XtX = t.matrix('XtX')
-        self._XtY = t.matrix('XtY')
-        self._extended_input = t.prepend_one_to_each_row(self._input)
-        self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
-        self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-        self._regularizer = self._L2_regularizer * t.dot(self._W,self._W)
-        self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
-        self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
-        self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
-
-        OneShotTLearner.__init__(self)
-            
-    def allocate(self,minibatch):
-        minibatch_n_inputs  = minibatch["input"].shape[1]
-        minibatch_n_outputs = minibatch["target"].shape[1]
-        if not self._n_inputs:
-            self._n_inputs = minibatch_n_inputs 
-            self._n_outputs = minibatch_n_outputs
-            self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
-            self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
-            self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
-            self.forget()
-        elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
-            # if the input or target changes dimension on the fly, we resize and forget everything
-            self.forget()
-            
-    def forget(self):
-        if self._n_inputs and self._n_outputs:
-            self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
-            self.XtY.resize((1+self.n_inputs,self.n_outputs))
-            self.XtX.data[:,:]=0
-            self.XtY.data[:,:]=0
-            numpy.diag(self.XtX.data)[1:]=self.L2_regularizer
-