diff mlp.py @ 118:d0a1bd0378c6

Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Wed, 07 May 2008 15:07:56 -0400
parents 88257dfedf8c
children 2ca8dccba270
line wrap: on
line diff
--- a/mlp.py	Wed May 07 09:16:04 2008 -0400
+++ b/mlp.py	Wed May 07 15:07:56 2008 -0400
@@ -2,6 +2,7 @@
 from learner import *
 from theano import tensor as t
 from theano.scalar import as_scalar
+from nnet_ops import *
 
 # this is one of the simplest example of learner, and illustrates
 # the use of theano
@@ -82,64 +83,61 @@
     def updateMinibatchInputFields(self):
         return ["input","target"]
     
-    def updateMinibatchInputAttributes(self):
-        return self.parameterAttributes()
-    
-    def updateMinibatchOutputAttributes(self):
-        return self.parameterAttributes()
-    
-    def updateEndInputAttributes(self):
-        return self.parameterAttributes()
-
     def updateEndOutputAttributes(self):
         return ["regularization_term"]
 
+    def lossAttribute(self):
+        return "minibatch_criterion"
+    
     def defaultOutputFields(self, input_fields):
         output_fields = ["output", "output_class",]
         if "target" in input_fields:
             output_fields += ["class_error", "nll"]
         return output_fields
         
-    def __init__(self):
+    def __init__(self,n_hidden,n_classes,learning_rate,init_range=1.):
+        self._n_outputs = n_classes
+        self._n_hidden = n_hidden
+        self._init_range = init_range
+        self.learning_rate = learning_rate # this is the float
+        self._learning_rate = t.scalar('learning_rate') # this is the symbol
         self._input = t.matrix('input') # n_examples x n_inputs
         self._target = t.matrix('target') # n_examples x n_outputs
-        self._lambda = as_scalar(0.,'lambda')
-        self._theta = t.matrix('theta')
-        self._W = self._theta[:,1:] 
-        self._b = self._theta[:,0]
-        self._XtX = t.matrix('XtX')
-        self._XtY = t.matrix('XtY')
-        self._extended_input = t.prepend_one_to_each_row(self._input)
-        self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
-        self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-        self._regularizer = self._lambda * t.dot(self._W,self._W)
-        self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
-        self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
-        self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
-
-        OneShotTLearner.__init__(self)
+        self._L2_regularizer = as_scalar(0.,'L2_regularizer')
+        self._W1 = t.matrix('W1')
+        self._W2 = t.matrix('W2')
+        self._b1 = t.row('b1')
+        self._b2 = t.row('b2')
+        self._regularizer = self._L2_regularizer * (t.dot(self._W1,self._W1) + t.dot(self._W2,self._W2))
+        self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T)
+        self._output = t.softmax(self._output_activations)
+        self._output_class = t.argmax(self._output,1)
+        self._class_error = self._output_class != self._target
+        self._nll,self._output = crossentropy_softmax_1hot(self._output_activation,self._target)
+        self._minibatch_criterion = self._nll + self._regularizer / t.shape(self._input)[0]
+        MinibatchUpdatesTLearner.__init__(self)
             
     def allocate(self,minibatch):
         minibatch_n_inputs  = minibatch["input"].shape[1]
-        minibatch_n_outputs = minibatch["target"].shape[1]
         if not self._n_inputs:
-            self._n_inputs = minibatch_n_inputs 
-            self._n_outputs = minibatch_n_outputs
-            self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
-            self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
-            self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
+            self._n_inputs = minibatch_n_inputs
+            self.b1 = numpy.zeros(self._n_hidden)
+            self.b2 = numpy.zeros(self._n_outputs)
             self.forget()
-        elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
-            # if the input or target changes dimension on the fly, we resize and forget everything
+        elif self._n_inputs!=minibatch_n_inputs:
+            # if the input changes dimension on the fly, we resize and forget everything
             self.forget()
             
     def forget(self):
-        if self._n_inputs and self._n_outputs:
-            self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
-            self.XtY.resize((1+self.n_inputs,self.n_outputs))
-            self.XtX.data[:,:]=0
-            self.XtY.data[:,:]=0
-            numpy.diag(self.XtX.data)[1:]=self.lambda
+        if self._n_inputs:
+            r = self._init_range/math.sqrt(self._n_inputs)
+            self.W1 = numpy.random.uniform(low=-r,high=r,
+                                           size=(self._n_hidden,self._n_inputs))
+            r = self._init_range/math.sqrt(self._n_hidden)
+            self.W2 = numpy.random.uniform(low=-r,high=r,
+                                           size=(self._n_outputs,self._n_hidden))
+            self.b1[:]=0
+            self.b2[:]=0
 
 
 class MLP(MinibatchUpdatesTLearner):
@@ -236,7 +234,7 @@
     def __init__(self):
         self._input = t.matrix('input') # n_examples x n_inputs
         self._target = t.matrix('target') # n_examples x n_outputs
-        self._lambda = as_scalar(0.,'lambda')
+        self._L2_regularizer = as_scalar(0.,'L2_regularizer')
         self._theta = t.matrix('theta')
         self._W = self._theta[:,1:] 
         self._b = self._theta[:,0]
@@ -245,7 +243,7 @@
         self._extended_input = t.prepend_one_to_each_row(self._input)
         self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
         self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-        self._regularizer = self._lambda * t.dot(self._W,self._W)
+        self._regularizer = self._L2_regularizer * t.dot(self._W,self._W)
         self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
         self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
         self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
@@ -272,5 +270,5 @@
             self.XtY.resize((1+self.n_inputs,self.n_outputs))
             self.XtX.data[:,:]=0
             self.XtY.data[:,:]=0
-            numpy.diag(self.XtX.data)[1:]=self.lambda
+            numpy.diag(self.XtX.data)[1:]=self.L2_regularizer