changeset 124:9c4f522526bf

Automated merge with ssh://p-omega1@lgcm.iro.umontreal.ca/tlearn
author Frederic Bastien <bastienf@iro.umontreal.ca>
date Wed, 07 May 2008 16:20:40 -0400
parents 2ca8dccba270 (diff) 8b520423d4ee (current diff)
children 7d8b3d6dd4e9
files
diffstat 6 files changed, 150 insertions(+), 118 deletions(-) [+]
line wrap: on
line diff
--- a/_nnet_ops.py	Wed May 07 16:17:25 2008 -0400
+++ b/_nnet_ops.py	Wed May 07 16:20:40 2008 -0400
@@ -9,29 +9,31 @@
     def setUp(self):
         numpy.random.seed(9999)
     def test_elemwise(self):
-        TT.verify_grad(self, Sigmoid, [numpy.random.rand(3,4)])
+        TT.verify_grad(self, sigmoid, [numpy.random.rand(3,4)])
 
 class T_softplus(unittest.TestCase):
     def setUp(self):
         numpy.random.seed(9999)
     def test_elemwise(self):
-        TT.verify_grad(self, Softplus, [numpy.random.rand(3,4)])
+        TT.verify_grad(self, softplus, [numpy.random.rand(3,4)])
 
 class T_CrossentropySoftmax1Hot(unittest.TestCase):
     def setUp(self):
         numpy.random.seed(9999)
     def test0(self):
         y_idx = [0,1,3]
-        def output1(a,b):
-            return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0:1]
-        TT.verify_grad(self, output1, [numpy.random.rand(3,4),
+        class Dummy(object):
+            def make_node(self, a,b):
+                return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0:1]
+        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4),
             numpy.random.rand(4)])
 
     def test1(self):
         y_idx = [0,1,3]
-        def output1(a):
-            return crossentropy_softmax_1hot(a, y_idx)[0:1]
-        TT.verify_grad(self, output1, [numpy.random.rand(3,4)])
+        class Dummy(object):
+            def make_node(self, a):
+                return crossentropy_softmax_1hot(a, y_idx)[0:1]
+        TT.verify_grad(self, Dummy(), [numpy.random.rand(3,4)])
 
 
 
--- a/learner.py	Wed May 07 16:17:25 2008 -0400
+++ b/learner.py	Wed May 07 16:20:40 2008 -0400
@@ -1,6 +1,6 @@
 
-from dataset import *
-from compile import Function
+from dataset import AttributesHolder
+import compile
     
 class Learner(AttributesHolder):
     """Base class for learning algorithms, provides an interface
@@ -173,8 +173,8 @@
         if key not in self.use_functions_dictionary:
             use_input_attributes = self.useInputAttributes()
             use_output_attributes = self.useOutputAttributes()
-            complete_f = Function(self.names2OpResults(input_fields+use_input_attributes),
-                                  self.names2OpResults(output_fields+use_output_attributes))
+            complete_f = compile.function(self.names2OpResults(input_fields+use_input_attributes),
+                                          self.names2OpResults(output_fields+use_output_attributes))
             def f(*input_field_values):
                 input_attribute_values = self.names2attributes(use_input_attributes)
                 results = complete_f(*(input_field_values + input_attribute_values))
@@ -273,12 +273,13 @@
 
     def __init__(self):
         TLearner.__init__(self)
-        self.update_minibatch_function =
-        Function(self.names2OpResults(self.updateMinibatchOutputAttributes()+
-                                      self.updateMinibatchInputFields()),
+        self.update_minibatch_function = compile.function
+        (self.names2OpResults(self.updateMinibatchOutputAttributes()+
+                              self.updateMinibatchInputFields()),
                  self.names2OpResults(self.updateMinibatchOutputAttributes()))
-        self.update_end_function = Function(self.names2OpResults(self.updateEndInputAttributes()),
-                                            self.names2OpResults(self.updateEndOutputAttributes()))
+        self.update_end_function = compile.function
+        (self.names2OpResults(self.updateEndInputAttributes()),
+         self.names2OpResults(self.updateEndOutputAttributes()))
 
     def updateMinibatchInputFields(self):
         raise AbstractFunction()
@@ -310,7 +311,9 @@
         # make sure all required fields are allocated and initialized
         self.allocate(minibatch)
         self.setAttributes(self.updateMinibatchOutputAttributes(),
-                           self.update_minibatch_function(*(self.names2attributes(self.updateMinibatchInputAttributes()))
+                           # concatenate the attribute values and field values and then apply update fn
+                           self.update_minibatch_function(*(self.names2attributes
+                                                            (self.updateMinibatchInputAttributes()))
                                                           + minibatch(self.updateMinibatchInputFields())))
         
     def isLastEpoch(self):
@@ -347,17 +350,40 @@
     Specialization of MinibatchUpdatesTLearner in which the minibatch updates
     are obtained by performing an online (minibatch-based) gradient step.
 
-    Sub-classes must define the following methods:
-    
+    Sub-classes must define the following:
+
+      self._learning_rate (may be changed by the sub-class between epochs or minibatches)
+     
+      self.lossAttribute()  = name of the loss field 
+      
     """
     def __init__(self,truly_online=False):
         """
         If truly_online then only one pass is made through the training set passed to update().
-        
+
+        SUBCLASSES SHOULD CALL THIS CONSTRUCTOR ONLY AFTER HAVING DEFINED ALL THEIR THEANO FORMULAS
         """
         self.truly_online=truly_online
 
+        # create the formulas for the gradient update
+        old_params = [self.__getattr__("_"+name) for name in self.parameterAttributes()]
+        new_params_names = ["_new_"+name for name in self.parameterAttributes()]
+        loss = self.__getattr__(self.lossAttribute())
+        self.setAttributes(new_params_names,
+                           [t.add_inplace(self.param,
+                                          self._learning_rate*t.grad(loss,param))
+                            for param in old_params])
+
     def isLastEpoch(self):
         return self.truly_online
 
+    def updateMinibatchInputAttributes(self):
+        return self.parameterAttributes()
+    
+    def updateMinibatchOutputAttributes(self):
+        return ["_new"+name for name in self.parameterAttributes()]
+    
+    def updateEndInputAttributes(self):
+        return self.parameterAttributes()
 
+
--- a/linear_regression.py	Wed May 07 16:17:25 2008 -0400
+++ b/linear_regression.py	Wed May 07 16:20:40 2008 -0400
@@ -114,7 +114,7 @@
         self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
         self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
 
-        OneShotTLearner.__init__(self)
+        MinibatchUpdatesTLearner.__init__(self)
             
     def allocate(self,minibatch):
         minibatch_n_inputs  = minibatch["input"].shape[1]
--- a/mlp.py	Wed May 07 16:17:25 2008 -0400
+++ b/mlp.py	Wed May 07 16:20:40 2008 -0400
@@ -1,7 +1,7 @@
 
 from learner import *
 from theano import tensor as t
-from theano.scalar import as_scalar
+from nnet_ops import *
 
 # this is one of the simplest example of learner, and illustrates
 # the use of theano
@@ -64,6 +64,27 @@
 
     """
 
+    def __init__(self,n_hidden,n_classes,learning_rate,init_range=1.):
+        self._n_outputs = n_classes
+        self._n_hidden = n_hidden
+        self._init_range = init_range
+        self.learning_rate = learning_rate # this is the float
+        self._learning_rate = t.scalar('learning_rate') # this is the symbol
+        self._input = t.matrix('input') # n_examples x n_inputs
+        self._target = t.matrix('target','int32') # n_examples x n_outputs
+        self._L2_regularizer = t.scalar('L2_regularizer')
+        self._W1 = t.matrix('W1')
+        self._W2 = t.matrix('W2')
+        self._b1 = t.row('b1')
+        self._b2 = t.row('b2')
+        self._regularization_term = self._L2_regularizer * (t.dot(self._W1,self._W1) + t.dot(self._W2,self._W2))
+        self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T)
+        self._nll,self._output = crossentropy_softmax_1hot(self._output_activations,self._target)
+        self._output_class = t.argmax(self._output,1)
+        self._class_error = self._output_class != self._target
+        self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
+        MinibatchUpdatesTLearner.__init__(self)
+            
     def attributeNames(self):
         return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]
 
@@ -82,64 +103,39 @@
     def updateMinibatchInputFields(self):
         return ["input","target"]
     
-    def updateMinibatchInputAttributes(self):
-        return self.parameterAttributes()
-    
-    def updateMinibatchOutputAttributes(self):
-        return self.parameterAttributes()
-    
-    def updateEndInputAttributes(self):
-        return self.parameterAttributes()
-
     def updateEndOutputAttributes(self):
         return ["regularization_term"]
 
+    def lossAttribute(self):
+        return "minibatch_criterion"
+    
     def defaultOutputFields(self, input_fields):
         output_fields = ["output", "output_class",]
         if "target" in input_fields:
             output_fields += ["class_error", "nll"]
         return output_fields
         
-    def __init__(self):
-        self._input = t.matrix('input') # n_examples x n_inputs
-        self._target = t.matrix('target') # n_examples x n_outputs
-        self._lambda = as_scalar(0.,'lambda')
-        self._theta = t.matrix('theta')
-        self._W = self._theta[:,1:] 
-        self._b = self._theta[:,0]
-        self._XtX = t.matrix('XtX')
-        self._XtY = t.matrix('XtY')
-        self._extended_input = t.prepend_one_to_each_row(self._input)
-        self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
-        self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-        self._regularizer = self._lambda * t.dot(self._W,self._W)
-        self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
-        self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
-        self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
-
-        OneShotTLearner.__init__(self)
-            
     def allocate(self,minibatch):
         minibatch_n_inputs  = minibatch["input"].shape[1]
-        minibatch_n_outputs = minibatch["target"].shape[1]
         if not self._n_inputs:
-            self._n_inputs = minibatch_n_inputs 
-            self._n_outputs = minibatch_n_outputs
-            self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
-            self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
-            self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
+            self._n_inputs = minibatch_n_inputs
+            self.b1 = numpy.zeros(self._n_hidden)
+            self.b2 = numpy.zeros(self._n_outputs)
             self.forget()
-        elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
-            # if the input or target changes dimension on the fly, we resize and forget everything
+        elif self._n_inputs!=minibatch_n_inputs:
+            # if the input changes dimension on the fly, we resize and forget everything
             self.forget()
             
     def forget(self):
-        if self._n_inputs and self._n_outputs:
-            self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
-            self.XtY.resize((1+self.n_inputs,self.n_outputs))
-            self.XtX.data[:,:]=0
-            self.XtY.data[:,:]=0
-            numpy.diag(self.XtX.data)[1:]=self.lambda
+        if self._n_inputs:
+            r = self._init_range/math.sqrt(self._n_inputs)
+            self.W1 = numpy.random.uniform(low=-r,high=r,
+                                           size=(self._n_hidden,self._n_inputs))
+            r = self._init_range/math.sqrt(self._n_hidden)
+            self.W2 = numpy.random.uniform(low=-r,high=r,
+                                           size=(self._n_outputs,self._n_hidden))
+            self.b1[:]=0
+            self.b2[:]=0
 
 
 class MLP(MinibatchUpdatesTLearner):
@@ -236,7 +232,7 @@
     def __init__(self):
         self._input = t.matrix('input') # n_examples x n_inputs
         self._target = t.matrix('target') # n_examples x n_outputs
-        self._lambda = as_scalar(0.,'lambda')
+        self._L2_regularizer = t.scalar('L2_regularizer')
         self._theta = t.matrix('theta')
         self._W = self._theta[:,1:] 
         self._b = self._theta[:,0]
@@ -245,7 +241,7 @@
         self._extended_input = t.prepend_one_to_each_row(self._input)
         self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
         self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
-        self._regularizer = self._lambda * t.dot(self._W,self._W)
+        self._regularizer = self._L2_regularizer * t.dot(self._W,self._W)
         self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
         self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
         self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
@@ -272,5 +268,5 @@
             self.XtY.resize((1+self.n_inputs,self.n_outputs))
             self.XtX.data[:,:]=0
             self.XtY.data[:,:]=0
-            numpy.diag(self.XtX.data)[1:]=self.lambda
+            numpy.diag(self.XtX.data)[1:]=self.L2_regularizer
 
--- a/nnet_ops.py	Wed May 07 16:17:25 2008 -0400
+++ b/nnet_ops.py	Wed May 07 16:20:40 2008 -0400
@@ -1,5 +1,5 @@
 import theano
-from theano import tensor, gof, scalar
+from theano import tensor, scalar
 import numpy
 
 ############
@@ -7,7 +7,7 @@
 # SCALAR OPS
 #
 
-class ScalarSigmoid(scalar.FloatUnaryScalarOp):
+class ScalarSigmoid(scalar.UnaryScalarOp):
     @staticmethod
     def st_impl(x):
         if x < -30.0:
@@ -20,7 +20,7 @@
     def grad(self, (x,), (gz,)):
         y = scalar_sigmoid(x)
         return [gz * y * (1.0 - y)]
-    def c_foreach(self, (x,), (z,), sub):
+    def c_code(self, (x,), (z,), sub):
         if 'float' in self.inputs[0].dtype:
             return """%(z)s =
                 %(x)s < -30.0 
@@ -28,12 +28,11 @@
                 : %(x)s > 30.0 
                    ? 1.0
                    : 1.0 /(1.0+exp(-%(x)s));""" % locals()
-        raise NotImplementedError('only floatingpoint is implemented')
-scalar_sigmoid = gof.op.constructor(ScalarSigmoid)
-Sigmoid, sigmoid, SigmoidInplace, sigmoid_inplace =\
-        tensor.broadcast(ScalarSigmoid, 'Sigmoid')
+        return NotImplemented#Error('only floatingpoint is implemented')
+scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
+sigmoid = tensor.Elemwise(scalar_sigmoid, name='sigmoid')
 
-class ScalarSoftplus(scalar.FloatUnaryScalarOp):
+class ScalarSoftplus(scalar.UnaryScalarOp):
     @staticmethod
     def static_impl(x):
         if x < -30.0:
@@ -45,7 +44,7 @@
         return ScalarSoftplus.static_impl(x)
     def grad(self, (x,), (gz,)):
         return [gz * scalar_sigmoid(x)]
-    def c_foreach(self, (x,), (z,), sub):
+    def c_code(self, (x,), (z,), sub):
         if 'float' in self.inputs[0].dtype:
             return """%(z)s =
                 %(x)s < -30.0 
@@ -53,10 +52,9 @@
                 : %(x)s > 30.0 
                    ? %(x)s
                    : log1p(exp(%(x)s));""" % locals()
-        raise NotImplementedError('only floating point x is implemented')
-scalar_softplus = gof.op.constructor(ScalarSoftplus)
-Softplus, softplus, SoftplusInplace, softplus_inplace =\
-        tensor.broadcast(ScalarSoftplus, 'Softplus')
+        return NotImplemented#Error('only floating point x is implemented')
+scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
+softplus = tensor.Elemwise(scalar_softplus, name='softplus')
 
 
 ############
@@ -64,8 +62,7 @@
 # TENSOR OPS
 #
 
-
-class CrossentropySoftmax1HotWithBias(gof.op.Op):
+class CrossentropySoftmax1HotWithBias(theano.Op):
     """A special compound L{Op} for the output of neural-net classifiers.
 
     @type x: is a matrix of floats (32 or 64)
@@ -90,28 +87,31 @@
     """
     nin=3
     nout=2
-    def __init__(self, x, b, y_idx, **kwargs):
-        x = tensor._as_tensor(x)
-        b = tensor._as_tensor(b)
-        y_idx = tensor._as_tensor(y_idx)
-        if len(x.broadcastable) != 2 \
-                or x.dtype not in ['float32', 'float64']:
+    def __init__(self, **kwargs):
+        theano.Op.__init__(self, **kwargs)
+
+    def make_node(self, x, b, y_idx):
+        x = tensor.as_tensor(x)
+        b = tensor.as_tensor(b)
+        y_idx = tensor.as_tensor(y_idx)
+        if x.type.ndim != 2 \
+                or x.type.dtype not in ['float32', 'float64']:
             raise ValueError('x must be 2-d tensor of floats')
-        if len(b.broadcastable) != 1 \
-                or x.dtype not in ['float32', 'float64']:
-            raise ValueError('x must be 1-d tensor of floats')
-        if len(y_idx.broadcastable) != 1 \
-                or y_idx.dtype not in ['int32', 'int64']:
-            raise ValueError('x must be 1-d tensor of ints')
+        if b.type.ndim != 1 \
+                or x.type.dtype not in ['float32', 'float64']:
+            raise ValueError('b must be 1-d tensor of floats')
+        if y_idx.type.ndim != 1 \
+                or y_idx.type.dtype not in ['int32', 'int64']:
+            raise ValueError('y_idx must be 1-d tensor of ints')
 
 #       TODO: Is this correct? It used to be y, not y_idx
-        nll = tensor.Tensor(x.dtype, y_idx.broadcastable)
+        nll = tensor.Tensor(x.type.dtype, 
+                y_idx.type.broadcastable).make_result()
 #        nll = Tensor(x.dtype, y.broadcastable)
-        sm = tensor.Tensor(x.dtype, x.broadcastable)
-        self.inputs = [x, b, y_idx]
-        self.outputs = [nll, sm]
-    def perform(self):
-        x, b, y_idx = [i.data for i in self.inputs]
+        sm = x.type.make_result()
+        return theano.Apply(self, [x, b, y_idx],[nll, sm])
+    def perform(self, node, input_storage, output_storage):
+        x, b, y_idx = input_storage
         if b.shape[0] != x.shape[1]:
             raise ValueError('b must have same number of columns as x')
         if y_idx.shape[0] != x.shape[0]:
@@ -124,14 +124,14 @@
             sm[i] = numpy.exp(row - numpy.max(row)) #softmax
             sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale
             nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy
-        self.outputs[0].data = nll
-        self.outputs[1].data = sm
+        output_storage[0][0] = nll
+        output_storage[1][0] = sm
     def grad(self, (x, b, y_idx), (g_nll, g_sm)):
         if g_sm is not None:
             raise NotImplementedError()
         nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
-        dx = CrossentropySoftmax1HotWithBiasDx(g_nll, sm, y_idx).outputs[0]
-        db = tensor.Sum(dx, axis = [0]).outputs[0]
+        dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
+        db = tensor.sum(dx, axis = [0])
         return dx, db, None
 
     def c_headers(self): return ['<iostream>']
@@ -280,27 +280,26 @@
               //mat_at(y,i,0) =  - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum);
         }
         """ % dict(locals(), **sub)
+crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias()
 
-crossentropy_softmax_1hot_with_bias = \
-        gof.op.constructor(CrossentropySoftmax1HotWithBias)
-
-class CrossentropySoftmax1HotWithBiasDx (gof.op.Op):
+class CrossentropySoftmax1HotWithBiasDx (theano.Op):
     nin=3
     nout=1
     """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
-    def __init__(self, dy, sm, y_idx,**kwargs):
-        dy = tensor._as_tensor(dy)
-        sm = tensor._as_tensor(sm)
-        y_idx = tensor._as_tensor(y_idx)
-        self.inputs = [dy, sm, y_idx]
-        self.outputs = [tensor.Tensor(sm.dtype, sm.broadcastable)]
-    def perform(self):
-        dy,sm,y_idx = [i.data for i in self.inputs]
+    def __init__(self, **kwargs):
+        theano.Op.__init__(self,**kwargs)
+    def make_node(self, dy, sm, y_idx,**kwargs):
+        dy = tensor.as_tensor(dy)
+        sm = tensor.as_tensor(sm)
+        y_idx = tensor.as_tensor(y_idx)
+        return theano.Apply(self, [dy, sm, y_idx],[sm.type.make_result()])
+    def perform(self, node, input_storage, output_storage):
+        dy,sm,y_idx = input_storage
         dx = numpy.zeros_like(sm)
         for i in xrange(sm.shape[0]):
             dx[i] = dy[i] * sm[i] #vector scale
             dx[i, y_idx[i]] -= dy[i] #scalar decrement
-        self.outputs[0].data = dx
+        output_storage[0][0] = dx
     def grad(self, *args):
         raise NotImplementedError()
     def c_code(self,  (dnll, sm, y_idx), (dx,), sub):
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_mlp.py	Wed May 07 16:20:40 2008 -0400
@@ -0,0 +1,9 @@
+
+from mlp import *
+
+def test0():
+    nnet = OneHiddenLayerNNetClassifier(10,3,.1)
+
+
+test0()
+