changeset 205:d1359de1ea13

Automated merge with ssh://p-omega1@lgcm.iro.umontreal.ca/tlearn
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Wed, 14 May 2008 14:06:52 -0400
parents ebbb0e749565 (diff) 62c7527c9ec1 (current diff)
children f2ddc795ec49
files mlp.py
diffstat 4 files changed, 338 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/mlp.py	Wed May 14 14:06:15 2008 -0400
+++ b/mlp.py	Wed May 14 14:06:52 2008 -0400
@@ -11,6 +11,77 @@
 import math
 from misc import *
 
+def function(inputs, outputs, linker='c&py'):
+    return theano.function(inputs, outputs, unpack_single=False,linker=linker)
+
+def randshape(*shape): return (numpy.random.rand(*shape) -0.5) * 0.001
+
+class ManualNNet(object):
+    def __init__(self, ninputs, nhid, nclass, lr, nepochs, 
+            linker='c&yp', 
+            hidden_layer=None):
+        class Vars:
+            def __init__(self, lr, l2coef=0.0):
+                lr = t.constant(lr)
+                l2coef = t.constant(l2coef)
+                input = t.matrix('input') # n_examples x n_inputs
+                target = t.ivector('target') # n_examples x 1
+                W2 = t.matrix('W2')
+                b2 = t.vector('b2')
+
+                if hidden_layer:
+                    hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
+                else:
+                    W1 = t.matrix('W1')
+                    b1 = t.vector('b1')
+                    hid = t.tanh(b1 + t.dot(input, W1))
+                    hid_params = [W1, b1]
+                    hid_regularization = l2coef * t.sum(W1*W1)
+                    hid_ivals = [randshape(ninputs, nhid), randshape(nhid)]
+
+                params = [W2, b2] + hid_params
+                ivals = [randshape(nhid, nclass), randshape(nclass)]\
+                        + hid_ivals
+                nll, predictions = crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target)
+                regularization = l2coef * t.sum(W2*W2) + hid_regularization
+                output_class = t.argmax(predictions,1)
+                loss_01 = t.neq(output_class, target)
+                g_params = t.grad(nll + regularization, params)
+                new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
+                self.__dict__.update(locals()); del self.self
+        self.nhid = nhid
+        self.nclass = nclass
+        self.nepochs = nepochs
+        self.v = Vars(lr)
+        self.params = None
+
+    def update(self, trainset):
+        params = self.v.ivals
+        update_fn = function(
+                [self.v.input, self.v.target] + self.v.params,
+                [self.v.nll] + self.v.new_params)
+        for i in xrange(self.nepochs):
+            for input, target in trainset.minibatches(['input', 'target'],
+                    minibatch_size=min(32, len(trainset))):
+                dummy = update_fn(input, target[:,0], *params)
+                if 0: print dummy[0] #the nll
+        return self.use
+    __call__ = update
+
+    def use(self, dset,
+            output_fieldnames=['output_class'],
+            test_stats_collector=None,
+            copy_inputs=False,
+            put_stats_in_output_dataset=True,
+            output_attributes=[]):
+        inputs = [self.v.input, self.v.target] + self.v.params
+        fn = function(inputs, [getattr(self.v, name) for name in output_fieldnames])
+        target = dset.fields()['target'] if ('target' in dset.fields()) else numpy.zeros((1,1),dtype='int64')
+        return ApplyFunctionDataSet(dset, 
+            lambda input, target: fn(input, target[:,0], *self.v.ivals),
+            output_fieldnames)
+
+
 class OneHiddenLayerNNetClassifier(OnlineGradientTLearner):
     """
     Implement a straightforward classicial feedforward
@@ -67,7 +138,7 @@
        - 'regularization_term'
 
     """
-    def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None):
+    def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None,linker='c|py'):
         self._n_inputs = n_inputs
         self._n_outputs = n_classes
         self._n_hidden = n_hidden
@@ -78,7 +149,7 @@
         self.L2_regularizer = L2_regularizer
         self._learning_rate = t.scalar('learning_rate') # this is the symbol
         self._input = t.matrix('input') # n_examples x n_inputs
-        self._target = t.imatrix('target') # n_examples x 1
+        self._target = t.lmatrix('target') # n_examples x 1
         self._target_vector = self._target[:,0]
         self._L2_regularizer = t.scalar('L2_regularizer')
         self._W1 = t.matrix('W1')
@@ -91,7 +162,7 @@
         self._output_class = t.argmax(self._output,1)
         self._class_error = t.neq(self._output_class,self._target_vector)
         self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0]
-        OnlineGradientTLearner.__init__(self)
+        OnlineGradientTLearner.__init__(self, linker = linker)
             
     def attributeNames(self):
         return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]
@@ -119,7 +190,7 @@
         
     def updateMinibatch(self,minibatch):
         MinibatchUpdatesTLearner.updateMinibatch(self,minibatch)
-        print "NLL=",self.nll
+        #print self.nll
 
     def allocate(self,minibatch):
         minibatch_n_inputs  = minibatch["input"].shape[1]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mlp_factory_approach.py	Wed May 14 14:06:52 2008 -0400
@@ -0,0 +1,127 @@
+import dataset
+import theano
+import theano.tensor as t
+import numpy
+import nnet_ops
+
+def _randshape(*shape): 
+    return (numpy.random.rand(*shape) -0.5) * 0.001
+def _function(inputs, outputs, linker='c&py'):
+    return theano.function(inputs, outputs, unpack_single=False,linker=linker)
+
+class NeuralNet(object):
+
+    class Model(object):
+        def __init__(self, nnet, params):
+            self.nnet = nnet
+            self.params = params
+
+        def update(self, trainset, stopper=None):
+            """Update this model from more training data."""
+            v = self.nnet.v
+            params = self.params
+            update_fn = _function([v.input, v.target] + v.params, [v.nll] + v.new_params)
+            if stopper is not None: 
+                raise NotImplementedError()
+            else:
+                for i in xrange(100):
+                    for input, target in trainset.minibatches(['input', 'target'],
+                            minibatch_size=min(32, len(trainset))):
+                        dummy = update_fn(input, target[:,0], *params)
+                        if 0: print dummy[0] #the nll
+
+        def __call__(self, testset,
+                output_fieldnames=['output_class'],
+                test_stats_collector=None,
+                copy_inputs=False,
+                put_stats_in_output_dataset=True,
+                output_attributes=[]):
+            """Apply this model (as a function) to new data"""
+            inputs = [self.nnet.v.input, self.nnet.v.target] + self.nnet.v.params
+            fn = _function(inputs, [getattr(self.nnet.v, name) for name in output_fieldnames])
+            if 'target' in testset.fields():
+                return dataset.ApplyFunctionDataSet(testset, 
+                    lambda input, target: fn(input, target[:,0], *self.params),
+                    output_fieldnames)
+            else:
+                return dataset.ApplyFunctionDataSet(testset, 
+                    lambda input: fn(input, numpy.zeros(1,dtype='int64'), *self.params),
+                    output_fieldnames)
+
+    def __init__(self, ninputs, nhid, nclass, lr, nepochs, 
+            l2coef=0.0,
+            linker='c&yp', 
+            hidden_layer=None):
+        class Vars:
+            def __init__(self, lr, l2coef):
+                lr = t.constant(lr)
+                l2coef = t.constant(l2coef)
+                input = t.matrix('input') # n_examples x n_inputs
+                target = t.ivector('target') # n_examples x 1
+                W2 = t.matrix('W2')
+                b2 = t.vector('b2')
+
+                if hidden_layer:
+                    hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
+                else:
+                    W1 = t.matrix('W1')
+                    b1 = t.vector('b1')
+                    hid = t.tanh(b1 + t.dot(input, W1))
+                    hid_params = [W1, b1]
+                    hid_regularization = l2coef * t.sum(W1*W1)
+                    hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)]
+
+                params = [W2, b2] + hid_params
+                nll, predictions = nnet_ops.crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target)
+                regularization = l2coef * t.sum(W2*W2) + hid_regularization
+                output_class = t.argmax(predictions,1)
+                loss_01 = t.neq(output_class, target)
+                g_params = t.grad(nll + regularization, params)
+                new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
+                self.__dict__.update(locals()); del self.self
+        self.nhid = nhid
+        self.nclass = nclass
+        self.nepochs = nepochs
+        self.v = Vars(lr, l2coef)
+        self.params = None
+
+    def __call__(self, trainset=None, iparams=None):
+        if iparams is None:
+            iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\
+                    + self.v.hid_ivals()
+        rval = NeuralNet.Model(self, iparams)
+        if trainset:
+            rval.update(trainset)
+        return rval
+
+
+if __name__ == '__main__':
+    training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                     [0, 1, 1],
+                                                     [1, 0, 1],
+                                                     [1, 1, 1]]),
+                                        {'input':slice(2),'target':2})
+    training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                     [0, 1, 1],
+                                                     [1, 0, 0],
+                                                     [1, 1, 1]]),
+                                        {'input':slice(2),'target':2})
+    test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                     [0, 1, 1],
+                                                     [1, 0, 0],
+                                                     [1, 1, 1]]),
+                                        {'input':slice(2)})
+
+    learn_algo = NeuralNet(2, 10, 3, .1, 1000)
+
+    model1 = learn_algo(training_set1)
+
+    model2 = learn_algo(training_set2)
+
+    n_match = 0
+    for o1, o2 in zip(model1(test_data), model2(test_data)):
+        n_match += (o1 == o2) 
+
+    print n_match, numpy.sum(training_set1.fields()['target'] ==
+            training_set2.fields()['target'])
+
--- a/nnet_ops.py	Wed May 14 14:06:15 2008 -0400
+++ b/nnet_ops.py	Wed May 14 14:06:52 2008 -0400
@@ -101,7 +101,7 @@
                 or x.type.dtype not in ['float32', 'float64']:
             raise ValueError('b must be 1-d tensor of floats')
         if y_idx.type.ndim != 1 \
-                or y_idx.type.dtype not in ['int32', 'int64']:
+                or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']:
             raise ValueError('y_idx must be 1-d tensor of ints')
 
 #       TODO: Is this correct? It used to be y, not y_idx
@@ -109,7 +109,7 @@
                 y_idx.type.broadcastable).make_result()
 #        nll = Tensor(x.dtype, y.broadcastable)
         sm = x.type.make_result()
-        return theano.Apply(self, [x, b, y_idx],[nll, sm])
+        return theano.Apply(self, [x, b, y_idx], [nll, sm])
     def perform(self, node, input_storage, output_storage):
         x, b, y_idx = input_storage
         if b.shape[0] != x.shape[1]:
@@ -144,6 +144,9 @@
 
         #TODO: set error messages for failures in this code
 
+        #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
+        y_idx_type = node.inputs[2].type.dtype_specs()[1]
+
         return """
         npy_intp* Nx = %(x)s->dimensions;
 
@@ -172,9 +175,12 @@
             PyErr_SetString(PyExc_TypeError, "b not float64");
             %(fail)s;
         }
-        if (%(y_idx)s->descr->type_num != PyArray_INT64)
+        if ((%(y_idx)s->descr->type_num != PyArray_INT64)
+            && (%(y_idx)s->descr->type_num != PyArray_INT32)
+            && (%(y_idx)s->descr->type_num != PyArray_INT16)
+            && (%(y_idx)s->descr->type_num != PyArray_INT8))
         {
-            PyErr_SetString(PyExc_TypeError, "y_idx not int64");
+            PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
             %(fail)s;
         }
         if ((%(x)s->dimensions[1] != %(b)s->dimensions[0])
@@ -217,7 +223,7 @@
 
             const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
             const double* __restrict__ b_i = (double*)(%(b)s->data);
-            const long int y_i = ((long int*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
+            const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
             double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
             double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);
 
@@ -303,15 +309,24 @@
     def grad(self, *args):
         raise NotImplementedError()
     def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
+        y_idx_type = node.inputs[2].type.dtype_specs()[1]
         return """
 
         if ((%(dnll)s->descr->type_num != PyArray_DOUBLE)
             || (%(sm)s->descr->type_num != PyArray_DOUBLE)
-            || (%(y_idx)s->descr->type_num != PyArray_INT64))
+            )
         {
             PyErr_SetString(PyExc_TypeError, "types should be float64, float64, int64");
             %(fail)s;
         }
+        if ((%(y_idx)s->descr->type_num != PyArray_INT64)
+            && (%(y_idx)s->descr->type_num != PyArray_INT32)
+            && (%(y_idx)s->descr->type_num != PyArray_INT16)
+            && (%(y_idx)s->descr->type_num != PyArray_INT8))
+        {
+            PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
+            %(fail)s;
+        }
         if ((%(dnll)s->nd != 1)
             || (%(sm)s->nd != 2)
             || (%(y_idx)s->nd != 1))
@@ -341,7 +356,7 @@
         {
             const double dnll_i = ((double*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0];
 
-            const long int y_i = ((long int*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
+            const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
 
             const double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
             npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
--- a/test_mlp.py	Wed May 14 14:06:15 2008 -0400
+++ b/test_mlp.py	Wed May 14 14:06:52 2008 -0400
@@ -1,9 +1,58 @@
 
 from mlp import *
 import dataset
+import nnet_ops
+
+
+from functools import partial
+def separator(debugger, i, node, *ths):
+    print "==================="
+
+def what(debugger, i, node, *ths):
+    print "#%i" % i, node
+
+def parents(debugger, i, node, *ths):
+    print [input.step for input in node.inputs]
+
+def input_shapes(debugger, i, node, *ths):
+    print "input shapes: ",
+    for r in node.inputs:
+        if hasattr(r.value, 'shape'):
+            print r.value.shape,
+        else:
+            print "no_shape",
+    print
+
+def input_types(debugger, i, node, *ths):
+    print "input types: ",
+    for r in node.inputs:
+        print r.type,
+    print
+
+def output_shapes(debugger, i, node, *ths):
+    print "output shapes:",
+    for r in node.outputs:
+        if hasattr(r.value, 'shape'):
+            print r.value.shape,
+        else:
+            print "no_shape",
+    print
+
+def output_types(debugger, i, node, *ths):
+    print "output types:",
+    for r in node.outputs:
+        print r.type,
+    print
+
 
 def test0():
-    nnet = OneHiddenLayerNNetClassifier(10,2,.001,1000)
+    linker = 'c|py'
+    #linker = partial(theano.gof.DebugLinker, linkers = [theano.gof.OpWiseCLinker],
+    #                 debug_pre = [separator, what, parents, input_types, input_shapes],
+    #                 debug_post = [output_shapes, output_types],
+    #                 compare_fn = lambda x, y: numpy.all(x == y))
+    
+    nnet = OneHiddenLayerNNetClassifier(10,2,.001,1000, linker = linker)
     training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
                                                      [0, 1, 1],
                                                      [1, 0, 1],
@@ -16,5 +65,68 @@
     for fieldname in output_ds.fieldNames():
         print fieldname+"=",output_ds[fieldname]
 
-test0()
+def test1():
+    nnet = ManualNNet(2, 10,3,.1,1000)
+    training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                     [0, 1, 1],
+                                                     [1, 0, 1],
+                                                     [1, 1, 1]]),
+                                        {'input':slice(2),'target':2})
+    fprop=nnet(training_set)
+
+    output_ds = fprop(training_set)
+
+    for fieldname in output_ds.fieldNames():
+        print fieldname+"=",output_ds[fieldname]
+
+def test2():
+    training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                     [0, 1, 1],
+                                                     [1, 0, 1],
+                                                     [1, 1, 1]]),
+                                        {'input':slice(2),'target':2})
+    nin, nhid=2, 10
+    def sigm_layer(input):
+        W1 = t.matrix('W1')
+        b1 = t.vector('b1')
+        return (nnet_ops.sigmoid(b1 + t.dot(input, W1)),
+                [W1, b1],
+                [(numpy.random.rand(nin, nhid) -0.5) * 0.001, numpy.zeros(nhid)])
+    nnet = ManualNNet(nin, nhid, 3, .1, 1000, hidden_layer=sigm_layer)
+    fprop=nnet(training_set)
+
+    output_ds = fprop(training_set)
 
+    for fieldname in output_ds.fieldNames():
+        print fieldname+"=",output_ds[fieldname]
+
+def test_interface_0():
+    learner = ManualNNet(2, 10, 3, .1, 1000)
+
+    model = learner(training_set)
+
+    model2 = learner(training_set)    # trains model a second time
+
+    learner.update(additional_data)   # modifies nnet and model by side-effect
+
+
+def test_interface2_1():
+    learn_algo = ManualNNet(2, 10, 3, .1, 1000)
+
+    prior = learn_algo()
+
+    model1 = learn_algo(training_set1)
+
+    model2 = learn_algo(training_set2)
+
+    model2.update(additional_data)
+
+    n_match = 0
+    for o1, o2 in zip(model1.use(test_data), model2.use(test_data)):
+        n_match += (o1 == o2) 
+
+    print n_match
+
+test1()
+test2()
+