changeset 207:c5a7105fa40b

trying to merge
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Fri, 16 May 2008 16:38:15 -0400
parents f2ddc795ec49 (diff) 80731832c62b (current diff)
children bf320808919f
files mlp_factory_approach.py
diffstat 1 files changed, 112 insertions(+), 143 deletions(-) [+]
line wrap: on
line diff
--- a/mlp_factory_approach.py	Thu May 15 15:21:00 2008 -0400
+++ b/mlp_factory_approach.py	Fri May 16 16:38:15 2008 -0400
@@ -1,172 +1,141 @@
-import copy, sys
-import numpy
-
+import dataset
 import theano
-from theano import tensor as t
-
-from tlearn import dataset, nnet_ops, stopper
+import theano.tensor as t
+import numpy
+import nnet_ops
 
 def _randshape(*shape): 
     return (numpy.random.rand(*shape) -0.5) * 0.001
+def _function(inputs, outputs, linker='c&py'):
+    return theano.function(inputs, outputs, unpack_single=False,linker=linker)
 
-def _cache(d, key, valfn):
-    #valfn() is only evaluated if key isn't in dictionary d
-    if key not in d:
-        d[key] = valfn()
-    return d[key]
+class NeuralNet(object):
+
+    class Model(object):
+        def __init__(self, nnet, params):
+            self.nnet = nnet
+            self.params = params
 
-class _Model(object):
-    def __init__(self, algo, params):
-        self.algo = algo
-        self.params = params
-        v = algo.v
-        self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params)
-        self._fn_cache = {}
-
-    def __copy__(self):
-        return _Model(self.algo, [copy.copy(p) for p in params])
-
-    def update(self, input_target):
-        """Update this model from more training data."""
-        params = self.params
-        #TODO: why should we have to unpack target like this?
-        for input, target in input_target:
-            self.update_fn(input, target[:,0], *params)
+        def update(self, trainset, stopper=None):
+            """Update this model from more training data."""
+            v = self.nnet.v
+            params = self.params
+            update_fn = _function([v.input, v.target] + v.params, [v.nll] + v.new_params)
+            if stopper is not None: 
+                raise NotImplementedError()
+            else:
+                for i in xrange(100):
+                    for input, target in trainset.minibatches(['input', 'target'],
+                            minibatch_size=min(32, len(trainset))):
+                        results = update_fn(input, target[:,0], *params)
+                        if 0: print results[0]
+                        # print params['b']
 
-    def __call__(self, testset, fieldnames=['output_class']):
-        """Apply this model (as a function) to new data"""
-        #TODO: cache fn between calls
-        assert 'input' == testset.fieldNames()[0]
-        assert len(testset.fieldNames()) <= 2
-        v = self.algo.v
-        outputs = [getattr(v, name) for name in fieldnames]
-        inputs = [v.input] + ([v.target] if 'target' in testset else [])
-        inputs.extend(v.params)
-        theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)),
-                lambda: self.algo._fn(inputs, outputs))
-        lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
-        return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)
+        def __call__(self, testset,
+                output_fieldnames=['output_class'],
+                test_stats_collector=None,
+                copy_inputs=False,
+                put_stats_in_output_dataset=True,
+                output_attributes=[]):
+            """Apply this model (as a function) to new data"""
+            inputs = [self.nnet.v.input, self.nnet.v.target] + self.nnet.v.params
+            fn = _function(inputs, [getattr(self.nnet.v, name) for name in output_fieldnames])
+            if 'target' in testset.fieldNames():
+                return dataset.ApplyFunctionDataSet(testset, 
+                    lambda input, target: fn(input, target[:,0], *self.params),
+                    output_fieldnames)
+            else:
+                return dataset.ApplyFunctionDataSet(testset, 
+                    lambda input: fn(input, numpy.zeros(1,dtype='int64'), *self.params),
+                    output_fieldnames)
 
-class AutonameVars(object):
-    def __init__(self, dct):
-        for key, val in dct.items():
-            if type(key) is str and hasattr(val, 'name'):
-                val.name = key
-        self.__dict__.update(dct)
-
-class MultiLayerPerceptron(object):
-
-    def __init__(self, ninputs, nhid, nclass, lr,
-            l2coef=0.0,
-            linker='c&py', 
-            hidden_layer=None,
-            early_stopper=None,
-            validation_portion=0.2,
-            V_extern=None):
-        class V_intern(AutonameVars):
-            def __init__(v_self, lr, l2coef, **kwargs):
+    def __init__(self, ninputs, nhid, nclass, lr, nepochs, 
+                 l2coef=0.0,
+                 linker='c&yp', 
+                 hidden_layer=None):
+        if not hidden_layer:
+            hidden_layer = AffineSigmoidLayer("hidden",ninputs,nhid,l2coef)
+        class Vars:
+            def __init__(self, lr, l2coef):
                 lr = t.constant(lr)
                 l2coef = t.constant(l2coef)
-                input = t.matrix() # n_examples x n_inputs
-                target = t.ivector() # len: n_examples
-                W2, b2 = t.matrix(), t.vector()
+                input = t.matrix('input') # n_examples x n_inputs
+                target = t.ivector('target') # n_examples x 1
+                W2 = t.matrix('W2')
+                b2 = t.vector('b2')
 
-                if hidden_layer:
-                    hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
-                else:
-                    W1, b1 = t.matrix(), t.vector()
-                    hid = t.tanh(b1 + t.dot(input, W1))
-                    hid_params = [W1, b1]
-                    hid_regularization = l2coef * t.sum(W1*W1)
-                    hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)]
-
+                hid = hidden_layer(input)
+                hid_params = hidden_layer.params()
+                hid_params_init_vals = hidden_layer.params_ivals()
+                hid_regularization = hidden_layer.regularization()
+                    
                 params = [W2, b2] + hid_params
-                activations = b2 + t.dot(hid, W2)
-                nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target)
+                nll, predictions = nnet_ops.crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target)
                 regularization = l2coef * t.sum(W2*W2) + hid_regularization
-                output_class = t.argmax(activations,1)
+                output_class = t.argmax(predictions,1)
                 loss_01 = t.neq(output_class, target)
                 g_params = t.grad(nll + regularization, params)
                 new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
-                self.__dict__.update(locals()); del self.self
-                AutonameVars.__init__(v_self, locals())
+                setattr_and_name(self, locals())
         self.nhid = nhid
         self.nclass = nclass
-        self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals())
-        self.linker = linker
-        self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1)
-        self.validation_portion = validation_portion
-
-    def _fn(self, inputs, outputs):
-        # Caching here would hamper multi-threaded apps
-        # prefer caching in _Model.__call__
-        return theano.function(inputs, outputs, unpack_single=False, linker=self.linker)
+        self.nepochs = nepochs
+        self.v = Vars(lr, l2coef)
+        self.params = None
 
     def __call__(self, trainset=None, iparams=None):
-        """Allocate and optionally train a model"""
         if iparams is None:
-            iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\
-                    + self.v.hid_ivals()
-        rval = _Model(self, iparams)
+            iparams = LookupList(["W","b"],[_randshape(self.nhid, self.nclass), _randshape(self.nclass)])
+                    + self.v.hid_params_init_vals()
+        rval = NeuralNet.Model(self, iparams)
         if trainset:
-            if len(trainset) == sys.maxint:
-                raise NotImplementedError('Learning from infinite streams is not supported')
-            nval = int(self.validation_portion * len(trainset))
-            nmin = len(trainset) - nval
-            assert nmin >= 0
-            minset = trainset[:nmin] #real training set for minimizing loss
-            valset = trainset[nmin:] #validation set for early stopping
-            best = rval
-            for stp in self.early_stopper():
-                rval.update(
-                    trainset.minibatches(['input', 'target'], minibatch_size=min(32,
-                        len(trainset))))
-                if stp.set_score:
-                    stp.score = rval(valset, ['loss_01'])
-                    if (stp.score < stp.best_score):
-                        best = copy.copy(rval)
-            rval = best
+            rval.update(trainset)
         return rval
 
 
-import unittest
-
-class TestMLP(unittest.TestCase):
-    def test0(self):
+def setattr_and_name(self, dict):
+    """This will do a self.__setattr__ for all elements in the dict
+    (except for element self). In addition it will make sure that
+    each element's .name (if it exists) is set to the element's key
+    in the dicitonary.
+    Typical usage:  setattr_and_name(self, locals())  """
+    for varname,var in locals.items():
+        if var is not self:
+            if hasattr(var,"name") and not var.name:
+                var.name=varname
+            self.__setattr__(varname,var)
 
-        training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
-                                                         [0, 1, 1],
-                                                         [1, 0, 1],
-                                                         [1, 1, 1]]),
-                                            {'input':slice(2),'target':2})
-        training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
-                                                         [0, 1, 1],
-                                                         [1, 0, 0],
-                                                         [1, 1, 1]]),
-                                            {'input':slice(2),'target':2})
-        test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
-                                                         [0, 1, 1],
-                                                         [1, 0, 0],
-                                                         [1, 1, 1]]),
-                                            {'input':slice(2)})
-
-        learn_algo = MultiLayerPerceptron(2, 10, 2, .1
-                , linker='c&py'
-                , early_stopper = lambda:stopper.NStages(100,1))
-
-        model1 = learn_algo(training_set1)
-
-        model2 = learn_algo(training_set2)
-
-        n_match = 0
-        for o1, o2 in zip(model1(test_data), model2(test_data)):
-            #print o1
-            #print o2
-            n_match += (o1 == o2) 
-
-        assert n_match ==  (numpy.sum(training_set1.fields()['target'] ==
-                training_set2.fields()['target']))
 
 if __name__ == '__main__':
-    unittest.main()
+    training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                     [0, 1, 1],
+                                                     [1, 0, 1],
+                                                     [1, 1, 1]]),
+                                        {'input':slice(2),'target':2})
+    training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                     [0, 1, 1],
+                                                     [1, 0, 0],
+                                                     [1, 1, 1]]),
+                                        {'input':slice(2),'target':2})
+    test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                     [0, 1, 1],
+                                                     [1, 0, 0],
+                                                     [1, 1, 1]]),
+                                        {'input':slice(2)})
 
+
+    learn_algo = NeuralNet(2, 10, 3, .1, 1000)
+
+    model = learn_algo()
+
+    model1 = learn_algo(training_set1)
+
+    model2 = learn_algo(training_set2)
+
+    n_match = 0
+    for o1, o2 in zip(model1(test_data), model2(test_data)):
+        n_match += (o1 == o2) 
+
+    print n_match, numpy.sum(training_set1.fields()['target'] ==
+            training_set2.fields()['target'])
+