diff mlp_factory_approach.py @ 244:3156a9976183

mlp_factory_approach.py, updated and un-deprecated by popular demand
author James Bergstra <bergstrj@iro.umontreal.ca>
date Mon, 02 Jun 2008 17:08:17 -0400
parents c047238e5b3f
children a1793a5e9523
line wrap: on
line diff
--- a/mlp_factory_approach.py	Wed May 28 14:09:19 2008 -0400
+++ b/mlp_factory_approach.py	Mon Jun 02 17:08:17 2008 -0400
@@ -1,156 +1,225 @@
-"""
-
-
-
-This file is deprecated. I'm continuing development in hpu/models.py.
-
-Get that project like this: hg clone ssh://user@lgcm/../bergstrj/hpu
-
-
-
-
-
-"""
 import copy, sys
 import numpy
 
 import theano
-from theano import tensor as t
-
-from pylearn import dataset, nnet_ops, stopper
+from theano import tensor as T
 
+from pylearn import dataset, nnet_ops, stopper, LookupList
 
-def _randshape(*shape): 
-    return (numpy.random.rand(*shape) -0.5) * 0.001
+class AbstractFunction (Exception): pass
 
-def _cache(d, key, valfn):
-    #valfn() is only evaluated if key isn't in dictionary d
-    if key not in d:
-        d[key] = valfn()
-    return d[key]
-
-class _Model(object):
-    def __init__(self, algo, params):
-        self.algo = algo
-        self.params = params
-        v = algo.v
-        self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params)
-        self._fn_cache = {}
+class AutoName(object):
+    """
+    By inheriting from this class, class variables which have a name attribute
+    will have that name attribute set to the class variable name.
+    """
+    class __metaclass__(type):
+         def __init__(cls, name, bases, dct):
+             type.__init__(name, bases, dct)
+             for key, val in dct.items():
+                 assert type(key) is str
+                 if hasattr(val, 'name'): 
+                     val.name = key
 
-    def __copy__(self):
-        return _Model(self.algo, [copy.copy(p) for p in params])
+class GraphLearner(object):
+    class Model(object):
+        def __init__(self, algo, params):
+            self.algo = algo
+            self.params = params
+            graph = self.algo.graph
+            self.update_fn = algo._fn([graph.input, graph.target] + graph.params,
+                    [graph.nll] + graph.new_params)
+            self._fn_cache = {}
 
-    def update(self, input_target):
-        """Update this model from more training data."""
-        params = self.params
-        #TODO: why should we have to unpack target like this?
-        # tbm : creates problem...
-        for input, target in input_target:
-            rval= self.update_fn(input, target, *params)
-            #print rval[0]
+        def __copy__(self):
+            raise Exception('why not called?') 
+            return GraphLearner.Model(self.algo, [copy.copy(p) for p in params])
 
-    def __call__(self, testset, fieldnames=['output_class'],input='input',target='target'):
-        """Apply this model (as a function) to new data"""
-        #TODO: cache fn between calls
-        assert input == testset.fieldNames()[0] # why first one???
-        assert len(testset.fieldNames()) <= 2
-        v = self.algo.v
-        outputs = [getattr(v, name) for name in fieldnames]
-        inputs = [v.input] + ([v.target] if target in testset else [])
-        inputs.extend(v.params)
-        theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)),
-                lambda: self.algo._fn(inputs, outputs))
-        lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
-        return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)
+        def _cache(self, key, valfn):
+            d = self._fn_cache
+            if key not in d:
+                d[key] = valfn()
+            return d[key]
+
+        def update_minibatch(self, minibatch):
+            assert isinstance(minibatch, LookupList)
+            self.update_fn(minibatch['input'], minibatch['target'], *self.params)
+
+        def update(self, dataset, 
+                default_minibatch_size=32):
+            """Update this model from more training data."""
+            params = self.params
+            minibatch_size = min(default_minibatch_size, len(dataset))
+            for mb in dataset.minibatches(['input', 'target'], minibatch_size=minibatch_size):
+                self.update_minibatch(mb)
+
+        def __call__(self, testset, fieldnames=['output_class']):
+            """Apply this model (as a function) to new data.
 
-class AutonameVars(object):
-    def __init__(self, dct):
-        for key, val in dct.items():
-            if type(key) is str and hasattr(val, 'name'):
-                val.name = key
-        self.__dict__.update(dct)
+            @param testset: DataSet, whose fields feed Result terms in self.algo.g
+            @type testset: DataSet
 
-class MultiLayerPerceptron(object):
+            @param fieldnames: names of results in self.algo.g to compute.
+            @type fieldnames: list of strings
 
-    def __init__(self, ninputs, nhid, nclass, lr,
-            l2coef=0.0,
-            linker='c&py', 
-            hidden_layer=None,
-            early_stopper=None,
-            validation_portion=0.2,
-            V_extern=None):
-        class V_intern(AutonameVars):
-            def __init__(v_self, lr, l2coef, **kwargs):
-                lr = t.constant(lr)
-                l2coef = t.constant(l2coef)
-                input = t.matrix() # n_examples x n_inputs
-                target = t.ivector() # len: n_examples
-                W2, b2 = t.matrix(), t.vector()
+            @return: DataSet with fields from fieldnames, computed from testset by
+            this model.  
+            @rtype: ApplyFunctionDataSet instance
+            
+            """
+            graph = self.algo.graph
+            def getresult(name):
+                r = getattr(graph, name)
+                if not isinstance(r, theano.Result):
+                    raise TypeError('string does not name a theano.Result', (name, r))
+                return r
+
+            provided = [getresult(name) for name in testset.fieldNames()]
+            wanted = [getresult(name) for name in fieldnames]
+            inputs = provided + graph.params
+
+            theano_fn = self._cache((tuple(inputs), tuple(wanted)),
+                    lambda: self.algo._fn(inputs, wanted))
+            lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
+            return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)
 
-                if hidden_layer:
-                    hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input)
-                else:
-                    W1, b1 = t.matrix(), t.vector()
-                    hid = t.tanh(b1 + t.dot(input, W1))
-                    hid_params = [W1, b1]
-                    hid_regularization = l2coef * t.sum(W1*W1)
-                    hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)]
+    class Graph(object):
+        class Opt(object):
+            merge = theano.gof.MergeOptimizer()
+            gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1)
+            sqr_opt_0 = theano.gof.TopoOptimizer(theano.gof.PatternSub(
+                    (T.mul,'x', 'x'),
+                    (T.sqr, 'x')))
+
+            def __init__(self, do_sqr=True):
+                self.do_sqr = do_sqr
 
-                params = [W2, b2] + hid_params
-                activations = b2 + t.dot(hid, W2)
-                nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target)
-                regularization = l2coef * t.sum(W2*W2) + hid_regularization
-                output_class = t.argmax(activations,1)
-                loss_01 = t.neq(output_class, target)
-                g_params = t.grad(nll + regularization, params)
-                new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
-                self.__dict__.update(locals()); del self.self
-                AutonameVars.__init__(v_self, locals())
-        self.nhid = nhid
-        self.nclass = nclass
-        self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals())
-        self.linker = linker
-        self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1)
-        self.validation_portion = validation_portion
+            def __call__(self, env):
+                self.merge(env)
+                self.gemm_opt_1(env)
+                if self.do_sqr:
+                    self.sqr_opt_0(env)
+                self.merge(env)
+
+        def linker(self): 
+            return theano.gof.PerformLinker()
+
+        def early_stopper(self):
+            stopper.NStages(10,1)
+        
+        def train_iter(self, trainset):
+            raise AbstractFunction
+        optimizer = Opt()
+
+    def __init__(self, graph):
+        self.graph = graph
 
     def _fn(self, inputs, outputs):
         # Caching here would hamper multi-threaded apps
-        # prefer caching in _Model.__call__
-        return theano.function(inputs, outputs, unpack_single=False, linker=self.linker)
+        # prefer caching in Model.__call__
+        return theano.function(inputs, outputs, 
+                unpack_single=False,
+                optimizer=self.graph.optimizer,
+                linker=self.graph.linker() if hasattr(self.graph, 'linker')
+                else 'c&py')
+
+    def __call__(self,
+            trainset=None,
+            validset=None,
+            iparams=None):
+        """Allocate and optionally train a model
+
+        @param trainset: Data for minimizing the cost function
+        @type trainset: None or Dataset
+
+        @param validset: Data for early stopping
+        @type validset: None or Dataset
+
+        @param input: name of field to use as input
+        @type input: string
+
+        @param target: name of field to use as target
+        @type target: string
 
-    def __call__(self, trainset=None, iparams=None, input='input', target='target'):
-        """Allocate and optionally train a model"""
-        if iparams is None:
-            iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\
-                    + self.v.hid_ivals()
-        rval = _Model(self, iparams)
-        if trainset:
-            if len(trainset) == sys.maxint:
-                raise NotImplementedError('Learning from infinite streams is not supported')
-            nval = int(self.validation_portion * len(trainset))
-            nmin = len(trainset) - nval
-            assert nmin >= 0
-            minset = trainset[:nmin] #real training set for minimizing loss
-            valset = trainset[nmin:] #validation set for early stopping
-            best = rval
-            for stp in self.early_stopper():
-                rval.update(
-                    minset.minibatches([input, target], minibatch_size=min(32,
-                        len(minset))))
-                #print 'mlp.__call__(), we did an update'
+        @return: model
+        @rtype: GraphLearner.Model instance
+        
+        """
+        iparams = self.graph.iparams() if iparams is None else iparams
+        curmodel = GraphLearner.Model(self, iparams)
+        best = curmodel
+        
+        if trainset is not None: 
+            #do some training by calling Model.update_minibatch()
+            stp = self.graph.early_stopper()
+            for mb in self.graph.train_iter(trainset):
+                curmodel.update_minibatch(mb)
                 if stp.set_score:
-                    stp.score = rval(valset, ['loss_01'])
-                    if (stp.score < stp.best_score):
-                        best = copy.copy(rval)
-            rval = best
-        return rval
+                    if validset:
+                        stp.score = curmodel(validset, ['validset_score'])
+                        if (stp.score < stp.best_score):
+                            best = copy.copy(curmodel)
+                    else:
+                        stp.score = 0.0
+                stp.next()
+            if validset:
+                curmodel = best
+        return curmodel
+
+def graphMLP(ninputs, nhid, nclass, lr_val, l2coef_val=0.0):
+    def wrapper(i, node, thunk):
+        if 0:
+            print i, node
+            print thunk.inputs
+            print thunk.outputs
+            if node.op == nnet_ops.crossentropy_softmax_1hot_with_bias:
+                print 'here is the nll op'
+        thunk() #actually compute this piece of the graph
+
+    class G(GraphLearner.Graph, AutoName):
+            
+        lr = T.constant(lr_val)
+        assert l2coef_val == 0.0
+        l2coef = T.constant(l2coef_val)
+        input = T.matrix() # n_examples x n_inputs
+        target = T.ivector() # len: n_examples
+        W2, b2 = T.matrix(), T.vector()
+
+        W1, b1 = T.matrix(), T.vector()
+        hid = T.tanh(b1 + T.dot(input, W1))
+        hid_regularization = l2coef * T.sum(W1*W1)
+
+        params = [W1, b1, W2, b2] 
+        activations = b2 + T.dot(hid, W2)
+        nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target)
+        regularization = l2coef * T.sum(W2*W2) + hid_regularization
+        output_class = T.argmax(activations,1)
+        loss_01 = T.neq(output_class, target)
+        #g_params = T.grad(nll + regularization, params)
+        g_params = T.grad(nll, params)
+        new_params = [T.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]
+
+        def iparams(self):
+            def randsmall(*shape): 
+                return (numpy.random.rand(*shape) -0.5) * 0.001
+            return [randsmall(ninputs, nhid)
+                    , randsmall(nhid)
+                    , randsmall(nhid, nclass)
+                    , randsmall(nclass)]
+
+        def train_iter(self, trainset):
+            return trainset.minibatches(['input', 'target'],
+                    minibatch_size=min(len(trainset), 32), n_batches=300)
+        def early_stopper(self): 
+            return stopper.NStages(300,1)
+
+    return G()
 
 
 import unittest
 
 class TestMLP(unittest.TestCase):
-    def test0(self):
-
+    def blah(self, g):
         training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
                                                          [0, 1, 1],
                                                          [1, 0, 1],
@@ -167,23 +236,46 @@
                                                          [1, 1, 1]]),
                                             {'input':slice(2)})
 
-        learn_algo = MultiLayerPerceptron(2, 10, 2, .1
-                , linker='c&py'
-                , early_stopper = lambda:stopper.NStages(100,1))
+        learn_algo = GraphLearner(g)
 
         model1 = learn_algo(training_set1)
 
         model2 = learn_algo(training_set2)
 
-        n_match = 0
-        for o1, o2 in zip(model1(test_data), model2(test_data)):
-            #print o1
-            #print o2
-            n_match += (o1 == o2) 
+        omatch = [o1 == o2 for o1, o2 in zip(model1(test_data),
+            model2(test_data))]
+
+        n_match = sum(omatch)
+
+        self.failUnless(n_match ==  (numpy.sum(training_set1.fields()['target'] ==
+                training_set2.fields()['target'])), omatch)
 
-        assert n_match ==  (numpy.sum(training_set1.fields()['target'] ==
-                training_set2.fields()['target']))
+    def equiv(self, g0, g1):
+        training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
+                                                         [0, 1, 1],
+                                                         [1, 0, 1],
+                                                         [1, 1, 1]]),
+                                            {'input':slice(2),'target':2})
+        learn_algo_0 = GraphLearner(g0)
+        learn_algo_1 = GraphLearner(g1)
+
+        model_0 = learn_algo_0(training_set1)
+        model_1 = learn_algo_1(training_set1)
+
+        print '----'
+        for p in zip(model_0.params, model_1.params):
+            abs_rel_err = theano.gradient.numeric_grad.abs_rel_err(p[0], p[1])
+            max_abs_rel_err = numpy.max(abs_rel_err)
+            if max_abs_rel_err > 1.0e-7:
+                print 'p0', p[0]
+                print 'p1', p[1]
+            #self.failUnless(max_abs_rel_err < 1.0e-7, max_abs_rel_err)
+
+
+    def test0(self): self.blah(graphMLP(2, 10, 2, .1))
+    def test1(self): self.blah(graphMLP(2, 3, 2, .1))
 
 if __name__ == '__main__':
     unittest.main()
 
+