view mlp_factory_approach.py @ 275:323909110d1c

added test_lookup_list
author James Bergstra <bergstrj@iro.umontreal.ca>
date Thu, 05 Jun 2008 14:14:51 -0400
parents ae0a8345869b
children eded3cb54930
line wrap: on
line source

import copy, sys, os
import numpy

import theano
from theano import tensor as T

from pylearn import dataset, nnet_ops, stopper, LookupList, filetensor


class AbstractFunction (Exception): pass

class AutoName(object):
    """
    By inheriting from this class, class variables which have a name attribute
    will have that name attribute set to the class variable name.
    """
    class __metaclass__(type):
         def __init__(cls, name, bases, dct):
             type.__init__(name, bases, dct)
             for key, val in dct.items():
                 assert type(key) is str
                 if hasattr(val, 'name'): 
                     val.name = key

class GraphLearner(object):
    class Model(object):
        def __init__(self, algo, params):
            self.algo = algo
            self.params = params
            graph = self.algo.graph
            self.update_fn = algo._fn([graph.input, graph.target] + graph.params,
                    [graph.nll] + graph.new_params)
            self._fn_cache = {}

        def __copy__(self):
            raise Exception('why not called?') 
            return GraphLearner.Model(self.algo, [copy.copy(p) for p in params])

        def __eq__(self,other,tolerance=0.) :
            """ Only compares weights of matrices and bias vector. """
            if not isinstance(other,GraphLearner.Model) :
                return False
            for p in range(4) :
                if self.params[p].shape != other.params[p].shape :
                    return False
                if not numpy.all( numpy.abs(self.params[p] - other.params[p]) <= tolerance ) :                    
                    return False
            return True

        def _cache(self, key, valfn):
            d = self._fn_cache
            if key not in d:
                d[key] = valfn()
            return d[key]

        def update_minibatch(self, minibatch):
            #assert isinstance(minibatch, LookupList) # why false???
            self.update_fn(minibatch['input'], minibatch['target'], *self.params)

        def update(self, dataset, 
                default_minibatch_size=32):
            """Update this model from more training data."""
            params = self.params
            minibatch_size = min(default_minibatch_size, len(dataset))
            for mb in dataset.minibatches(['input', 'target'], minibatch_size=minibatch_size):
                self.update_minibatch(mb)

        def save(self, f):
            self.algo.graph.save(f, self)

        def __call__(self, testset, fieldnames=['output_class']):
            """Apply this model (as a function) to new data.

            @param testset: DataSet, whose fields feed Result terms in self.algo.g
            @type testset: DataSet

            @param fieldnames: names of results in self.algo.g to compute.
            @type fieldnames: list of strings

            @return: DataSet with fields from fieldnames, computed from testset by
            this model.  
            @rtype: ApplyFunctionDataSet instance
            
            """
            graph = self.algo.graph
            def getresult(name):
                r = getattr(graph, name)
                if not isinstance(r, theano.Result):
                    raise TypeError('string does not name a theano.Result', (name, r))
                return r

            provided = [getresult(name) for name in testset.fieldNames()]
            wanted = [getresult(name) for name in fieldnames]
            inputs = provided + graph.params

            theano_fn = self._cache((tuple(inputs), tuple(wanted)),
                    lambda: self.algo._fn(inputs, wanted))
            lambda_fn = lambda *args: theano_fn(*(list(args) + self.params))
            return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames)

    class Graph(object):
        class Opt(object):
            merge = theano.gof.MergeOptimizer()
            gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1)
            sqr_opt_0 = theano.gof.TopoOptimizer(theano.gof.PatternSub(
                    (T.mul,'x', 'x'),
                    (T.sqr, 'x')))

            def __init__(self, do_sqr=True):
                self.do_sqr = do_sqr

            def __call__(self, env):
                self.merge(env)
                self.gemm_opt_1(env)
                if self.do_sqr:
                    self.sqr_opt_0(env)
                self.merge(env)

        def linker(self): 
            return theano.gof.PerformLinker()

        def early_stopper(self):
            stopper.NStages(10,1)
        
        def train_iter(self, trainset):
            raise AbstractFunction
        optimizer = Opt()

        def load(self,f) :
            raise AbstractFunction

        def save(self,f,model) :
            raise AbstractFunction


    def __init__(self, graph):
        self.graph = graph

    def _fn(self, inputs, outputs):
        # Caching here would hamper multi-threaded apps
        # prefer caching in Model.__call__
        return theano.function(inputs, outputs, 
                unpack_single=False,
                optimizer=self.graph.optimizer,
                linker=self.graph.linker() if hasattr(self.graph, 'linker')
                else 'c&py')

    def __call__(self,
            trainset=None,
            validset=None,
            iparams=None):
        """Allocate and optionally train a model

        @param trainset: Data for minimizing the cost function
        @type trainset: None or Dataset

        @param validset: Data for early stopping
        @type validset: None or Dataset

        @param input: name of field to use as input
        @type input: string

        @param target: name of field to use as target
        @type target: string

        @return: model
        @rtype: GraphLearner.Model instance
        
        """
        
        iparams = self.graph.iparams() if iparams is None else iparams

        # if we load, type(trainset) == 'str'
        if isinstance(trainset,str) or isinstance(trainset,file):
            #loadmodel = GraphLearner.Model(self, iparams)
            loadmodel = self.graph.load(self,trainset)
            return loadmodel

        curmodel = GraphLearner.Model(self, iparams)
        best = curmodel
        
        if trainset is not None: 
            #do some training by calling Model.update_minibatch()
            stp = self.graph.early_stopper()
            for mb in self.graph.train_iter(trainset):
                curmodel.update_minibatch(mb)
                if stp.set_score:
                    if validset:
                        stp.score = curmodel(validset, ['validset_score'])
                        if (stp.score < stp.best_score):
                            best = copy.copy(curmodel)
                    else:
                        stp.score = 0.0
                stp.next()
            if validset:
                curmodel = best
        return curmodel


def graphMLP(ninputs, nhid, nclass, lr_val, l2coef_val=0.0):


    def wrapper(i, node, thunk):
        if 0:
            print i, node
            print thunk.inputs
            print thunk.outputs
            if node.op == nnet_ops.crossentropy_softmax_1hot_with_bias:
                print 'here is the nll op'
        thunk() #actually compute this piece of the graph

    class G(GraphLearner.Graph, AutoName):
            
        lr = T.constant(lr_val)
        assert l2coef_val == 0.0
        l2coef = T.constant(l2coef_val)
        input = T.matrix() # n_examples x n_inputs
        target = T.ivector() # len: n_examples
        W2, b2 = T.matrix(), T.vector()

        W1, b1 = T.matrix(), T.vector()
        hid = T.tanh(b1 + T.dot(input, W1))
        hid_regularization = l2coef * T.sum(W1*W1)

        params = [W1, b1, W2, b2] 
        activations = b2 + T.dot(hid, W2)
        nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target)
        regularization = l2coef * T.sum(W2*W2) + hid_regularization
        output_class = T.argmax(activations,1)
        loss_01 = T.neq(output_class, target)
        #g_params = T.grad(nll + regularization, params)
        g_params = T.grad(nll, params)
        new_params = [T.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)]

            
        def __eq__(self,other) :
            print 'G.__eq__ from graphMLP(), not implemented yet'
            return NotImplemented


        def load(self, algo, f):
            """ Load from file the 2 matrices and bias vectors """
            cloase_at_end = False
            if isinstance(f,str) :
                f = open(f,'r')
                close_at_end = True
            params = []
            for i in xrange(4):
                params.append(filetensor.read(f))
            if close_at_end :
                f.close()
            return GraphLearner.Model(algo, params)

        def save(self, f, model):
            """ Save params to file, so 2 matrices and 2 bias vectors. Same order as iparams. """
            cloase_at_end = False
            if isinstance(f,str) :
                f = open(f,'w')
                close_at_end = True
            for p in model.params:
                filetensor.write(f,p)
            if close_at_end :
                f.close()


        def iparams(self):
            """ init params. """
            def randsmall(*shape): 
                return (numpy.random.rand(*shape) -0.5) * 0.001
            return [randsmall(ninputs, nhid)
                    , randsmall(nhid)
                    , randsmall(nhid, nclass)
                    , randsmall(nclass)]

        def train_iter(self, trainset):
            return trainset.minibatches(['input', 'target'],
                    minibatch_size=min(len(trainset), 32), n_batches=300)
        def early_stopper(self): 
            return stopper.NStages(300,1)

    return G()


import unittest

class TestMLP(unittest.TestCase):
    def blah(self, g):
        training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
                                                         [0, 1, 1],
                                                         [1, 0, 1],
                                                         [1, 1, 1]]),
                                            {'input':slice(2),'target':2})
        training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
                                                         [0, 1, 1],
                                                         [1, 0, 0],
                                                         [1, 1, 1]]),
                                            {'input':slice(2),'target':2})
        test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
                                                         [0, 1, 1],
                                                         [1, 0, 0],
                                                         [1, 1, 1]]),
                                            {'input':slice(2)})

        learn_algo = GraphLearner(g)

        model1 = learn_algo(training_set1)

        model2 = learn_algo(training_set2)

        omatch = [o1 == o2 for o1, o2 in zip(model1(test_data),
            model2(test_data))]

        n_match = sum(omatch)

        self.failUnless(n_match ==  (numpy.sum(training_set1.fields()['target'] ==
                training_set2.fields()['target'])), omatch)

        model1.save('/tmp/model1')
        
        #denoising_aa = GraphLearner(denoising_g)
        #model1 = denoising_aa(trainset)
        #hidset = model(trainset, fieldnames=['hidden'])
        #model2 = denoising_aa(hidset)
        
        #f = open('blah', 'w')
        #for m in model:
        #    m.save(f)
        #filetensor.write(f, initial_classification_weights)
        #f.flush()

        #deep_sigmoid_net = GraphLearner(deepnetwork_g)
        #deep_model = deep_sigmoid_net.load('blah')
        #deep_model.update(trainset)  #do some fine tuning

        model1_dup = learn_algo('/tmp/model1')


    def equiv(self, g0, g1):
        training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0],
                                                         [0, 1, 1],
                                                         [1, 0, 1],
                                                         [1, 1, 1]]),
                                            {'input':slice(2),'target':2})
        learn_algo_0 = GraphLearner(g0)
        learn_algo_1 = GraphLearner(g1)

        model_0 = learn_algo_0(training_set1)
        model_1 = learn_algo_1(training_set1)

        print '----'
        for p in zip(model_0.params, model_1.params):
            abs_rel_err = theano.gradient.numeric_grad.abs_rel_err(p[0], p[1])
            max_abs_rel_err = numpy.max(abs_rel_err)
            if max_abs_rel_err > 1.0e-7:
                print 'p0', p[0]
                print 'p1', p[1]
            #self.failUnless(max_abs_rel_err < 1.0e-7, max_abs_rel_err)


    def test0(self): self.blah(graphMLP(2, 10, 2, .1))
    def test1(self): self.blah(graphMLP(2, 3, 2, .1))

if __name__ == '__main__':
    unittest.main()