Mercurial > pylearn
view mlp_factory_approach.py @ 507:b8e6de17eaa6
modifs to smallNorb
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 29 Oct 2008 18:06:49 -0400 |
parents | 93280a0c151a |
children |
line wrap: on
line source
import copy, sys, os import numpy import theano from theano import tensor as T import dataset, nnet_ops, stopper, filetensor from pylearn.lookup_list import LookupList class AbstractFunction (Exception): pass class AutoName(object): """ By inheriting from this class, class variables which have a name attribute will have that name attribute set to the class variable name. """ class __metaclass__(type): def __init__(cls, name, bases, dct): type.__init__(name, bases, dct) for key, val in dct.items(): assert type(key) is str if hasattr(val, 'name'): val.name = key class GraphLearner(object): class Model(object): def __init__(self, algo, params): self.algo = algo self.params = params graph = self.algo.graph self.update_fn = algo._fn([graph.input, graph.target] + graph.params, [graph.nll] + graph.new_params) self._fn_cache = {} def __copy__(self): raise Exception('why not called?') return GraphLearner.Model(self.algo, [copy.copy(p) for p in params]) def __eq__(self,other,tolerance=0.) : """ Only compares weights of matrices and bias vector. """ if not isinstance(other,GraphLearner.Model) : return False for p in range(4) : if self.params[p].shape != other.params[p].shape : return False if not numpy.all( numpy.abs(self.params[p] - other.params[p]) <= tolerance ) : return False return True def _cache(self, key, valfn): d = self._fn_cache if key not in d: d[key] = valfn() return d[key] def update_minibatch(self, minibatch): if not isinstance(minibatch, LookupList): print type(minibatch) assert isinstance(minibatch, LookupList) self.update_fn(minibatch['input'], minibatch['target'], *self.params) def update(self, dataset, default_minibatch_size=32): """ Update this model from more training data.Uses all the data once, cut into minibatches. No early stopper here. """ params = self.params minibatch_size = min(default_minibatch_size, len(dataset)) for mb in dataset.minibatches(['input', 'target'], minibatch_size=minibatch_size): self.update_minibatch(mb) def save(self, f): self.algo.graph.save(f, self) def __call__(self, testset, fieldnames=['output_class']): """Apply this model (as a function) to new data. @param testset: DataSet, whose fields feed Result terms in self.algo.g @type testset: DataSet @param fieldnames: names of results in self.algo.g to compute. @type fieldnames: list of strings @return: DataSet with fields from fieldnames, computed from testset by this model. @rtype: ApplyFunctionDataSet instance """ graph = self.algo.graph def getresult(name): r = getattr(graph, name) if not isinstance(r, theano.Result): raise TypeError('string does not name a theano.Result', (name, r)) return r provided = [getresult(name) for name in testset.fieldNames()] wanted = [getresult(name) for name in fieldnames] inputs = provided + graph.params theano_fn = self._cache((tuple(inputs), tuple(wanted)), lambda: self.algo._fn(inputs, wanted)) lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) class Graph(object): class Opt(object): merge = theano.gof.MergeOptimizer() gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1) sqr_opt_0 = theano.gof.TopoOptimizer(theano.gof.PatternSub( (T.mul,'x', 'x'), (T.sqr, 'x'))) def __init__(self, do_sqr=True): self.do_sqr = do_sqr def __call__(self, env): self.merge(env) self.gemm_opt_1(env) if self.do_sqr: self.sqr_opt_0(env) self.merge(env) def linker(self): return theano.gof.PerformLinker() def early_stopper(self): stopper.NStages(300,1) def train_iter(self, trainset): raise AbstractFunction optimizer = Opt() def load(self,f) : raise AbstractFunction def save(self,f,model) : raise AbstractFunction def __init__(self, graph): self.graph = graph def _fn(self, inputs, outputs): # Caching here would hamper multi-threaded apps # prefer caching in Model.__call__ return theano.function(inputs, outputs, unpack_single=False, optimizer=self.graph.optimizer, linker=self.graph.linker() if hasattr(self.graph, 'linker') else 'c|py') def __call__(self, trainset=None, validset=None, iparams=None, stp=None): """Allocate and optionally train a model @param trainset: Data for minimizing the cost function @type trainset: None or Dataset @param validset: Data for early stopping @type validset: None or Dataset @param input: name of field to use as input @type input: string @param target: name of field to use as target @type target: string @param stp: early stopper, if None use default in graphMLP.G @type stp: None or early stopper @return: model @rtype: GraphLearner.Model instance """ iparams = self.graph.iparams() if iparams is None else iparams # if we load, type(trainset) == 'str' if isinstance(trainset,str) or isinstance(trainset,file): #loadmodel = GraphLearner.Model(self, iparams) loadmodel = self.graph.load(self,trainset) return loadmodel curmodel = GraphLearner.Model(self, iparams) best = curmodel if trainset is not None: #do some training by calling Model.update_minibatch() if stp == None : stp = self.graph.early_stopper() try : countiter = 0 for mb in self.graph.train_iter(trainset): curmodel.update_minibatch(mb) if stp.set_score: if validset: stp.score = curmodel(validset, ['validset_score']) if (stp.score < stp.best_score): best = copy.copy(curmodel) else: stp.score = 0.0 countiter +=1 stp.next() except StopIteration : print 'Iterations stopped after ', countiter,' iterations' if validset: curmodel = best return curmodel def graphMLP(ninputs, nhid, nclass, lr_val, l2coef_val=0.0): def wrapper(i, node, thunk): if 0: print i, node print thunk.inputs print thunk.outputs if node.op == nnet_ops.crossentropy_softmax_1hot_with_bias: print 'here is the nll op' thunk() #actually compute this piece of the graph class G(GraphLearner.Graph, AutoName): lr = T.constant(lr_val) assert l2coef_val == 0.0 l2coef = T.constant(l2coef_val) input = T.matrix() # n_examples x n_inputs target = T.ivector() # len: n_examples #target = T.matrix() W2, b2 = T.matrix(), T.vector() W1, b1 = T.matrix(), T.vector() hid = T.tanh(b1 + T.dot(input, W1)) hid_regularization = l2coef * T.sum(W1*W1) params = [W1, b1, W2, b2] activations = b2 + T.dot(hid, W2) nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target ) regularization = l2coef * T.sum(W2*W2) + hid_regularization output_class = T.argmax(activations,1) loss_01 = T.neq(output_class, target) #g_params = T.grad(nll + regularization, params) g_params = T.grad(nll, params) new_params = [T.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] def __eq__(self,other) : print 'G.__eq__ from graphMLP(), not implemented yet' return NotImplemented def load(self, algo, f): """ Load from file the 2 matrices and bias vectors """ cloase_at_end = False if isinstance(f,str) : f = open(f,'r') close_at_end = True params = [] for i in xrange(4): params.append(filetensor.read(f)) if close_at_end : f.close() return GraphLearner.Model(algo, params) def save(self, f, model): """ Save params to file, so 2 matrices and 2 bias vectors. Same order as iparams. """ cloase_at_end = False if isinstance(f,str) : f = open(f,'w') close_at_end = True for p in model.params: filetensor.write(f,p) if close_at_end : f.close() def iparams(self): """ init params. """ def randsmall(*shape): return (numpy.random.rand(*shape) -0.5) * 0.001 return [randsmall(ninputs, nhid) , randsmall(nhid) , randsmall(nhid, nclass) , randsmall(nclass)] def train_iter(self, trainset): return trainset.minibatches(['input', 'target'], minibatch_size=min(len(trainset), 32), n_batches=2000) def early_stopper(self): """ overwrites GraphLearner.graph function """ return stopper.NStages(300,1) return G() import unittest class TestMLP(unittest.TestCase): def blah(self, g): training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]), {'input':slice(2),'target':2}) training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], [0, 1, 1], [1, 0, 0], [1, 1, 1]]), {'input':slice(2),'target':2}) test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], [0, 1, 1], [1, 0, 0], [1, 1, 1]]), {'input':slice(2)}) learn_algo = GraphLearner(g) model1 = learn_algo(training_set1) model2 = learn_algo(training_set2) omatch = [o1 == o2 for o1, o2 in zip(model1(test_data), model2(test_data))] n_match = sum(omatch) self.failUnless(n_match == (numpy.sum(training_set1.fields()['target'] == training_set2.fields()['target'])), omatch) model1.save('/tmp/model1') #denoising_aa = GraphLearner(denoising_g) #model1 = denoising_aa(trainset) #hidset = model(trainset, fieldnames=['hidden']) #model2 = denoising_aa(hidset) #f = open('blah', 'w') #for m in model: # m.save(f) #filetensor.write(f, initial_classification_weights) #f.flush() #deep_sigmoid_net = GraphLearner(deepnetwork_g) #deep_model = deep_sigmoid_net.load('blah') #deep_model.update(trainset) #do some fine tuning model1_dup = learn_algo('/tmp/model1') def equiv(self, g0, g1): training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]), {'input':slice(2),'target':2}) learn_algo_0 = GraphLearner(g0) learn_algo_1 = GraphLearner(g1) model_0 = learn_algo_0(training_set1) model_1 = learn_algo_1(training_set1) print '----' for p in zip(model_0.params, model_1.params): abs_rel_err = theano.gradient.numeric_grad.abs_rel_err(p[0], p[1]) max_abs_rel_err = numpy.max(abs_rel_err) if max_abs_rel_err > 1.0e-7: print 'p0', p[0] print 'p1', p[1] #self.failUnless(max_abs_rel_err < 1.0e-7, max_abs_rel_err) def test0(self): self.blah(graphMLP(2, 10, 2, .1)) def test1(self): self.blah(graphMLP(2, 3, 2, .1)) if __name__ == '__main__': unittest.main()