# HG changeset patch # User James Bergstra # Date 1212440998 14400 # Node ID c702abb7f87557ae97a5576f46d6850623a57d72 # Parent 3156a9976183b20c5faa6e4a092fdbc3a573193f# Parent c8f19a9eb10fac6eec3cf7c77f4e2476a1c25c24 merged diff -r c8f19a9eb10f -r c702abb7f875 mlp_factory_approach.py --- a/mlp_factory_approach.py Mon Jun 02 11:59:41 2008 -0400 +++ b/mlp_factory_approach.py Mon Jun 02 17:09:58 2008 -0400 @@ -1,156 +1,225 @@ -""" - - - -This file is deprecated. I'm continuing development in hpu/models.py. - -Get that project like this: hg clone ssh://user@lgcm/../bergstrj/hpu - - - - - -""" import copy, sys import numpy import theano -from theano import tensor as t - -from pylearn import dataset, nnet_ops, stopper +from theano import tensor as T +from pylearn import dataset, nnet_ops, stopper, LookupList -def _randshape(*shape): - return (numpy.random.rand(*shape) -0.5) * 0.001 +class AbstractFunction (Exception): pass -def _cache(d, key, valfn): - #valfn() is only evaluated if key isn't in dictionary d - if key not in d: - d[key] = valfn() - return d[key] - -class _Model(object): - def __init__(self, algo, params): - self.algo = algo - self.params = params - v = algo.v - self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) - self._fn_cache = {} +class AutoName(object): + """ + By inheriting from this class, class variables which have a name attribute + will have that name attribute set to the class variable name. + """ + class __metaclass__(type): + def __init__(cls, name, bases, dct): + type.__init__(name, bases, dct) + for key, val in dct.items(): + assert type(key) is str + if hasattr(val, 'name'): + val.name = key - def __copy__(self): - return _Model(self.algo, [copy.copy(p) for p in params]) +class GraphLearner(object): + class Model(object): + def __init__(self, algo, params): + self.algo = algo + self.params = params + graph = self.algo.graph + self.update_fn = algo._fn([graph.input, graph.target] + graph.params, + [graph.nll] + graph.new_params) + self._fn_cache = {} - def update(self, input_target): - """Update this model from more training data.""" - params = self.params - #TODO: why should we have to unpack target like this? - # tbm : creates problem... - for input, target in input_target: - rval= self.update_fn(input, target, *params) - #print rval[0] + def __copy__(self): + raise Exception('why not called?') + return GraphLearner.Model(self.algo, [copy.copy(p) for p in params]) - def __call__(self, testset, fieldnames=['output_class'],input='input',target='target'): - """Apply this model (as a function) to new data""" - #TODO: cache fn between calls - assert input == testset.fieldNames()[0] # why first one??? - assert len(testset.fieldNames()) <= 2 - v = self.algo.v - outputs = [getattr(v, name) for name in fieldnames] - inputs = [v.input] + ([v.target] if target in testset else []) - inputs.extend(v.params) - theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)), - lambda: self.algo._fn(inputs, outputs)) - lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) - return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) + def _cache(self, key, valfn): + d = self._fn_cache + if key not in d: + d[key] = valfn() + return d[key] + + def update_minibatch(self, minibatch): + assert isinstance(minibatch, LookupList) + self.update_fn(minibatch['input'], minibatch['target'], *self.params) + + def update(self, dataset, + default_minibatch_size=32): + """Update this model from more training data.""" + params = self.params + minibatch_size = min(default_minibatch_size, len(dataset)) + for mb in dataset.minibatches(['input', 'target'], minibatch_size=minibatch_size): + self.update_minibatch(mb) + + def __call__(self, testset, fieldnames=['output_class']): + """Apply this model (as a function) to new data. -class AutonameVars(object): - def __init__(self, dct): - for key, val in dct.items(): - if type(key) is str and hasattr(val, 'name'): - val.name = key - self.__dict__.update(dct) + @param testset: DataSet, whose fields feed Result terms in self.algo.g + @type testset: DataSet -class MultiLayerPerceptron(object): + @param fieldnames: names of results in self.algo.g to compute. + @type fieldnames: list of strings - def __init__(self, ninputs, nhid, nclass, lr, - l2coef=0.0, - linker='c&py', - hidden_layer=None, - early_stopper=None, - validation_portion=0.2, - V_extern=None): - class V_intern(AutonameVars): - def __init__(v_self, lr, l2coef, **kwargs): - lr = t.constant(lr) - l2coef = t.constant(l2coef) - input = t.matrix() # n_examples x n_inputs - target = t.ivector() # len: n_examples - W2, b2 = t.matrix(), t.vector() + @return: DataSet with fields from fieldnames, computed from testset by + this model. + @rtype: ApplyFunctionDataSet instance + + """ + graph = self.algo.graph + def getresult(name): + r = getattr(graph, name) + if not isinstance(r, theano.Result): + raise TypeError('string does not name a theano.Result', (name, r)) + return r + + provided = [getresult(name) for name in testset.fieldNames()] + wanted = [getresult(name) for name in fieldnames] + inputs = provided + graph.params + + theano_fn = self._cache((tuple(inputs), tuple(wanted)), + lambda: self.algo._fn(inputs, wanted)) + lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) + return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) - if hidden_layer: - hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) - else: - W1, b1 = t.matrix(), t.vector() - hid = t.tanh(b1 + t.dot(input, W1)) - hid_params = [W1, b1] - hid_regularization = l2coef * t.sum(W1*W1) - hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)] + class Graph(object): + class Opt(object): + merge = theano.gof.MergeOptimizer() + gemm_opt_1 = theano.gof.TopoOptimizer(theano.tensor_opt.gemm_pattern_1) + sqr_opt_0 = theano.gof.TopoOptimizer(theano.gof.PatternSub( + (T.mul,'x', 'x'), + (T.sqr, 'x'))) + + def __init__(self, do_sqr=True): + self.do_sqr = do_sqr - params = [W2, b2] + hid_params - activations = b2 + t.dot(hid, W2) - nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target) - regularization = l2coef * t.sum(W2*W2) + hid_regularization - output_class = t.argmax(activations,1) - loss_01 = t.neq(output_class, target) - g_params = t.grad(nll + regularization, params) - new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] - self.__dict__.update(locals()); del self.self - AutonameVars.__init__(v_self, locals()) - self.nhid = nhid - self.nclass = nclass - self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals()) - self.linker = linker - self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1) - self.validation_portion = validation_portion + def __call__(self, env): + self.merge(env) + self.gemm_opt_1(env) + if self.do_sqr: + self.sqr_opt_0(env) + self.merge(env) + + def linker(self): + return theano.gof.PerformLinker() + + def early_stopper(self): + stopper.NStages(10,1) + + def train_iter(self, trainset): + raise AbstractFunction + optimizer = Opt() + + def __init__(self, graph): + self.graph = graph def _fn(self, inputs, outputs): # Caching here would hamper multi-threaded apps - # prefer caching in _Model.__call__ - return theano.function(inputs, outputs, unpack_single=False, linker=self.linker) + # prefer caching in Model.__call__ + return theano.function(inputs, outputs, + unpack_single=False, + optimizer=self.graph.optimizer, + linker=self.graph.linker() if hasattr(self.graph, 'linker') + else 'c&py') + + def __call__(self, + trainset=None, + validset=None, + iparams=None): + """Allocate and optionally train a model + + @param trainset: Data for minimizing the cost function + @type trainset: None or Dataset + + @param validset: Data for early stopping + @type validset: None or Dataset + + @param input: name of field to use as input + @type input: string + + @param target: name of field to use as target + @type target: string - def __call__(self, trainset=None, iparams=None, input='input', target='target'): - """Allocate and optionally train a model""" - if iparams is None: - iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ - + self.v.hid_ivals() - rval = _Model(self, iparams) - if trainset: - if len(trainset) == sys.maxint: - raise NotImplementedError('Learning from infinite streams is not supported') - nval = int(self.validation_portion * len(trainset)) - nmin = len(trainset) - nval - assert nmin >= 0 - minset = trainset[:nmin] #real training set for minimizing loss - valset = trainset[nmin:] #validation set for early stopping - best = rval - for stp in self.early_stopper(): - rval.update( - minset.minibatches([input, target], minibatch_size=min(32, - len(minset)))) - #print 'mlp.__call__(), we did an update' + @return: model + @rtype: GraphLearner.Model instance + + """ + iparams = self.graph.iparams() if iparams is None else iparams + curmodel = GraphLearner.Model(self, iparams) + best = curmodel + + if trainset is not None: + #do some training by calling Model.update_minibatch() + stp = self.graph.early_stopper() + for mb in self.graph.train_iter(trainset): + curmodel.update_minibatch(mb) if stp.set_score: - stp.score = rval(valset, ['loss_01']) - if (stp.score < stp.best_score): - best = copy.copy(rval) - rval = best - return rval + if validset: + stp.score = curmodel(validset, ['validset_score']) + if (stp.score < stp.best_score): + best = copy.copy(curmodel) + else: + stp.score = 0.0 + stp.next() + if validset: + curmodel = best + return curmodel + +def graphMLP(ninputs, nhid, nclass, lr_val, l2coef_val=0.0): + def wrapper(i, node, thunk): + if 0: + print i, node + print thunk.inputs + print thunk.outputs + if node.op == nnet_ops.crossentropy_softmax_1hot_with_bias: + print 'here is the nll op' + thunk() #actually compute this piece of the graph + + class G(GraphLearner.Graph, AutoName): + + lr = T.constant(lr_val) + assert l2coef_val == 0.0 + l2coef = T.constant(l2coef_val) + input = T.matrix() # n_examples x n_inputs + target = T.ivector() # len: n_examples + W2, b2 = T.matrix(), T.vector() + + W1, b1 = T.matrix(), T.vector() + hid = T.tanh(b1 + T.dot(input, W1)) + hid_regularization = l2coef * T.sum(W1*W1) + + params = [W1, b1, W2, b2] + activations = b2 + T.dot(hid, W2) + nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target) + regularization = l2coef * T.sum(W2*W2) + hid_regularization + output_class = T.argmax(activations,1) + loss_01 = T.neq(output_class, target) + #g_params = T.grad(nll + regularization, params) + g_params = T.grad(nll, params) + new_params = [T.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] + + def iparams(self): + def randsmall(*shape): + return (numpy.random.rand(*shape) -0.5) * 0.001 + return [randsmall(ninputs, nhid) + , randsmall(nhid) + , randsmall(nhid, nclass) + , randsmall(nclass)] + + def train_iter(self, trainset): + return trainset.minibatches(['input', 'target'], + minibatch_size=min(len(trainset), 32), n_batches=300) + def early_stopper(self): + return stopper.NStages(300,1) + + return G() import unittest class TestMLP(unittest.TestCase): - def test0(self): - + def blah(self, g): training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], @@ -167,23 +236,46 @@ [1, 1, 1]]), {'input':slice(2)}) - learn_algo = MultiLayerPerceptron(2, 10, 2, .1 - , linker='c&py' - , early_stopper = lambda:stopper.NStages(100,1)) + learn_algo = GraphLearner(g) model1 = learn_algo(training_set1) model2 = learn_algo(training_set2) - n_match = 0 - for o1, o2 in zip(model1(test_data), model2(test_data)): - #print o1 - #print o2 - n_match += (o1 == o2) + omatch = [o1 == o2 for o1, o2 in zip(model1(test_data), + model2(test_data))] + + n_match = sum(omatch) + + self.failUnless(n_match == (numpy.sum(training_set1.fields()['target'] == + training_set2.fields()['target'])), omatch) - assert n_match == (numpy.sum(training_set1.fields()['target'] == - training_set2.fields()['target'])) + def equiv(self, g0, g1): + training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 1], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + learn_algo_0 = GraphLearner(g0) + learn_algo_1 = GraphLearner(g1) + + model_0 = learn_algo_0(training_set1) + model_1 = learn_algo_1(training_set1) + + print '----' + for p in zip(model_0.params, model_1.params): + abs_rel_err = theano.gradient.numeric_grad.abs_rel_err(p[0], p[1]) + max_abs_rel_err = numpy.max(abs_rel_err) + if max_abs_rel_err > 1.0e-7: + print 'p0', p[0] + print 'p1', p[1] + #self.failUnless(max_abs_rel_err < 1.0e-7, max_abs_rel_err) + + + def test0(self): self.blah(graphMLP(2, 10, 2, .1)) + def test1(self): self.blah(graphMLP(2, 3, 2, .1)) if __name__ == '__main__': unittest.main() +