Mercurial > pylearn
diff mlp_factory_approach.py @ 191:e816821c1e50
added early stopping to mlp.__call__
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 14 May 2008 20:04:44 -0400 |
parents | aa7a3ecbcc90 |
children | c5a7105fa40b |
line wrap: on
line diff
--- a/mlp_factory_approach.py Wed May 14 16:24:10 2008 -0400 +++ b/mlp_factory_approach.py Wed May 14 20:04:44 2008 -0400 @@ -1,84 +1,80 @@ -import copy +import copy, sys import numpy import theano -import theano.tensor as t +from theano import tensor as t -import dataset -import nnet_ops +from tlearn import dataset, nnet_ops, stopper def _randshape(*shape): return (numpy.random.rand(*shape) -0.5) * 0.001 -class NeuralNet(object): - - class _Model(object): - def __init__(self, nnet, params): - self.nnet = nnet - self.params = params - - def __copy__(self): - return _Model(self.nnet, [copy.copy(p) for p in params]) +def _cache(d, key, valfn): + #valfn() is only evaluated if key isn't in dictionary d + if key not in d: + d[key] = valfn() + return d[key] - def update(self, trainset, stopper=None): - """Update this model from more training data.""" - v = self.nnet.v - params = self.params - update_fn = self.nnet._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) - if stopper is not None: - raise NotImplementedError() - else: - for i in xrange(100): - for input, target in trainset.minibatches(['input', 'target'], - minibatch_size=min(32, len(trainset))): - dummy = update_fn(input, target[:,0], *params) - if 0: print dummy[0] #the nll +class _Model(object): + def __init__(self, algo, params): + self.algo = algo + self.params = params + v = algo.v + self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) + self._fn_cache = {} + + def __copy__(self): + return _Model(self.algo, [copy.copy(p) for p in params]) + + def update(self, input_target): + """Update this model from more training data.""" + params = self.params + #TODO: why should we have to unpack target like this? + for input, target in input_target: + self.update_fn(input, target[:,0], *params) - def __call__(self, testset, - output_fieldnames=['output_class'], - test_stats_collector=None, - copy_inputs=False, - put_stats_in_output_dataset=True, - output_attributes=[]): - """Apply this model (as a function) to new data""" - v = self.nnet.v - outputs = [getattr(self.nnet.v, name) for name in output_fieldnames] - if 'target' in testset: - fn = self.nnet._fn([v.input, v.target] + v.params, outputs) - return dataset.ApplyFunctionDataSet(testset, - lambda input, target: fn(input, target[:,0], *self.params), - output_fieldnames) - else: - fn = self.nnet._fn([v.input] + v.params, outputs) - return dataset.ApplyFunctionDataSet(testset, - lambda input: fn(input, *self.params), - output_fieldnames) - def _fn(self, inputs, outputs): - #it is possible for this function to implement function caching - #... but not necessarily desirable. - #- caching ruins the possibility of multi-threaded learning - #- caching demands more efficiency in the face of resizing inputs - #- caching makes it really hard to borrow references to function outputs - return theano.function(inputs, outputs, unpack_single=False, linker=self.linker) + def __call__(self, testset, fieldnames=['output_class']): + """Apply this model (as a function) to new data""" + #TODO: cache fn between calls + assert 'input' == testset.fieldNames()[0] + assert len(testset.fieldNames()) <= 2 + v = self.algo.v + outputs = [getattr(v, name) for name in fieldnames] + inputs = [v.input] + ([v.target] if 'target' in testset else []) + inputs.extend(v.params) + theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)), + lambda: self.algo._fn(inputs, outputs)) + lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) + return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) - def __init__(self, ninputs, nhid, nclass, lr, nepochs, +class AutonameVars(object): + def __init__(self, dct): + for key, val in dct.items(): + if type(key) is str and hasattr(val, 'name'): + val.name = key + self.__dict__.update(dct) + +class MultiLayerPerceptron(object): + + def __init__(self, ninputs, nhid, nclass, lr, l2coef=0.0, linker='c&py', - hidden_layer=None): - class Vars: - def __init__(self, lr, l2coef): + hidden_layer=None, + early_stopper=None, + validation_portion=0.2, + V_extern=None): + class V_intern(AutonameVars): + def __init__(v_self, lr, l2coef, **kwargs): lr = t.constant(lr) l2coef = t.constant(l2coef) - input = t.matrix('input') # n_examples x n_inputs - target = t.ivector('target') # n_examples x 1 - W2 = t.matrix('W2') - b2 = t.vector('b2') + input = t.matrix() # n_examples x n_inputs + target = t.ivector() # len: n_examples + W2, b2 = t.matrix(), t.vector() if hidden_layer: hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) else: - W1 = t.matrix('W1') - b1 = t.vector('b1') + W1, b1 = t.matrix(), t.vector() hid = t.tanh(b1 + t.dot(input, W1)) hid_params = [W1, b1] hid_regularization = l2coef * t.sum(W1*W1) @@ -93,50 +89,84 @@ g_params = t.grad(nll + regularization, params) new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] self.__dict__.update(locals()); del self.self + AutonameVars.__init__(v_self, locals()) self.nhid = nhid self.nclass = nclass - self.nepochs = nepochs - self.v = Vars(lr, l2coef) - self.params = None + self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals()) self.linker = linker + self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1) + self.validation_portion = validation_portion + + def _fn(self, inputs, outputs): + # Caching here would hamper multi-threaded apps + # prefer caching in _Model.__call__ + return theano.function(inputs, outputs, unpack_single=False, linker=self.linker) def __call__(self, trainset=None, iparams=None): + """Allocate and optionally train a model""" if iparams is None: iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ + self.v.hid_ivals() - rval = NeuralNet._Model(self, iparams) + rval = _Model(self, iparams) if trainset: - rval.update(trainset) + if len(trainset) == sys.maxint: + raise NotImplementedError('Learning from infinite streams is not supported') + nval = int(self.validation_portion * len(trainset)) + nmin = len(trainset) - nval + assert nmin >= 0 + minset = trainset[:nmin] #real training set for minimizing loss + valset = trainset[nmin:] #validation set for early stopping + best = rval + for stp in self.early_stopper(): + rval.update( + trainset.minibatches(['input', 'target'], minibatch_size=min(32, + len(trainset)))) + if stp.set_score: + stp.score = rval(valset, ['loss_01']) + if (stp.score < stp.best_score): + best = copy.copy(rval) + rval = best return rval +import unittest + +class TestMLP(unittest.TestCase): + def test0(self): + + training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 1], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 0], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 0], + [1, 1, 1]]), + {'input':slice(2)}) + + learn_algo = MultiLayerPerceptron(2, 10, 2, .1 + , linker='c&py' + , early_stopper = lambda:stopper.NStages(100,1)) + + model1 = learn_algo(training_set1) + + model2 = learn_algo(training_set2) + + n_match = 0 + for o1, o2 in zip(model1(test_data), model2(test_data)): + #print o1 + #print o2 + n_match += (o1 == o2) + + assert n_match == (numpy.sum(training_set1.fields()['target'] == + training_set2.fields()['target'])) + if __name__ == '__main__': - training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 1], - [1, 1, 1]]), - {'input':slice(2),'target':2}) - training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 0], - [1, 1, 1]]), - {'input':slice(2),'target':2}) - test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 0], - [1, 1, 1]]), - {'input':slice(2)}) + unittest.main() - learn_algo = NeuralNet(2, 10, 3, .1, 1000) - - model1 = learn_algo(training_set1) - - model2 = learn_algo(training_set2) - - n_match = 0 - for o1, o2 in zip(model1(test_data), model2(test_data)): - n_match += (o1 == o2) - - print n_match, numpy.sum(training_set1.fields()['target'] == - training_set2.fields()['target']) -