Mercurial > pylearn
changeset 208:bf320808919f
back to James' version
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Fri, 16 May 2008 16:39:01 -0400 |
parents | c5a7105fa40b |
children | 50a8302addaf |
files | mlp_factory_approach.py |
diffstat | 1 files changed, 143 insertions(+), 112 deletions(-) [+] |
line wrap: on
line diff
--- a/mlp_factory_approach.py Fri May 16 16:38:15 2008 -0400 +++ b/mlp_factory_approach.py Fri May 16 16:39:01 2008 -0400 @@ -1,141 +1,172 @@ -import dataset +import copy, sys +import numpy + import theano -import theano.tensor as t -import numpy -import nnet_ops +from theano import tensor as t + +from tlearn import dataset, nnet_ops, stopper def _randshape(*shape): return (numpy.random.rand(*shape) -0.5) * 0.001 -def _function(inputs, outputs, linker='c&py'): - return theano.function(inputs, outputs, unpack_single=False,linker=linker) -class NeuralNet(object): - - class Model(object): - def __init__(self, nnet, params): - self.nnet = nnet - self.params = params +def _cache(d, key, valfn): + #valfn() is only evaluated if key isn't in dictionary d + if key not in d: + d[key] = valfn() + return d[key] - def update(self, trainset, stopper=None): - """Update this model from more training data.""" - v = self.nnet.v - params = self.params - update_fn = _function([v.input, v.target] + v.params, [v.nll] + v.new_params) - if stopper is not None: - raise NotImplementedError() - else: - for i in xrange(100): - for input, target in trainset.minibatches(['input', 'target'], - minibatch_size=min(32, len(trainset))): - results = update_fn(input, target[:,0], *params) - if 0: print results[0] - # print params['b'] +class _Model(object): + def __init__(self, algo, params): + self.algo = algo + self.params = params + v = algo.v + self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) + self._fn_cache = {} + + def __copy__(self): + return _Model(self.algo, [copy.copy(p) for p in params]) + + def update(self, input_target): + """Update this model from more training data.""" + params = self.params + #TODO: why should we have to unpack target like this? + for input, target in input_target: + self.update_fn(input, target[:,0], *params) - def __call__(self, testset, - output_fieldnames=['output_class'], - test_stats_collector=None, - copy_inputs=False, - put_stats_in_output_dataset=True, - output_attributes=[]): - """Apply this model (as a function) to new data""" - inputs = [self.nnet.v.input, self.nnet.v.target] + self.nnet.v.params - fn = _function(inputs, [getattr(self.nnet.v, name) for name in output_fieldnames]) - if 'target' in testset.fieldNames(): - return dataset.ApplyFunctionDataSet(testset, - lambda input, target: fn(input, target[:,0], *self.params), - output_fieldnames) - else: - return dataset.ApplyFunctionDataSet(testset, - lambda input: fn(input, numpy.zeros(1,dtype='int64'), *self.params), - output_fieldnames) + def __call__(self, testset, fieldnames=['output_class']): + """Apply this model (as a function) to new data""" + #TODO: cache fn between calls + assert 'input' == testset.fieldNames()[0] + assert len(testset.fieldNames()) <= 2 + v = self.algo.v + outputs = [getattr(v, name) for name in fieldnames] + inputs = [v.input] + ([v.target] if 'target' in testset else []) + inputs.extend(v.params) + theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)), + lambda: self.algo._fn(inputs, outputs)) + lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) + return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) - def __init__(self, ninputs, nhid, nclass, lr, nepochs, - l2coef=0.0, - linker='c&yp', - hidden_layer=None): - if not hidden_layer: - hidden_layer = AffineSigmoidLayer("hidden",ninputs,nhid,l2coef) - class Vars: - def __init__(self, lr, l2coef): +class AutonameVars(object): + def __init__(self, dct): + for key, val in dct.items(): + if type(key) is str and hasattr(val, 'name'): + val.name = key + self.__dict__.update(dct) + +class MultiLayerPerceptron(object): + + def __init__(self, ninputs, nhid, nclass, lr, + l2coef=0.0, + linker='c&py', + hidden_layer=None, + early_stopper=None, + validation_portion=0.2, + V_extern=None): + class V_intern(AutonameVars): + def __init__(v_self, lr, l2coef, **kwargs): lr = t.constant(lr) l2coef = t.constant(l2coef) - input = t.matrix('input') # n_examples x n_inputs - target = t.ivector('target') # n_examples x 1 - W2 = t.matrix('W2') - b2 = t.vector('b2') + input = t.matrix() # n_examples x n_inputs + target = t.ivector() # len: n_examples + W2, b2 = t.matrix(), t.vector() - hid = hidden_layer(input) - hid_params = hidden_layer.params() - hid_params_init_vals = hidden_layer.params_ivals() - hid_regularization = hidden_layer.regularization() - + if hidden_layer: + hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) + else: + W1, b1 = t.matrix(), t.vector() + hid = t.tanh(b1 + t.dot(input, W1)) + hid_params = [W1, b1] + hid_regularization = l2coef * t.sum(W1*W1) + hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)] + params = [W2, b2] + hid_params - nll, predictions = nnet_ops.crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target) + activations = b2 + t.dot(hid, W2) + nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target) regularization = l2coef * t.sum(W2*W2) + hid_regularization - output_class = t.argmax(predictions,1) + output_class = t.argmax(activations,1) loss_01 = t.neq(output_class, target) g_params = t.grad(nll + regularization, params) new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] - setattr_and_name(self, locals()) + self.__dict__.update(locals()); del self.self + AutonameVars.__init__(v_self, locals()) self.nhid = nhid self.nclass = nclass - self.nepochs = nepochs - self.v = Vars(lr, l2coef) - self.params = None + self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals()) + self.linker = linker + self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1) + self.validation_portion = validation_portion + + def _fn(self, inputs, outputs): + # Caching here would hamper multi-threaded apps + # prefer caching in _Model.__call__ + return theano.function(inputs, outputs, unpack_single=False, linker=self.linker) def __call__(self, trainset=None, iparams=None): + """Allocate and optionally train a model""" if iparams is None: - iparams = LookupList(["W","b"],[_randshape(self.nhid, self.nclass), _randshape(self.nclass)]) - + self.v.hid_params_init_vals() - rval = NeuralNet.Model(self, iparams) + iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ + + self.v.hid_ivals() + rval = _Model(self, iparams) if trainset: - rval.update(trainset) + if len(trainset) == sys.maxint: + raise NotImplementedError('Learning from infinite streams is not supported') + nval = int(self.validation_portion * len(trainset)) + nmin = len(trainset) - nval + assert nmin >= 0 + minset = trainset[:nmin] #real training set for minimizing loss + valset = trainset[nmin:] #validation set for early stopping + best = rval + for stp in self.early_stopper(): + rval.update( + trainset.minibatches(['input', 'target'], minibatch_size=min(32, + len(trainset)))) + if stp.set_score: + stp.score = rval(valset, ['loss_01']) + if (stp.score < stp.best_score): + best = copy.copy(rval) + rval = best return rval -def setattr_and_name(self, dict): - """This will do a self.__setattr__ for all elements in the dict - (except for element self). In addition it will make sure that - each element's .name (if it exists) is set to the element's key - in the dicitonary. - Typical usage: setattr_and_name(self, locals()) """ - for varname,var in locals.items(): - if var is not self: - if hasattr(var,"name") and not var.name: - var.name=varname - self.__setattr__(varname,var) +import unittest + +class TestMLP(unittest.TestCase): + def test0(self): + training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 1], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 0], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 0], + [1, 1, 1]]), + {'input':slice(2)}) + + learn_algo = MultiLayerPerceptron(2, 10, 2, .1 + , linker='c&py' + , early_stopper = lambda:stopper.NStages(100,1)) + + model1 = learn_algo(training_set1) + + model2 = learn_algo(training_set2) + + n_match = 0 + for o1, o2 in zip(model1(test_data), model2(test_data)): + #print o1 + #print o2 + n_match += (o1 == o2) + + assert n_match == (numpy.sum(training_set1.fields()['target'] == + training_set2.fields()['target'])) if __name__ == '__main__': - training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 1], - [1, 1, 1]]), - {'input':slice(2),'target':2}) - training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 0], - [1, 1, 1]]), - {'input':slice(2),'target':2}) - test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 0], - [1, 1, 1]]), - {'input':slice(2)}) + unittest.main() - - learn_algo = NeuralNet(2, 10, 3, .1, 1000) - - model = learn_algo() - - model1 = learn_algo(training_set1) - - model2 = learn_algo(training_set2) - - n_match = 0 - for o1, o2 in zip(model1(test_data), model2(test_data)): - n_match += (o1 == o2) - - print n_match, numpy.sum(training_set1.fields()['target'] == - training_set2.fields()['target']) -