# HG changeset patch # User Yoshua Bengio # Date 1210970295 14400 # Node ID c5a7105fa40b7fd2c3c02b568ed1a72b6f241934 # Parent f2ddc795ec49c4038e2ee8a1e8c807b444e99d27# Parent 80731832c62bdb8772b5d2368ba8a196c5295df3 trying to merge diff -r 80731832c62b -r c5a7105fa40b mlp.py diff -r 80731832c62b -r c5a7105fa40b mlp_factory_approach.py --- a/mlp_factory_approach.py Thu May 15 15:21:00 2008 -0400 +++ b/mlp_factory_approach.py Fri May 16 16:38:15 2008 -0400 @@ -1,172 +1,141 @@ -import copy, sys -import numpy - +import dataset import theano -from theano import tensor as t - -from tlearn import dataset, nnet_ops, stopper +import theano.tensor as t +import numpy +import nnet_ops def _randshape(*shape): return (numpy.random.rand(*shape) -0.5) * 0.001 +def _function(inputs, outputs, linker='c&py'): + return theano.function(inputs, outputs, unpack_single=False,linker=linker) -def _cache(d, key, valfn): - #valfn() is only evaluated if key isn't in dictionary d - if key not in d: - d[key] = valfn() - return d[key] +class NeuralNet(object): + + class Model(object): + def __init__(self, nnet, params): + self.nnet = nnet + self.params = params -class _Model(object): - def __init__(self, algo, params): - self.algo = algo - self.params = params - v = algo.v - self.update_fn = algo._fn([v.input, v.target] + v.params, [v.nll] + v.new_params) - self._fn_cache = {} - - def __copy__(self): - return _Model(self.algo, [copy.copy(p) for p in params]) - - def update(self, input_target): - """Update this model from more training data.""" - params = self.params - #TODO: why should we have to unpack target like this? - for input, target in input_target: - self.update_fn(input, target[:,0], *params) + def update(self, trainset, stopper=None): + """Update this model from more training data.""" + v = self.nnet.v + params = self.params + update_fn = _function([v.input, v.target] + v.params, [v.nll] + v.new_params) + if stopper is not None: + raise NotImplementedError() + else: + for i in xrange(100): + for input, target in trainset.minibatches(['input', 'target'], + minibatch_size=min(32, len(trainset))): + results = update_fn(input, target[:,0], *params) + if 0: print results[0] + # print params['b'] - def __call__(self, testset, fieldnames=['output_class']): - """Apply this model (as a function) to new data""" - #TODO: cache fn between calls - assert 'input' == testset.fieldNames()[0] - assert len(testset.fieldNames()) <= 2 - v = self.algo.v - outputs = [getattr(v, name) for name in fieldnames] - inputs = [v.input] + ([v.target] if 'target' in testset else []) - inputs.extend(v.params) - theano_fn = _cache(self._fn_cache, (tuple(inputs), tuple(outputs)), - lambda: self.algo._fn(inputs, outputs)) - lambda_fn = lambda *args: theano_fn(*(list(args) + self.params)) - return dataset.ApplyFunctionDataSet(testset, lambda_fn, fieldnames) + def __call__(self, testset, + output_fieldnames=['output_class'], + test_stats_collector=None, + copy_inputs=False, + put_stats_in_output_dataset=True, + output_attributes=[]): + """Apply this model (as a function) to new data""" + inputs = [self.nnet.v.input, self.nnet.v.target] + self.nnet.v.params + fn = _function(inputs, [getattr(self.nnet.v, name) for name in output_fieldnames]) + if 'target' in testset.fieldNames(): + return dataset.ApplyFunctionDataSet(testset, + lambda input, target: fn(input, target[:,0], *self.params), + output_fieldnames) + else: + return dataset.ApplyFunctionDataSet(testset, + lambda input: fn(input, numpy.zeros(1,dtype='int64'), *self.params), + output_fieldnames) -class AutonameVars(object): - def __init__(self, dct): - for key, val in dct.items(): - if type(key) is str and hasattr(val, 'name'): - val.name = key - self.__dict__.update(dct) - -class MultiLayerPerceptron(object): - - def __init__(self, ninputs, nhid, nclass, lr, - l2coef=0.0, - linker='c&py', - hidden_layer=None, - early_stopper=None, - validation_portion=0.2, - V_extern=None): - class V_intern(AutonameVars): - def __init__(v_self, lr, l2coef, **kwargs): + def __init__(self, ninputs, nhid, nclass, lr, nepochs, + l2coef=0.0, + linker='c&yp', + hidden_layer=None): + if not hidden_layer: + hidden_layer = AffineSigmoidLayer("hidden",ninputs,nhid,l2coef) + class Vars: + def __init__(self, lr, l2coef): lr = t.constant(lr) l2coef = t.constant(l2coef) - input = t.matrix() # n_examples x n_inputs - target = t.ivector() # len: n_examples - W2, b2 = t.matrix(), t.vector() + input = t.matrix('input') # n_examples x n_inputs + target = t.ivector('target') # n_examples x 1 + W2 = t.matrix('W2') + b2 = t.vector('b2') - if hidden_layer: - hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) - else: - W1, b1 = t.matrix(), t.vector() - hid = t.tanh(b1 + t.dot(input, W1)) - hid_params = [W1, b1] - hid_regularization = l2coef * t.sum(W1*W1) - hid_ivals = lambda : [_randshape(ninputs, nhid), _randshape(nhid)] - + hid = hidden_layer(input) + hid_params = hidden_layer.params() + hid_params_init_vals = hidden_layer.params_ivals() + hid_regularization = hidden_layer.regularization() + params = [W2, b2] + hid_params - activations = b2 + t.dot(hid, W2) - nll, predictions = nnet_ops.crossentropy_softmax_1hot(activations, target) + nll, predictions = nnet_ops.crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target) regularization = l2coef * t.sum(W2*W2) + hid_regularization - output_class = t.argmax(activations,1) + output_class = t.argmax(predictions,1) loss_01 = t.neq(output_class, target) g_params = t.grad(nll + regularization, params) new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] - self.__dict__.update(locals()); del self.self - AutonameVars.__init__(v_self, locals()) + setattr_and_name(self, locals()) self.nhid = nhid self.nclass = nclass - self.v = V_intern(**locals()) if V_extern is None else V_extern(**locals()) - self.linker = linker - self.early_stopper = early_stopper if early_stopper is not None else lambda: stopper.NStages(10,1) - self.validation_portion = validation_portion - - def _fn(self, inputs, outputs): - # Caching here would hamper multi-threaded apps - # prefer caching in _Model.__call__ - return theano.function(inputs, outputs, unpack_single=False, linker=self.linker) + self.nepochs = nepochs + self.v = Vars(lr, l2coef) + self.params = None def __call__(self, trainset=None, iparams=None): - """Allocate and optionally train a model""" if iparams is None: - iparams = [_randshape(self.nhid, self.nclass), _randshape(self.nclass)]\ - + self.v.hid_ivals() - rval = _Model(self, iparams) + iparams = LookupList(["W","b"],[_randshape(self.nhid, self.nclass), _randshape(self.nclass)]) + + self.v.hid_params_init_vals() + rval = NeuralNet.Model(self, iparams) if trainset: - if len(trainset) == sys.maxint: - raise NotImplementedError('Learning from infinite streams is not supported') - nval = int(self.validation_portion * len(trainset)) - nmin = len(trainset) - nval - assert nmin >= 0 - minset = trainset[:nmin] #real training set for minimizing loss - valset = trainset[nmin:] #validation set for early stopping - best = rval - for stp in self.early_stopper(): - rval.update( - trainset.minibatches(['input', 'target'], minibatch_size=min(32, - len(trainset)))) - if stp.set_score: - stp.score = rval(valset, ['loss_01']) - if (stp.score < stp.best_score): - best = copy.copy(rval) - rval = best + rval.update(trainset) return rval -import unittest - -class TestMLP(unittest.TestCase): - def test0(self): +def setattr_and_name(self, dict): + """This will do a self.__setattr__ for all elements in the dict + (except for element self). In addition it will make sure that + each element's .name (if it exists) is set to the element's key + in the dicitonary. + Typical usage: setattr_and_name(self, locals()) """ + for varname,var in locals.items(): + if var is not self: + if hasattr(var,"name") and not var.name: + var.name=varname + self.__setattr__(varname,var) - training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 1], - [1, 1, 1]]), - {'input':slice(2),'target':2}) - training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 0], - [1, 1, 1]]), - {'input':slice(2),'target':2}) - test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 0], - [1, 1, 1]]), - {'input':slice(2)}) - - learn_algo = MultiLayerPerceptron(2, 10, 2, .1 - , linker='c&py' - , early_stopper = lambda:stopper.NStages(100,1)) - - model1 = learn_algo(training_set1) - - model2 = learn_algo(training_set2) - - n_match = 0 - for o1, o2 in zip(model1(test_data), model2(test_data)): - #print o1 - #print o2 - n_match += (o1 == o2) - - assert n_match == (numpy.sum(training_set1.fields()['target'] == - training_set2.fields()['target'])) if __name__ == '__main__': - unittest.main() + training_set1 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 1], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + training_set2 = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 0], + [1, 1, 1]]), + {'input':slice(2),'target':2}) + test_data = dataset.ArrayDataSet(numpy.array([[0, 0, 0], + [0, 1, 1], + [1, 0, 0], + [1, 1, 1]]), + {'input':slice(2)}) + + learn_algo = NeuralNet(2, 10, 3, .1, 1000) + + model = learn_algo() + + model1 = learn_algo(training_set1) + + model2 = learn_algo(training_set2) + + n_match = 0 + for o1, o2 in zip(model1(test_data), model2(test_data)): + n_match += (o1 == o2) + + print n_match, numpy.sum(training_set1.fields()['target'] == + training_set2.fields()['target']) +