Mercurial > pylearn
changeset 288:2d08f46d17d8
removed old stuff
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Fri, 06 Jun 2008 14:08:28 -0400 |
parents | 78cc8fe3bbe9 |
children | 3af204aa71e5 174374d59405 |
files | mlp.py test_mlp.py |
diffstat | 2 files changed, 0 insertions(+), 372 deletions(-) [+] |
line wrap: on
line diff
--- a/mlp.py Fri Jun 06 14:01:36 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,240 +0,0 @@ -""" -A straightforward classicial feedforward -one-hidden-layer neural net, with L2 regularization. -This is one of the simplest example of L{Learner}, and illustrates -the use of theano. -""" - -from learner import * -from theano import tensor as t -from nnet_ops import * -import math -from misc import * - -def function(inputs, outputs, linker='c&py'): - return theano.function(inputs, outputs, unpack_single=False,linker=linker) - -def randshape(*shape): return (numpy.random.rand(*shape) -0.5) * 0.001 - -class ManualNNet(object): - def __init__(self, ninputs, nhid, nclass, lr, nepochs, - linker='c&yp', - hidden_layer=None): - class Vars: - def __init__(self, lr, l2coef=0.0): - lr = t.constant(lr) - l2coef = t.constant(l2coef) - input = t.matrix('input') # n_examples x n_inputs - target = t.ivector('target') # n_examples x 1 - W2 = t.matrix('W2') - b2 = t.vector('b2') - - if hidden_layer: - hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) - else: - W1 = t.matrix('W1') - b1 = t.vector('b1') - hid = t.tanh(b1 + t.dot(input, W1)) - hid_params = [W1, b1] - hid_regularization = l2coef * t.sum(W1*W1) - hid_ivals = [randshape(ninputs, nhid), randshape(nhid)] - - params = [W2, b2] + hid_params - ivals = [randshape(nhid, nclass), randshape(nclass)]\ - + hid_ivals - nll, predictions = crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target) - regularization = l2coef * t.sum(W2*W2) + hid_regularization - output_class = t.argmax(predictions,1) - loss_01 = t.neq(output_class, target) - g_params = t.grad(nll + regularization, params) - new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] - self.__dict__.update(locals()); del self.self - self.nhid = nhid - self.nclass = nclass - self.nepochs = nepochs - self.v = Vars(lr) - self.params = None - - def update(self, trainset): - params = self.v.ivals - update_fn = function( - [self.v.input, self.v.target] + self.v.params, - [self.v.nll] + self.v.new_params) - for i in xrange(self.nepochs): - for input, target in trainset.minibatches(['input', 'target'], - minibatch_size=min(32, len(trainset))): - dummy = update_fn(input, target[:,0], *params) - if 0: print dummy[0] #the nll - return self.use - __call__ = update - - def use(self, dset, - output_fieldnames=['output_class'], - test_stats_collector=None, - copy_inputs=False, - put_stats_in_output_dataset=True, - output_attributes=[]): - inputs = [self.v.input, self.v.target] + self.v.params - fn = function(inputs, [getattr(self.v, name) for name in output_fieldnames]) - target = dset.fields()['target'] if ('target' in dset.fields()) else numpy.zeros((1,1),dtype='int64') - return ApplyFunctionDataSet(dset, - lambda input, target: fn(input, target[:,0], *self.v.ivals), - output_fieldnames) - - -class OneHiddenLayerNNetClassifier(OnlineGradientTLearner): - """ - Implement a straightforward classicial feedforward - one-hidden-layer neural net, with L2 regularization. - - The predictor parameters are obtained by minibatch/online gradient descent. - Training can proceed sequentially (with multiple calls to update with - different disjoint subsets of the training sets). - - Hyper-parameters: - - L2_regularizer - - learning_rate - - n_hidden - - For each (input_t,output_t) pair in a minibatch,:: - - output_activations_t = b2+W2*tanh(b1+W1*input_t) - output_t = softmax(output_activations_t) - output_class_t = argmax(output_activations_t) - class_error_t = 1_{output_class_t != target_t} - nll_t = -log(output_t[target_t]) - - and the training criterion is:: - - loss = L2_regularizer*(||W1||^2 + ||W2||^2) + sum_t nll_t - - The parameters are [b1,W1,b2,W2] and are obtained by minimizing the loss by - stochastic minibatch gradient descent:: - - parameters[i] -= learning_rate * dloss/dparameters[i] - - The fields and attributes expected and produced by use and update are the following: - - - Input and output fields (example-wise quantities): - - - 'input' (always expected by use and update) - - 'target' (optionally expected by use and always by update) - - 'output' (optionally produced by use) - - 'output_class' (optionally produced by use) - - 'class_error' (optionally produced by use) - - 'nll' (optionally produced by use) - - - optional attributes (optionally expected as input_dataset attributes) - (warning, this may be dangerous, the 'use' method will use those provided in the - input_dataset rather than those learned during 'update'; currently no support - for providing these to update): - - - 'L2_regularizer' - - 'b1' - - 'W1' - - 'b2' - - 'W2' - - 'parameters' = [b1, W1, b2, W2] - - 'regularization_term' - - """ - def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None,linker='c|py'): - self._n_inputs = n_inputs - self._n_outputs = n_classes - self._n_hidden = n_hidden - self._init_range = init_range - self._max_n_epochs = max_n_epochs - self._minibatch_size = minibatch_size - self.learning_rate = learning_rate # this is the float - self.L2_regularizer = L2_regularizer - self._learning_rate = t.scalar('learning_rate') # this is the symbol - self._input = t.matrix('input') # n_examples x n_inputs - self._target = t.lmatrix('target') # n_examples x 1 - self._target_vector = self._target[:,0] - self._L2_regularizer = t.scalar('L2_regularizer') - self._W1 = t.matrix('W1') - self._W2 = t.matrix('W2') - self._b1 = t.row('b1') - self._b2 = t.row('b2') - self._regularization_term = self._L2_regularizer * (t.sum(self._W1*self._W1) + t.sum(self._W2*self._W2)) - self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T) - self._nll,self._output = crossentropy_softmax_1hot(self._output_activations,self._target_vector) - self._output_class = t.argmax(self._output,1) - self._class_error = t.neq(self._output_class,self._target_vector) - self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0] - OnlineGradientTLearner.__init__(self, linker = linker) - - def attributeNames(self): - return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"] - - def parameterAttributes(self): - return ["b1","W1", "b2", "W2"] - - def updateMinibatchInputFields(self): - return ["input","target"] - - def updateMinibatchInputAttributes(self): - return OnlineGradientTLearner.updateMinibatchInputAttributes(self)+["L2_regularizer"] - - def updateEndOutputAttributes(self): - return ["regularization_term"] - - def lossAttribute(self): - return "minibatch_criterion" - - def defaultOutputFields(self, input_fields): - output_fields = ["output", "output_class",] - if "target" in input_fields: - output_fields += ["class_error", "nll"] - return output_fields - - def updateMinibatch(self,minibatch): - MinibatchUpdatesTLearner.updateMinibatch(self,minibatch) - #print self.nll - - def allocate(self,minibatch): - minibatch_n_inputs = minibatch["input"].shape[1] - if not self._n_inputs: - self._n_inputs = minibatch_n_inputs - self.b1 = numpy.zeros((1,self._n_hidden)) - self.b2 = numpy.zeros((1,self._n_outputs)) - self.forget() - elif self._n_inputs!=minibatch_n_inputs: - # if the input changes dimension on the fly, we resize and forget everything - self.forget() - - def forget(self): - if self._n_inputs: - r = self._init_range/math.sqrt(self._n_inputs) - self.W1 = numpy.random.uniform(low=-r,high=r, - size=(self._n_hidden,self._n_inputs)) - r = self._init_range/math.sqrt(self._n_hidden) - self.W2 = numpy.random.uniform(low=-r,high=r, - size=(self._n_outputs,self._n_hidden)) - self.b1[:]=0 - self.b2[:]=0 - self._n_epochs=0 - - def isLastEpoch(self): - self._n_epochs +=1 - return self._n_epochs>=self._max_n_epochs - - def debug_updateMinibatch(self,minibatch): - # make sure all required fields are allocated and initialized - self.allocate(minibatch) - input_attributes = self.names2attributes(self.updateMinibatchInputAttributes()) - input_fields = minibatch(*self.updateMinibatchInputFields()) - print 'input attributes', input_attributes - print 'input fields', input_fields - results = self.update_minibatch_function(*(input_attributes+input_fields)) - print 'output attributes', self.updateMinibatchOutputAttributes() - print 'results', results - self.setAttributes(self.updateMinibatchOutputAttributes(), - results) - - if 0: - print 'n0', self.names2OpResults(self.updateMinibatchOutputAttributes()+ self.updateMinibatchInputFields()) - print 'n1', self.names2OpResults(self.updateMinibatchOutputAttributes()) - print 'n2', self.names2OpResults(self.updateEndInputAttributes()) - print 'n3', self.names2OpResults(self.updateEndOutputAttributes()) -
--- a/test_mlp.py Fri Jun 06 14:01:36 2008 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,132 +0,0 @@ - -from mlp import * -import dataset -import nnet_ops - - -from functools import partial -def separator(debugger, i, node, *ths): - print "===================" - -def what(debugger, i, node, *ths): - print "#%i" % i, node - -def parents(debugger, i, node, *ths): - print [input.step for input in node.inputs] - -def input_shapes(debugger, i, node, *ths): - print "input shapes: ", - for r in node.inputs: - if hasattr(r.value, 'shape'): - print r.value.shape, - else: - print "no_shape", - print - -def input_types(debugger, i, node, *ths): - print "input types: ", - for r in node.inputs: - print r.type, - print - -def output_shapes(debugger, i, node, *ths): - print "output shapes:", - for r in node.outputs: - if hasattr(r.value, 'shape'): - print r.value.shape, - else: - print "no_shape", - print - -def output_types(debugger, i, node, *ths): - print "output types:", - for r in node.outputs: - print r.type, - print - - -def test0(): - linker = 'c|py' - #linker = partial(theano.gof.DebugLinker, linkers = [theano.gof.OpWiseCLinker], - # debug_pre = [separator, what, parents, input_types, input_shapes], - # debug_post = [output_shapes, output_types], - # compare_fn = lambda x, y: numpy.all(x == y)) - - nnet = OneHiddenLayerNNetClassifier(10,2,.001,1000, linker = linker) - training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 1], - [1, 1, 1]]), - {'input':slice(2),'target':2}) - fprop=nnet(training_set) - - output_ds = fprop(training_set) - - for fieldname in output_ds.fieldNames(): - print fieldname+"=",output_ds[fieldname] - -def test1(): - nnet = ManualNNet(2, 10,3,.1,1000) - training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 1], - [1, 1, 1]]), - {'input':slice(2),'target':2}) - fprop=nnet(training_set) - - output_ds = fprop(training_set) - - for fieldname in output_ds.fieldNames(): - print fieldname+"=",output_ds[fieldname] - -def test2(): - training_set = dataset.ArrayDataSet(numpy.array([[0, 0, 0], - [0, 1, 1], - [1, 0, 1], - [1, 1, 1]]), - {'input':slice(2),'target':2}) - nin, nhid=2, 10 - def sigm_layer(input): - W1 = t.matrix('W1') - b1 = t.vector('b1') - return (nnet_ops.sigmoid(b1 + t.dot(input, W1)), - [W1, b1], - [(numpy.random.rand(nin, nhid) -0.5) * 0.001, numpy.zeros(nhid)]) - nnet = ManualNNet(nin, nhid, 3, .1, 1000, hidden_layer=sigm_layer) - fprop=nnet(training_set) - - output_ds = fprop(training_set) - - for fieldname in output_ds.fieldNames(): - print fieldname+"=",output_ds[fieldname] - -def test_interface_0(): - learner = ManualNNet(2, 10, 3, .1, 1000) - - model = learner(training_set) - - model2 = learner(training_set) # trains model a second time - - learner.update(additional_data) # modifies nnet and model by side-effect - - -def test_interface2_1(): - learn_algo = ManualNNet(2, 10, 3, .1, 1000) - - prior = learn_algo() - - model1 = learn_algo(training_set1) - - model2 = learn_algo(training_set2) - - model2.update(additional_data) - - n_match = 0 - for o1, o2 in zip(model1.use(test_data), model2.use(test_data)): - n_match += (o1 == o2) - - print n_match - -test1() -test2() -