Mercurial > pylearn
view mlp.py @ 249:e93e511deb9a
merged
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Tue, 03 Jun 2008 13:18:33 -0400 |
parents | d1359de1ea13 |
children |
line wrap: on
line source
""" A straightforward classicial feedforward one-hidden-layer neural net, with L2 regularization. This is one of the simplest example of L{Learner}, and illustrates the use of theano. """ from learner import * from theano import tensor as t from nnet_ops import * import math from misc import * def function(inputs, outputs, linker='c&py'): return theano.function(inputs, outputs, unpack_single=False,linker=linker) def randshape(*shape): return (numpy.random.rand(*shape) -0.5) * 0.001 class ManualNNet(object): def __init__(self, ninputs, nhid, nclass, lr, nepochs, linker='c&yp', hidden_layer=None): class Vars: def __init__(self, lr, l2coef=0.0): lr = t.constant(lr) l2coef = t.constant(l2coef) input = t.matrix('input') # n_examples x n_inputs target = t.ivector('target') # n_examples x 1 W2 = t.matrix('W2') b2 = t.vector('b2') if hidden_layer: hid, hid_params, hid_ivals, hid_regularization = hidden_layer(input) else: W1 = t.matrix('W1') b1 = t.vector('b1') hid = t.tanh(b1 + t.dot(input, W1)) hid_params = [W1, b1] hid_regularization = l2coef * t.sum(W1*W1) hid_ivals = [randshape(ninputs, nhid), randshape(nhid)] params = [W2, b2] + hid_params ivals = [randshape(nhid, nclass), randshape(nclass)]\ + hid_ivals nll, predictions = crossentropy_softmax_1hot( b2 + t.dot(hid, W2), target) regularization = l2coef * t.sum(W2*W2) + hid_regularization output_class = t.argmax(predictions,1) loss_01 = t.neq(output_class, target) g_params = t.grad(nll + regularization, params) new_params = [t.sub_inplace(p, lr * gp) for p,gp in zip(params, g_params)] self.__dict__.update(locals()); del self.self self.nhid = nhid self.nclass = nclass self.nepochs = nepochs self.v = Vars(lr) self.params = None def update(self, trainset): params = self.v.ivals update_fn = function( [self.v.input, self.v.target] + self.v.params, [self.v.nll] + self.v.new_params) for i in xrange(self.nepochs): for input, target in trainset.minibatches(['input', 'target'], minibatch_size=min(32, len(trainset))): dummy = update_fn(input, target[:,0], *params) if 0: print dummy[0] #the nll return self.use __call__ = update def use(self, dset, output_fieldnames=['output_class'], test_stats_collector=None, copy_inputs=False, put_stats_in_output_dataset=True, output_attributes=[]): inputs = [self.v.input, self.v.target] + self.v.params fn = function(inputs, [getattr(self.v, name) for name in output_fieldnames]) target = dset.fields()['target'] if ('target' in dset.fields()) else numpy.zeros((1,1),dtype='int64') return ApplyFunctionDataSet(dset, lambda input, target: fn(input, target[:,0], *self.v.ivals), output_fieldnames) class OneHiddenLayerNNetClassifier(OnlineGradientTLearner): """ Implement a straightforward classicial feedforward one-hidden-layer neural net, with L2 regularization. The predictor parameters are obtained by minibatch/online gradient descent. Training can proceed sequentially (with multiple calls to update with different disjoint subsets of the training sets). Hyper-parameters: - L2_regularizer - learning_rate - n_hidden For each (input_t,output_t) pair in a minibatch,:: output_activations_t = b2+W2*tanh(b1+W1*input_t) output_t = softmax(output_activations_t) output_class_t = argmax(output_activations_t) class_error_t = 1_{output_class_t != target_t} nll_t = -log(output_t[target_t]) and the training criterion is:: loss = L2_regularizer*(||W1||^2 + ||W2||^2) + sum_t nll_t The parameters are [b1,W1,b2,W2] and are obtained by minimizing the loss by stochastic minibatch gradient descent:: parameters[i] -= learning_rate * dloss/dparameters[i] The fields and attributes expected and produced by use and update are the following: - Input and output fields (example-wise quantities): - 'input' (always expected by use and update) - 'target' (optionally expected by use and always by update) - 'output' (optionally produced by use) - 'output_class' (optionally produced by use) - 'class_error' (optionally produced by use) - 'nll' (optionally produced by use) - optional attributes (optionally expected as input_dataset attributes) (warning, this may be dangerous, the 'use' method will use those provided in the input_dataset rather than those learned during 'update'; currently no support for providing these to update): - 'L2_regularizer' - 'b1' - 'W1' - 'b2' - 'W2' - 'parameters' = [b1, W1, b2, W2] - 'regularization_term' """ def __init__(self,n_hidden,n_classes,learning_rate,max_n_epochs,L2_regularizer=0,init_range=1.,n_inputs=None,minibatch_size=None,linker='c|py'): self._n_inputs = n_inputs self._n_outputs = n_classes self._n_hidden = n_hidden self._init_range = init_range self._max_n_epochs = max_n_epochs self._minibatch_size = minibatch_size self.learning_rate = learning_rate # this is the float self.L2_regularizer = L2_regularizer self._learning_rate = t.scalar('learning_rate') # this is the symbol self._input = t.matrix('input') # n_examples x n_inputs self._target = t.lmatrix('target') # n_examples x 1 self._target_vector = self._target[:,0] self._L2_regularizer = t.scalar('L2_regularizer') self._W1 = t.matrix('W1') self._W2 = t.matrix('W2') self._b1 = t.row('b1') self._b2 = t.row('b2') self._regularization_term = self._L2_regularizer * (t.sum(self._W1*self._W1) + t.sum(self._W2*self._W2)) self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T) self._nll,self._output = crossentropy_softmax_1hot(self._output_activations,self._target_vector) self._output_class = t.argmax(self._output,1) self._class_error = t.neq(self._output_class,self._target_vector) self._minibatch_criterion = self._nll + self._regularization_term / t.shape(self._input)[0] OnlineGradientTLearner.__init__(self, linker = linker) def attributeNames(self): return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"] def parameterAttributes(self): return ["b1","W1", "b2", "W2"] def updateMinibatchInputFields(self): return ["input","target"] def updateMinibatchInputAttributes(self): return OnlineGradientTLearner.updateMinibatchInputAttributes(self)+["L2_regularizer"] def updateEndOutputAttributes(self): return ["regularization_term"] def lossAttribute(self): return "minibatch_criterion" def defaultOutputFields(self, input_fields): output_fields = ["output", "output_class",] if "target" in input_fields: output_fields += ["class_error", "nll"] return output_fields def updateMinibatch(self,minibatch): MinibatchUpdatesTLearner.updateMinibatch(self,minibatch) #print self.nll def allocate(self,minibatch): minibatch_n_inputs = minibatch["input"].shape[1] if not self._n_inputs: self._n_inputs = minibatch_n_inputs self.b1 = numpy.zeros((1,self._n_hidden)) self.b2 = numpy.zeros((1,self._n_outputs)) self.forget() elif self._n_inputs!=minibatch_n_inputs: # if the input changes dimension on the fly, we resize and forget everything self.forget() def forget(self): if self._n_inputs: r = self._init_range/math.sqrt(self._n_inputs) self.W1 = numpy.random.uniform(low=-r,high=r, size=(self._n_hidden,self._n_inputs)) r = self._init_range/math.sqrt(self._n_hidden) self.W2 = numpy.random.uniform(low=-r,high=r, size=(self._n_outputs,self._n_hidden)) self.b1[:]=0 self.b2[:]=0 self._n_epochs=0 def isLastEpoch(self): self._n_epochs +=1 return self._n_epochs>=self._max_n_epochs def debug_updateMinibatch(self,minibatch): # make sure all required fields are allocated and initialized self.allocate(minibatch) input_attributes = self.names2attributes(self.updateMinibatchInputAttributes()) input_fields = minibatch(*self.updateMinibatchInputFields()) print 'input attributes', input_attributes print 'input fields', input_fields results = self.update_minibatch_function(*(input_attributes+input_fields)) print 'output attributes', self.updateMinibatchOutputAttributes() print 'results', results self.setAttributes(self.updateMinibatchOutputAttributes(), results) if 0: print 'n0', self.names2OpResults(self.updateMinibatchOutputAttributes()+ self.updateMinibatchInputFields()) print 'n1', self.names2OpResults(self.updateMinibatchOutputAttributes()) print 'n2', self.names2OpResults(self.updateEndInputAttributes()) print 'n3', self.names2OpResults(self.updateEndOutputAttributes())