Mercurial > pylearn
diff mlp.py @ 118:d0a1bd0378c6
Finished draft of OneHiddenLayerNNetClassifier to debut learner.py
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Wed, 07 May 2008 15:07:56 -0400 |
parents | 88257dfedf8c |
children | 2ca8dccba270 |
line wrap: on
line diff
--- a/mlp.py Wed May 07 09:16:04 2008 -0400 +++ b/mlp.py Wed May 07 15:07:56 2008 -0400 @@ -2,6 +2,7 @@ from learner import * from theano import tensor as t from theano.scalar import as_scalar +from nnet_ops import * # this is one of the simplest example of learner, and illustrates # the use of theano @@ -82,64 +83,61 @@ def updateMinibatchInputFields(self): return ["input","target"] - def updateMinibatchInputAttributes(self): - return self.parameterAttributes() - - def updateMinibatchOutputAttributes(self): - return self.parameterAttributes() - - def updateEndInputAttributes(self): - return self.parameterAttributes() - def updateEndOutputAttributes(self): return ["regularization_term"] + def lossAttribute(self): + return "minibatch_criterion" + def defaultOutputFields(self, input_fields): output_fields = ["output", "output_class",] if "target" in input_fields: output_fields += ["class_error", "nll"] return output_fields - def __init__(self): + def __init__(self,n_hidden,n_classes,learning_rate,init_range=1.): + self._n_outputs = n_classes + self._n_hidden = n_hidden + self._init_range = init_range + self.learning_rate = learning_rate # this is the float + self._learning_rate = t.scalar('learning_rate') # this is the symbol self._input = t.matrix('input') # n_examples x n_inputs self._target = t.matrix('target') # n_examples x n_outputs - self._lambda = as_scalar(0.,'lambda') - self._theta = t.matrix('theta') - self._W = self._theta[:,1:] - self._b = self._theta[:,0] - self._XtX = t.matrix('XtX') - self._XtY = t.matrix('XtY') - self._extended_input = t.prepend_one_to_each_row(self._input) - self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix - self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector - self._regularizer = self._lambda * t.dot(self._W,self._W) - self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) - self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) - self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) - - OneShotTLearner.__init__(self) + self._L2_regularizer = as_scalar(0.,'L2_regularizer') + self._W1 = t.matrix('W1') + self._W2 = t.matrix('W2') + self._b1 = t.row('b1') + self._b2 = t.row('b2') + self._regularizer = self._L2_regularizer * (t.dot(self._W1,self._W1) + t.dot(self._W2,self._W2)) + self._output_activations =self._b2+t.dot(t.tanh(self._b1+t.dot(self._input,self._W1.T)),self._W2.T) + self._output = t.softmax(self._output_activations) + self._output_class = t.argmax(self._output,1) + self._class_error = self._output_class != self._target + self._nll,self._output = crossentropy_softmax_1hot(self._output_activation,self._target) + self._minibatch_criterion = self._nll + self._regularizer / t.shape(self._input)[0] + MinibatchUpdatesTLearner.__init__(self) def allocate(self,minibatch): minibatch_n_inputs = minibatch["input"].shape[1] - minibatch_n_outputs = minibatch["target"].shape[1] if not self._n_inputs: - self._n_inputs = minibatch_n_inputs - self._n_outputs = minibatch_n_outputs - self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) - self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) - self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) + self._n_inputs = minibatch_n_inputs + self.b1 = numpy.zeros(self._n_hidden) + self.b2 = numpy.zeros(self._n_outputs) self.forget() - elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: - # if the input or target changes dimension on the fly, we resize and forget everything + elif self._n_inputs!=minibatch_n_inputs: + # if the input changes dimension on the fly, we resize and forget everything self.forget() def forget(self): - if self._n_inputs and self._n_outputs: - self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) - self.XtY.resize((1+self.n_inputs,self.n_outputs)) - self.XtX.data[:,:]=0 - self.XtY.data[:,:]=0 - numpy.diag(self.XtX.data)[1:]=self.lambda + if self._n_inputs: + r = self._init_range/math.sqrt(self._n_inputs) + self.W1 = numpy.random.uniform(low=-r,high=r, + size=(self._n_hidden,self._n_inputs)) + r = self._init_range/math.sqrt(self._n_hidden) + self.W2 = numpy.random.uniform(low=-r,high=r, + size=(self._n_outputs,self._n_hidden)) + self.b1[:]=0 + self.b2[:]=0 class MLP(MinibatchUpdatesTLearner): @@ -236,7 +234,7 @@ def __init__(self): self._input = t.matrix('input') # n_examples x n_inputs self._target = t.matrix('target') # n_examples x n_outputs - self._lambda = as_scalar(0.,'lambda') + self._L2_regularizer = as_scalar(0.,'L2_regularizer') self._theta = t.matrix('theta') self._W = self._theta[:,1:] self._b = self._theta[:,0] @@ -245,7 +243,7 @@ self._extended_input = t.prepend_one_to_each_row(self._input) self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector - self._regularizer = self._lambda * t.dot(self._W,self._W) + self._regularizer = self._L2_regularizer * t.dot(self._W,self._W) self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) @@ -272,5 +270,5 @@ self.XtY.resize((1+self.n_inputs,self.n_outputs)) self.XtX.data[:,:]=0 self.XtY.data[:,:]=0 - numpy.diag(self.XtX.data)[1:]=self.lambda + numpy.diag(self.XtX.data)[1:]=self.L2_regularizer