pylearn: mlp.py comparison

comparison mlp.py @ 111:88257dfedf8c

Added another work in progress, for mlp's

author	bengioy@bengiomac.local
date	Wed, 07 May 2008 09:16:04 -0400
parents
children	d0a1bd0378c6

comparison

equal deleted inserted replaced

-:8fa1ef2411a0
+:88257dfedf8c
+from learner import *
+from theano import tensor as t
+from theano.scalar import as_scalar
+# this is one of the simplest example of learner, and illustrates
+# the use of theano
+class OneHiddenLayerNNetClassifier(MinibatchUpdatesTLearner):
+"""
+Implement a straightforward classicial feedforward
+one-hidden-layer neural net, with L2 regularization.
+The predictor parameters are obtained by minibatch/online gradient descent.
+Training can proceed sequentially (with multiple calls to update with
+different disjoint subsets of the training sets).
+Hyper-parameters:
+- L2_regularizer
+- learning_rate
+- n_hidden
+For each (input_t,output_t) pair in a minibatch,::
+output_activations_t = b2+W2*tanh(b1+W1*input_t)
+output_t = softmax(output_activations_t)
+output_class_t = argmax(output_activations_t)
+class_error_t = 1_{output_class_t != target_t}
+nll_t = -log(output_t[target_t])
+and the training criterion is::
+loss = L2_regularizer*(||W1||^2 + ||W2||^2) + sum_t nll_t
+The parameters are [b1,W1,b2,W2] and are obtained by minimizing the loss by
+stochastic minibatch gradient descent::
+parameters[i] -= learning_rate * dloss/dparameters[i]
+The fields and attributes expected and produced by use and update are the following:
+- Input and output fields (example-wise quantities):
+- 'input' (always expected by use and update)
+- 'target' (optionally expected by use and always by update)
+- 'output' (optionally produced by use)
+- 'output_class' (optionally produced by use)
+- 'class_error' (optionally produced by use)
+- 'nll' (optionally produced by use)
+- optional attributes (optionally expected as input_dataset attributes)
+(warning, this may be dangerous, the 'use' method will use those provided in the
+input_dataset rather than those learned during 'update'; currently no support
+for providing these to update):
+- 'L2_regularizer'
+- 'b1'
+- 'W1'
+- 'b2'
+- 'W2'
+- 'parameters' = [b1, W1, b2, W2]
+- 'regularization_term'
+"""
+def attributeNames(self):
+return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"]
+def parameterAttributes(self):
+return ["b1","W1", "b2", "W2"]
+def useInputAttributes(self):
+return self.parameterAttributes()
+def useOutputAttributes(self):
+return []
+def updateInputAttributes(self):
+return self.parameterAttributes() + ["L2_regularizer"]
+def updateMinibatchInputFields(self):
+return ["input","target"]
+def updateMinibatchInputAttributes(self):
+return self.parameterAttributes()
+def updateMinibatchOutputAttributes(self):
+return self.parameterAttributes()
+def updateEndInputAttributes(self):
+return self.parameterAttributes()
+def updateEndOutputAttributes(self):
+return ["regularization_term"]
+def defaultOutputFields(self, input_fields):
+output_fields = ["output", "output_class",]
+if "target" in input_fields:
+output_fields += ["class_error", "nll"]
+return output_fields
+def __init__(self):
+self._input = t.matrix('input') # n_examples x n_inputs
+self._target = t.matrix('target') # n_examples x n_outputs
+self._lambda = as_scalar(0.,'lambda')
+self._theta = t.matrix('theta')
+self._W = self._theta[:,1:]
+self._b = self._theta[:,0]
+self._XtX = t.matrix('XtX')
+self._XtY = t.matrix('XtY')
+self._extended_input = t.prepend_one_to_each_row(self._input)
+self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
+self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
+self._regularizer = self._lambda * t.dot(self._W,self._W)
+self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
+self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
+self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
+OneShotTLearner.__init__(self)
+def allocate(self,minibatch):
+minibatch_n_inputs  = minibatch["input"].shape[1]
+minibatch_n_outputs = minibatch["target"].shape[1]
+if not self._n_inputs:
+self._n_inputs = minibatch_n_inputs
+self._n_outputs = minibatch_n_outputs
+self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
+self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
+self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
+self.forget()
+elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
+# if the input or target changes dimension on the fly, we resize and forget everything
+self.forget()
+def forget(self):
+if self._n_inputs and self._n_outputs:
+self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
+self.XtY.resize((1+self.n_inputs,self.n_outputs))
+self.XtX.data[:,:]=0
+self.XtY.data[:,:]=0
+numpy.diag(self.XtX.data)[1:]=self.lambda
+class MLP(MinibatchUpdatesTLearner):
+"""
+Implement a feedforward multi-layer perceptron, with or without L1 and/or L2 regularization.
+The predictor parameters are obtained by minibatch/online gradient descent.
+Training can proceed sequentially (with multiple calls to update with
+different disjoint subsets of the training sets).
+Hyper-parameters:
+- L1_regularizer
+- L2_regularizer
+- neuron_sparsity_regularizer
+- initial_learning_rate
+- learning_rate_decrease_rate
+- n_hidden_per_layer (a list of integers)
+- activation_function ("sigmoid","tanh", or "ratio")
+The output/task type (classification, regression, etc.) is obtained by specializing MLP.
+For each (input[t],output[t]) pair in a minibatch,::
+activation[0] = input_t
+for k=1 to n_hidden_layers:
+activation[k]=activation_function(b[k]+ W[k]*activation[k-1])
+output_t = output_activation_function(b[n_hidden_layers+1]+W[n_hidden_layers+1]*activation[n_hidden_layers])
+and the b and W are obtained by minimizing the following by stochastic minibatch gradient descent::
+L2_regularizer sum_{ijk} W_{kij}^2  + L1_regularizer sum_{kij} |W_{kij}|
++ neuron_sparsity_regularizer sum_{ki} |b_{ki} + infinity|
+- sum_t log P_{output_model}(target_t | output_t)
+The fields and attributes expected and produced by use and update are the following:
+- Input and output fields (example-wise quantities):
+- 'input' (always expected by use and update)
+- 'target' (optionally expected by use and always by update)
+- 'output' (optionally produced by use)
+- error fields produced by sub-class of MLP
+- optional attributes (optionally expected as input_dataset attributes)
+(warning, this may be dangerous, the 'use' method will use those provided in the
+input_dataset rather than those learned during 'update'; currently no support
+for providing these to update):
+- 'L1_regularizer'
+- 'L2_regularizer'
+- 'b'
+- 'W'
+- 'parameters' = [b[1], W[1], b[2], W[2], ...]
+- 'regularization_term'
+"""
+def attributeNames(self):
+return ["parameters","b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer","regularization_term"]
+def useInputAttributes(self):
+return ["b","W"]
+def useOutputAttributes(self):
+return []
+def updateInputAttributes(self):
+return ["b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer"]
+def updateMinibatchInputFields(self):
+return ["input","target"]
+def updateMinibatchInputAttributes(self):
+return ["b","W"]
+def updateMinibatchOutputAttributes(self):
+return ["new_XtX","new_XtY"]
+def updateEndInputAttributes(self):
+return ["theta","XtX","XtY"]
+def updateEndOutputAttributes(self):
+return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ?
+def parameterAttributes(self):
+return ["b","W"]
+def defaultOutputFields(self, input_fields):
+output_fields = ["output"]
+if "target" in input_fields:
+output_fields.append("squared_error")
+return output_fields
+def __init__(self):
+self._input = t.matrix('input') # n_examples x n_inputs
+self._target = t.matrix('target') # n_examples x n_outputs
+self._lambda = as_scalar(0.,'lambda')
+self._theta = t.matrix('theta')
+self._W = self._theta[:,1:]
+self._b = self._theta[:,0]
+self._XtX = t.matrix('XtX')
+self._XtY = t.matrix('XtY')
+self._extended_input = t.prepend_one_to_each_row(self._input)
+self._output = t.dot(self._input,self._W.T) + self._b  # (n_examples , n_outputs) matrix
+self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector
+self._regularizer = self._lambda * t.dot(self._W,self._W)
+self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input))
+self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target))
+self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY)
+OneShotTLearner.__init__(self)
+def allocate(self,minibatch):
+minibatch_n_inputs  = minibatch["input"].shape[1]
+minibatch_n_outputs = minibatch["target"].shape[1]
+if not self._n_inputs:
+self._n_inputs = minibatch_n_inputs
+self._n_outputs = minibatch_n_outputs
+self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs))
+self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs))
+self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs))
+self.forget()
+elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs:
+# if the input or target changes dimension on the fly, we resize and forget everything
+self.forget()
+def forget(self):
+if self._n_inputs and self._n_outputs:
+self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
+self.XtY.resize((1+self.n_inputs,self.n_outputs))
+self.XtX.data[:,:]=0
+self.XtY.data[:,:]=0
+numpy.diag(self.XtX.data)[1:]=self.lambda

Mercurial > pylearn

comparison mlp.py @ 111:88257dfedf8c