Mercurial > pylearn
view mlp.py @ 116:9330d941fa1f
added function load_pmat_as_array_dataset and save_array_dataset_as_pmat
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Wed, 07 May 2008 13:07:33 -0400 |
parents | 88257dfedf8c |
children | d0a1bd0378c6 |
line wrap: on
line source
from learner import * from theano import tensor as t from theano.scalar import as_scalar # this is one of the simplest example of learner, and illustrates # the use of theano class OneHiddenLayerNNetClassifier(MinibatchUpdatesTLearner): """ Implement a straightforward classicial feedforward one-hidden-layer neural net, with L2 regularization. The predictor parameters are obtained by minibatch/online gradient descent. Training can proceed sequentially (with multiple calls to update with different disjoint subsets of the training sets). Hyper-parameters: - L2_regularizer - learning_rate - n_hidden For each (input_t,output_t) pair in a minibatch,:: output_activations_t = b2+W2*tanh(b1+W1*input_t) output_t = softmax(output_activations_t) output_class_t = argmax(output_activations_t) class_error_t = 1_{output_class_t != target_t} nll_t = -log(output_t[target_t]) and the training criterion is:: loss = L2_regularizer*(||W1||^2 + ||W2||^2) + sum_t nll_t The parameters are [b1,W1,b2,W2] and are obtained by minimizing the loss by stochastic minibatch gradient descent:: parameters[i] -= learning_rate * dloss/dparameters[i] The fields and attributes expected and produced by use and update are the following: - Input and output fields (example-wise quantities): - 'input' (always expected by use and update) - 'target' (optionally expected by use and always by update) - 'output' (optionally produced by use) - 'output_class' (optionally produced by use) - 'class_error' (optionally produced by use) - 'nll' (optionally produced by use) - optional attributes (optionally expected as input_dataset attributes) (warning, this may be dangerous, the 'use' method will use those provided in the input_dataset rather than those learned during 'update'; currently no support for providing these to update): - 'L2_regularizer' - 'b1' - 'W1' - 'b2' - 'W2' - 'parameters' = [b1, W1, b2, W2] - 'regularization_term' """ def attributeNames(self): return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"] def parameterAttributes(self): return ["b1","W1", "b2", "W2"] def useInputAttributes(self): return self.parameterAttributes() def useOutputAttributes(self): return [] def updateInputAttributes(self): return self.parameterAttributes() + ["L2_regularizer"] def updateMinibatchInputFields(self): return ["input","target"] def updateMinibatchInputAttributes(self): return self.parameterAttributes() def updateMinibatchOutputAttributes(self): return self.parameterAttributes() def updateEndInputAttributes(self): return self.parameterAttributes() def updateEndOutputAttributes(self): return ["regularization_term"] def defaultOutputFields(self, input_fields): output_fields = ["output", "output_class",] if "target" in input_fields: output_fields += ["class_error", "nll"] return output_fields def __init__(self): self._input = t.matrix('input') # n_examples x n_inputs self._target = t.matrix('target') # n_examples x n_outputs self._lambda = as_scalar(0.,'lambda') self._theta = t.matrix('theta') self._W = self._theta[:,1:] self._b = self._theta[:,0] self._XtX = t.matrix('XtX') self._XtY = t.matrix('XtY') self._extended_input = t.prepend_one_to_each_row(self._input) self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector self._regularizer = self._lambda * t.dot(self._W,self._W) self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) OneShotTLearner.__init__(self) def allocate(self,minibatch): minibatch_n_inputs = minibatch["input"].shape[1] minibatch_n_outputs = minibatch["target"].shape[1] if not self._n_inputs: self._n_inputs = minibatch_n_inputs self._n_outputs = minibatch_n_outputs self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) self.forget() elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: # if the input or target changes dimension on the fly, we resize and forget everything self.forget() def forget(self): if self._n_inputs and self._n_outputs: self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) self.XtY.resize((1+self.n_inputs,self.n_outputs)) self.XtX.data[:,:]=0 self.XtY.data[:,:]=0 numpy.diag(self.XtX.data)[1:]=self.lambda class MLP(MinibatchUpdatesTLearner): """ Implement a feedforward multi-layer perceptron, with or without L1 and/or L2 regularization. The predictor parameters are obtained by minibatch/online gradient descent. Training can proceed sequentially (with multiple calls to update with different disjoint subsets of the training sets). Hyper-parameters: - L1_regularizer - L2_regularizer - neuron_sparsity_regularizer - initial_learning_rate - learning_rate_decrease_rate - n_hidden_per_layer (a list of integers) - activation_function ("sigmoid","tanh", or "ratio") The output/task type (classification, regression, etc.) is obtained by specializing MLP. For each (input[t],output[t]) pair in a minibatch,:: activation[0] = input_t for k=1 to n_hidden_layers: activation[k]=activation_function(b[k]+ W[k]*activation[k-1]) output_t = output_activation_function(b[n_hidden_layers+1]+W[n_hidden_layers+1]*activation[n_hidden_layers]) and the b and W are obtained by minimizing the following by stochastic minibatch gradient descent:: L2_regularizer sum_{ijk} W_{kij}^2 + L1_regularizer sum_{kij} |W_{kij}| + neuron_sparsity_regularizer sum_{ki} |b_{ki} + infinity| - sum_t log P_{output_model}(target_t | output_t) The fields and attributes expected and produced by use and update are the following: - Input and output fields (example-wise quantities): - 'input' (always expected by use and update) - 'target' (optionally expected by use and always by update) - 'output' (optionally produced by use) - error fields produced by sub-class of MLP - optional attributes (optionally expected as input_dataset attributes) (warning, this may be dangerous, the 'use' method will use those provided in the input_dataset rather than those learned during 'update'; currently no support for providing these to update): - 'L1_regularizer' - 'L2_regularizer' - 'b' - 'W' - 'parameters' = [b[1], W[1], b[2], W[2], ...] - 'regularization_term' """ def attributeNames(self): return ["parameters","b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer","regularization_term"] def useInputAttributes(self): return ["b","W"] def useOutputAttributes(self): return [] def updateInputAttributes(self): return ["b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer"] def updateMinibatchInputFields(self): return ["input","target"] def updateMinibatchInputAttributes(self): return ["b","W"] def updateMinibatchOutputAttributes(self): return ["new_XtX","new_XtY"] def updateEndInputAttributes(self): return ["theta","XtX","XtY"] def updateEndOutputAttributes(self): return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ? def parameterAttributes(self): return ["b","W"] def defaultOutputFields(self, input_fields): output_fields = ["output"] if "target" in input_fields: output_fields.append("squared_error") return output_fields def __init__(self): self._input = t.matrix('input') # n_examples x n_inputs self._target = t.matrix('target') # n_examples x n_outputs self._lambda = as_scalar(0.,'lambda') self._theta = t.matrix('theta') self._W = self._theta[:,1:] self._b = self._theta[:,0] self._XtX = t.matrix('XtX') self._XtY = t.matrix('XtY') self._extended_input = t.prepend_one_to_each_row(self._input) self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector self._regularizer = self._lambda * t.dot(self._W,self._W) self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) OneShotTLearner.__init__(self) def allocate(self,minibatch): minibatch_n_inputs = minibatch["input"].shape[1] minibatch_n_outputs = minibatch["target"].shape[1] if not self._n_inputs: self._n_inputs = minibatch_n_inputs self._n_outputs = minibatch_n_outputs self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) self.forget() elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: # if the input or target changes dimension on the fly, we resize and forget everything self.forget() def forget(self): if self._n_inputs and self._n_outputs: self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) self.XtY.resize((1+self.n_inputs,self.n_outputs)) self.XtX.data[:,:]=0 self.XtY.data[:,:]=0 numpy.diag(self.XtX.data)[1:]=self.lambda