# HG changeset patch # User Yoshua Bengio # Date 1210712453 14400 # Node ID 4afb41e61fcfa510d720a92e92ffe4abb5264e56 # Parent 1b06bc2c3ca979ca47229ed5ca94a40bbbf71ae3 strange bug in linker obtained by 'python test_mlp.py' diff -r 1b06bc2c3ca9 -r 4afb41e61fcf mlp.py --- a/mlp.py Tue May 13 15:49:39 2008 -0400 +++ b/mlp.py Tue May 13 17:00:53 2008 -0400 @@ -117,6 +117,10 @@ output_fields += ["class_error", "nll"] return output_fields + def updateMinibatch(self,minibatch): + MinibatchUpdatesTLearner.updateMinibatch(self,minibatch) + print self.nll + def allocate(self,minibatch): minibatch_n_inputs = minibatch["input"].shape[1] if not self._n_inputs: @@ -163,135 +167,3 @@ print 'n2', self.names2OpResults(self.updateEndInputAttributes()) print 'n3', self.names2OpResults(self.updateEndOutputAttributes()) -class MLP(MinibatchUpdatesTLearner): - """ - Implement a feedforward multi-layer perceptron, with or without L1 and/or L2 regularization. - - The predictor parameters are obtained by minibatch/online gradient descent. - Training can proceed sequentially (with multiple calls to update with - different disjoint subsets of the training sets). - - Hyper-parameters: - - L1_regularizer - - L2_regularizer - - neuron_sparsity_regularizer - - initial_learning_rate - - learning_rate_decrease_rate - - n_hidden_per_layer (a list of integers) - - activation_function ("sigmoid","tanh", or "ratio") - - The output/task type (classification, regression, etc.) is obtained by specializing MLP. - - For each (input[t],output[t]) pair in a minibatch,:: - - activation[0] = input_t - for k=1 to n_hidden_layers: - activation[k]=activation_function(b[k]+ W[k]*activation[k-1]) - output_t = output_activation_function(b[n_hidden_layers+1]+W[n_hidden_layers+1]*activation[n_hidden_layers]) - - and the b and W are obtained by minimizing the following by stochastic minibatch gradient descent:: - - L2_regularizer sum_{ijk} W_{kij}^2 + L1_regularizer sum_{kij} |W_{kij}| - + neuron_sparsity_regularizer sum_{ki} |b_{ki} + infinity| - - sum_t log P_{output_model}(target_t | output_t) - - The fields and attributes expected and produced by use and update are the following: - - - Input and output fields (example-wise quantities): - - - 'input' (always expected by use and update) - - 'target' (optionally expected by use and always by update) - - 'output' (optionally produced by use) - - error fields produced by sub-class of MLP - - - optional attributes (optionally expected as input_dataset attributes) - (warning, this may be dangerous, the 'use' method will use those provided in the - input_dataset rather than those learned during 'update'; currently no support - for providing these to update): - - - 'L1_regularizer' - - 'L2_regularizer' - - 'b' - - 'W' - - 'parameters' = [b[1], W[1], b[2], W[2], ...] - - 'regularization_term' - - """ - - def attributeNames(self): - return ["parameters","b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer","regularization_term"] - - def useInputAttributes(self): - return ["b","W"] - - def useOutputAttributes(self): - return [] - - def updateInputAttributes(self): - return ["b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer"] - - def updateMinibatchInputFields(self): - return ["input","target"] - - def updateMinibatchInputAttributes(self): - return ["b","W"] - - def updateMinibatchOutputAttributes(self): - return ["new_XtX","new_XtY"] - - def updateEndInputAttributes(self): - return ["theta","XtX","XtY"] - - def updateEndOutputAttributes(self): - return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ? - - def parameterAttributes(self): - return ["b","W"] - - def defaultOutputFields(self, input_fields): - output_fields = ["output"] - if "target" in input_fields: - output_fields.append("squared_error") - return output_fields - - def __init__(self): - self._input = t.matrix('input') # n_examples x n_inputs - self._target = t.matrix('target') # n_examples x n_outputs - self._L2_regularizer = t.scalar('L2_regularizer') - self._theta = t.matrix('theta') - self._W = self._theta[:,1:] - self._b = self._theta[:,0] - self._XtX = t.matrix('XtX') - self._XtY = t.matrix('XtY') - self._extended_input = t.prepend_one_to_each_row(self._input) - self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix - self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector - self._regularizer = self._L2_regularizer * t.dot(self._W,self._W) - self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) - self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) - self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) - - OneShotTLearner.__init__(self) - - def allocate(self,minibatch): - minibatch_n_inputs = minibatch["input"].shape[1] - minibatch_n_outputs = minibatch["target"].shape[1] - if not self._n_inputs: - self._n_inputs = minibatch_n_inputs - self._n_outputs = minibatch_n_outputs - self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) - self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) - self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) - self.forget() - elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: - # if the input or target changes dimension on the fly, we resize and forget everything - self.forget() - - def forget(self): - if self._n_inputs and self._n_outputs: - self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) - self.XtY.resize((1+self.n_inputs,self.n_outputs)) - self.XtX.data[:,:]=0 - self.XtY.data[:,:]=0 - numpy.diag(self.XtX.data)[1:]=self.L2_regularizer -