# HG changeset patch # User Frederic Bastien # Date 1210176768 14400 # Node ID b6bc1e769b36d2e6a2cf7f73c69fbc870a794b1f # Parent 88257dfedf8cc756a7e95b904f1c632abd76722e# Parent 5bfcb7e5df4ab9f15f5a7b458ae3808aca56f671 Automated merge with ssh://p-omega1@lgcm.iro.umontreal.ca/tlearn diff -r 5bfcb7e5df4a -r b6bc1e769b36 dataset.py --- a/dataset.py Wed May 07 12:12:43 2008 -0400 +++ b/dataset.py Wed May 07 12:12:48 2008 -0400 @@ -9,7 +9,22 @@ class AbstractFunction (Exception): """Derived class must override this function""" class NotImplementedYet (NotImplementedError): """Work in progress, this should eventually be implemented""" -class DataSet(object): +class AttributesHolder(object): + def __init__(self): pass + + def attributeNames(self): + raise AbstractFunction() + + def setAttributes(self,attribute_names,attribute_values,make_copies=False): + if make_copies: + for name,value in zip(attribute_names,attribute_values): + self.__setattr__(name,copy.deepcopy(value)) + else: + for name,value in zip(attribute_names,attribute_values): + self.__setattr__(name,value) + + +class DataSet(AttributesHolder): """A virtual base class for datasets. A DataSet can be seen as a generalization of a matrix, meant to be used in conjunction @@ -149,10 +164,6 @@ def attributeNames(self): return self._attribute_names - def setAttributes(self,attribute_names,attribute_values): - for name,value in zip(attribute_names,attribute_values): - self.__setattr__(name,value) - class MinibatchToSingleExampleIterator(object): """ Converts the result of minibatch iterator with minibatch_size==1 into diff -r 5bfcb7e5df4a -r b6bc1e769b36 learner.py --- a/learner.py Wed May 07 12:12:43 2008 -0400 +++ b/learner.py Wed May 07 12:12:48 2008 -0400 @@ -1,7 +1,8 @@ from dataset import * +from compile import Function -class Learner(object): +class Learner(AttributesHolder): """Base class for learning algorithms, provides an interface that allows various algorithms to be applicable to generic learning algorithms. @@ -61,14 +62,47 @@ """ A Learner may have attributes that it wishes to export to other objects. To automate such export, sub-classes should define here the names (list of strings) of these attributes. + + @todo By default, attributeNames looks for all dictionary entries whose name does not start with _. """ return [] + def updateInputAttributes(self): + """ + A subset of self.attributeNames() which are the names of attributes needed by update() in order + to do its work. + """ + raise AbstractFunction() + + def useInputAttributes(self): + """ + A subset of self.attributeNames() which are the names of attributes needed by use() in order + to do its work. + """ + raise AbstractFunction() + + def updateOutputAttributes(self): + """ + A subset of self.attributeNames() which are the names of attributes modified/created by update() in order + to do its work. + + By default these are inferred from the various update output attributes: + """ + return ["parameters"] + self.updateMinibatchOutputAttributes() + self.updateEndOutputAttributes() + + def useOutputAttributes(self): + """ + A subset of self.attributeNames() which are the names of attributes modified/created by use() in order + to do its work. + """ + raise AbstractFunction() + + class TLearner(Learner): """ TLearner is a virtual class of Learners that attempts to factor out of the definition of a learner the steps that are common to many implementations of learning algorithms, - so as to leave only "the equations" to define in particular sub-classes, using Theano. + so as to leave only 'the equations' to define in particular sub-classes, using Theano. In the default implementations of use and update, it is assumed that the 'use' and 'update' methods visit examples in the input dataset sequentially. In the 'use' method only one pass through the dataset is done, @@ -85,14 +119,6 @@ or by a stats collector. - defaultOutputFields(input_fields): return a list of default dataset output fields when None are provided by the caller of use. - - update_start(), update_end(), update_minibatch(minibatch): functions - executed at the beginning, the end, and in the middle - (for each minibatch) of the update method. This model only - works for 'online' or one-short learning that requires - going only once through the training data. For more complicated - models, more specialized subclasses of TLearner should be used - or a learning-algorithm specific update method should be defined. - The following naming convention is assumed and important. Attributes whose names are listed in attributeNames() can be of any type, but those that can be referenced as input/output dataset fields or as @@ -102,58 +128,107 @@ the TLearner, created in the sub-class constructor) should be _. Typically will be numpy ndarray and _ will be the corresponding Theano Tensor (for symbolic manipulation). + + @todo pousser dans Learner toute la poutine qui peut l'etre sans etre + dependant de Theano """ def __init__(self): Learner.__init__(self) + + def defaultOutputFields(self, input_fields): + """ + Return a default list of output field names (to put in the output dataset). + This will be used when None are provided (as output_fields) by the caller of the 'use' method. + This may involve looking at the input_fields (names) available in the + input_dataset. + """ + raise AbstractFunction() + + def allocate(self, minibatch): + """ + This function is called at the beginning of each updateMinibatch + and should be used to check that all required attributes have been + allocated and initialized (usually this function calls forget() + when it has to do an initialization). + """ + raise AbstractFunction() - def _minibatchwise_use_functions(self, input_fields, output_fields, stats_collector): + def minibatchwise_use_functions(self, input_fields, output_fields, stats_collector): """ Private helper function called by the generic TLearner.use. It returns a function that can map the given input fields to the given output fields (along with the - attributes that the stats collector needs for its computation. + attributes that the stats collector needs for its computation. The function + called also automatically makes use of the self.useInputAttributes() and + sets the self.useOutputAttributes(). """ if not output_fields: output_fields = self.defaultOutputFields(input_fields) if stats_collector: - stats_collector_inputs = stats_collector.inputUpdateAttributes() + stats_collector_inputs = stats_collector.input2UpdateAttributes() for attribute in stats_collector_inputs: if attribute not in input_fields: output_fields.append(attribute) key = (input_fields,output_fields) if key not in self.use_functions_dictionary: - self.use_functions_dictionary[key]=Function(self._names2attributes(input_fields), - self._names2attributes(output_fields)) + use_input_attributes = self.useInputAttributes() + use_output_attributes = self.useOutputAttributes() + complete_f = Function(self.names2OpResults(input_fields+use_input_attributes), + self.names2OpResults(output_fields+use_output_attributes)) + def f(*input_field_values): + input_attribute_values = self.names2attributes(use_input_attributes) + results = complete_f(*(input_field_values + input_attribute_values)) + output_field_values = results[0:len(output_fields)] + output_attribute_values = results[len(output_fields):len(results)] + if use_output_attributes: + self.setAttributes(use_output_attributes,output_attribute_values) + return output_field_values + self.use_functions_dictionary[key]=f return self.use_functions_dictionary[key] def attributes(self,return_copy=False): """ Return a list with the values of the learner's attributes (or optionally, a deep copy). """ - return self.names2attributes(self.attributeNames()) - - def _names2attributes(self,names,return_Result=False, return_copy=False): + return self.names2attributes(self.attributeNames(),return_copy) + + def names2attributes(self,names,return_copy=False): + """ + Private helper function that maps a list of attribute names to a list + of (optionally copies) values of attributes. + """ + if return_copy: + return [copy.deepcopy(self.__getattr__(name).data) for name in names] + else: + return [self.__getattr__(name).data for name in names] + + def names2OpResults(self,names): """ Private helper function that maps a list of attribute names to a list - of (optionally copies) values or of the Result objects that own these values. + of corresponding Op Results (with the same name but with a '_' prefix). + """ + return [self.__getattr__('_'+name).data for name in names] + + def use(self,input_dataset,output_fieldnames=None,output_attributes=[], + test_stats_collector=None,copy_inputs=True, put_stats_in_output_dataset=True): """ - if return_Result: - if return_copy: - return [copy.deepcopy(self.__getattr__(name)) for name in names] - else: - return [self.__getattr__(name) for name in names] - else: - if return_copy: - return [copy.deepcopy(self.__getattr__(name).data) for name in names] - else: - return [self.__getattr__(name).data for name in names] + The learner tries to compute in the output dataset the output fields specified + + @todo check if some of the learner attributes are actually SPECIFIED + as attributes of the input_dataset, and if so use their values instead + of the ones in the learner. - def use(self,input_dataset,output_fieldnames=None,output_attributes=None, - test_stats_collector=None,copy_inputs=True): + The learner tries to compute in the output dataset the output fields specified. + If None is specified then self.defaultOutputFields(input_dataset.fieldNames()) + is called to determine the output fields. + + Attributes of the learner can also optionally be copied into the output dataset. + If output_attributes is None then all of the attributes in self.AttributeNames() + are copied in the output dataset, but if it is [] (the default), then none are copied. + If a test_stats_collector is provided, then its attributes (test_stats_collector.AttributeNames()) + are also copied into the output dataset attributes. """ - The learner tries to compute in the output dataset the output fields specified - """ - minibatchwise_use_function = _minibatchwise_use_functions(input_dataset.fieldNames(), + minibatchwise_use_function = minibatchwise_use_functions(input_dataset.fieldNames(), output_fieldnames, test_stats_collector) virtual_output_dataset = ApplyFunctionDataSet(input_dataset, @@ -165,31 +240,124 @@ if copy_inputs: output_dataset = input_dataset | output_dataset # copy the wanted attributes in the dataset + if output_attributes is None: + output_attributes = self.attributeNames() if output_attributes: - assert set(output_attributes) <= set(self.attributeNames()) + assert set(attribute_names) <= set(self.attributeNames()) output_dataset.setAttributes(output_attributes, - self._names2attributes(output_attributes,return_copy=True)) + self.names2attributes(output_attributes,return_copy=True)) if test_stats_collector: test_stats_collector.update(output_dataset) - output_dataset.setAttributes(test_stats_collector.attributeNames(), - test_stats_collector.attributes()) + if put_stats_in_output_dataset: + output_dataset.setAttributes(test_stats_collector.attributeNames(), + test_stats_collector.attributes()) return output_dataset - def update_start(self): pass - def update_end(self): pass - def update_minibatch(self,minibatch): + +class MinibatchUpdatesTLearner(TLearner): + """ + This adds to TLearner a + - updateStart(), updateEnd(), updateMinibatch(minibatch), isLastEpoch(): + functions executed at the beginning, the end, in the middle + (for each minibatch) of the update method, and at the end + of each epoch. This model only + works for 'online' or one-shot learning that requires + going only once through the training data. For more complicated + models, more specialized subclasses of TLearner should be used + or a learning-algorithm specific update method should be defined. + + - a 'parameters' attribute which is a list of parameters (whose names are + specified by the user's subclass with the parameterAttributes() method) + + """ + + def __init__(self): + TLearner.__init__(self) + self.update_minibatch_function = + Function(self.names2OpResults(self.updateMinibatchOutputAttributes()+ + self.updateMinibatchInputFields()), + self.names2OpResults(self.updateMinibatchOutputAttributes())) + self.update_end_function = Function(self.names2OpResults(self.updateEndInputAttributes()), + self.names2OpResults(self.updateEndOutputAttributes())) + + def updateMinibatchInputFields(self): + raise AbstractFunction() + + def updateMinibatchInputAttributes(self): raise AbstractFunction() + def updateMinibatchOutputAttributes(self): + raise AbstractFunction() + + def updateEndInputAttributes(self): + raise AbstractFunction() + + def updateEndOutputAttributes(self): + raise AbstractFunction() + + def parameterAttributes(self): + raise AbstractFunction() + + def updateStart(self): pass + + def updateEnd(self): + self.setAttributes(self.updateEndOutputAttributes(), + self.update_end_function + (self.names2attributes(self.updateEndInputAttributes()))) + self.parameters = self.names2attributes(self.parameterAttributes()) + + def updateMinibatch(self,minibatch): + # make sure all required fields are allocated and initialized + self.allocate(minibatch) + self.setAttributes(self.updateMinibatchOutputAttributes(), + self.update_minibatch_function(*(self.names2attributes(self.updateMinibatchInputAttributes())) + + minibatch(self.updateMinibatchInputFields()))) + + def isLastEpoch(self): + """ + This method is called at the end of each epoch (cycling over the training set). + It returns a boolean to indicate if this is the last epoch. + By default just do one epoch. + """ + return True + def update(self,training_set,train_stats_collector=None): - - self.update_start() - for minibatch in training_set.minibatches(self.training_set_input_fields, - minibatch_size=self.minibatch_size): - self.update_minibatch(minibatch) + """ + @todo check if some of the learner attributes are actually SPECIFIED + in as attributes of the training_set. + """ + self.updateStart(training_set) + stop=False + while not stop: if train_stats_collector: - minibatch_set = minibatch.examples() - minibatch_set.setAttributes(self.attributeNames(),self.attributes()) - train_stats_collector.update(minibatch_set) - self.update_end() + train_stats_collector.forget() # restart stats collectin at the beginning of each epoch + for minibatch in training_set.minibatches(self.training_set_input_fields, + minibatch_size=self.minibatch_size): + self.update_minibatch(minibatch) + if train_stats_collector: + minibatch_set = minibatch.examples() + minibatch_set.setAttributes(self.attributeNames(),self.attributes()) + train_stats_collector.update(minibatch_set) + stop = self.isLastEpoch() + self.updateEnd() return self.use +class OnlineGradientBasedTLearner(MinibatchUpdatesTLearner): + """ + Specialization of MinibatchUpdatesTLearner in which the minibatch updates + are obtained by performing an online (minibatch-based) gradient step. + + Sub-classes must define the following methods: + + """ + def __init__(self,truly_online=False): + """ + If truly_online then only one pass is made through the training set passed to update(). + + """ + self.truly_online=truly_online + + def isLastEpoch(self): + return self.truly_online + + diff -r 5bfcb7e5df4a -r b6bc1e769b36 linear_regression.py --- a/linear_regression.py Wed May 07 12:12:43 2008 -0400 +++ b/linear_regression.py Wed May 07 12:12:48 2008 -0400 @@ -1,12 +1,11 @@ from learner import * from theano import tensor as t -from compile import Function from theano.scalar import as_scalar # this is one of the simplest example of learner, and illustrates # the use of theano -class LinearRegression(Learner): +class LinearRegression(MinibatchUpdatesTLearner): """ Implement linear regression, with or without L2 regularization (the former is called Ridge Regression and the latter Ordinary Least Squares). @@ -18,14 +17,13 @@ of all the training sets passed to update since construction or since the last call to forget). - The L2 regularization coefficient is obtained analytically. For each (input[t],output[t]) pair in a minibatch,:: output_t = b + W * input_t where b and W are obtained by minimizing:: - lambda sum_{ij} W_{ij}^2 + sum_t ||output_t - target_t||^2 + L2_regularizer sum_{ij} W_{ij}^2 + sum_t ||output_t - target_t||^2 Let X be the whole training set inputs matrix (one input example per row), with the first column full of 1's, and Let Y the whole training set @@ -36,7 +34,7 @@ XtX * theta[:,i] = XtY[:,i] where XtX is a (n_inputs+1)x(n_inputs+1) matrix containing X'*X - plus lambda on the diagonal except at (0,0), + plus L2_regularizer on the diagonal except at (0,0), and XtY is a (n_inputs+1)*n_outputs matrix containing X'*Y. The fields and attributes expected and produced by use and update are the following: @@ -48,123 +46,95 @@ - 'output' (optionally produced by use as an output dataset field) - 'squared_error' (optionally produced by use as an output dataset field, needs 'target') = example-wise squared error - - optional input attributes (optionally expected as input_dataset attributes) - - optional attributes (optionally expected as input_dataset attributes) (warning, this may be dangerous, the 'use' method will use those provided in the input_dataset rather than those learned during 'update'; currently no support for providing these to update): - - 'lambda' + - 'L2_regularizer' - 'b' - - 'W' - - 'regularization_term' + - 'W' + - 'parameters' = [b, W] + - 'regularization_term' - 'XtX' - 'XtY' + """ def attributeNames(self): - return ["lambda","b","W","regularization_term","XtX","XtY"] - -# definitions specifiques a la regression lineaire: + return ["L2_regularizer","parameters","b","W","regularization_term","XtX","XtY"] + def useInputAttributes(self): + return ["b","W"] - def global_inputs(self): - self.lambda = as_scalar(0.,'lambda') - self.theta = t.matrix('theta') - self.W = self.theta[:,1:] - self.b = self.theta[:,0] - self.XtX = t.matrix('XtX') - self.XtY = t.matrix('XtY') + def useOutputAttributes(self): + return [] - def global_outputs(self): - self.regularizer = self.lambda * t.dot(self.W,self.W) - self.loss = self.regularizer + t.sum(self.squared_error) # this only makes sense if the whole training set fits in memory in a minibatch - self.loss_function = Function([self.W,self.lambda,self.squared_error],[self.loss]) + def updateInputAttributes(self): + return ["L2_regularizer","XtX","XtY"] - def initialize(self): - self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) - self.XtY.resize((1+self.n_inputs,self.n_outputs)) - self.XtX.data[:,:]=0 - self.XtY.data[:,:]=0 - numpy.diag(self.XtX.data)[1:]=self.lambda.data - - def updated_variables(self): - self.new_XtX = self.XtX + t.dot(self.extended_input.T,self.extended_input) - self.new_XtY = self.XtY + t.dot(self.extended_input.T,self.target) - self.new_theta = t.solve(self.XtX,self.XtY) + def updateMinibatchInputFields(self): + return ["input","target"] + + def updateMinibatchInputAttributes(self): + return ["XtX","XtY"] + + def updateMinibatchOutputAttributes(self): + return ["new_XtX","new_XtY"] - def minibatch_wise_inputs(self): - self.input = t.matrix('input') # n_examples x n_inputs - self.target = t.matrix('target') # n_examples x n_outputs - - def minibatch_wise_outputs(self): - # self.input is a (n_examples, n_inputs) minibatch matrix - self.extended_input = t.prepend_one_to_each_row(self.input) - self.output = t.dot(self.input,self.W.T) + self.b # (n_examples , n_outputs) matrix - self.squared_error = t.sum_within_rows(t.sqr(self.output-self.target)) # (n_examples ) vector + def updateEndInputAttributes(self): + return ["theta","XtX","XtY"] - def attributeNames(self): - return ["lambda","b","W","regularization_term","XtX","XtY"] + def updateEndOutputAttributes(self): + return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ? + def parameterAttributes(self): + return ["b","W"] + def defaultOutputFields(self, input_fields): output_fields = ["output"] if "target" in input_fields: output_fields.append("squared_error") return output_fields - # poutine generale basee sur ces fonctions + def __init__(self): + self._input = t.matrix('input') # n_examples x n_inputs + self._target = t.matrix('target') # n_examples x n_outputs + self._L2_regularizer = as_scalar(0.,'L2_regularizer') + self._theta = t.matrix('theta') + self._W = self._theta[:,1:] + self._b = self._theta[:,0] + self._XtX = t.matrix('XtX') + self._XtY = t.matrix('XtY') + self._extended_input = t.prepend_one_to_each_row(self._input) + self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix + self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector + self._regularizer = self._L2_regularizer * t.dot(self._W,self._W) + self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) + self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) + self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) - - def __init__(self,lambda=0.,max_memory_use=500): - """ - @type lambda: float - @param lambda: regularization coefficient - """ - - W=t.matrix('W') - # b is a broadcastable row vector (can be replicated into - # as many rows as there are examples in the minibach) - b=t.row('b') - minibatch_input = t.matrix('input') # n_examples x n_inputs - minibatch_target = t.matrix('target') # n_examples x n_outputs - minibatch_output = t.dot(minibatch_input,W.T) + b # n_examples x n_outputs - lambda = as_scalar(lambda) - regularizer = self.lambda * t.dot(W,W) - example_squared_error = t.sum_within_rows(t.sqr(minibatch_output-minibatch_target)) - self.output_function = Function([W,b,minibatch_input],[minibatch_output]) - self.squared_error_function = Function([minibatch_output,minibatch_target],[self.example_squared_error]) - self.loss_function = Function([W,squared_error],[self.regularizer + t.sum(self.example_squared_error)]) - self.W=None - self.b=None - self.XtX=None - self.XtY=None - + OneShotTLearner.__init__(self) + + def allocate(self,minibatch): + minibatch_n_inputs = minibatch["input"].shape[1] + minibatch_n_outputs = minibatch["target"].shape[1] + if not self._n_inputs: + self._n_inputs = minibatch_n_inputs + self._n_outputs = minibatch_n_outputs + self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) + self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) + self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) + self.forget() + elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: + # if the input or target changes dimension on the fly, we resize and forget everything + self.forget() + def forget(self): - if self.W: - self.XtX *= 0 - self.XtY *= 0 + if self._n_inputs and self._n_outputs: + self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) + self.XtY.resize((1+self.n_inputs,self.n_outputs)) + self.XtX.data[:,:]=0 + self.XtY.data[:,:]=0 + numpy.diag(self.XtX.data)[1:]=self.L2_regularizer - def use(self,input_dataset,output_fieldnames=None,copy_inputs=True): - input_fieldnames = input_dataset.fieldNames() - assert "input" in input_fieldnames - if not output_fields: - output_fields = ["output"] - if "target" in input_fieldnames: - output_fields += ["squared_error"] - else: - if "squared_error" in output_fields or "total_loss" in output_fields: - assert "target" in input_fieldnames - - use_functions = [] - for output_fieldname in output_fieldnames: - if output_fieldname=="output": - use_functions.append(self.output_function) - elif output_fieldname=="squared_error": - use_functions.append(lambda self.output_function) - - n_examples = len(input_dataset) - - for minibatch in input_dataset.minibatches(minibatch_size=minibatch_size, allow_odd_last_minibatch=True): - use_function( - diff -r 5bfcb7e5df4a -r b6bc1e769b36 lookup_list.py --- a/lookup_list.py Wed May 07 12:12:43 2008 -0400 +++ b/lookup_list.py Wed May 07 12:12:48 2008 -0400 @@ -9,6 +9,7 @@ following syntactic constructions work as one would expect: example = LookupList(['x','y','z'],[1,2,3]) example['x'] = [1, 2, 3] # set or change a field + print example('z','y') # prints [3,2] x, y, z = example x = example[0] x = example["x"] @@ -88,7 +89,6 @@ new_example.append_keyval(item[0],item[1]) return new_example - def __eq__(self, other): return self._values==other._values and self._name2index==other._name2index and self._names==other._names @@ -97,3 +97,9 @@ def __hash__(): raise NotImplementedError() + + def __call__(*names): + """ + Return a list of values associated with the given names (which must all be keys of the lookup list). + """ + return [self[name] for name in names] diff -r 5bfcb7e5df4a -r b6bc1e769b36 mlp.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlp.py Wed May 07 12:12:48 2008 -0400 @@ -0,0 +1,276 @@ + +from learner import * +from theano import tensor as t +from theano.scalar import as_scalar + +# this is one of the simplest example of learner, and illustrates +# the use of theano + + +class OneHiddenLayerNNetClassifier(MinibatchUpdatesTLearner): + """ + Implement a straightforward classicial feedforward + one-hidden-layer neural net, with L2 regularization. + + The predictor parameters are obtained by minibatch/online gradient descent. + Training can proceed sequentially (with multiple calls to update with + different disjoint subsets of the training sets). + + Hyper-parameters: + - L2_regularizer + - learning_rate + - n_hidden + + For each (input_t,output_t) pair in a minibatch,:: + + output_activations_t = b2+W2*tanh(b1+W1*input_t) + output_t = softmax(output_activations_t) + output_class_t = argmax(output_activations_t) + class_error_t = 1_{output_class_t != target_t} + nll_t = -log(output_t[target_t]) + + and the training criterion is:: + + loss = L2_regularizer*(||W1||^2 + ||W2||^2) + sum_t nll_t + + The parameters are [b1,W1,b2,W2] and are obtained by minimizing the loss by + stochastic minibatch gradient descent:: + + parameters[i] -= learning_rate * dloss/dparameters[i] + + The fields and attributes expected and produced by use and update are the following: + + - Input and output fields (example-wise quantities): + + - 'input' (always expected by use and update) + - 'target' (optionally expected by use and always by update) + - 'output' (optionally produced by use) + - 'output_class' (optionally produced by use) + - 'class_error' (optionally produced by use) + - 'nll' (optionally produced by use) + + - optional attributes (optionally expected as input_dataset attributes) + (warning, this may be dangerous, the 'use' method will use those provided in the + input_dataset rather than those learned during 'update'; currently no support + for providing these to update): + + - 'L2_regularizer' + - 'b1' + - 'W1' + - 'b2' + - 'W2' + - 'parameters' = [b1, W1, b2, W2] + - 'regularization_term' + + """ + + def attributeNames(self): + return ["parameters","b1","W2","b2","W2", "L2_regularizer","regularization_term"] + + def parameterAttributes(self): + return ["b1","W1", "b2", "W2"] + + def useInputAttributes(self): + return self.parameterAttributes() + + def useOutputAttributes(self): + return [] + + def updateInputAttributes(self): + return self.parameterAttributes() + ["L2_regularizer"] + + def updateMinibatchInputFields(self): + return ["input","target"] + + def updateMinibatchInputAttributes(self): + return self.parameterAttributes() + + def updateMinibatchOutputAttributes(self): + return self.parameterAttributes() + + def updateEndInputAttributes(self): + return self.parameterAttributes() + + def updateEndOutputAttributes(self): + return ["regularization_term"] + + def defaultOutputFields(self, input_fields): + output_fields = ["output", "output_class",] + if "target" in input_fields: + output_fields += ["class_error", "nll"] + return output_fields + + def __init__(self): + self._input = t.matrix('input') # n_examples x n_inputs + self._target = t.matrix('target') # n_examples x n_outputs + self._lambda = as_scalar(0.,'lambda') + self._theta = t.matrix('theta') + self._W = self._theta[:,1:] + self._b = self._theta[:,0] + self._XtX = t.matrix('XtX') + self._XtY = t.matrix('XtY') + self._extended_input = t.prepend_one_to_each_row(self._input) + self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix + self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector + self._regularizer = self._lambda * t.dot(self._W,self._W) + self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) + self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) + self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) + + OneShotTLearner.__init__(self) + + def allocate(self,minibatch): + minibatch_n_inputs = minibatch["input"].shape[1] + minibatch_n_outputs = minibatch["target"].shape[1] + if not self._n_inputs: + self._n_inputs = minibatch_n_inputs + self._n_outputs = minibatch_n_outputs + self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) + self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) + self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) + self.forget() + elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: + # if the input or target changes dimension on the fly, we resize and forget everything + self.forget() + + def forget(self): + if self._n_inputs and self._n_outputs: + self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) + self.XtY.resize((1+self.n_inputs,self.n_outputs)) + self.XtX.data[:,:]=0 + self.XtY.data[:,:]=0 + numpy.diag(self.XtX.data)[1:]=self.lambda + + +class MLP(MinibatchUpdatesTLearner): + """ + Implement a feedforward multi-layer perceptron, with or without L1 and/or L2 regularization. + + The predictor parameters are obtained by minibatch/online gradient descent. + Training can proceed sequentially (with multiple calls to update with + different disjoint subsets of the training sets). + + Hyper-parameters: + - L1_regularizer + - L2_regularizer + - neuron_sparsity_regularizer + - initial_learning_rate + - learning_rate_decrease_rate + - n_hidden_per_layer (a list of integers) + - activation_function ("sigmoid","tanh", or "ratio") + + The output/task type (classification, regression, etc.) is obtained by specializing MLP. + + For each (input[t],output[t]) pair in a minibatch,:: + + activation[0] = input_t + for k=1 to n_hidden_layers: + activation[k]=activation_function(b[k]+ W[k]*activation[k-1]) + output_t = output_activation_function(b[n_hidden_layers+1]+W[n_hidden_layers+1]*activation[n_hidden_layers]) + + and the b and W are obtained by minimizing the following by stochastic minibatch gradient descent:: + + L2_regularizer sum_{ijk} W_{kij}^2 + L1_regularizer sum_{kij} |W_{kij}| + + neuron_sparsity_regularizer sum_{ki} |b_{ki} + infinity| + - sum_t log P_{output_model}(target_t | output_t) + + The fields and attributes expected and produced by use and update are the following: + + - Input and output fields (example-wise quantities): + + - 'input' (always expected by use and update) + - 'target' (optionally expected by use and always by update) + - 'output' (optionally produced by use) + - error fields produced by sub-class of MLP + + - optional attributes (optionally expected as input_dataset attributes) + (warning, this may be dangerous, the 'use' method will use those provided in the + input_dataset rather than those learned during 'update'; currently no support + for providing these to update): + + - 'L1_regularizer' + - 'L2_regularizer' + - 'b' + - 'W' + - 'parameters' = [b[1], W[1], b[2], W[2], ...] + - 'regularization_term' + + """ + + def attributeNames(self): + return ["parameters","b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer","regularization_term"] + + def useInputAttributes(self): + return ["b","W"] + + def useOutputAttributes(self): + return [] + + def updateInputAttributes(self): + return ["b","W","L1_regularizer","L2_regularizer","neuron_sparsity_regularizer"] + + def updateMinibatchInputFields(self): + return ["input","target"] + + def updateMinibatchInputAttributes(self): + return ["b","W"] + + def updateMinibatchOutputAttributes(self): + return ["new_XtX","new_XtY"] + + def updateEndInputAttributes(self): + return ["theta","XtX","XtY"] + + def updateEndOutputAttributes(self): + return ["new_theta","b","W","regularization_term"] # CHECK: WILL b AND W CONTAIN OLD OR NEW THETA? @todo i.e. order of computation = ? + + def parameterAttributes(self): + return ["b","W"] + + def defaultOutputFields(self, input_fields): + output_fields = ["output"] + if "target" in input_fields: + output_fields.append("squared_error") + return output_fields + + def __init__(self): + self._input = t.matrix('input') # n_examples x n_inputs + self._target = t.matrix('target') # n_examples x n_outputs + self._lambda = as_scalar(0.,'lambda') + self._theta = t.matrix('theta') + self._W = self._theta[:,1:] + self._b = self._theta[:,0] + self._XtX = t.matrix('XtX') + self._XtY = t.matrix('XtY') + self._extended_input = t.prepend_one_to_each_row(self._input) + self._output = t.dot(self._input,self._W.T) + self._b # (n_examples , n_outputs) matrix + self._squared_error = t.sum_within_rows(t.sqr(self._output-self._target)) # (n_examples ) vector + self._regularizer = self._lambda * t.dot(self._W,self._W) + self._new_XtX = add_inplace(self._XtX,t.dot(self._extended_input.T,self._extended_input)) + self._new_XtY = add_inplace(self._XtY,t.dot(self._extended_input.T,self._target)) + self._new_theta = t.solve_inplace(self._theta,self._XtX,self._XtY) + + OneShotTLearner.__init__(self) + + def allocate(self,minibatch): + minibatch_n_inputs = minibatch["input"].shape[1] + minibatch_n_outputs = minibatch["target"].shape[1] + if not self._n_inputs: + self._n_inputs = minibatch_n_inputs + self._n_outputs = minibatch_n_outputs + self.XtX = numpy.zeros((1+self._n_inputs,1+self._n_inputs)) + self.XtY = numpy.zeros((1+self._n_inputs,self._n_outputs)) + self.theta = numpy.zeros((self._n_outputs,1+self._n_inputs)) + self.forget() + elif self._n_inputs!=minibatch_n_inputs or self._n_outputs!=minibatch_n_outputs: + # if the input or target changes dimension on the fly, we resize and forget everything + self.forget() + + def forget(self): + if self._n_inputs and self._n_outputs: + self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) + self.XtY.resize((1+self.n_inputs,self.n_outputs)) + self.XtX.data[:,:]=0 + self.XtY.data[:,:]=0 + numpy.diag(self.XtX.data)[1:]=self.lambda +