pylearn: learner.py comparison

comparison learner.py @ 128:ee5507af2c60

minor edits

author	Yoshua Bengio <bengioy@iro.umontreal.ca>
date	Wed, 07 May 2008 20:51:24 -0400
parents	4efe6d36c061
children	4c2280edcaf5 3d8e40e7ed18

comparison

equal deleted inserted replaced

-:f959ad58facc
+:ee5507af2c60
 and return the learned function.
 """
 self.forget()
 return self.update(learning_task,train_stats_collector)
-def use(self,input_dataset,output_fields=None,copy_inputs=True):
+def use(self,input_dataset,output_fieldnames=None,
-"""Once a Learner has been trained by one or more call to 'update', it can
+test_stats_collector=None,copy_inputs=True,
-be used with one or more calls to 'use'. The argument is a DataSet (possibly
+put_stats_in_output_dataset=True,
-containing a single example) and the result is a DataSet of the same length.
+output_attributes=[]):
-If output_fields is specified, it may be use to indicate which fields should
+"""
+Once a Learner has been trained by one or more call to 'update', it can
+be used with one or more calls to 'use'. The argument is an input DataSet (possibly
+containing a single example) and the result is an output DataSet of the same length.
+If output_fieldnames is specified, it may be use to indicate which fields should
 be constructed in the output DataSet (for example ['output','classification_error']).
+Otherwise, self.defaultOutputFields is called to choose the output fields.
 Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
 visible in the output DataSet returned by this method.
-"""
+Optionally, attributes of the learner can be copied in the output dataset,
-raise AbstractFunction()
+and statistics computed by the stats collector also put in the output dataset.
+Note the distinction between fields (which are example-wise quantities, e.g. 'input')
+and attributes (which are not, e.g. 'regularization_term').
+We provide here a default implementation that does all this using
+a sub-class defined method: minibatchwiseUseFunction.
+@todo check if some of the learner attributes are actually SPECIFIED
+as attributes of the input_dataset, and if so use their values instead
+of the ones in the learner.
+The learner tries to compute in the output dataset the output fields specified.
+If None is specified then self.defaultOutputFields(input_dataset.fieldNames())
+is called to determine the output fields.
+Attributes of the learner can also optionally be copied into the output dataset.
+If output_attributes is None then all of the attributes in self.AttributeNames()
+are copied in the output dataset, but if it is [] (the default), then none are copied.
+If a test_stats_collector is provided, then its attributes (test_stats_collector.AttributeNames())
+are also copied into the output dataset attributes.
+"""
+minibatchwise_use_function = self.minibatchwiseUseFunction(input_dataset.fieldNames(),
+output_fieldnames,
+test_stats_collector)
+virtual_output_dataset = ApplyFunctionDataSet(input_dataset,
+minibatchwise_use_function,
+True,DataSet.numpy_vstack,
+DataSet.numpy_hstack)
+# actually force the computation
+output_dataset = CachedDataSet(virtual_output_dataset,True)
+if copy_inputs:
+output_dataset = input_dataset | output_dataset
+# copy the wanted attributes in the dataset
+if output_attributes is None:
+output_attributes = self.attributeNames()
+if output_attributes:
+assert set(attribute_names) <= set(self.attributeNames())
+output_dataset.setAttributes(output_attributes,
+self.names2attributes(output_attributes,return_copy=True))
+if test_stats_collector:
+test_stats_collector.update(output_dataset)
+if put_stats_in_output_dataset:
+output_dataset.setAttributes(test_stats_collector.attributeNames(),
+test_stats_collector.attributes())
+return output_dataset
+def minibatchwiseUseFunction(self, input_fields, output_fields, stats_collector):
+"""
+Returns a function that can map the given input fields to the given output fields
+and to the attributes that the stats collector needs for its computation.
+That function is expected to operate on minibatches.
+The function returned makes use of the self.useInputAttributes() and
+sets the attributes specified by self.useOutputAttributes().
+"""
 def attributeNames(self):
 """
 A Learner may have attributes that it wishes to export to other objects. To automate
 such export, sub-classes should define here the names (list of strings) of these attributes.
 @todo By default, attributeNames looks for all dictionary entries whose name does not start with _.
 """
 return []
+def attributes(self,return_copy=False):
+"""
+Return a list with the values of the learner's attributes (or optionally, a deep copy).
+"""
+return self.names2attributes(self.attributeNames(),return_copy)
+def names2attributes(self,names,return_copy=False):
+"""
+Private helper function that maps a list of attribute names to a list
+of (optionally copies) values of attributes.
+"""
+if return_copy:
+return [copy.deepcopy(self.__getattr__(name).data) for name in names]
+else:
+return [self.__getattr__(name).data for name in names]
 def updateInputAttributes(self):
 """
 A subset of self.attributeNames() which are the names of attributes needed by update() in order
 to do its work.
 This may involve looking at the input_fields (names) available in the
 input_dataset.
 """
 raise AbstractFunction()
-def allocate(self, minibatch):
+def minibatchwiseUseFunction(self, input_fields, output_fields, stats_collector):
 """
-This function is called at the beginning of each updateMinibatch
+Implement minibatchwiseUseFunction by exploiting Theano compilation
-and should be used to check that all required attributes have been
+and the expression graph defined by a sub-class constructor.
-allocated and initialized (usually this function calls forget()
-when it has to do an initialization).
-"""
-raise AbstractFunction()
-def minibatchwise_use_functions(self, input_fields, output_fields, stats_collector):
-"""
-Private helper function called by the generic TLearner.use. It returns a function
-that can map the given input fields to the given output fields (along with the
-attributes that the stats collector needs for its computation. The function
-called also automatically makes use of the self.useInputAttributes() and
-sets the self.useOutputAttributes().
 """
 if not output_fields:
 output_fields = self.defaultOutputFields(input_fields)
 if stats_collector:
 stats_collector_inputs = stats_collector.input2UpdateAttributes()
 self.setAttributes(use_output_attributes,output_attribute_values)
 return output_field_values
 self.use_functions_dictionary[key]=f
 return self.use_functions_dictionary[key]
-def attributes(self,return_copy=False):
-"""
-Return a list with the values of the learner's attributes (or optionally, a deep copy).
-"""
-return self.names2attributes(self.attributeNames(),return_copy)
-def names2attributes(self,names,return_copy=False):
-"""
-Private helper function that maps a list of attribute names to a list
-of (optionally copies) values of attributes.
-"""
-if return_copy:
-return [copy.deepcopy(self.__getattr__(name).data) for name in names]
-else:
-return [self.__getattr__(name).data for name in names]
 def names2OpResults(self,names):
 """
 Private helper function that maps a list of attribute names to a list
 of corresponding Op Results (with the same name but with a '_' prefix).
 """
 return [self.__getattr__('_'+name).data for name in names]
-def use(self,input_dataset,output_fieldnames=None,output_attributes=[],
-test_stats_collector=None,copy_inputs=True, put_stats_in_output_dataset=True):
-"""
-The learner tries to compute in the output dataset the output fields specified
-@todo check if some of the learner attributes are actually SPECIFIED
-as attributes of the input_dataset, and if so use their values instead
-of the ones in the learner.
-The learner tries to compute in the output dataset the output fields specified.
-If None is specified then self.defaultOutputFields(input_dataset.fieldNames())
-is called to determine the output fields.
-Attributes of the learner can also optionally be copied into the output dataset.
-If output_attributes is None then all of the attributes in self.AttributeNames()
-are copied in the output dataset, but if it is [] (the default), then none are copied.
-If a test_stats_collector is provided, then its attributes (test_stats_collector.AttributeNames())
-are also copied into the output dataset attributes.
-"""
-minibatchwise_use_function = self.minibatchwise_use_functions(input_dataset.fieldNames(),
-output_fieldnames,
-test_stats_collector)
-virtual_output_dataset = ApplyFunctionDataSet(input_dataset,
-minibatchwise_use_function,
-True,DataSet.numpy_vstack,
-DataSet.numpy_hstack)
-# actually force the computation
-output_dataset = CachedDataSet(virtual_output_dataset,True)
-if copy_inputs:
-output_dataset = input_dataset | output_dataset
-# copy the wanted attributes in the dataset
-if output_attributes is None:
-output_attributes = self.attributeNames()
-if output_attributes:
-assert set(attribute_names) <= set(self.attributeNames())
-output_dataset.setAttributes(output_attributes,
-self.names2attributes(output_attributes,return_copy=True))
-if test_stats_collector:
-test_stats_collector.update(output_dataset)
-if put_stats_in_output_dataset:
-output_dataset.setAttributes(test_stats_collector.attributeNames(),
-test_stats_collector.attributes())
-return output_dataset
 class MinibatchUpdatesTLearner(TLearner):
 """
 This adds to TLearner a
 self.names2OpResults(self.updateMinibatchOutputAttributes()))
 self.update_end_function = compile.function
 (self.names2OpResults(self.updateEndInputAttributes()),
 self.names2OpResults(self.updateEndOutputAttributes()))
+def allocate(self, minibatch):
+"""
+This function is called at the beginning of each updateMinibatch
+and should be used to check that all required attributes have been
+allocated and initialized (usually this function calls forget()
+when it has to do an initialization).
+"""
+raise AbstractFunction()
 def updateMinibatchInputFields(self):
 raise AbstractFunction()
 def updateMinibatchInputAttributes(self):
 raise AbstractFunction()

Mercurial > pylearn

comparison learner.py @ 128:ee5507af2c60