pylearn: learner.py comparison

comparison learner.py @ 92:c4726e19b8ec

Finished first draft of TLearner

author	Yoshua Bengio <bengioy@iro.umontreal.ca>
date	Mon, 05 May 2008 18:14:32 -0400
parents	3499918faa9d
children	c4916445e025

comparison

equal deleted inserted replaced

-:aa9e786ee849
+:c4726e19b8ec
 Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
 visible in the output DataSet returned by this method.
 """
 raise NotImplementedError
-def attribute_names(self):
+def attributeNames(self):
 """
 A Learner may have attributes that it wishes to export to other objects. To automate
 such export, sub-classes should define here the names (list of strings) of these attributes.
 """
 return []
 The sub-class constructor defines the relations between
 the Theano variables that may be used by 'use' and 'update'
 or by a stats collector.
 - defaultOutputFields(input_fields): return a list of default dataset output fields when
 None are provided by the caller of use.
--
+- update_start(), update_end(), update_minibatch(minibatch): functions
+executed at the beginning, the end, and in the middle
+(for each minibatch) of the update method. This model only
+works for 'online' or one-short learning that requires
+going only once through the training data. For more complicated
+models, more specialized subclasses of TLearner should be used
+or a learning-algorithm specific update method should be defined.
+The following naming convention is assumed and important.
+Attributes whose names are listed in attributeNames() can be of any type,
+but those that can be referenced as input/output dataset fields or as
+output attributes in 'use' or as input attributes in the stats collector
+should be associated with a Theano Result variable. If the exported attribute
+name is <name>, the corresponding Result name (an internal attribute of
+the TLearner, created in the sub-class constructor) should be _<name>.
+Typically <name> will be numpy ndarray and _<name> will be the corresponding
+Theano Tensor (for symbolic manipulation).
 """
+def __init__(self):
+Learner.__init__(self)
+def _minibatchwise_use_functions(self, input_fields, output_fields, stats_collector):
+"""
+Private helper function called by the generic TLearner.use. It returns a function
+that can map the given input fields to the given output fields (along with the
+attributes that the stats collector needs for its computation.
+"""
+if not output_fields:
+output_fields = self.defaultOutputFields(input_fields)
+if stats_collector:
+stats_collector_inputs = stats_collector.inputUpdateAttributes()
+for attribute in stats_collector_inputs:
+if attribute not in input_fields:
+output_fields.append(attribute)
+key = (input_fields,output_fields)
+if key not in self.use_functions_dictionary:
+self.use_functions_dictionary[key]=Function(self._names2attributes(input_fields),
+self._names2attributes(output_fields))
+return self.use_functions_dictionary[key]
+def attributes(self,return_copy=False):
+"""
+Return a list with the values of the learner's attributes (or optionally, a deep copy).
+"""
+return self.names2attributes(self.attributeNames())
+def _names2attributes(self,names,return_Result=False, return_copy=False):
+"""
+Private helper function that maps a list of attribute names to a list
+of (optionally copies) values or of the Result objects that own these values.
+"""
+if return_Result:
+if return_copy:
+return [copy.deepcopy(self.__getattr__(name)) for name in names]
+else:
+return [self.__getattr__(name) for name in names]
+else:
+if return_copy:
+return [copy.deepcopy(self.__getattr__(name).data) for name in names]
+else:
+return [self.__getattr__(name).data for name in names]
+def use(self,input_dataset,output_fieldnames=None,output_attributes=None,
+test_stats_collector=None,copy_inputs=True):
+"""
+The learner tries to compute in the output dataset the output fields specified
+"""
+minibatchwise_use_function = _minibatchwise_use_functions(input_dataset.fieldNames(),
+output_fieldnames,
+test_stats_collector)
+virtual_output_dataset = ApplyFunctionDataSet(input_dataset,
+minibatchwise_use_function,
+True,DataSet.numpy_vstack,
+DataSet.numpy_hstack)
+# actually force the computation
+output_dataset = CachedDataSet(virtual_output_dataset,True)
+if copy_inputs:
+output_dataset = input_dataset | output_dataset
+# copy the wanted attributes in the dataset
+if output_attributes:
+assert set(output_attributes) <= set(self.attributeNames())
+output_dataset.setAttributes(output_attributes,
+self._names2attributes(output_attributes,return_copy=True))
+if test_stats_collector:
+test_stats_collector.update(output_dataset)
+output_dataset.setAttributes(test_stats_collector.attributeNames(),
+test_stats_collector.attributes())
+return output_dataset
+def update_start(self): pass
+def update_end(self): pass
+def update_minibatch(self,minibatch):
+raise AbstractFunction()
+def update(self,training_set,train_stats_collector=None):
+self.update_start()
+for minibatch in training_set.minibatches(self.training_set_input_fields,
+minibatch_size=self.minibatch_size):
+self.update_minibatch(minibatch)
+if train_stats_collector:
+minibatch_set = minibatch.examples()
+minibatch_set.setAttributes(self.attributeNames(),self.attributes())
+train_stats_collector.update(minibatch_set)
+self.update_end()
+return self.use

Mercurial > pylearn

comparison learner.py @ 92:c4726e19b8ec