pylearn: linear_regression.py comparison

comparison linear_regression.py @ 92:c4726e19b8ec

Finished first draft of TLearner

author	Yoshua Bengio <bengioy@iro.umontreal.ca>
date	Mon, 05 May 2008 18:14:32 -0400
parents	3499918faa9d
children	c4916445e025

comparison

equal deleted inserted replaced

-:aa9e786ee849
+:c4726e19b8ec
 class LinearRegression(Learner):
 """
 Implement linear regression, with or without L2 regularization
 (the former is called Ridge Regression and the latter Ordinary Least Squares).
-The predictor is obtained analytically.
+The predictor parameters are obtained analytically from the training set.
+Training can proceed sequentially (with multiple calls to update with
+different disjoint subsets of the training sets). After each call to
+update the predictor is ready to be used (and optimized for the union
+of all the training sets passed to update since construction or since
+the last call to forget).
 The L2 regularization coefficient is obtained analytically.
 For each (input[t],output[t]) pair in a minibatch,::
 output_t = b + W * input_t
 - 'output' (optionally produced by use as an output dataset field)
 - 'squared_error' (optionally produced by use as an output dataset field, needs 'target') = example-wise squared error
 - optional input attributes (optionally expected as input_dataset attributes)
-- 'lambda' (only used by update)
+- optional attributes (optionally expected as input_dataset attributes)
-- 'b' (only used by use)
+(warning, this may be dangerous, the 'use' method will use those provided in the
-- 'W' (only used by use)
+input_dataset rather than those learned during 'update'; currently no support
+for providing these to update):
-- optional output attributes (available in self and optionally in output dataset)
-- 'b' (only set by update)
-- 'W' (only set by update)
-- 'regularization_term' (only set by update)
-- 'XtX' (only set by update)
-- 'XtY' (only set by update)
+- 'lambda'
+- 'b'
+- 'W'
+- 'regularization_term'
+- 'XtX'
+- 'XtY'
 """
+def attributeNames(self):
+return ["lambda","b","W","regularization_term","XtX","XtY"]
 # definitions specifiques a la regression lineaire:
 def global_inputs(self):
 self.lambda = as_scalar(0.,'lambda')
 self.theta = t.matrix('theta')
 self.W = self.theta[:,1:]
 output_fields.append("squared_error")
 return output_fields
 # poutine generale basee sur ces fonctions
-def minibatchwise_use_functions(self, input_fields, output_fields, stats_collector):
-if not output_fields:
-output_fields = self.defaultOutputFields(input_fields)
-if stats_collector:
-stats_collector_inputs = stats_collector.inputUpdateAttributes()
-for attribute in stats_collector_inputs:
-if attribute not in input_fields:
-output_fields.append(attribute)
-key = (input_fields,output_fields)
-if key not in self.use_functions_dictionary:
-self.use_functions_dictionary[key]=Function(self.names2attributes(input_fields),
-self.names2attributes(output_fields))
-return self.use_functions_dictionary[key]
-def attributes(self,return_copy=False):
-return self.names2attributes(self.attributeNames())
-def names2attributes(self,names,return_Result=False, return_copy=False):
-if return_Result:
-if return_copy:
-return [copy.deepcopy(self.__getattr__(name)) for name in names]
-else:
-return [self.__getattr__(name) for name in names]
-else:
-if return_copy:
-return [copy.deepcopy(self.__getattr__(name).data) for name in names]
-else:
-return [self.__getattr__(name).data for name in names]
-def use(self,input_dataset,output_fieldnames=None,test_stats_collector=None,copy_inputs=True):
-minibatchwise_use_function = minibatchwise_use_functions(input_dataset.fieldNames(),output_fieldnames,test_stats_collector)
-virtual_output_dataset = ApplyFunctionDataSet(input_dataset,
-minibatchwise_use_function,
-True,DataSet.numpy_vstack,
-DataSet.numpy_hstack)
-# actually force the computation
-output_dataset = CachedDataSet(virtual_output_dataset,True)
-if copy_inputs:
-output_dataset = input_dataset | output_dataset
-# compute the attributes that should be copied in the dataset
-output_dataset.setAttributes(self.attributeNames(),self.attributes(return_copy=True))
-if test_stats_collector:
-test_stats_collector.update(output_dataset)
-for attribute in test_stats_collector.attributeNames():
-output_dataset[attribute] = copy.deepcopy(test_stats_collector[attribute])
-return output_dataset
-def update(self,training_set,train_stats_collector=None):
-self.update_start()
-for minibatch in training_set.minibatches(self.training_set_input_fields, minibatch_size=self.minibatch_size):
-self.update_minibatch(minibatch)
-if train_stats_collector:
-minibatch_set = minibatch.examples()
-minibatch_set.setAttributes(self.attributeNames(),self.attributes())
-train_stats_collector.update(minibatch_set)
-self.update_end()
-return self.use
 def __init__(self,lambda=0.,max_memory_use=500):
 """
 @type lambda: float
 @param lambda: regularization coefficient

Mercurial > pylearn

comparison linear_regression.py @ 92:c4726e19b8ec