Mercurial > pylearn
diff linear_regression.py @ 92:c4726e19b8ec
Finished first draft of TLearner
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Mon, 05 May 2008 18:14:32 -0400 |
parents | 3499918faa9d |
children | c4916445e025 |
line wrap: on
line diff
--- a/linear_regression.py Mon May 05 11:49:40 2008 -0400 +++ b/linear_regression.py Mon May 05 18:14:32 2008 -0400 @@ -11,7 +11,12 @@ Implement linear regression, with or without L2 regularization (the former is called Ridge Regression and the latter Ordinary Least Squares). - The predictor is obtained analytically. + The predictor parameters are obtained analytically from the training set. + Training can proceed sequentially (with multiple calls to update with + different disjoint subsets of the training sets). After each call to + update the predictor is ready to be used (and optimized for the union + of all the training sets passed to update since construction or since + the last call to forget). The L2 regularization coefficient is obtained analytically. For each (input[t],output[t]) pair in a minibatch,:: @@ -45,22 +50,25 @@ - optional input attributes (optionally expected as input_dataset attributes) - - 'lambda' (only used by update) - - 'b' (only used by use) - - 'W' (only used by use) - - - optional output attributes (available in self and optionally in output dataset) - - - 'b' (only set by update) - - 'W' (only set by update) - - 'regularization_term' (only set by update) - - 'XtX' (only set by update) - - 'XtY' (only set by update) + - optional attributes (optionally expected as input_dataset attributes) + (warning, this may be dangerous, the 'use' method will use those provided in the + input_dataset rather than those learned during 'update'; currently no support + for providing these to update): + - 'lambda' + - 'b' + - 'W' + - 'regularization_term' + - 'XtX' + - 'XtY' """ + def attributeNames(self): + return ["lambda","b","W","regularization_term","XtX","XtY"] + # definitions specifiques a la regression lineaire: + def global_inputs(self): self.lambda = as_scalar(0.,'lambda') self.theta = t.matrix('theta') @@ -107,63 +115,6 @@ # poutine generale basee sur ces fonctions - def minibatchwise_use_functions(self, input_fields, output_fields, stats_collector): - if not output_fields: - output_fields = self.defaultOutputFields(input_fields) - if stats_collector: - stats_collector_inputs = stats_collector.inputUpdateAttributes() - for attribute in stats_collector_inputs: - if attribute not in input_fields: - output_fields.append(attribute) - key = (input_fields,output_fields) - if key not in self.use_functions_dictionary: - self.use_functions_dictionary[key]=Function(self.names2attributes(input_fields), - self.names2attributes(output_fields)) - return self.use_functions_dictionary[key] - - def attributes(self,return_copy=False): - return self.names2attributes(self.attributeNames()) - - def names2attributes(self,names,return_Result=False, return_copy=False): - if return_Result: - if return_copy: - return [copy.deepcopy(self.__getattr__(name)) for name in names] - else: - return [self.__getattr__(name) for name in names] - else: - if return_copy: - return [copy.deepcopy(self.__getattr__(name).data) for name in names] - else: - return [self.__getattr__(name).data for name in names] - - def use(self,input_dataset,output_fieldnames=None,test_stats_collector=None,copy_inputs=True): - minibatchwise_use_function = minibatchwise_use_functions(input_dataset.fieldNames(),output_fieldnames,test_stats_collector) - virtual_output_dataset = ApplyFunctionDataSet(input_dataset, - minibatchwise_use_function, - True,DataSet.numpy_vstack, - DataSet.numpy_hstack) - # actually force the computation - output_dataset = CachedDataSet(virtual_output_dataset,True) - if copy_inputs: - output_dataset = input_dataset | output_dataset - # compute the attributes that should be copied in the dataset - output_dataset.setAttributes(self.attributeNames(),self.attributes(return_copy=True)) - if test_stats_collector: - test_stats_collector.update(output_dataset) - for attribute in test_stats_collector.attributeNames(): - output_dataset[attribute] = copy.deepcopy(test_stats_collector[attribute]) - return output_dataset - - def update(self,training_set,train_stats_collector=None): - self.update_start() - for minibatch in training_set.minibatches(self.training_set_input_fields, minibatch_size=self.minibatch_size): - self.update_minibatch(minibatch) - if train_stats_collector: - minibatch_set = minibatch.examples() - minibatch_set.setAttributes(self.attributeNames(),self.attributes()) - train_stats_collector.update(minibatch_set) - self.update_end() - return self.use def __init__(self,lambda=0.,max_memory_use=500): """