diff linear_regression.py @ 78:3499918faa9d

In the middle of designing TLearner
author bengioy@bengiomac.local
date Mon, 05 May 2008 09:35:30 -0400
parents 1e2bb5bad636
children c4726e19b8ec
line wrap: on
line diff
--- a/linear_regression.py	Sun May 04 15:09:22 2008 -0400
+++ b/linear_regression.py	Mon May 05 09:35:30 2008 -0400
@@ -96,10 +96,10 @@
         self.output = t.dot(self.input,self.W.T) + self.b  # (n_examples , n_outputs) matrix
         self.squared_error = t.sum_within_rows(t.sqr(self.output-self.target)) # (n_examples ) vector
 
-    def attribute_names(self):
+    def attributeNames(self):
         return ["lambda","b","W","regularization_term","XtX","XtY"]
 
-    def default_output_fields(self, input_fields):
+    def defaultOutputFields(self, input_fields):
         output_fields = ["output"]
         if "target" in input_fields:
             output_fields.append("squared_error")
@@ -107,23 +107,37 @@
         
     # poutine generale basee sur ces fonctions
 
-    def minibatchwise_use_functions(self, input_fields, output_fields):
+    def minibatchwise_use_functions(self, input_fields, output_fields, stats_collector):
         if not output_fields:
-            output_fields = self.default_output_fields(input_fields)
+            output_fields = self.defaultOutputFields(input_fields)
+        if stats_collector:
+            stats_collector_inputs = stats_collector.inputUpdateAttributes()
+            for attribute in stats_collector_inputs:
+                if attribute not in input_fields:
+                    output_fields.append(attribute)
         key = (input_fields,output_fields)
-        if key not in use_functions_dictionary:
-            use_functions_dictionary[key]=Function(self.names2attributes(input_fields),
+        if key not in self.use_functions_dictionary:
+            self.use_functions_dictionary[key]=Function(self.names2attributes(input_fields),
                                                    self.names2attributes(output_fields))
-        return use_functions_dictionary[key]
+        return self.use_functions_dictionary[key]
 
-    def names2attributes(self,names,return_Result=True):
+    def attributes(self,return_copy=False):
+        return self.names2attributes(self.attributeNames())
+            
+    def names2attributes(self,names,return_Result=False, return_copy=False):
         if return_Result:
-            return [self.__getattr__(name) for name in names]
+            if return_copy:
+                return [copy.deepcopy(self.__getattr__(name)) for name in names]
+            else:
+                return [self.__getattr__(name) for name in names]
         else:
-            return [self.__getattr__(name).data for name in names]
+            if return_copy:
+                return [copy.deepcopy(self.__getattr__(name).data) for name in names]
+            else:
+                return [self.__getattr__(name).data for name in names]
 
     def use(self,input_dataset,output_fieldnames=None,test_stats_collector=None,copy_inputs=True):
-        minibatchwise_use_function = use_functions(input_dataset.fieldNames(),output_fieldnames)
+        minibatchwise_use_function = minibatchwise_use_functions(input_dataset.fieldNames(),output_fieldnames,test_stats_collector)
         virtual_output_dataset = ApplyFunctionDataSet(input_dataset,
                                                       minibatchwise_use_function,
                                                       True,DataSet.numpy_vstack,
@@ -133,17 +147,23 @@
         if copy_inputs:
             output_dataset = input_dataset | output_dataset
         # compute the attributes that should be copied in the dataset
-        for attribute in self.attribute_names():
-            # .data assumes that all attributes are Result objects
-            output_dataset.__setattr__(attribute) = copy.deepcopy(self.__getattr__(attribute).data)
+        output_dataset.setAttributes(self.attributeNames(),self.attributes(return_copy=True))
         if test_stats_collector:
             test_stats_collector.update(output_dataset)
-            for attribute in test_stats_collector.attribute_names():
+            for attribute in test_stats_collector.attributeNames():
                 output_dataset[attribute] = copy.deepcopy(test_stats_collector[attribute])
         return output_dataset
 
     def update(self,training_set,train_stats_collector=None):
-        
+        self.update_start()
+        for minibatch in training_set.minibatches(self.training_set_input_fields, minibatch_size=self.minibatch_size):
+            self.update_minibatch(minibatch)
+            if train_stats_collector:
+                minibatch_set = minibatch.examples()
+                minibatch_set.setAttributes(self.attributeNames(),self.attributes())
+                train_stats_collector.update(minibatch_set)
+        self.update_end()
+        return self.use
     
     def __init__(self,lambda=0.,max_memory_use=500):
         """