pylearn: linear_regression.py comparison

comparison linear_regression.py @ 77:1e2bb5bad636

toying with different ways to implement learners

author	bengioy@bengiomac.local
date	Sun, 04 May 2008 15:09:22 -0400
parents	90e4c0784d6e
children	3499918faa9d

comparison

equal deleted inserted replaced

-:ccd6ae89a7c4
+:1e2bb5bad636
 - optional output attributes (available in self and optionally in output dataset)
 - 'b' (only set by update)
 - 'W' (only set by update)
-- 'total_squared_error' (set by use and by update) = sum over examples of example_wise_squared_error
+- 'regularization_term' (only set by update)
-- 'total_loss' (set by use and by update) = regularizer + total_squared_error
 - 'XtX' (only set by update)
 - 'XtY' (only set by update)
 """
-def __init__(self,lambda=0.):
+# definitions specifiques a la regression lineaire:
+def global_inputs(self):
+self.lambda = as_scalar(0.,'lambda')
+self.theta = t.matrix('theta')
+self.W = self.theta[:,1:]
+self.b = self.theta[:,0]
+self.XtX = t.matrix('XtX')
+self.XtY = t.matrix('XtY')
+def global_outputs(self):
+self.regularizer = self.lambda * t.dot(self.W,self.W)
+self.loss = self.regularizer + t.sum(self.squared_error) # this only makes sense if the whole training set fits in memory in a minibatch
+self.loss_function = Function([self.W,self.lambda,self.squared_error],[self.loss])
+def initialize(self):
+self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
+self.XtY.resize((1+self.n_inputs,self.n_outputs))
+self.XtX.data[:,:]=0
+self.XtY.data[:,:]=0
+numpy.diag(self.XtX.data)[1:]=self.lambda.data
+def updated_variables(self):
+self.new_XtX = self.XtX + t.dot(self.extended_input.T,self.extended_input)
+self.new_XtY = self.XtY + t.dot(self.extended_input.T,self.target)
+self.new_theta = t.solve(self.XtX,self.XtY)
+def minibatch_wise_inputs(self):
+self.input = t.matrix('input') # n_examples x n_inputs
+self.target = t.matrix('target') # n_examples x n_outputs
+def minibatch_wise_outputs(self):
+# self.input is a (n_examples, n_inputs) minibatch matrix
+self.extended_input = t.prepend_one_to_each_row(self.input)
+self.output = t.dot(self.input,self.W.T) + self.b  # (n_examples , n_outputs) matrix
+self.squared_error = t.sum_within_rows(t.sqr(self.output-self.target)) # (n_examples ) vector
+def attribute_names(self):
+return ["lambda","b","W","regularization_term","XtX","XtY"]
+def default_output_fields(self, input_fields):
+output_fields = ["output"]
+if "target" in input_fields:
+output_fields.append("squared_error")
+return output_fields
+# poutine generale basee sur ces fonctions
+def minibatchwise_use_functions(self, input_fields, output_fields):
+if not output_fields:
+output_fields = self.default_output_fields(input_fields)
+key = (input_fields,output_fields)
+if key not in use_functions_dictionary:
+use_functions_dictionary[key]=Function(self.names2attributes(input_fields),
+self.names2attributes(output_fields))
+return use_functions_dictionary[key]
+def names2attributes(self,names,return_Result=True):
+if return_Result:
+return [self.__getattr__(name) for name in names]
+else:
+return [self.__getattr__(name).data for name in names]
+def use(self,input_dataset,output_fieldnames=None,test_stats_collector=None,copy_inputs=True):
+minibatchwise_use_function = use_functions(input_dataset.fieldNames(),output_fieldnames)
+virtual_output_dataset = ApplyFunctionDataSet(input_dataset,
+minibatchwise_use_function,
+True,DataSet.numpy_vstack,
+DataSet.numpy_hstack)
+# actually force the computation
+output_dataset = CachedDataSet(virtual_output_dataset,True)
+if copy_inputs:
+output_dataset = input_dataset | output_dataset
+# compute the attributes that should be copied in the dataset
+for attribute in self.attribute_names():
+# .data assumes that all attributes are Result objects
+output_dataset.__setattr__(attribute) = copy.deepcopy(self.__getattr__(attribute).data)
+if test_stats_collector:
+test_stats_collector.update(output_dataset)
+for attribute in test_stats_collector.attribute_names():
+output_dataset[attribute] = copy.deepcopy(test_stats_collector[attribute])
+return output_dataset
+def update(self,training_set,train_stats_collector=None):
+def __init__(self,lambda=0.,max_memory_use=500):
 """
 @type lambda: float
 @param lambda: regularization coefficient
 """
 if output_fieldname=="output":
 use_functions.append(self.output_function)
 elif output_fieldname=="squared_error":
 use_functions.append(lambda self.output_function)
+n_examples = len(input_dataset)
+for minibatch in input_dataset.minibatches(minibatch_size=minibatch_size, allow_odd_last_minibatch=True):
+use_function(

Mercurial > pylearn

comparison linear_regression.py @ 77:1e2bb5bad636