Mercurial > pylearn
diff linear_regression.py @ 77:1e2bb5bad636
toying with different ways to implement learners
author | bengioy@bengiomac.local |
---|---|
date | Sun, 04 May 2008 15:09:22 -0400 |
parents | 90e4c0784d6e |
children | 3499918faa9d |
line wrap: on
line diff
--- a/linear_regression.py Sat May 03 22:00:37 2008 -0400 +++ b/linear_regression.py Sun May 04 15:09:22 2008 -0400 @@ -53,14 +53,99 @@ - 'b' (only set by update) - 'W' (only set by update) - - 'total_squared_error' (set by use and by update) = sum over examples of example_wise_squared_error - - 'total_loss' (set by use and by update) = regularizer + total_squared_error + - 'regularization_term' (only set by update) - 'XtX' (only set by update) - 'XtY' (only set by update) """ - def __init__(self,lambda=0.): +# definitions specifiques a la regression lineaire: + + def global_inputs(self): + self.lambda = as_scalar(0.,'lambda') + self.theta = t.matrix('theta') + self.W = self.theta[:,1:] + self.b = self.theta[:,0] + self.XtX = t.matrix('XtX') + self.XtY = t.matrix('XtY') + + def global_outputs(self): + self.regularizer = self.lambda * t.dot(self.W,self.W) + self.loss = self.regularizer + t.sum(self.squared_error) # this only makes sense if the whole training set fits in memory in a minibatch + self.loss_function = Function([self.W,self.lambda,self.squared_error],[self.loss]) + + def initialize(self): + self.XtX.resize((1+self.n_inputs,1+self.n_inputs)) + self.XtY.resize((1+self.n_inputs,self.n_outputs)) + self.XtX.data[:,:]=0 + self.XtY.data[:,:]=0 + numpy.diag(self.XtX.data)[1:]=self.lambda.data + + def updated_variables(self): + self.new_XtX = self.XtX + t.dot(self.extended_input.T,self.extended_input) + self.new_XtY = self.XtY + t.dot(self.extended_input.T,self.target) + self.new_theta = t.solve(self.XtX,self.XtY) + + def minibatch_wise_inputs(self): + self.input = t.matrix('input') # n_examples x n_inputs + self.target = t.matrix('target') # n_examples x n_outputs + + def minibatch_wise_outputs(self): + # self.input is a (n_examples, n_inputs) minibatch matrix + self.extended_input = t.prepend_one_to_each_row(self.input) + self.output = t.dot(self.input,self.W.T) + self.b # (n_examples , n_outputs) matrix + self.squared_error = t.sum_within_rows(t.sqr(self.output-self.target)) # (n_examples ) vector + + def attribute_names(self): + return ["lambda","b","W","regularization_term","XtX","XtY"] + + def default_output_fields(self, input_fields): + output_fields = ["output"] + if "target" in input_fields: + output_fields.append("squared_error") + return output_fields + + # poutine generale basee sur ces fonctions + + def minibatchwise_use_functions(self, input_fields, output_fields): + if not output_fields: + output_fields = self.default_output_fields(input_fields) + key = (input_fields,output_fields) + if key not in use_functions_dictionary: + use_functions_dictionary[key]=Function(self.names2attributes(input_fields), + self.names2attributes(output_fields)) + return use_functions_dictionary[key] + + def names2attributes(self,names,return_Result=True): + if return_Result: + return [self.__getattr__(name) for name in names] + else: + return [self.__getattr__(name).data for name in names] + + def use(self,input_dataset,output_fieldnames=None,test_stats_collector=None,copy_inputs=True): + minibatchwise_use_function = use_functions(input_dataset.fieldNames(),output_fieldnames) + virtual_output_dataset = ApplyFunctionDataSet(input_dataset, + minibatchwise_use_function, + True,DataSet.numpy_vstack, + DataSet.numpy_hstack) + # actually force the computation + output_dataset = CachedDataSet(virtual_output_dataset,True) + if copy_inputs: + output_dataset = input_dataset | output_dataset + # compute the attributes that should be copied in the dataset + for attribute in self.attribute_names(): + # .data assumes that all attributes are Result objects + output_dataset.__setattr__(attribute) = copy.deepcopy(self.__getattr__(attribute).data) + if test_stats_collector: + test_stats_collector.update(output_dataset) + for attribute in test_stats_collector.attribute_names(): + output_dataset[attribute] = copy.deepcopy(test_stats_collector[attribute]) + return output_dataset + + def update(self,training_set,train_stats_collector=None): + + + def __init__(self,lambda=0.,max_memory_use=500): """ @type lambda: float @param lambda: regularization coefficient @@ -107,3 +192,8 @@ elif output_fieldname=="squared_error": use_functions.append(lambda self.output_function) + n_examples = len(input_dataset) + + for minibatch in input_dataset.minibatches(minibatch_size=minibatch_size, allow_odd_last_minibatch=True): + use_function( +