comparison linear_regression.py @ 77:1e2bb5bad636

toying with different ways to implement learners
author bengioy@bengiomac.local
date Sun, 04 May 2008 15:09:22 -0400
parents 90e4c0784d6e
children 3499918faa9d
comparison
equal deleted inserted replaced
76:ccd6ae89a7c4 77:1e2bb5bad636
51 51
52 - optional output attributes (available in self and optionally in output dataset) 52 - optional output attributes (available in self and optionally in output dataset)
53 53
54 - 'b' (only set by update) 54 - 'b' (only set by update)
55 - 'W' (only set by update) 55 - 'W' (only set by update)
56 - 'total_squared_error' (set by use and by update) = sum over examples of example_wise_squared_error 56 - 'regularization_term' (only set by update)
57 - 'total_loss' (set by use and by update) = regularizer + total_squared_error
58 - 'XtX' (only set by update) 57 - 'XtX' (only set by update)
59 - 'XtY' (only set by update) 58 - 'XtY' (only set by update)
60 59
61 """ 60 """
62 61
63 def __init__(self,lambda=0.): 62 # definitions specifiques a la regression lineaire:
63
64 def global_inputs(self):
65 self.lambda = as_scalar(0.,'lambda')
66 self.theta = t.matrix('theta')
67 self.W = self.theta[:,1:]
68 self.b = self.theta[:,0]
69 self.XtX = t.matrix('XtX')
70 self.XtY = t.matrix('XtY')
71
72 def global_outputs(self):
73 self.regularizer = self.lambda * t.dot(self.W,self.W)
74 self.loss = self.regularizer + t.sum(self.squared_error) # this only makes sense if the whole training set fits in memory in a minibatch
75 self.loss_function = Function([self.W,self.lambda,self.squared_error],[self.loss])
76
77 def initialize(self):
78 self.XtX.resize((1+self.n_inputs,1+self.n_inputs))
79 self.XtY.resize((1+self.n_inputs,self.n_outputs))
80 self.XtX.data[:,:]=0
81 self.XtY.data[:,:]=0
82 numpy.diag(self.XtX.data)[1:]=self.lambda.data
83
84 def updated_variables(self):
85 self.new_XtX = self.XtX + t.dot(self.extended_input.T,self.extended_input)
86 self.new_XtY = self.XtY + t.dot(self.extended_input.T,self.target)
87 self.new_theta = t.solve(self.XtX,self.XtY)
88
89 def minibatch_wise_inputs(self):
90 self.input = t.matrix('input') # n_examples x n_inputs
91 self.target = t.matrix('target') # n_examples x n_outputs
92
93 def minibatch_wise_outputs(self):
94 # self.input is a (n_examples, n_inputs) minibatch matrix
95 self.extended_input = t.prepend_one_to_each_row(self.input)
96 self.output = t.dot(self.input,self.W.T) + self.b # (n_examples , n_outputs) matrix
97 self.squared_error = t.sum_within_rows(t.sqr(self.output-self.target)) # (n_examples ) vector
98
99 def attribute_names(self):
100 return ["lambda","b","W","regularization_term","XtX","XtY"]
101
102 def default_output_fields(self, input_fields):
103 output_fields = ["output"]
104 if "target" in input_fields:
105 output_fields.append("squared_error")
106 return output_fields
107
108 # poutine generale basee sur ces fonctions
109
110 def minibatchwise_use_functions(self, input_fields, output_fields):
111 if not output_fields:
112 output_fields = self.default_output_fields(input_fields)
113 key = (input_fields,output_fields)
114 if key not in use_functions_dictionary:
115 use_functions_dictionary[key]=Function(self.names2attributes(input_fields),
116 self.names2attributes(output_fields))
117 return use_functions_dictionary[key]
118
119 def names2attributes(self,names,return_Result=True):
120 if return_Result:
121 return [self.__getattr__(name) for name in names]
122 else:
123 return [self.__getattr__(name).data for name in names]
124
125 def use(self,input_dataset,output_fieldnames=None,test_stats_collector=None,copy_inputs=True):
126 minibatchwise_use_function = use_functions(input_dataset.fieldNames(),output_fieldnames)
127 virtual_output_dataset = ApplyFunctionDataSet(input_dataset,
128 minibatchwise_use_function,
129 True,DataSet.numpy_vstack,
130 DataSet.numpy_hstack)
131 # actually force the computation
132 output_dataset = CachedDataSet(virtual_output_dataset,True)
133 if copy_inputs:
134 output_dataset = input_dataset | output_dataset
135 # compute the attributes that should be copied in the dataset
136 for attribute in self.attribute_names():
137 # .data assumes that all attributes are Result objects
138 output_dataset.__setattr__(attribute) = copy.deepcopy(self.__getattr__(attribute).data)
139 if test_stats_collector:
140 test_stats_collector.update(output_dataset)
141 for attribute in test_stats_collector.attribute_names():
142 output_dataset[attribute] = copy.deepcopy(test_stats_collector[attribute])
143 return output_dataset
144
145 def update(self,training_set,train_stats_collector=None):
146
147
148 def __init__(self,lambda=0.,max_memory_use=500):
64 """ 149 """
65 @type lambda: float 150 @type lambda: float
66 @param lambda: regularization coefficient 151 @param lambda: regularization coefficient
67 """ 152 """
68 153
105 if output_fieldname=="output": 190 if output_fieldname=="output":
106 use_functions.append(self.output_function) 191 use_functions.append(self.output_function)
107 elif output_fieldname=="squared_error": 192 elif output_fieldname=="squared_error":
108 use_functions.append(lambda self.output_function) 193 use_functions.append(lambda self.output_function)
109 194
195 n_examples = len(input_dataset)
196
197 for minibatch in input_dataset.minibatches(minibatch_size=minibatch_size, allow_odd_last_minibatch=True):
198 use_function(
199