Mercurial > pylearn
comparison learner.py @ 207:c5a7105fa40b
trying to merge
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Fri, 16 May 2008 16:38:15 -0400 |
parents | cb6b945acf5a |
children | 50a8302addaf |
comparison
equal
deleted
inserted
replaced
206:f2ddc795ec49 | 207:c5a7105fa40b |
---|---|
1 | 1 |
2 from exceptions import * | 2 from exceptions import * |
3 from dataset import AttributesHolder,ApplyFunctionDataSet,DataSet,CachedDataSet | |
4 import theano | |
5 from theano import compile | |
6 from theano import tensor as t | |
7 from misc import Print | |
8 Print = lambda x: lambda y: y | |
9 | 3 |
10 class Learner(AttributesHolder): | 4 |
5 class LearningAlgorithm(object): | |
11 """ | 6 """ |
12 Base class for learning algorithms, provides an interface | 7 Base class for learning algorithms, provides an interface |
13 that allows various algorithms to be applicable to generic learning | 8 that allows various algorithms to be applicable to generic learning |
14 algorithms. | 9 algorithms. It is only given here to define the expected semantics. |
15 | 10 |
16 A L{Learner} can be seen as a learning algorithm, a function that when | 11 A L{Learner} can be seen as a learning algorithm, a function that when |
17 applied to training data returns a learned function (which is an object that | 12 applied to training data returns a learned function (which is an object that |
18 can be applied to other data and return some output data). | 13 can be applied to other data and return some output data). |
14 | |
15 There are two main ways of using a learning algorithms, and some learning | |
16 algorithms only support one of them. The first is the way of the standard | |
17 machine learning framework, in which a learning algorithm is applied | |
18 to a training dataset, | |
19 | |
20 model = learning_algorithm(training_set) | |
21 | |
22 resulting in a fully trained model that can be applied to another dataset: | |
23 | |
24 output_dataset = model(input_dataset) | |
25 | |
26 Note that the application of a dataset has no side-effect on the model. | |
27 In that example, the training set may for example have 'input' and 'target' | |
28 fields while the input dataset may have only 'input' (or both 'input' and | |
29 'target') and the output dataset would contain some default output fields defined | |
30 by the learning algorithm (e.g. 'output' and 'error'). | |
31 | |
32 The second way of using a learning algorithm is in the online or | |
33 adaptive framework, where the training data are only revealed in pieces | |
34 (maybe one example or a batch of example at a time): | |
35 | |
36 model = learning_algorithm() | |
37 | |
38 results in a fresh model. The model can be adapted by presenting | |
39 it with some training data, | |
40 | |
41 model.update(some_training_data) | |
42 ... | |
43 model.update(some_more_training_data) | |
44 ... | |
45 model.update(yet_more_training_data) | |
46 | |
47 and at any point one can use the model to perform some computation: | |
48 | |
49 output_dataset = model(input_dataset) | |
50 | |
19 """ | 51 """ |
52 | |
53 def __init__(self): pass | |
54 | |
55 def __call__(self, training_dataset=None): | |
56 """ | |
57 Return a LearnerModel, either fresh (if training_dataset is None) or fully trained (otherwise). | |
58 """ | |
59 raise AbstractFunction() | |
20 | 60 |
61 class LearnerModel(AttributesHolder): | |
62 """ | |
63 LearnerModel is a base class for models returned by instances of a LearningAlgorithm subclass. | |
64 It is only given here to define the expected semantics. | |
65 """ | |
21 def __init__(self): | 66 def __init__(self): |
22 pass | 67 pass |
23 | |
24 def forget(self): | |
25 """ | |
26 Reset the state of the learner to a blank slate, before seeing | |
27 training data. The operation may be non-deterministic if the | |
28 learner has a random number generator that is set to use a | |
29 different seed each time it forget() is called. | |
30 """ | |
31 raise NotImplementedError | |
32 | 68 |
33 def update(self,training_set,train_stats_collector=None): | 69 def update(self,training_set,train_stats_collector=None): |
34 """ | 70 """ |
35 Continue training a learner, with the evidence provided by the given training set. | 71 Continue training a learner, with the evidence provided by the given training set. |
36 Hence update can be called multiple times. This is particularly useful in the | 72 Hence update can be called multiple times. This is the main method used for training in the |
37 on-line setting or the sequential (Bayesian or not) settings. | 73 on-line setting or the sequential (Bayesian or not) settings. |
38 The result is a function that can be applied on data, with the same | 74 |
39 semantics of the Learner.use method. | 75 This function has as side effect that self(data) will behave differently, |
76 according to the adaptation achieved by update(). | |
40 | 77 |
41 The user may optionally provide a training L{StatsCollector} that is used to record | 78 The user may optionally provide a training L{StatsCollector} that is used to record |
42 some statistics of the outputs computed during training. It is update(d) during | 79 some statistics of the outputs computed during training. It is update(d) during |
43 training. | 80 training. |
44 """ | 81 """ |
45 return self.use # default behavior is 'non-adaptive', i.e. update does not do anything | 82 raise AbstractFunction() |
46 | 83 |
47 | 84 def __call__(self,input_dataset,output_fieldnames=None, |
48 def __call__(self,training_set,train_stats_collector=None): | 85 test_stats_collector=None,copy_inputs=False, |
86 put_stats_in_output_dataset=True, | |
87 output_attributes=[]): | |
49 """ | 88 """ |
50 Train a learner from scratch using the provided training set, | 89 A trained or partially trained L{Model} can be used with |
51 and return the learned function. | 90 with one or more calls to it. The argument is an input L{DataSet} (possibly |
52 """ | |
53 self.forget() | |
54 return self.update(training_set,train_stats_collector) | |
55 | |
56 def use(self,input_dataset,output_fieldnames=None, | |
57 test_stats_collector=None,copy_inputs=False, | |
58 put_stats_in_output_dataset=True, | |
59 output_attributes=[]): | |
60 """ | |
61 Once a L{Learner} has been trained by one or more call to 'update', it can | |
62 be used with one or more calls to 'use'. The argument is an input L{DataSet} (possibly | |
63 containing a single example) and the result is an output L{DataSet} of the same length. | 91 containing a single example) and the result is an output L{DataSet} of the same length. |
64 If output_fieldnames is specified, it may be use to indicate which fields should | 92 If output_fieldnames is specified, it may be use to indicate which fields should |
65 be constructed in the output L{DataSet} (for example ['output','classification_error']). | 93 be constructed in the output L{DataSet} (for example ['output','classification_error']). |
66 Otherwise, self.defaultOutputFields is called to choose the output fields. | 94 Otherwise, some default output fields are produced (possibly depending on the input |
95 fields available in the input_dataset). | |
67 Optionally, if copy_inputs, the input fields (of the input_dataset) can be made | 96 Optionally, if copy_inputs, the input fields (of the input_dataset) can be made |
68 visible in the output L{DataSet} returned by this method. | 97 visible in the output L{DataSet} returned by this method. |
69 Optionally, attributes of the learner can be copied in the output dataset, | 98 Optionally, attributes of the learner can be copied in the output dataset, |
70 and statistics computed by the stats collector also put in the output dataset. | 99 and statistics computed by the stats collector also put in the output dataset. |
71 Note the distinction between fields (which are example-wise quantities, e.g. 'input') | 100 Note the distinction between fields (which are example-wise quantities, e.g. 'input') |
72 and attributes (which are not, e.g. 'regularization_term'). | 101 and attributes (which are not, e.g. 'regularization_term'). |
73 | |
74 We provide here a default implementation that does all this using | |
75 a sub-class defined method: minibatchwiseUseFunction. | |
76 | |
77 @todo check if some of the learner attributes are actually SPECIFIED | |
78 as attributes of the input_dataset, and if so use their values instead | |
79 of the ones in the learner. | |
80 | |
81 The learner tries to compute in the output dataset the output fields specified. | |
82 If None is specified then self.defaultOutputFields(input_dataset.fieldNames()) | |
83 is called to determine the output fields. | |
84 | |
85 Attributes of the learner can also optionally be copied into the output dataset. | |
86 If output_attributes is None then all of the attributes in self.AttributeNames() | |
87 are copied in the output dataset, but if it is [] (the default), then none are copied. | |
88 If a test_stats_collector is provided, then its attributes (test_stats_collector.AttributeNames()) | |
89 are also copied into the output dataset attributes. | |
90 """ | |
91 input_fieldnames = input_dataset.fieldNames() | |
92 if not output_fieldnames: | |
93 output_fieldnames = self.defaultOutputFields(input_fieldnames) | |
94 | |
95 minibatchwise_use_function = self.minibatchwiseUseFunction(input_fieldnames, | |
96 output_fieldnames, | |
97 test_stats_collector) | |
98 virtual_output_dataset = ApplyFunctionDataSet(input_dataset, | |
99 minibatchwise_use_function, | |
100 output_fieldnames, | |
101 True,DataSet.numpy_vstack, | |
102 DataSet.numpy_hstack) | |
103 # actually force the computation | |
104 output_dataset = CachedDataSet(virtual_output_dataset,True) | |
105 if copy_inputs: | |
106 output_dataset = input_dataset | output_dataset | |
107 # copy the wanted attributes in the dataset | |
108 if output_attributes is None: | |
109 output_attributes = self.attributeNames() | |
110 if output_attributes: | |
111 assert set(attribute_names) <= set(self.attributeNames()) | |
112 output_dataset.setAttributes(output_attributes, | |
113 self.names2attributes(output_attributes,return_copy=True)) | |
114 if test_stats_collector: | |
115 test_stats_collector.update(output_dataset) | |
116 if put_stats_in_output_dataset: | |
117 output_dataset.setAttributes(test_stats_collector.attributeNames(), | |
118 test_stats_collector.attributes()) | |
119 return output_dataset | |
120 | |
121 def minibatchwiseUseFunction(self, input_fields, output_fields, stats_collector): | |
122 """ | |
123 Returns a function that can map the given input fields to the given output fields | |
124 and to the attributes that the stats collector needs for its computation. | |
125 That function is expected to operate on minibatches. | |
126 The function returned makes use of the self.useInputAttributes() and | |
127 sets the attributes specified by self.useOutputAttributes(). | |
128 """ | 102 """ |
129 raise AbstractFunction() | 103 raise AbstractFunction() |
130 | |
131 def attributeNames(self): | |
132 """ | |
133 A Learner may have attributes that it wishes to export to other objects. To automate | |
134 such export, sub-classes should define here the names (list of strings) of these attributes. | |
135 | |
136 @todo By default, attributeNames looks for all dictionary entries whose name does not start with _. | |
137 """ | |
138 return [] | |
139 | |
140 def attributes(self,return_copy=False): | |
141 """ | |
142 Return a list with the values of the learner's attributes (or optionally, a deep copy). | |
143 """ | |
144 return self.names2attributes(self.attributeNames(),return_copy) | |
145 | |
146 def names2attributes(self,names): | |
147 """ | |
148 Private helper function that maps a list of attribute names to a list | |
149 of (optionally copies) values of attributes. | |
150 """ | |
151 res=[] | |
152 for name in names: | |
153 assert name in names | |
154 res.append(self.__getattribute__(name)) | |
155 return res | |
156 | |
157 def useInputAttributes(self): | |
158 """ | |
159 A subset of self.attributeNames() which are the names of attributes needed by use() in order | |
160 to do its work. | |
161 """ | |
162 raise AbstractFunction() | |
163 | |
164 def useOutputAttributes(self): | |
165 """ | |
166 A subset of self.attributeNames() which are the names of attributes modified/created by use() in order | |
167 to do its work. | |
168 """ | |
169 raise AbstractFunction() | |
170 | |
171 | |
172 class TLearner(Learner): | |
173 """ | |
174 TLearner is a virtual class of L{Learner}s that attempts to factor | |
175 out of the definition of a learner the steps that are common to | |
176 many implementations of learning algorithms, so as to leave only | |
177 'the equations' to define in particular sub-classes, using Theano. | |
178 | |
179 In the default implementations of use and update, it is assumed | |
180 that the 'use' and 'update' methods visit examples in the input | |
181 dataset sequentially. In the 'use' method only one pass through the | |
182 dataset is done, whereas the sub-learner may wish to iterate over | |
183 the examples multiple times. Subclasses where this basic model is | |
184 not appropriate can simply redefine update or use. | |
185 | |
186 Sub-classes must provide the following functions and functionalities: | |
187 - attributeNames(): defines all the names of attributes which can | |
188 be used as fields or | |
189 attributes in input/output datasets or in | |
190 stats collectors. All these attributes | |
191 are expected to be theano.Result objects | |
192 (with a .data property and recognized by | |
193 theano.function for compilation). The sub-class | |
194 constructor defines the relations between the | |
195 Theano variables that may be used by 'use' | |
196 and 'update' or by a stats collector. | |
197 - defaultOutputFields(input_fields): return a list of default | |
198 dataset output fields when | |
199 None are provided by the caller of use. | |
200 The following naming convention is assumed and important. Attributes | |
201 whose names are listed in attributeNames() can be of any type, | |
202 but those that can be referenced as input/output dataset fields or | |
203 as output attributes in 'use' or as input attributes in the stats | |
204 collector should be associated with a Theano Result variable. If the | |
205 exported attribute name is <name>, the corresponding Result name | |
206 (an internal attribute of the TLearner, created in the sub-class | |
207 constructor) should be _<name>. Typically <name> will be numpy | |
208 ndarray and _<name> will be the corresponding Theano Tensor (for | |
209 symbolic manipulation). | |
210 | |
211 @todo pousser dans Learner toute la poutine qui peut l'etre sans etre | |
212 dependant de Theano | |
213 """ | |
214 | |
215 def __init__(self,linker="c|py"): | |
216 Learner.__init__(self) | |
217 self.use_functions_dictionary={} | |
218 self.linker=linker | |
219 | |
220 def defaultOutputFields(self, input_fields): | |
221 """ | |
222 Return a default list of output field names (to put in the output dataset). | |
223 This will be used when None are provided (as output_fields) by the caller of the 'use' method. | |
224 This may involve looking at the input_fields (names) available in the | |
225 input_dataset. | |
226 """ | |
227 raise AbstractFunction() | |
228 | |
229 def minibatchwiseUseFunction(self, input_fields, output_fields, stats_collector): | |
230 """ | |
231 Implement minibatchwiseUseFunction by exploiting Theano compilation | |
232 and the expression graph defined by a sub-class constructor. | |
233 """ | |
234 if stats_collector: | |
235 stats_collector_inputs = stats_collector.input2UpdateAttributes() | |
236 for attribute in stats_collector_inputs: | |
237 if attribute not in input_fields: | |
238 output_fields.append(attribute) | |
239 key = (tuple(input_fields),tuple(output_fields)) | |
240 if key not in self.use_functions_dictionary: | |
241 use_input_attributes = self.useInputAttributes() | |
242 use_output_attributes = self.useOutputAttributes() | |
243 complete_f = compile.function(self.names2OpResults(input_fields+use_input_attributes), | |
244 self.names2OpResults(output_fields+use_output_attributes), | |
245 self.linker) | |
246 def f(*input_field_values): | |
247 input_attribute_values = self.names2attributes(use_input_attributes) | |
248 results = complete_f(*(list(input_field_values) + input_attribute_values)) | |
249 output_field_values = results[0:len(output_fields)] | |
250 output_attribute_values = results[len(output_fields):len(results)] | |
251 if use_output_attributes: | |
252 self.setAttributes(use_output_attributes,output_attribute_values) | |
253 return output_field_values | |
254 self.use_functions_dictionary[key]=f | |
255 return self.use_functions_dictionary[key] | |
256 | |
257 def names2OpResults(self,names): | |
258 """ | |
259 Private helper function that maps a list of attribute names to a list | |
260 of corresponding Op Results (with the same name but with a '_' prefix). | |
261 """ | |
262 return [self.__getattribute__('_'+name) for name in names] | |
263 | |
264 | |
265 class MinibatchUpdatesTLearner(TLearner): | |
266 """ | |
267 This adds the following functions to a L{TLearner}: | |
268 - updateStart(), updateEnd(), updateMinibatch(minibatch), isLastEpoch(): | |
269 functions executed at the beginning, the end, in the middle (for | |
270 each minibatch) of the update method, and at the end of each | |
271 epoch. This model only works for 'online' or one-shot learning | |
272 that requires going only once through the training data. For more | |
273 complicated models, more specialized subclasses of TLearner should | |
274 be used or a learning-algorithm specific update method should | |
275 be defined. | |
276 | |
277 - a 'parameters' attribute which is a list of parameters | |
278 (whose names are specified by the user's subclass with the | |
279 parameterAttributes() method) | |
280 | |
281 """ | |
282 | |
283 def __init__(self,linker="c|py"): | |
284 TLearner.__init__(self,linker) | |
285 self.update_minibatch_function = compile.function(self.names2OpResults(self.updateMinibatchInputAttributes()+ | |
286 self.updateMinibatchInputFields()), | |
287 self.names2OpResults(self.updateMinibatchOutputAttributes()), | |
288 linker) | |
289 self.update_end_function = compile.function(self.names2OpResults(self.updateEndInputAttributes()), | |
290 self.names2OpResults(self.updateEndOutputAttributes()), | |
291 linker) | |
292 | |
293 def allocate(self, minibatch): | |
294 """ | |
295 This function is called at the beginning of each L{updateMinibatch} | |
296 and should be used to check that all required attributes have been | |
297 allocated and initialized (usually this function calls forget() | |
298 when it has to do an initialization). | |
299 """ | |
300 raise AbstractFunction() | |
301 | |
302 def updateMinibatchInputFields(self): | |
303 raise AbstractFunction() | |
304 | |
305 def updateMinibatchInputAttributes(self): | |
306 raise AbstractFunction() | |
307 | |
308 def updateMinibatchOutputAttributes(self): | |
309 raise AbstractFunction() | |
310 | |
311 def updateEndInputAttributes(self): | |
312 raise AbstractFunction() | |
313 | |
314 def updateEndOutputAttributes(self): | |
315 raise AbstractFunction() | |
316 | |
317 def parameterAttributes(self): | |
318 raise AbstractFunction() | |
319 | |
320 def updateStart(self,training_set): | |
321 pass | |
322 | |
323 def updateEnd(self): | |
324 self.setAttributes(self.updateEndOutputAttributes(), | |
325 self.update_end_function(*self.names2attributes(self.updateEndInputAttributes()))) | |
326 self.parameters = self.names2attributes(self.parameterAttributes()) | |
327 | |
328 def updateMinibatch(self,minibatch): | |
329 # make sure all required fields are allocated and initialized | |
330 self.allocate(minibatch) | |
331 input_attributes = self.names2attributes(self.updateMinibatchInputAttributes()) | |
332 input_fields = minibatch(*self.updateMinibatchInputFields()) | |
333 self.setAttributes(self.updateMinibatchOutputAttributes(), | |
334 # concatenate the attribute values and field values and then apply update fn | |
335 self.update_minibatch_function(*(input_attributes+input_fields))) | |
336 | |
337 def isLastEpoch(self): | |
338 """ | |
339 This method is called at the end of each epoch (cycling over the training set). | |
340 It returns a boolean to indicate if this is the last epoch. | |
341 By default just do one epoch. | |
342 """ | |
343 return True | |
344 | |
345 def update(self,training_set,train_stats_collector=None): | |
346 """ | |
347 @todo check if some of the learner attributes are actually SPECIFIED | |
348 in as attributes of the training_set. | |
349 """ | |
350 self.updateStart(training_set) | |
351 stop=False | |
352 if hasattr(self,'_minibatch_size') and self._minibatch_size: | |
353 minibatch_size=self._minibatch_size | |
354 else: | |
355 minibatch_size=min(100,len(training_set)) | |
356 while not stop: | |
357 if train_stats_collector: | |
358 train_stats_collector.forget() # restart stats collectin at the beginning of each epoch | |
359 for minibatch in training_set.minibatches(minibatch_size=minibatch_size): | |
360 self.updateMinibatch(minibatch) | |
361 if train_stats_collector: | |
362 minibatch_set = minibatch.examples() | |
363 minibatch_set.setAttributes(self.attributeNames(),self.attributes()) | |
364 train_stats_collector.update(minibatch_set) | |
365 stop = self.isLastEpoch() | |
366 self.updateEnd() | |
367 return self.use | |
368 | |
369 class OnlineGradientTLearner(MinibatchUpdatesTLearner): | |
370 """ | |
371 Specialization of L{MinibatchUpdatesTLearner} in which the minibatch updates | |
372 are obtained by performing an online (minibatch-based) gradient step. | |
373 | |
374 Sub-classes must define the following: | |
375 - self._learning_rate (may be changed by the sub-class between epochs or minibatches) | |
376 - self.lossAttribute() = name of the loss field | |
377 """ | |
378 def __init__(self,truly_online=False,linker="c|py"): | |
379 """ | |
380 If truly_online then only one pass is made through the training set passed to update(). | |
381 | |
382 SUBCLASSES SHOULD CALL THIS CONSTRUCTOR ONLY AFTER HAVING DEFINED ALL THEIR THEANO FORMULAS | |
383 """ | |
384 self.truly_online=truly_online | |
385 | |
386 # create the formulas for the gradient update | |
387 old_params = [self.__getattribute__("_"+name) for name in self.parameterAttributes()] | |
388 new_params_names = ["_new_"+name for name in self.parameterAttributes()] | |
389 loss = self.__getattribute__("_"+self.lossAttribute()) | |
390 self.setAttributes(new_params_names, | |
391 [t.add_inplace(param,-self._learning_rate*Print("grad("+param.name+")")(t.grad(loss,param))) | |
392 for param in old_params]) | |
393 MinibatchUpdatesTLearner.__init__(self,linker) | |
394 | |
395 | |
396 def namesOfAttributesToComputeOutputs(self,output_names): | |
397 """ | |
398 The output_names are attribute names (not the corresponding Result names, which have leading _). | |
399 Return the corresponding input names | |
400 """ | |
401 all_inputs = t.gof.graph.inputs(self.names2OpResults(output_names)) | |
402 # remove constants and leading '_' in name | |
403 | |
404 return [r.name for r in all_inputs if isinstance(r,theano.Result) and \ | |
405 not isinstance(r,theano.Constant) and not isinstance(r,theano.Value)] | |
406 #inputs = [] | |
407 #for r in all_inputs: | |
408 # if isinstance(r,theano.Result) and \ | |
409 # not isinstance(r,theano.Constant) and not isinstance(r,theano.Value): | |
410 # inputs.append(r.name) | |
411 #return inputs | |
412 | |
413 def isLastEpoch(self): | |
414 return self.truly_online | |
415 | |
416 def updateMinibatchInputAttributes(self): | |
417 return self.parameterAttributes()+["learning_rate"] | |
418 | |
419 def updateMinibatchOutputAttributes(self): | |
420 return ["new_"+name for name in self.parameterAttributes()] | |
421 | |
422 def updateEndInputAttributes(self): | |
423 return self.namesOfAttributesToComputeOutputs(self.updateEndOutputAttributes()) | |
424 | |
425 def useInputAttributes(self): | |
426 return self.parameterAttributes() | |
427 | |
428 def useOutputAttributes(self): | |
429 return [] | |
430 |