comparison learner.py @ 193:cb6b945acf5a

Complete redesign of learner...
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Thu, 15 May 2008 12:55:21 -0400
parents 2698c0feeb54
children 50a8302addaf
comparison
equal deleted inserted replaced
192:f62a03c9d485 193:cb6b945acf5a
1 1
2 from exceptions import * 2 from exceptions import *
3 from dataset import AttributesHolder,ApplyFunctionDataSet,DataSet,CachedDataSet
4 import theano
5 from theano import compile
6 from theano import tensor as t
7 from misc import Print
8 Print = lambda x: lambda y: y
9 3
10 class Learner(AttributesHolder): 4
5 class LearningAlgorithm(object):
11 """ 6 """
12 Base class for learning algorithms, provides an interface 7 Base class for learning algorithms, provides an interface
13 that allows various algorithms to be applicable to generic learning 8 that allows various algorithms to be applicable to generic learning
14 algorithms. 9 algorithms. It is only given here to define the expected semantics.
15 10
16 A L{Learner} can be seen as a learning algorithm, a function that when 11 A L{Learner} can be seen as a learning algorithm, a function that when
17 applied to training data returns a learned function (which is an object that 12 applied to training data returns a learned function (which is an object that
18 can be applied to other data and return some output data). 13 can be applied to other data and return some output data).
14
15 There are two main ways of using a learning algorithms, and some learning
16 algorithms only support one of them. The first is the way of the standard
17 machine learning framework, in which a learning algorithm is applied
18 to a training dataset,
19
20 model = learning_algorithm(training_set)
21
22 resulting in a fully trained model that can be applied to another dataset:
23
24 output_dataset = model(input_dataset)
25
26 Note that the application of a dataset has no side-effect on the model.
27 In that example, the training set may for example have 'input' and 'target'
28 fields while the input dataset may have only 'input' (or both 'input' and
29 'target') and the output dataset would contain some default output fields defined
30 by the learning algorithm (e.g. 'output' and 'error').
31
32 The second way of using a learning algorithm is in the online or
33 adaptive framework, where the training data are only revealed in pieces
34 (maybe one example or a batch of example at a time):
35
36 model = learning_algorithm()
37
38 results in a fresh model. The model can be adapted by presenting
39 it with some training data,
40
41 model.update(some_training_data)
42 ...
43 model.update(some_more_training_data)
44 ...
45 model.update(yet_more_training_data)
46
47 and at any point one can use the model to perform some computation:
48
49 output_dataset = model(input_dataset)
50
19 """ 51 """
52
53 def __init__(self): pass
54
55 def __call__(self, training_dataset=None):
56 """
57 Return a LearnerModel, either fresh (if training_dataset is None) or fully trained (otherwise).
58 """
59 raise AbstractFunction()
20 60
61 class LearnerModel(AttributesHolder):
62 """
63 LearnerModel is a base class for models returned by instances of a LearningAlgorithm subclass.
64 It is only given here to define the expected semantics.
65 """
21 def __init__(self): 66 def __init__(self):
22 pass 67 pass
23
24 def forget(self):
25 """
26 Reset the state of the learner to a blank slate, before seeing
27 training data. The operation may be non-deterministic if the
28 learner has a random number generator that is set to use a
29 different seed each time it forget() is called.
30 """
31 raise NotImplementedError
32 68
33 def update(self,training_set,train_stats_collector=None): 69 def update(self,training_set,train_stats_collector=None):
34 """ 70 """
35 Continue training a learner, with the evidence provided by the given training set. 71 Continue training a learner, with the evidence provided by the given training set.
36 Hence update can be called multiple times. This is particularly useful in the 72 Hence update can be called multiple times. This is the main method used for training in the
37 on-line setting or the sequential (Bayesian or not) settings. 73 on-line setting or the sequential (Bayesian or not) settings.
38 The result is a function that can be applied on data, with the same 74
39 semantics of the Learner.use method. 75 This function has as side effect that self(data) will behave differently,
76 according to the adaptation achieved by update().
40 77
41 The user may optionally provide a training L{StatsCollector} that is used to record 78 The user may optionally provide a training L{StatsCollector} that is used to record
42 some statistics of the outputs computed during training. It is update(d) during 79 some statistics of the outputs computed during training. It is update(d) during
43 training. 80 training.
44 """ 81 """
45 return self.use # default behavior is 'non-adaptive', i.e. update does not do anything 82 raise AbstractFunction()
46 83
47 84 def __call__(self,input_dataset,output_fieldnames=None,
48 def __call__(self,training_set,train_stats_collector=None): 85 test_stats_collector=None,copy_inputs=False,
86 put_stats_in_output_dataset=True,
87 output_attributes=[]):
49 """ 88 """
50 Train a learner from scratch using the provided training set, 89 A trained or partially trained L{Model} can be used with
51 and return the learned function. 90 with one or more calls to it. The argument is an input L{DataSet} (possibly
52 """
53 self.forget()
54 return self.update(training_set,train_stats_collector)
55
56 def use(self,input_dataset,output_fieldnames=None,
57 test_stats_collector=None,copy_inputs=False,
58 put_stats_in_output_dataset=True,
59 output_attributes=[]):
60 """
61 Once a L{Learner} has been trained by one or more call to 'update', it can
62 be used with one or more calls to 'use'. The argument is an input L{DataSet} (possibly
63 containing a single example) and the result is an output L{DataSet} of the same length. 91 containing a single example) and the result is an output L{DataSet} of the same length.
64 If output_fieldnames is specified, it may be use to indicate which fields should 92 If output_fieldnames is specified, it may be use to indicate which fields should
65 be constructed in the output L{DataSet} (for example ['output','classification_error']). 93 be constructed in the output L{DataSet} (for example ['output','classification_error']).
66 Otherwise, self.defaultOutputFields is called to choose the output fields. 94 Otherwise, some default output fields are produced (possibly depending on the input
95 fields available in the input_dataset).
67 Optionally, if copy_inputs, the input fields (of the input_dataset) can be made 96 Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
68 visible in the output L{DataSet} returned by this method. 97 visible in the output L{DataSet} returned by this method.
69 Optionally, attributes of the learner can be copied in the output dataset, 98 Optionally, attributes of the learner can be copied in the output dataset,
70 and statistics computed by the stats collector also put in the output dataset. 99 and statistics computed by the stats collector also put in the output dataset.
71 Note the distinction between fields (which are example-wise quantities, e.g. 'input') 100 Note the distinction between fields (which are example-wise quantities, e.g. 'input')
72 and attributes (which are not, e.g. 'regularization_term'). 101 and attributes (which are not, e.g. 'regularization_term').
73
74 We provide here a default implementation that does all this using
75 a sub-class defined method: minibatchwiseUseFunction.
76
77 @todo check if some of the learner attributes are actually SPECIFIED
78 as attributes of the input_dataset, and if so use their values instead
79 of the ones in the learner.
80
81 The learner tries to compute in the output dataset the output fields specified.
82 If None is specified then self.defaultOutputFields(input_dataset.fieldNames())
83 is called to determine the output fields.
84
85 Attributes of the learner can also optionally be copied into the output dataset.
86 If output_attributes is None then all of the attributes in self.AttributeNames()
87 are copied in the output dataset, but if it is [] (the default), then none are copied.
88 If a test_stats_collector is provided, then its attributes (test_stats_collector.AttributeNames())
89 are also copied into the output dataset attributes.
90 """
91 input_fieldnames = input_dataset.fieldNames()
92 if not output_fieldnames:
93 output_fieldnames = self.defaultOutputFields(input_fieldnames)
94
95 minibatchwise_use_function = self.minibatchwiseUseFunction(input_fieldnames,
96 output_fieldnames,
97 test_stats_collector)
98 virtual_output_dataset = ApplyFunctionDataSet(input_dataset,
99 minibatchwise_use_function,
100 output_fieldnames,
101 True,DataSet.numpy_vstack,
102 DataSet.numpy_hstack)
103 # actually force the computation
104 output_dataset = CachedDataSet(virtual_output_dataset,True)
105 if copy_inputs:
106 output_dataset = input_dataset | output_dataset
107 # copy the wanted attributes in the dataset
108 if output_attributes is None:
109 output_attributes = self.attributeNames()
110 if output_attributes:
111 assert set(attribute_names) <= set(self.attributeNames())
112 output_dataset.setAttributes(output_attributes,
113 self.names2attributes(output_attributes,return_copy=True))
114 if test_stats_collector:
115 test_stats_collector.update(output_dataset)
116 if put_stats_in_output_dataset:
117 output_dataset.setAttributes(test_stats_collector.attributeNames(),
118 test_stats_collector.attributes())
119 return output_dataset
120
121 def minibatchwiseUseFunction(self, input_fields, output_fields, stats_collector):
122 """
123 Returns a function that can map the given input fields to the given output fields
124 and to the attributes that the stats collector needs for its computation.
125 That function is expected to operate on minibatches.
126 The function returned makes use of the self.useInputAttributes() and
127 sets the attributes specified by self.useOutputAttributes().
128 """ 102 """
129 raise AbstractFunction() 103 raise AbstractFunction()
130
131 def attributeNames(self):
132 """
133 A Learner may have attributes that it wishes to export to other objects. To automate
134 such export, sub-classes should define here the names (list of strings) of these attributes.
135
136 @todo By default, attributeNames looks for all dictionary entries whose name does not start with _.
137 """
138 return []
139
140 def attributes(self,return_copy=False):
141 """
142 Return a list with the values of the learner's attributes (or optionally, a deep copy).
143 """
144 return self.names2attributes(self.attributeNames(),return_copy)
145
146 def names2attributes(self,names):
147 """
148 Private helper function that maps a list of attribute names to a list
149 of (optionally copies) values of attributes.
150 """
151 res=[]
152 for name in names:
153 assert name in names
154 res.append(self.__getattribute__(name))
155 return res
156
157 def useInputAttributes(self):
158 """
159 A subset of self.attributeNames() which are the names of attributes needed by use() in order
160 to do its work.
161 """
162 raise AbstractFunction()
163
164 def useOutputAttributes(self):
165 """
166 A subset of self.attributeNames() which are the names of attributes modified/created by use() in order
167 to do its work.
168 """
169 raise AbstractFunction()
170
171
172 class TLearner(Learner):
173 """
174 TLearner is a virtual class of L{Learner}s that attempts to factor
175 out of the definition of a learner the steps that are common to
176 many implementations of learning algorithms, so as to leave only
177 'the equations' to define in particular sub-classes, using Theano.
178
179 In the default implementations of use and update, it is assumed
180 that the 'use' and 'update' methods visit examples in the input
181 dataset sequentially. In the 'use' method only one pass through the
182 dataset is done, whereas the sub-learner may wish to iterate over
183 the examples multiple times. Subclasses where this basic model is
184 not appropriate can simply redefine update or use.
185
186 Sub-classes must provide the following functions and functionalities:
187 - attributeNames(): defines all the names of attributes which can
188 be used as fields or
189 attributes in input/output datasets or in
190 stats collectors. All these attributes
191 are expected to be theano.Result objects
192 (with a .data property and recognized by
193 theano.function for compilation). The sub-class
194 constructor defines the relations between the
195 Theano variables that may be used by 'use'
196 and 'update' or by a stats collector.
197 - defaultOutputFields(input_fields): return a list of default
198 dataset output fields when
199 None are provided by the caller of use.
200 The following naming convention is assumed and important. Attributes
201 whose names are listed in attributeNames() can be of any type,
202 but those that can be referenced as input/output dataset fields or
203 as output attributes in 'use' or as input attributes in the stats
204 collector should be associated with a Theano Result variable. If the
205 exported attribute name is <name>, the corresponding Result name
206 (an internal attribute of the TLearner, created in the sub-class
207 constructor) should be _<name>. Typically <name> will be numpy
208 ndarray and _<name> will be the corresponding Theano Tensor (for
209 symbolic manipulation).
210
211 @todo pousser dans Learner toute la poutine qui peut l'etre sans etre
212 dependant de Theano
213 """
214
215 def __init__(self,linker="c|py"):
216 Learner.__init__(self)
217 self.use_functions_dictionary={}
218 self.linker=linker
219
220 def defaultOutputFields(self, input_fields):
221 """
222 Return a default list of output field names (to put in the output dataset).
223 This will be used when None are provided (as output_fields) by the caller of the 'use' method.
224 This may involve looking at the input_fields (names) available in the
225 input_dataset.
226 """
227 raise AbstractFunction()
228
229 def minibatchwiseUseFunction(self, input_fields, output_fields, stats_collector):
230 """
231 Implement minibatchwiseUseFunction by exploiting Theano compilation
232 and the expression graph defined by a sub-class constructor.
233 """
234 if stats_collector:
235 stats_collector_inputs = stats_collector.input2UpdateAttributes()
236 for attribute in stats_collector_inputs:
237 if attribute not in input_fields:
238 output_fields.append(attribute)
239 key = (tuple(input_fields),tuple(output_fields))
240 if key not in self.use_functions_dictionary:
241 use_input_attributes = self.useInputAttributes()
242 use_output_attributes = self.useOutputAttributes()
243 complete_f = compile.function(self.names2OpResults(input_fields+use_input_attributes),
244 self.names2OpResults(output_fields+use_output_attributes),
245 self.linker)
246 def f(*input_field_values):
247 input_attribute_values = self.names2attributes(use_input_attributes)
248 results = complete_f(*(list(input_field_values) + input_attribute_values))
249 output_field_values = results[0:len(output_fields)]
250 output_attribute_values = results[len(output_fields):len(results)]
251 if use_output_attributes:
252 self.setAttributes(use_output_attributes,output_attribute_values)
253 return output_field_values
254 self.use_functions_dictionary[key]=f
255 return self.use_functions_dictionary[key]
256
257 def names2OpResults(self,names):
258 """
259 Private helper function that maps a list of attribute names to a list
260 of corresponding Op Results (with the same name but with a '_' prefix).
261 """
262 return [self.__getattribute__('_'+name) for name in names]
263
264
265 class MinibatchUpdatesTLearner(TLearner):
266 """
267 This adds the following functions to a L{TLearner}:
268 - updateStart(), updateEnd(), updateMinibatch(minibatch), isLastEpoch():
269 functions executed at the beginning, the end, in the middle (for
270 each minibatch) of the update method, and at the end of each
271 epoch. This model only works for 'online' or one-shot learning
272 that requires going only once through the training data. For more
273 complicated models, more specialized subclasses of TLearner should
274 be used or a learning-algorithm specific update method should
275 be defined.
276
277 - a 'parameters' attribute which is a list of parameters
278 (whose names are specified by the user's subclass with the
279 parameterAttributes() method)
280
281 """
282
283 def __init__(self,linker="c|py"):
284 TLearner.__init__(self,linker)
285 self.update_minibatch_function = compile.function(self.names2OpResults(self.updateMinibatchInputAttributes()+
286 self.updateMinibatchInputFields()),
287 self.names2OpResults(self.updateMinibatchOutputAttributes()),
288 linker)
289 self.update_end_function = compile.function(self.names2OpResults(self.updateEndInputAttributes()),
290 self.names2OpResults(self.updateEndOutputAttributes()),
291 linker)
292
293 def allocate(self, minibatch):
294 """
295 This function is called at the beginning of each L{updateMinibatch}
296 and should be used to check that all required attributes have been
297 allocated and initialized (usually this function calls forget()
298 when it has to do an initialization).
299 """
300 raise AbstractFunction()
301
302 def updateMinibatchInputFields(self):
303 raise AbstractFunction()
304
305 def updateMinibatchInputAttributes(self):
306 raise AbstractFunction()
307
308 def updateMinibatchOutputAttributes(self):
309 raise AbstractFunction()
310
311 def updateEndInputAttributes(self):
312 raise AbstractFunction()
313
314 def updateEndOutputAttributes(self):
315 raise AbstractFunction()
316
317 def parameterAttributes(self):
318 raise AbstractFunction()
319
320 def updateStart(self,training_set):
321 pass
322
323 def updateEnd(self):
324 self.setAttributes(self.updateEndOutputAttributes(),
325 self.update_end_function(*self.names2attributes(self.updateEndInputAttributes())))
326 self.parameters = self.names2attributes(self.parameterAttributes())
327
328 def updateMinibatch(self,minibatch):
329 # make sure all required fields are allocated and initialized
330 self.allocate(minibatch)
331 input_attributes = self.names2attributes(self.updateMinibatchInputAttributes())
332 input_fields = minibatch(*self.updateMinibatchInputFields())
333 self.setAttributes(self.updateMinibatchOutputAttributes(),
334 # concatenate the attribute values and field values and then apply update fn
335 self.update_minibatch_function(*(input_attributes+input_fields)))
336
337 def isLastEpoch(self):
338 """
339 This method is called at the end of each epoch (cycling over the training set).
340 It returns a boolean to indicate if this is the last epoch.
341 By default just do one epoch.
342 """
343 return True
344
345 def update(self,training_set,train_stats_collector=None):
346 """
347 @todo check if some of the learner attributes are actually SPECIFIED
348 in as attributes of the training_set.
349 """
350 self.updateStart(training_set)
351 stop=False
352 if hasattr(self,'_minibatch_size') and self._minibatch_size:
353 minibatch_size=self._minibatch_size
354 else:
355 minibatch_size=min(100,len(training_set))
356 while not stop:
357 if train_stats_collector:
358 train_stats_collector.forget() # restart stats collectin at the beginning of each epoch
359 for minibatch in training_set.minibatches(minibatch_size=minibatch_size):
360 self.updateMinibatch(minibatch)
361 if train_stats_collector:
362 minibatch_set = minibatch.examples()
363 minibatch_set.setAttributes(self.attributeNames(),self.attributes())
364 train_stats_collector.update(minibatch_set)
365 stop = self.isLastEpoch()
366 self.updateEnd()
367 return self.use
368
369 class OnlineGradientTLearner(MinibatchUpdatesTLearner):
370 """
371 Specialization of L{MinibatchUpdatesTLearner} in which the minibatch updates
372 are obtained by performing an online (minibatch-based) gradient step.
373
374 Sub-classes must define the following:
375 - self._learning_rate (may be changed by the sub-class between epochs or minibatches)
376 - self.lossAttribute() = name of the loss field
377 """
378 def __init__(self,truly_online=False,linker="c|py"):
379 """
380 If truly_online then only one pass is made through the training set passed to update().
381
382 SUBCLASSES SHOULD CALL THIS CONSTRUCTOR ONLY AFTER HAVING DEFINED ALL THEIR THEANO FORMULAS
383 """
384 self.truly_online=truly_online
385
386 # create the formulas for the gradient update
387 old_params = [self.__getattribute__("_"+name) for name in self.parameterAttributes()]
388 new_params_names = ["_new_"+name for name in self.parameterAttributes()]
389 loss = self.__getattribute__("_"+self.lossAttribute())
390 self.setAttributes(new_params_names,
391 [t.add_inplace(param,-self._learning_rate*Print("grad("+param.name+")")(t.grad(loss,param)))
392 for param in old_params])
393 MinibatchUpdatesTLearner.__init__(self,linker)
394
395
396 def namesOfAttributesToComputeOutputs(self,output_names):
397 """
398 The output_names are attribute names (not the corresponding Result names, which have leading _).
399 Return the corresponding input names
400 """
401 all_inputs = t.gof.graph.inputs(self.names2OpResults(output_names))
402 # remove constants and leading '_' in name
403
404 return [r.name for r in all_inputs if isinstance(r,theano.Result) and \
405 not isinstance(r,theano.Constant) and not isinstance(r,theano.Value)]
406 #inputs = []
407 #for r in all_inputs:
408 # if isinstance(r,theano.Result) and \
409 # not isinstance(r,theano.Constant) and not isinstance(r,theano.Value):
410 # inputs.append(r.name)
411 #return inputs
412
413 def isLastEpoch(self):
414 return self.truly_online
415
416 def updateMinibatchInputAttributes(self):
417 return self.parameterAttributes()+["learning_rate"]
418
419 def updateMinibatchOutputAttributes(self):
420 return ["new_"+name for name in self.parameterAttributes()]
421
422 def updateEndInputAttributes(self):
423 return self.namesOfAttributesToComputeOutputs(self.updateEndOutputAttributes())
424
425 def useInputAttributes(self):
426 return self.parameterAttributes()
427
428 def useOutputAttributes(self):
429 return []
430