comparison learner.py @ 132:f6505ec32dc3

Updated documentation slightly
author Joseph Turian <turian@gmail.com>
date Thu, 08 May 2008 00:54:14 -0400
parents 57e6492644ec
children b4657441dd65
comparison
equal deleted inserted replaced
131:57e6492644ec 132:f6505ec32dc3
2 from dataset import AttributesHolder,AbstractFunction 2 from dataset import AttributesHolder,AbstractFunction
3 import compile 3 import compile
4 from theano import tensor as t 4 from theano import tensor as t
5 5
6 class Learner(AttributesHolder): 6 class Learner(AttributesHolder):
7 """Base class for learning algorithms, provides an interface 7 """
8 Base class for learning algorithms, provides an interface
8 that allows various algorithms to be applicable to generic learning 9 that allows various algorithms to be applicable to generic learning
9 algorithms. 10 algorithms.
10 11
11 A Learner can be seen as a learning algorithm, a function that when 12 A L{Learner} can be seen as a learning algorithm, a function that when
12 applied to training data returns a learned function, an object that 13 applied to training data returns a learned function, an object that
13 can be applied to other data and return some output data. 14 can be applied to other data and return some output data.
14 """ 15 """
15 16
16 def __init__(self): 17 def __init__(self):
31 Hence update can be called multiple times. This is particularly useful in the 32 Hence update can be called multiple times. This is particularly useful in the
32 on-line setting or the sequential (Bayesian or not) settings. 33 on-line setting or the sequential (Bayesian or not) settings.
33 The result is a function that can be applied on data, with the same 34 The result is a function that can be applied on data, with the same
34 semantics of the Learner.use method. 35 semantics of the Learner.use method.
35 36
36 The user may optionally provide a training StatsCollector that is used to record 37 The user may optionally provide a training L{StatsCollector} that is used to record
37 some statistics of the outputs computed during training. It is update(d) during 38 some statistics of the outputs computed during training. It is update(d) during
38 training. 39 training.
39 """ 40 """
40 return self.use # default behavior is 'non-adaptive', i.e. update does not do anything 41 return self.use # default behavior is 'non-adaptive', i.e. update does not do anything
41 42
51 def use(self,input_dataset,output_fieldnames=None, 52 def use(self,input_dataset,output_fieldnames=None,
52 test_stats_collector=None,copy_inputs=True, 53 test_stats_collector=None,copy_inputs=True,
53 put_stats_in_output_dataset=True, 54 put_stats_in_output_dataset=True,
54 output_attributes=[]): 55 output_attributes=[]):
55 """ 56 """
56 Once a Learner has been trained by one or more call to 'update', it can 57 Once a L{Learner} has been trained by one or more call to 'update', it can
57 be used with one or more calls to 'use'. The argument is an input DataSet (possibly 58 be used with one or more calls to 'use'. The argument is an input L{DataSet} (possibly
58 containing a single example) and the result is an output DataSet of the same length. 59 containing a single example) and the result is an output L{DataSet} of the same length.
59 If output_fieldnames is specified, it may be use to indicate which fields should 60 If output_fieldnames is specified, it may be use to indicate which fields should
60 be constructed in the output DataSet (for example ['output','classification_error']). 61 be constructed in the output L{DataSet} (for example ['output','classification_error']).
61 Otherwise, self.defaultOutputFields is called to choose the output fields. 62 Otherwise, self.defaultOutputFields is called to choose the output fields.
62 Optionally, if copy_inputs, the input fields (of the input_dataset) can be made 63 Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
63 visible in the output DataSet returned by this method. 64 visible in the output L{DataSet} returned by this method.
64 Optionally, attributes of the learner can be copied in the output dataset, 65 Optionally, attributes of the learner can be copied in the output dataset,
65 and statistics computed by the stats collector also put in the output dataset. 66 and statistics computed by the stats collector also put in the output dataset.
66 Note the distinction between fields (which are example-wise quantities, e.g. 'input') 67 Note the distinction between fields (which are example-wise quantities, e.g. 'input')
67 and attributes (which are not, e.g. 'regularization_term'). 68 and attributes (which are not, e.g. 'regularization_term').
68 69
256 return [self.__getattribute__('_'+name).data for name in names] 257 return [self.__getattribute__('_'+name).data for name in names]
257 258
258 259
259 class MinibatchUpdatesTLearner(TLearner): 260 class MinibatchUpdatesTLearner(TLearner):
260 """ 261 """
261 This adds to TLearner a 262 This adds to L{TLearner} a
262 - updateStart(), updateEnd(), updateMinibatch(minibatch), isLastEpoch(): 263 - updateStart(), updateEnd(), updateMinibatch(minibatch), isLastEpoch():
263 functions executed at the beginning, the end, in the middle 264 functions executed at the beginning, the end, in the middle
264 (for each minibatch) of the update method, and at the end 265 (for each minibatch) of the update method, and at the end
265 of each epoch. This model only 266 of each epoch. This model only
266 works for 'online' or one-shot learning that requires 267 works for 'online' or one-shot learning that requires
283 (self.names2OpResults(self.updateEndInputAttributes()), 284 (self.names2OpResults(self.updateEndInputAttributes()),
284 self.names2OpResults(self.updateEndOutputAttributes())) 285 self.names2OpResults(self.updateEndOutputAttributes()))
285 286
286 def allocate(self, minibatch): 287 def allocate(self, minibatch):
287 """ 288 """
288 This function is called at the beginning of each updateMinibatch 289 This function is called at the beginning of each L{updateMinibatch}
289 and should be used to check that all required attributes have been 290 and should be used to check that all required attributes have been
290 allocated and initialized (usually this function calls forget() 291 allocated and initialized (usually this function calls forget()
291 when it has to do an initialization). 292 when it has to do an initialization).
292 """ 293 """
293 raise AbstractFunction() 294 raise AbstractFunction()
356 self.updateEnd() 357 self.updateEnd()
357 return self.use 358 return self.use
358 359
359 class OnlineGradientTLearner(MinibatchUpdatesTLearner): 360 class OnlineGradientTLearner(MinibatchUpdatesTLearner):
360 """ 361 """
361 Specialization of MinibatchUpdatesTLearner in which the minibatch updates 362 Specialization of L{MinibatchUpdatesTLearner} in which the minibatch updates
362 are obtained by performing an online (minibatch-based) gradient step. 363 are obtained by performing an online (minibatch-based) gradient step.
363 364
364 Sub-classes must define the following: 365 Sub-classes must define the following:
365 366 - self._learning_rate (may be changed by the sub-class between epochs or minibatches)
366 self._learning_rate (may be changed by the sub-class between epochs or minibatches) 367 - self.lossAttribute() = name of the loss field
367
368 self.lossAttribute() = name of the loss field
369
370 """ 368 """
371 def __init__(self,truly_online=False): 369 def __init__(self,truly_online=False):
372 """ 370 """
373 If truly_online then only one pass is made through the training set passed to update(). 371 If truly_online then only one pass is made through the training set passed to update().
374 372