comparison learner.py @ 10:80bf5492e571

Rewrote learner.py according to the specs in the wiki for learners.
author bengioy@esprit.iro.umontreal.ca
date Tue, 25 Mar 2008 11:39:02 -0400
parents 2cd82666b9a7
children 633453635d51
comparison
equal deleted inserted replaced
9:de616c423dbd 10:80bf5492e571
5 class Learner(object): 5 class Learner(object):
6 """Base class for learning algorithms, provides an interface 6 """Base class for learning algorithms, provides an interface
7 that allows various algorithms to be applicable to generic learning 7 that allows various algorithms to be applicable to generic learning
8 algorithms. 8 algorithms.
9 9
10 A Learner often works in stages, and the user can control when 10 A Learner can be seen as a learning algorithm, a function that when
11 each stage is executed by calling train repetively (with a different 11 applied to training data returns a learned function, an object that
12 target_stage). 12 can be applied to other data and return some output data.
13 """ 13 """
14 14
15 __init__(self,training_set=None,verbosity=0): 15 def __init__(self):
16 self.stage=0 16 pass
17 self.training_set=training_set 17
18 if training_set: 18 def forget(self):
19 assert isinstance(training_set,DataSet), "training_set should be a DataSet" 19 """
20 self.verbosity=verbosity 20 Reset the state of the learner to a blank slate, before seeing
21 training data. The operation may be non-deterministic if the
22 learner has a random number generator that is set to use a
23 different seed each time it forget() is called.
24 """
25 raise NotImplementedError
26
27 def update(self,training_set):
28 """
29 Continue training a learner, with the evidence provided by the given training set.
30 Hence update can be called multiple times. This is particularly useful in the
31 on-line setting or the sequential (Bayesian or not) settings.
32 The result is a function that can be applied on data, with the same
33 semantics of the Learner.use method.
34 """
35 return self.use
36
37
38 def __call__(self,training_set):
39 """
40 Train a learner from scratch using the provided training set,
41 and return the learned function.
42 """
43 self.forget()
44 return self.update(learning_task)
21 45
22 46
23 train(self,target_stage=None,training_set=None,from_scratch=True,train_stats=None): 47 def use(self,input_dataset,output_fields=None):
24 """The main method of a learner, to 'train' it. This modifies the Learner 48 """Once a Learner has been trained by one or more call to 'update', it can
25 so that its behavior upon a next call to use may be different.
26 The training set may have already been provided (by setting the training_set
27 attribute) or a new / different training set may be provided here (which will
28 set the training_set attribute). Training may occur in stages. The current
29 stage may be set by the user and all the stages until and including the target_stage
30 will be performed. If from_scratch then stage is set to 0 initially.
31 If the train_stats argument is provided, it should be a StatsCollector object.
32 In that case, performance statistics will be computed on the training set
33 and accumulated into the train_stats object. If from_scratch though, the
34 train_stats object will be cleared automatically. The same train_stats object
35 can thus be reused on consecutive calls to train (where all but the first
36 are from scratch).
37
38 Subclasses may call Learner.train to set stage and training_set according
39 to the above arguments. The actual training should then be performed
40 within the subclass train method.
41 """
42 if from_scratch:
43 stage=0
44 if train_stats:
45 train_stats.clear()
46 if training_set:
47 if training_set:
48 self.training_set=training_set
49 assert isinstance(training_set,DataSet), "training_set should be a DataSet"
50
51 return
52
53
54 use(self,input_dataset,test_stats=None,output_fields=None):
55 """Once a Learner has been trained by one or more call to 'train', it can
56 be used with one or more calls to 'use'. The argument is a DataSet (possibly 49 be used with one or more calls to 'use'. The argument is a DataSet (possibly
57 containing a single example) and the result is a DataSet of the same size. 50 containing a single example) and the result is a DataSet of the same length.
58 If output_fields is specified, it may be use to indicate which fields should 51 If output_fields is specified, it may be use to indicate which fields should
59 be constructed in the output DataSet (for example ['output','classification_error']). 52 be constructed in the output DataSet (for example ['output','classification_error']).
60 If a test_stats object is provided, then performance statistics will be computed to 53 """
61 measure the performance of the learner on the given dataset, and accumulated into
62 the test_stats (hence it must be cleared before the call)."""
63 raise NotImplementedError 54 raise NotImplementedError