comparison learner.py @ 1:2cd82666b9a7

Added statscollector and started writing dataset and learner.
author bengioy@esprit.iro.umontreal.ca
date Fri, 14 Mar 2008 11:28:08 -0400
parents 586dcaa4b2df
children 80bf5492e571
comparison
equal deleted inserted replaced
0:586dcaa4b2df 1:2cd82666b9a7
1
2 from dataset import *
3 from statscollector import *
4
5 class Learner(object):
6 """Base class for learning algorithms, provides an interface
7 that allows various algorithms to be applicable to generic learning
8 algorithms.
9
10 A Learner often works in stages, and the user can control when
11 each stage is executed by calling train repetively (with a different
12 target_stage).
13 """
14
15 __init__(self,training_set=None,verbosity=0):
16 self.stage=0
17 self.training_set=training_set
18 if training_set:
19 assert isinstance(training_set,DataSet), "training_set should be a DataSet"
20 self.verbosity=verbosity
21
22
23 train(self,target_stage=None,training_set=None,from_scratch=True,train_stats=None):
24 """The main method of a learner, to 'train' it. This modifies the Learner
25 so that its behavior upon a next call to use may be different.
26 The training set may have already been provided (by setting the training_set
27 attribute) or a new / different training set may be provided here (which will
28 set the training_set attribute). Training may occur in stages. The current
29 stage may be set by the user and all the stages until and including the target_stage
30 will be performed. If from_scratch then stage is set to 0 initially.
31 If the train_stats argument is provided, it should be a StatsCollector object.
32 In that case, performance statistics will be computed on the training set
33 and accumulated into the train_stats object. If from_scratch though, the
34 train_stats object will be cleared automatically. The same train_stats object
35 can thus be reused on consecutive calls to train (where all but the first
36 are from scratch).
37
38 Subclasses may call Learner.train to set stage and training_set according
39 to the above arguments. The actual training should then be performed
40 within the subclass train method.
41 """
42 if from_scratch:
43 stage=0
44 if train_stats:
45 train_stats.clear()
46 if training_set:
47 if training_set:
48 self.training_set=training_set
49 assert isinstance(training_set,DataSet), "training_set should be a DataSet"
50
51 return
52
53
54 use(self,input_dataset,test_stats=None,output_fields=None):
55 """Once a Learner has been trained by one or more call to 'train', it can
56 be used with one or more calls to 'use'. The argument is a DataSet (possibly
57 containing a single example) and the result is a DataSet of the same size.
58 If output_fields is specified, it may be use to indicate which fields should
59 be constructed in the output DataSet (for example ['output','classification_error']).
60 If a test_stats object is provided, then performance statistics will be computed to
61 measure the performance of the learner on the given dataset, and accumulated into
62 the test_stats (hence it must be cleared before the call)."""
63 raise NotImplementedError