# HG changeset patch # User bengioy@esprit.iro.umontreal.ca # Date 1206459542 14400 # Node ID 80bf5492e571d48cc7cfc5c0b914fb4d9fd9ea2a # Parent de616c423dbd1025d67340b110ea35570115e2dc Rewrote learner.py according to the specs in the wiki for learners. diff -r de616c423dbd -r 80bf5492e571 learner.py --- a/learner.py Mon Mar 24 16:52:47 2008 -0400 +++ b/learner.py Tue Mar 25 11:39:02 2008 -0400 @@ -7,57 +7,48 @@ that allows various algorithms to be applicable to generic learning algorithms. - A Learner often works in stages, and the user can control when - each stage is executed by calling train repetively (with a different - target_stage). + A Learner can be seen as a learning algorithm, a function that when + applied to training data returns a learned function, an object that + can be applied to other data and return some output data. """ - __init__(self,training_set=None,verbosity=0): - self.stage=0 - self.training_set=training_set - if training_set: - assert isinstance(training_set,DataSet), "training_set should be a DataSet" - self.verbosity=verbosity + def __init__(self): + pass + + def forget(self): + """ + Reset the state of the learner to a blank slate, before seeing + training data. The operation may be non-deterministic if the + learner has a random number generator that is set to use a + different seed each time it forget() is called. + """ + raise NotImplementedError + + def update(self,training_set): + """ + Continue training a learner, with the evidence provided by the given training set. + Hence update can be called multiple times. This is particularly useful in the + on-line setting or the sequential (Bayesian or not) settings. + The result is a function that can be applied on data, with the same + semantics of the Learner.use method. + """ + return self.use + + + def __call__(self,training_set): + """ + Train a learner from scratch using the provided training set, + and return the learned function. + """ + self.forget() + return self.update(learning_task) - train(self,target_stage=None,training_set=None,from_scratch=True,train_stats=None): - """The main method of a learner, to 'train' it. This modifies the Learner - so that its behavior upon a next call to use may be different. - The training set may have already been provided (by setting the training_set - attribute) or a new / different training set may be provided here (which will - set the training_set attribute). Training may occur in stages. The current - stage may be set by the user and all the stages until and including the target_stage - will be performed. If from_scratch then stage is set to 0 initially. - If the train_stats argument is provided, it should be a StatsCollector object. - In that case, performance statistics will be computed on the training set - and accumulated into the train_stats object. If from_scratch though, the - train_stats object will be cleared automatically. The same train_stats object - can thus be reused on consecutive calls to train (where all but the first - are from scratch). - - Subclasses may call Learner.train to set stage and training_set according - to the above arguments. The actual training should then be performed - within the subclass train method. - """ - if from_scratch: - stage=0 - if train_stats: - train_stats.clear() - if training_set: - if training_set: - self.training_set=training_set - assert isinstance(training_set,DataSet), "training_set should be a DataSet" - - return - - - use(self,input_dataset,test_stats=None,output_fields=None): - """Once a Learner has been trained by one or more call to 'train', it can + def use(self,input_dataset,output_fields=None): + """Once a Learner has been trained by one or more call to 'update', it can be used with one or more calls to 'use'. The argument is a DataSet (possibly - containing a single example) and the result is a DataSet of the same size. + containing a single example) and the result is a DataSet of the same length. If output_fields is specified, it may be use to indicate which fields should be constructed in the output DataSet (for example ['output','classification_error']). - If a test_stats object is provided, then performance statistics will be computed to - measure the performance of the learner on the given dataset, and accumulated into - the test_stats (hence it must be cleared before the call).""" + """ raise NotImplementedError