diff learner.py @ 10:80bf5492e571

Rewrote learner.py according to the specs in the wiki for learners.
author bengioy@esprit.iro.umontreal.ca
date Tue, 25 Mar 2008 11:39:02 -0400
parents 2cd82666b9a7
children 633453635d51
line wrap: on
line diff
--- a/learner.py	Mon Mar 24 16:52:47 2008 -0400
+++ b/learner.py	Tue Mar 25 11:39:02 2008 -0400
@@ -7,57 +7,48 @@
     that allows various algorithms to be applicable to generic learning
     algorithms.
 
-    A Learner often works in stages, and the user can control when
-    each stage is executed by calling train repetively (with a different
-    target_stage).
+    A Learner can be seen as a learning algorithm, a function that when
+    applied to training data returns a learned function, an object that
+    can be applied to other data and return some output data.
     """
     
-    __init__(self,training_set=None,verbosity=0):
-        self.stage=0
-        self.training_set=training_set
-        if training_set:
-            assert isinstance(training_set,DataSet), "training_set should be a DataSet"
-        self.verbosity=verbosity
+    def __init__(self):
+        pass
+
+    def forget(self):
+        """
+        Reset the state of the learner to a blank slate, before seeing
+        training data. The operation may be non-deterministic if the
+        learner has a random number generator that is set to use a
+        different seed each time it forget() is called.
+        """
+        raise NotImplementedError
+
+    def update(self,training_set):
+        """
+        Continue training a learner, with the evidence provided by the given training set.
+        Hence update can be called multiple times. This is particularly useful in the
+        on-line setting or the sequential (Bayesian or not) settings.
+        The result is a function that can be applied on data, with the same
+        semantics of the Learner.use method.
+        """
+        return self.use
+    
+    
+    def __call__(self,training_set):
+        """
+        Train a learner from scratch using the provided training set,
+        and return the learned function.
+        """
+        self.forget()
+        return self.update(learning_task)
 
 
-    train(self,target_stage=None,training_set=None,from_scratch=True,train_stats=None):
-        """The main method of a learner, to 'train' it. This modifies the Learner
-        so that its behavior upon a next call to use may be different.
-        The training set may have already been provided (by setting the training_set
-        attribute) or a new / different training set may be provided here (which will
-        set the training_set attribute). Training may occur in stages. The current
-        stage may be set by the user and all the stages until and including the target_stage
-        will be performed. If from_scratch then stage is set to 0 initially.
-        If the train_stats argument is provided, it should be a StatsCollector object.
-        In that case, performance statistics will be computed on the training set
-        and accumulated into the train_stats object. If from_scratch though, the
-        train_stats object will be cleared automatically. The same train_stats object
-        can thus be reused on consecutive calls to train (where all but the first
-        are from scratch).
-
-        Subclasses may call Learner.train to set stage and training_set according
-        to the above arguments. The actual training should then be performed
-        within the subclass train method.
-        """
-        if from_scratch:
-            stage=0
-            if train_stats:
-                train_stats.clear()
-        if training_set:
-        if training_set:
-            self.training_set=training_set
-            assert isinstance(training_set,DataSet), "training_set should be a DataSet"
-            
-        return
-        
-
-    use(self,input_dataset,test_stats=None,output_fields=None):
-        """Once a Learner has been trained by one or more call to 'train', it can
+    def use(self,input_dataset,output_fields=None):
+        """Once a Learner has been trained by one or more call to 'update', it can
         be used with one or more calls to 'use'. The argument is a DataSet (possibly
-        containing a single example) and the result is a DataSet of the same size.
+        containing a single example) and the result is a DataSet of the same length.
         If output_fields is specified, it may be use to indicate which fields should
         be constructed in the output DataSet (for example ['output','classification_error']).
-        If a test_stats object is provided, then performance statistics will be computed to
-        measure the performance of the learner on the given dataset, and accumulated into
-        the test_stats (hence it must be cleared before the call)."""
+        """
         raise NotImplementedError