changeset 374:2b16604ffad9

split LearningAlgorithm into OfflineLearningAlgorithm and OnlineLearningAlgorithm
author bengioy@bengiomac.local
date Tue, 03 Jun 2008 21:27:32 -0400
parents 2b91638a11d3
children 12ce29abf27d
files learner.py
diffstat 1 files changed, 63 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/learner.py	Tue Jun 03 16:52:06 2008 -0400
+++ b/learner.py	Tue Jun 03 21:27:32 2008 -0400
@@ -3,24 +3,24 @@
 from exceptions import *
 from dataset import AttributesHolder
 
-class LearningAlgorithm(object):
+class OfflineLearningAlgorithm(object):
     """
-    Base class for learning algorithms, provides an interface
+    Base class for offline learning algorithms, provides an interface
     that allows various algorithms to be applicable to generic learning
     algorithms. It is only given here to define the expected semantics.
 
-    A L{Learner} can be seen as a learning algorithm, a function that when
+    An offline learning algorithm can be seen as a function that when
     applied to training data returns a learned function (which is an object that
     can be applied to other data and return some output data).
 
-    There are two main ways of using a learning algorithms, and some learning
-    algorithms only support one of them. The first is the way of the standard
-    machine learning framework, in which a learning algorithm is applied
+    The offline learning scenario is the standard and most common one 
+    in machine learning:  an offline learning algorithm is applied
     to a training dataset, 
 
        model = learning_algorithm(training_set)
         
-    resulting in a fully trained model that can be applied to another dataset:
+    resulting in a fully trained model that can be applied to another dataset
+    in order to perform some desired computation:
 
         output_dataset = model(input_dataset)
 
@@ -28,10 +28,58 @@
     In that example, the training set may for example have 'input' and 'target'
     fields while the input dataset may have only 'input' (or both 'input' and
     'target') and the output dataset would contain some default output fields defined
-    by the learning algorithm (e.g. 'output' and 'error').
+    by the learning algorithm (e.g. 'output' and 'error'). The user may specifiy
+    what the output dataset should contain either by setting options in the
+    model, by the presence of particular fields in the input dataset, or with
+    keyword options of the __call__ method of the model (see LearnedModel.__call__).
+
+    """
+
+    def __init__(self): pass
+
+    def __call__(self, training_dataset):
+        """
+        Return a fully trained TrainedModel.
+        """
+        raise AbstractFunction()
+    
+class TrainedModel(AttributesHolder):
+    """
+    TrainedModel is a base class for models returned by instances of an
+    OfflineLearningAlgorithm subclass. It is only given here to define the expected semantics.
+    """
+    def __init__(self):
+        pass
 
-    The second way of using a learning algorithm is in the online or
-    adaptive framework, where the training data are only revealed in pieces
+    def __call__(self,input_dataset,output_fieldnames=None,
+                 test_stats_collector=None,copy_inputs=False,
+                 put_stats_in_output_dataset=True,
+                 output_attributes=[]):
+        """
+        A L{TrainedModel} can be used with
+        with one or more calls to it. The main argument is an input L{DataSet} (possibly
+        containing a single example) and the result is an output L{DataSet} of the same length.
+        If output_fieldnames is specified, it may be use to indicate which fields should
+        be constructed in the output L{DataSet} (for example ['output','classification_error']).
+        Otherwise, some default output fields are produced (possibly depending on the input
+        fields available in the input_dataset).
+        Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
+        visible in the output L{DataSet} returned by this method.
+        Optionally, attributes of the learner can be copied in the output dataset,
+        and statistics computed by the stats collector also put in the output dataset.
+        Note the distinction between fields (which are example-wise quantities, e.g. 'input')
+        and attributes (which are not, e.g. 'regularization_term').
+        """
+        raise AbstractFunction()
+
+
+class OnlineLearningAlgorithm(object):
+    """
+    Base class for online learning algorithms, provides an interface
+    that allows various algorithms to be applicable to generic online learning
+    algorithms. It is only given here to define the expected semantics.
+
+    The basic setting is that the training data are only revealed in pieces
     (maybe one example or a batch of example at a time):
 
        model = learning_algorithm()
@@ -49,6 +97,9 @@
     
        output_dataset = model(input_dataset)
 
+    The model should be a LearnerModel subclass instance, and LearnerModel
+    is a subclass of LearnedModel.
+
     """
 
     def __init__(self): pass
@@ -59,7 +110,7 @@
         """
         raise AbstractFunction()
     
-class LearnerModel(AttributesHolder):
+class LearnerModel(LearnedModel):
     """
     LearnerModel is a base class for models returned by instances of a LearningAlgorithm subclass.
     It is only given here to define the expected semantics.
@@ -69,7 +120,7 @@
 
     def update(self,training_set,train_stats_collector=None):
         """
-        Continue training a learner, with the evidence provided by the given training set.
+        Continue training a learner model, with the evidence provided by the given training set.
         Hence update can be called multiple times. This is the main method used for training in the
         on-line setting or the sequential (Bayesian or not) settings.
 
@@ -82,23 +133,3 @@
         """
         raise AbstractFunction()
     
-    def __call__(self,input_dataset,output_fieldnames=None,
-                 test_stats_collector=None,copy_inputs=False,
-                 put_stats_in_output_dataset=True,
-                 output_attributes=[]):
-        """
-        A trained or partially trained L{Model} can be used with
-        with one or more calls to it. The argument is an input L{DataSet} (possibly
-        containing a single example) and the result is an output L{DataSet} of the same length.
-        If output_fieldnames is specified, it may be use to indicate which fields should
-        be constructed in the output L{DataSet} (for example ['output','classification_error']).
-        Otherwise, some default output fields are produced (possibly depending on the input
-        fields available in the input_dataset).
-        Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
-        visible in the output L{DataSet} returned by this method.
-        Optionally, attributes of the learner can be copied in the output dataset,
-        and statistics computed by the stats collector also put in the output dataset.
-        Note the distinction between fields (which are example-wise quantities, e.g. 'input')
-        and attributes (which are not, e.g. 'regularization_term').
-        """
-        raise AbstractFunction()