diff learner.py @ 1:2cd82666b9a7

Added statscollector and started writing dataset and learner.
author bengioy@esprit.iro.umontreal.ca
date Fri, 14 Mar 2008 11:28:08 -0400
parents 586dcaa4b2df
children 80bf5492e571
line wrap: on
line diff
--- a/learner.py	Fri Mar 14 10:07:50 2008 -0400
+++ b/learner.py	Fri Mar 14 11:28:08 2008 -0400
@@ -0,0 +1,63 @@
+
+from dataset import *
+from statscollector import *
+    
+class Learner(object):
+    """Base class for learning algorithms, provides an interface
+    that allows various algorithms to be applicable to generic learning
+    algorithms.
+
+    A Learner often works in stages, and the user can control when
+    each stage is executed by calling train repetively (with a different
+    target_stage).
+    """
+    
+    __init__(self,training_set=None,verbosity=0):
+        self.stage=0
+        self.training_set=training_set
+        if training_set:
+            assert isinstance(training_set,DataSet), "training_set should be a DataSet"
+        self.verbosity=verbosity
+
+
+    train(self,target_stage=None,training_set=None,from_scratch=True,train_stats=None):
+        """The main method of a learner, to 'train' it. This modifies the Learner
+        so that its behavior upon a next call to use may be different.
+        The training set may have already been provided (by setting the training_set
+        attribute) or a new / different training set may be provided here (which will
+        set the training_set attribute). Training may occur in stages. The current
+        stage may be set by the user and all the stages until and including the target_stage
+        will be performed. If from_scratch then stage is set to 0 initially.
+        If the train_stats argument is provided, it should be a StatsCollector object.
+        In that case, performance statistics will be computed on the training set
+        and accumulated into the train_stats object. If from_scratch though, the
+        train_stats object will be cleared automatically. The same train_stats object
+        can thus be reused on consecutive calls to train (where all but the first
+        are from scratch).
+
+        Subclasses may call Learner.train to set stage and training_set according
+        to the above arguments. The actual training should then be performed
+        within the subclass train method.
+        """
+        if from_scratch:
+            stage=0
+            if train_stats:
+                train_stats.clear()
+        if training_set:
+        if training_set:
+            self.training_set=training_set
+            assert isinstance(training_set,DataSet), "training_set should be a DataSet"
+            
+        return
+        
+
+    use(self,input_dataset,test_stats=None,output_fields=None):
+        """Once a Learner has been trained by one or more call to 'train', it can
+        be used with one or more calls to 'use'. The argument is a DataSet (possibly
+        containing a single example) and the result is a DataSet of the same size.
+        If output_fields is specified, it may be use to indicate which fields should
+        be constructed in the output DataSet (for example ['output','classification_error']).
+        If a test_stats object is provided, then performance statistics will be computed to
+        measure the performance of the learner on the given dataset, and accumulated into
+        the test_stats (hence it must be cleared before the call)."""
+        raise NotImplementedError