view learner.py @ 2:3fddb1c8f955

Rewrote DataSet interface and created FiniteDataSet interface.
author bengioy@bengiomac.local
date Sat, 22 Mar 2008 22:21:59 -0400
parents 2cd82666b9a7
children 80bf5492e571
line wrap: on
line source


from dataset import *
from statscollector import *
    
class Learner(object):
    """Base class for learning algorithms, provides an interface
    that allows various algorithms to be applicable to generic learning
    algorithms.

    A Learner often works in stages, and the user can control when
    each stage is executed by calling train repetively (with a different
    target_stage).
    """
    
    __init__(self,training_set=None,verbosity=0):
        self.stage=0
        self.training_set=training_set
        if training_set:
            assert isinstance(training_set,DataSet), "training_set should be a DataSet"
        self.verbosity=verbosity


    train(self,target_stage=None,training_set=None,from_scratch=True,train_stats=None):
        """The main method of a learner, to 'train' it. This modifies the Learner
        so that its behavior upon a next call to use may be different.
        The training set may have already been provided (by setting the training_set
        attribute) or a new / different training set may be provided here (which will
        set the training_set attribute). Training may occur in stages. The current
        stage may be set by the user and all the stages until and including the target_stage
        will be performed. If from_scratch then stage is set to 0 initially.
        If the train_stats argument is provided, it should be a StatsCollector object.
        In that case, performance statistics will be computed on the training set
        and accumulated into the train_stats object. If from_scratch though, the
        train_stats object will be cleared automatically. The same train_stats object
        can thus be reused on consecutive calls to train (where all but the first
        are from scratch).

        Subclasses may call Learner.train to set stage and training_set according
        to the above arguments. The actual training should then be performed
        within the subclass train method.
        """
        if from_scratch:
            stage=0
            if train_stats:
                train_stats.clear()
        if training_set:
        if training_set:
            self.training_set=training_set
            assert isinstance(training_set,DataSet), "training_set should be a DataSet"
            
        return
        

    use(self,input_dataset,test_stats=None,output_fields=None):
        """Once a Learner has been trained by one or more call to 'train', it can
        be used with one or more calls to 'use'. The argument is a DataSet (possibly
        containing a single example) and the result is a DataSet of the same size.
        If output_fields is specified, it may be use to indicate which fields should
        be constructed in the output DataSet (for example ['output','classification_error']).
        If a test_stats object is provided, then performance statistics will be computed to
        measure the performance of the learner on the given dataset, and accumulated into
        the test_stats (hence it must be cleared before the call)."""
        raise NotImplementedError