view learner.py @ 17:759d17112b23

more comments, looping ArrayDataSet iterator, bugfixes to lookup_list, more tests
author bergstrj@iro.umontreal.ca
date Wed, 26 Mar 2008 21:05:14 -0400
parents 80bf5492e571
children 633453635d51
line wrap: on
line source


from dataset import *
from statscollector import *
    
class Learner(object):
    """Base class for learning algorithms, provides an interface
    that allows various algorithms to be applicable to generic learning
    algorithms.

    A Learner can be seen as a learning algorithm, a function that when
    applied to training data returns a learned function, an object that
    can be applied to other data and return some output data.
    """
    
    def __init__(self):
        pass

    def forget(self):
        """
        Reset the state of the learner to a blank slate, before seeing
        training data. The operation may be non-deterministic if the
        learner has a random number generator that is set to use a
        different seed each time it forget() is called.
        """
        raise NotImplementedError

    def update(self,training_set):
        """
        Continue training a learner, with the evidence provided by the given training set.
        Hence update can be called multiple times. This is particularly useful in the
        on-line setting or the sequential (Bayesian or not) settings.
        The result is a function that can be applied on data, with the same
        semantics of the Learner.use method.
        """
        return self.use
    
    
    def __call__(self,training_set):
        """
        Train a learner from scratch using the provided training set,
        and return the learned function.
        """
        self.forget()
        return self.update(learning_task)


    def use(self,input_dataset,output_fields=None):
        """Once a Learner has been trained by one or more call to 'update', it can
        be used with one or more calls to 'use'. The argument is a DataSet (possibly
        containing a single example) and the result is a DataSet of the same length.
        If output_fields is specified, it may be use to indicate which fields should
        be constructed in the output DataSet (for example ['output','classification_error']).
        """
        raise NotImplementedError