view learner.py @ 453:ce6b4fd3ab29

Fixed typo in help
author delallea@valhalla.apstat.com
date Thu, 04 Sep 2008 13:48:47 -0400
parents fe57b96f33d4
children
line wrap: on
line source



from exceptions import *
from dataset import AttributesHolder

class OfflineLearningAlgorithm(object):
    """
    Base class for offline learning algorithms, provides an interface
    that allows various algorithms to be applicable to generic learning
    algorithms. It is only given here to define the expected semantics.

    An offline learning algorithm can be seen as a function that when
    applied to training data returns a learned function (which is an object that
    can be applied to other data and return some output data).

    The offline learning scenario is the standard and most common one 
    in machine learning:  an offline learning algorithm is applied
    to a training dataset, 

        model = learning_algorithm(training_set)
        
    resulting in a fully trained model that can be applied to another dataset
    in order to perform some desired computation:

        output_dataset = model(input_dataset)

    Note that the application of a dataset has no side-effect on the model.
    In that example, the training set may for example have 'input' and 'target'
    fields while the input dataset may have only 'input' (or both 'input' and
    'target') and the output dataset would contain some default output fields defined
    by the learning algorithm (e.g. 'output' and 'error'). The user may specifiy
    what the output dataset should contain either by setting options in the
    model, by the presence of particular fields in the input dataset, or with
    keyword options of the __call__ method of the model (see LearnedModel.__call__).

    """

    def __init__(self): pass

    def __call__(self, training_dataset):
        """
        Return a fully trained TrainedModel.
        """
        raise AbstractFunction()
    
class TrainedModel(AttributesHolder):
    """
    TrainedModel is a base class for models returned by instances of an
    OfflineLearningAlgorithm subclass. It is only given here to define the expected semantics.
    """
    def __init__(self):
        pass

    def __call__(self,input_dataset,output_fieldnames=None,
                 test_stats_collector=None,copy_inputs=False,
                 put_stats_in_output_dataset=True,
                 output_attributes=[]):
        """
        A L{TrainedModel} can be used with
        with one or more calls to it. The main argument is an input L{DataSet} (possibly
        containing a single example) and the result is an output L{DataSet} of the same length.
        If output_fieldnames is specified, it may be use to indicate which fields should
        be constructed in the output L{DataSet} (for example ['output','classification_error']).
        Otherwise, some default output fields are produced (possibly depending on the input
        fields available in the input_dataset).
        Optionally, if copy_inputs, the input fields (of the input_dataset) can be made
        visible in the output L{DataSet} returned by this method.
        Optionally, attributes of the learner can be copied in the output dataset,
        and statistics computed by the stats collector also put in the output dataset.
        Note the distinction between fields (which are example-wise quantities, e.g. 'input')
        and attributes (which are not, e.g. 'regularization_term').
        """
        raise AbstractFunction()


class OnlineLearningAlgorithm(object):
    """
    Base class for online learning algorithms, provides an interface
    that allows various algorithms to be applicable to generic online learning
    algorithms. It is only given here to define the expected semantics.

    The basic setting is that the training data are only revealed in pieces
    (maybe one example or a batch of example at a time):

       model = learning_algorithm()

    results in a fresh model. The model can be adapted by presenting
    it with some training data,

       model.update(some_training_data)
       ...
       model.update(some_more_training_data)
       ...
       model.update(yet_more_training_data)

    and at any point one can use the model to perform some computation:
    
       output_dataset = model(input_dataset)

    The model should be a LearnerModel subclass instance, and LearnerModel
    is a subclass of LearnedModel.

    """

    def __init__(self): pass

    def __call__(self, training_dataset=None):
        """
        Return a LearnerModel, either fresh (if training_dataset is None) or fully trained (otherwise).
        """
        raise AbstractFunction()
    
class LearnerModel(TrainedModel):
    """
    LearnerModel is a base class for models returned by instances of a LearningAlgorithm subclass.
    It is only given here to define the expected semantics.
    """
    def __init__(self):
        pass

    def update(self,training_set,train_stats_collector=None):
        """
        Continue training a learner model, with the evidence provided by the given training set.
        Hence update can be called multiple times. This is the main method used for training in the
        on-line setting or the sequential (Bayesian or not) settings.

        This function has as side effect that self(data) will behave differently,
        according to the adaptation achieved by update().

        The user may optionally provide a training L{StatsCollector} that is used to record
        some statistics of the outputs computed during training. It is update(d) during
        training.
        """
        raise AbstractFunction()