view statscollector.py @ 17:759d17112b23

more comments, looping ArrayDataSet iterator, bugfixes to lookup_list, more tests
author bergstrj@iro.umontreal.ca
date Wed, 26 Mar 2008 21:05:14 -0400
parents 2cd82666b9a7
children f62a03c9d485
line wrap: on
line source


from numpy import *

class StatsCollector(object):
    """A StatsCollector object is used to record performance statistics during training
    or testing of a learner. It can be configured to measure different things and
    accumulate the appropriate statistics. From these statistics it can be interrogated
    to obtain performance measures of interest (such as maxima, minima, mean, standard
    deviation, standard error, etc.). Optionally, the observations can be weighted
    (yielded weighted mean, weighted variance, etc., where applicable). The statistics
    that are desired can be specified among a list supported by the StatsCollector
    class or subclass. When some statistics are requested, others become automatically
    available (e.g., sum or mean)."""

    default_statistics = [mean,standard_deviation,min,max]
    
    __init__(self,n_quantities_observed, statistics=default_statistics):
        self.n_quantities_observed=n_quantities_observed

    clear(self):
        raise NotImplementedError

    update(self,observations):
        """The observations is a numpy vector of length n_quantities_observed. Some
        entries can be 'missing' (with a NaN entry) and will not be counted in the
        statistics."""
        raise NotImplementedError

    __getattr__(self, statistic)
        """Return a particular statistic, which may be inferred from the collected statistics.
        The argument is a string naming that statistic."""