changeset 213:a96fa4de06d2

Renommé mon module de séries
author fsavard
date Wed, 10 Mar 2010 16:52:22 -0500
parents e390b0454515
children 1faae5079522 4c137f16b013
files utils/seriestables/__init__.py utils/seriestables/series.py utils/seriestables/test_series.py utils/tables_series/__init__.py utils/tables_series/series.py utils/tables_series/test_series.py
diffstat 6 files changed, 491 insertions(+), 491 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/seriestables/__init__.py	Wed Mar 10 16:52:22 2010 -0500
@@ -0,0 +1,2 @@
+from series import ErrorSeries, BasicStatisticsSeries, AccumulatorSeriesWrapper, SeriesArrayWrapper, SharedParamsStatisticsWrapper, DummySeries
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/seriestables/series.py	Wed Mar 10 16:52:22 2010 -0500
@@ -0,0 +1,316 @@
+from tables import *
+import numpy
+
+'''
+The way these "IsDescription constructor" work is simple: write the
+code as if it were in a file, then exec()ute it, leaving us with
+a local-scoped LocalDescription which may be used to call createTable.
+
+It's a small hack, but it's necessary as the names of the columns
+are retrieved based on the variable name, which we can't programmatically set
+otherwise.
+'''
+
+def get_beginning_description_n_ints(int_names, int_width=64):
+    """
+    Begins construction of a class inheriting from IsDescription
+    to construct an HDF5 table with index columns named with int_names.
+
+    See Series().__init__ to see how those are used.
+    """
+    int_constructor = "Int64Col"
+    if int_width == 32:
+        int_constructor = "Int32Col"
+
+    toexec = "class LocalDescription(IsDescription):\n"
+
+    pos = 0
+
+    for n in int_names:
+        toexec += "\t" + n + " = " + int_constructor + "(pos=" + str(pos) + ")\n"
+
+    return toexec
+
+def get_description_with_n_ints_n_floats(int_names, float_names, int_width=64, float_width=32):
+    """
+    Constructs a class to be used when constructing a table with PyTables.
+
+    This is useful to construct a series with an index with multiple levels.
+    E.g. if you want to index your "validation error" with "epoch" first, then
+    "minibatch_index" second, you'd use two "int_names".
+
+    Parameters
+    ----------
+    int_names : tuple of str
+        Names of the int (e.g. index) columns
+    float_names : tuple of str
+        Names of the float (e.g. error) columns
+    int_width : {'32', '64'}
+        Type of ints.
+    float_width : {'32', '64'}
+        Type of floats.
+
+    Returns
+    -------
+    A class object, to pass to createTable()
+    """
+
+    toexec = get_beginning_description_n_ints(int_names, int_width=int_width)
+
+    float_constructor = "Float32Col"
+    if float_width == 64:
+        float_constructor = "Float64Col"
+    
+    pos = len(int_names)
+
+    for n in float_names:
+        toexec += "\t" + n + " = " + float_constructor + "(pos=" + str(pos) + ")\n"
+
+    exec(toexec)
+
+    return LocalDescription
+
+class Series():
+    def __init__(self, table_name, hdf5_file, index_names=('epoch',), title="", hdf5_group='/'):
+        """Basic arguments each Series must get.
+
+        Parameters
+        ----------
+        table_name : str
+            Name of the table to create under group "hd5_group" (other parameter). No spaces, ie. follow variable naming restrictions.
+        hdf5_file : open HDF5 file
+            File opened with openFile() in PyTables (ie. return value of openFile).
+        index_names : tuple of str
+            Columns to use as index for elements in the series, other example would be ('epoch', 'minibatch'). This would then allow you to call append(index, element) with index made of two ints, one for epoch index, one for minibatch index in epoch.
+        title : str
+            Title to attach to this table as metadata. Can contain spaces and be longer then the table_name.
+        hdf5_group : str
+            Path of the group (kind of a file) in the HDF5 file under which to create the table.
+        """
+        self.table_name = table_name
+        self.hdf5_file = hdf5_file
+        self.index_names = index_names
+        self.title = title
+
+    def append(self, index, element):
+        raise NotImplementedError
+
+# To put in a series dictionary instead of a real series, to do nothing
+# when we don't want a given series to be saved.
+class DummySeries():
+    def append(self, index, element):
+        pass
+
+class ErrorSeries(Series):
+    def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title="", hdf5_group='/'):
+        Series.__init__(self, table_name, hdf5_file, index_names, title)
+
+        self.error_name = error_name
+
+        table_description = self._get_table_description()
+
+        self._table = hdf5_file.createTable(hdf5_group, self.table_name, table_description, title=title)
+
+    def _get_table_description(self):
+        return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,))
+
+    def append(self, index, error):
+        """
+        Parameters
+        ----------
+        index : tuple of int
+            Following index_names passed to __init__, e.g. (12, 15) if index_names were ('epoch', 'minibatch_size')
+        error : float
+            Next error in the series.
+        """
+        if len(index) != len(self.index_names):
+            raise ValueError("index provided does not have the right length (expected " \
+                            + str(len(self.index_names)) + " got " + str(len(index)))
+
+        newrow = self._table.row
+
+        # Columns for index in table are based on index_names
+        for col_name, value in zip(self.index_names, index):
+            newrow[col_name] = value
+        newrow[self.error_name] = error
+
+        newrow.append()
+
+        self.hdf5_file.flush()
+
+# Does not inherit from Series because it does not itself need to
+# access the hdf5_file and does not need a series_name (provided
+# by the base_series.)
+class AccumulatorSeriesWrapper():
+    """
+    
+    """
+    def __init__(self, base_series, reduce_every, reduce_function=numpy.mean):
+        """
+        Parameters
+        ----------
+        base_series : Series
+            This object must have an append(index, value) function.
+        reduce_every : int
+            Apply the reduction function (e.g. mean()) every time we get this number of elements. E.g. if this is 100, then every 100 numbers passed to append(), we'll take the mean and call append(this_mean) on the BaseSeries.
+        reduce_function : function
+            Must take as input an array of "elements", as passed to (this accumulator's) append(). Basic case would be to take an array of floats and sum them into one float, for example.
+        """
+        self.base_series = base_series
+        self.reduce_function = reduce_function
+        self.reduce_every = reduce_every
+
+        self._buffer = []
+
+    
+    def append(self, index, element):
+        """
+        Parameters
+        ----------
+        index : tuple of int
+            The index used is the one of the last element reduced. E.g. if
+            you accumulate over the first 1000 minibatches, the index
+            passed to the base_series.append() function will be 1000.
+        element : float
+            Element that will be accumulated.
+        """
+        self._buffer.append(element)
+
+        if len(self._buffer) == self.reduce_every:
+            reduced = self.reduce_function(self._buffer)
+            self.base_series.append(index, reduced)
+            self._buffer = []
+
+        # This should never happen, except if lists
+        # were appended, which should be a red flag.
+        assert len(self._buffer) < self.reduce_every
+
+# Outside of class to fix an issue with exec in Python 2.6.
+# My sorries to the God of pretty code.
+def _BasicStatisticsSeries_construct_table_toexec(index_names):
+    toexec = get_beginning_description_n_ints(index_names)
+
+    bpos = len(index_names)
+    toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n"
+    toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n"
+    toexec += "\tmax = Float32Col(pos=" + str(bpos+2) + ")\n"
+    toexec += "\tstd = Float32Col(pos=" + str(bpos+3) + ")\n"
+    
+    # This creates "LocalDescription", which we may then use
+    exec(toexec)
+
+    return LocalDescription
+
+basic_stats_functions = {'mean': lambda(x): numpy.mean(x),
+                    'min': lambda(x): numpy.min(x),
+                    'max': lambda(x): numpy.max(x),
+                    'std': lambda(x): numpy.std(x)}
+
+class BasicStatisticsSeries(Series):
+    """
+    Parameters
+    ----------
+    series_name : str
+        Not optional here. Will be prepended with "Basic statistics for "
+    stats_functions : dict, optional
+        Dictionary with a function for each key "mean", "min", "max", "std". The function must take whatever is passed to append(...) and return a single number (float).
+    """
+    def __init__(self, table_name, hdf5_file, stats_functions=basic_stats_functions, index_names=('epoch',), title="", hdf5_group='/'):
+        Series.__init__(self, table_name, hdf5_file, index_names, title)
+
+        self.hdf5_group = hdf5_group
+
+        self.stats_functions = stats_functions
+
+        self._construct_table()
+
+    def _construct_table(self):
+        table_description = _BasicStatisticsSeries_construct_table_toexec(self.index_names)
+
+        self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description)
+
+    def append(self, index, array):
+        """
+        Parameters
+        ----------
+        index : tuple of int
+            Following index_names passed to __init__, e.g. (12, 15) if index_names were ('epoch', 'minibatch_size')
+        array
+            Is of whatever type the stats_functions passed to __init__ can take. Default is anything numpy.mean(), min(), max(), std() can take. 
+        """
+        if len(index) != len(self.index_names):
+            raise ValueError("index provided does not have the right length (expected " \
+                            + str(len(self.index_names)) + " got " + str(len(index)))
+
+        newrow = self._table.row
+
+        for col_name, value in zip(self.index_names, index):
+            newrow[col_name] = value
+
+        newrow["mean"] = self.stats_functions['mean'](array)
+        newrow["min"] = self.stats_functions['min'](array)
+        newrow["max"] = self.stats_functions['max'](array)
+        newrow["std"] = self.stats_functions['std'](array)
+
+        newrow.append()
+
+        self.hdf5_file.flush()
+
+class SeriesArrayWrapper():
+    """
+    Simply redistributes any number of elements to sub-series to respective append()s.
+
+    To use if you have many elements to append in similar series, e.g. if you have an array containing [train_error, valid_error, test_error], and 3 corresponding series, this allows you to simply pass this array of 3 values to append() instead of passing each element to each individual series in turn.
+    """
+
+    def __init__(self, base_series_list):
+        self.base_series_list = base_series_list
+
+    def append(self, index, elements):
+        if len(elements) != len(self.base_series_list):
+            raise ValueError("not enough or too much elements provided (expected " \
+                            + str(len(self.base_series_list)) + " got " + str(len(elements)))
+
+        for series, el in zip(self.base_series_list, elements):
+            series.append(index, el)
+
+class SharedParamsStatisticsWrapper(SeriesArrayWrapper):
+    '''Save mean, min/max, std of shared parameters place in an array.
+
+    This is specifically for cases where we have _shared_ parameters,
+    as we take the .value of each array'''
+
+    def __init__(self, arrays_names, new_group_name, hdf5_file, base_group='/', index_names=('epoch',), title=""):
+        """
+        Parameters
+        ----------
+        array_names : array of str
+            Name of each array, in order of the array passed to append(). E.g. ('layer1_b', 'layer1_W', 'layer2_b', 'layer2_W')
+        new_group_name : str
+            Name of a new HDF5 group which will be created under base_group to store the new series.
+        base_group : str
+            Path of the group under which to create the new group which will store the series.
+        title : str
+            Here the title is attached to the new group, not a table.
+        """
+        base_series_list = []
+
+        new_group = hdf5_file.createGroup(base_group, new_group_name, title=title)
+
+        stats_functions = {'mean': lambda(x): numpy.mean(x.value),
+                    'min': lambda(x): numpy.min(x.value),
+                    'max': lambda(x): numpy.max(x.value),
+                    'std': lambda(x): numpy.std(x.value)}
+
+        for name in arrays_names:
+            base_series_list.append(
+                        BasicStatisticsSeries(
+                                table_name=name,
+                                hdf5_file=hdf5_file,
+                                index_names=index_names,
+                                stats_functions=stats_functions,
+                                hdf5_group=new_group._v_pathname))
+
+        SeriesArrayWrapper.__init__(self, base_series_list)
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/seriestables/test_series.py	Wed Mar 10 16:52:22 2010 -0500
@@ -0,0 +1,173 @@
+import tempfile
+import numpy
+import numpy.random
+
+from jobman import DD
+
+from tables import *
+
+from series import *
+
+
+def compare_floats(f1,f2):
+    if f1-f2 < 1e-3:
+        return True
+    return False
+
+def compare_lists(it1, it2, floats=False):
+    if len(it1) != len(it2):
+        return False
+
+    for el1,  el2 in zip(it1, it2):
+        if floats:
+            if not compare_floats(el1,el2):
+                return False
+        elif el1 != el2:
+            return False
+
+    return True
+
+def test_ErrorSeries_common_case(h5f=None):
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = openFile(h5f_path, "w")
+
+    validation_error = ErrorSeries(error_name="validation_error", table_name="validation_error",
+                                hdf5_file=h5f, index_names=('epoch','minibatch'),
+                                title="Validation error indexed by epoch and minibatch")
+
+    # (1,1), (1,2) etc. are (epoch, minibatch) index
+    validation_error.append((1,1), 32.0)
+    validation_error.append((1,2), 30.0)
+    validation_error.append((2,1), 28.0)
+    validation_error.append((2,2), 26.0)
+
+    h5f.close()
+
+    h5f = openFile(h5f_path, "r")
+    
+    table = h5f.getNode('/', 'validation_error')
+
+    assert compare_lists(table.cols.epoch[:], [1,1,2,2])
+    assert compare_lists(table.cols.minibatch[:], [1,2,1,2])
+    assert compare_lists(table.cols.validation_error[:], [32.0, 30.0, 28.0, 26.0])
+
+def test_AccumulatorSeriesWrapper_common_case(h5f=None):
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = openFile(h5f_path, "w")
+
+    validation_error = ErrorSeries(error_name="accumulated_validation_error",
+                                table_name="accumulated_validation_error",
+                                hdf5_file=h5f,
+                                index_names=('epoch','minibatch'),
+                                title="Validation error, summed every 3 minibatches, indexed by epoch and minibatch")
+
+    accumulator = AccumulatorSeriesWrapper(base_series=validation_error,
+                                    reduce_every=3, reduce_function=numpy.sum)
+
+    # (1,1), (1,2) etc. are (epoch, minibatch) index
+    accumulator.append((1,1), 32.0)
+    accumulator.append((1,2), 30.0)
+    accumulator.append((2,1), 28.0)
+    accumulator.append((2,2), 26.0)
+    accumulator.append((3,1), 24.0)
+    accumulator.append((3,2), 22.0)
+
+    h5f.close()
+
+    h5f = openFile(h5f_path, "r")
+    
+    table = h5f.getNode('/', 'accumulated_validation_error')
+
+    assert compare_lists(table.cols.epoch[:], [2,3])
+    assert compare_lists(table.cols.minibatch[:], [1,2])
+    assert compare_lists(table.cols.accumulated_validation_error[:], [90.0,72.0], floats=True)
+
+def test_BasicStatisticsSeries_common_case(h5f=None):
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = openFile(h5f_path, "w")
+
+    stats_series = BasicStatisticsSeries(table_name="b_vector_statistics",
+                                hdf5_file=h5f, index_names=('epoch','minibatch'),
+                                title="Basic statistics for b vector indexed by epoch and minibatch")
+
+    # (1,1), (1,2) etc. are (epoch, minibatch) index
+    stats_series.append((1,1), [0.15, 0.20, 0.30])
+    stats_series.append((1,2), [-0.18, 0.30, 0.58])
+    stats_series.append((2,1), [0.18, -0.38, -0.68])
+    stats_series.append((2,2), [0.15, 0.02, 1.9])
+
+    h5f.close()
+
+    h5f = openFile(h5f_path, "r")
+    
+    table = h5f.getNode('/', 'b_vector_statistics')
+
+    assert compare_lists(table.cols.epoch[:], [1,1,2,2])
+    assert compare_lists(table.cols.minibatch[:], [1,2,1,2])
+    assert compare_lists(table.cols.mean[:], [0.21666667,  0.23333333, -0.29333332,  0.69], floats=True)
+    assert compare_lists(table.cols.min[:], [0.15000001, -0.18000001, -0.68000001,  0.02], floats=True)
+    assert compare_lists(table.cols.max[:], [0.30, 0.58, 0.18, 1.9], floats=True)
+    assert compare_lists(table.cols.std[:], [0.06236095, 0.31382939,  0.35640177, 0.85724366], floats=True)
+
+def test_SharedParamsStatisticsWrapper_commoncase(h5f=None):
+    import numpy.random
+
+    if not h5f:
+        h5f_path = tempfile.NamedTemporaryFile().name
+        h5f = openFile(h5f_path, "w")
+
+    stats = SharedParamsStatisticsWrapper(new_group_name="params", base_group="/",
+                                arrays_names=('b1','b2','b3'), hdf5_file=h5f,
+                                index_names=('epoch','minibatch'))
+
+    b1 = DD({'value':numpy.random.rand(5)})
+    b2 = DD({'value':numpy.random.rand(5)})
+    b3 = DD({'value':numpy.random.rand(5)})
+    stats.append((1,1), [b1,b2,b3])
+
+    h5f.close()
+
+    h5f = openFile(h5f_path, "r")
+
+    b1_table = h5f.getNode('/params', 'b1')
+    b3_table = h5f.getNode('/params', 'b3')
+
+    assert b1_table.cols.mean[0] - numpy.mean(b1.value) < 1e-3
+    assert b3_table.cols.mean[0] - numpy.mean(b3.value) < 1e-3
+    assert b1_table.cols.min[0] - numpy.min(b1.value) < 1e-3
+    assert b3_table.cols.min[0] - numpy.min(b3.value) < 1e-3
+
+def test_get_desc():
+    h5f_path = tempfile.NamedTemporaryFile().name
+    h5f = openFile(h5f_path, "w")
+
+    desc = get_description_with_n_ints_n_floats(("col1","col2"), ("col3","col4"))
+
+    mytable = h5f.createTable('/', 'mytable', desc)
+
+    # just make sure the columns are there... otherwise this will throw an exception
+    mytable.cols.col1
+    mytable.cols.col2
+    mytable.cols.col3
+    mytable.cols.col4
+
+    try:
+        # this should fail... LocalDescription must be local to get_desc_etc
+        test = LocalDescription
+        assert False
+    except:
+        assert True
+
+    assert True
+
+if __name__ == '__main__':
+    import tempfile
+    test_get_desc()
+    test_ErrorSeries_common_case()
+    test_BasicStatisticsSeries_common_case()
+    test_AccumulatorSeriesWrapper_common_case()
+    test_SharedParamsStatisticsWrapper_commoncase()
+
--- a/utils/tables_series/__init__.py	Wed Mar 10 16:17:59 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-from series import ErrorSeries, BasicStatisticsSeries, AccumulatorSeriesWrapper, SeriesArrayWrapper, SharedParamsStatisticsWrapper, DummySeries
-
--- a/utils/tables_series/series.py	Wed Mar 10 16:17:59 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,316 +0,0 @@
-from tables import *
-import numpy
-
-'''
-The way these "IsDescription constructor" work is simple: write the
-code as if it were in a file, then exec()ute it, leaving us with
-a local-scoped LocalDescription which may be used to call createTable.
-
-It's a small hack, but it's necessary as the names of the columns
-are retrieved based on the variable name, which we can't programmatically set
-otherwise.
-'''
-
-def get_beginning_description_n_ints(int_names, int_width=64):
-    """
-    Begins construction of a class inheriting from IsDescription
-    to construct an HDF5 table with index columns named with int_names.
-
-    See Series().__init__ to see how those are used.
-    """
-    int_constructor = "Int64Col"
-    if int_width == 32:
-        int_constructor = "Int32Col"
-
-    toexec = "class LocalDescription(IsDescription):\n"
-
-    pos = 0
-
-    for n in int_names:
-        toexec += "\t" + n + " = " + int_constructor + "(pos=" + str(pos) + ")\n"
-
-    return toexec
-
-def get_description_with_n_ints_n_floats(int_names, float_names, int_width=64, float_width=32):
-    """
-    Constructs a class to be used when constructing a table with PyTables.
-
-    This is useful to construct a series with an index with multiple levels.
-    E.g. if you want to index your "validation error" with "epoch" first, then
-    "minibatch_index" second, you'd use two "int_names".
-
-    Parameters
-    ----------
-    int_names : tuple of str
-        Names of the int (e.g. index) columns
-    float_names : tuple of str
-        Names of the float (e.g. error) columns
-    int_width : {'32', '64'}
-        Type of ints.
-    float_width : {'32', '64'}
-        Type of floats.
-
-    Returns
-    -------
-    A class object, to pass to createTable()
-    """
-
-    toexec = get_beginning_description_n_ints(int_names, int_width=int_width)
-
-    float_constructor = "Float32Col"
-    if float_width == 64:
-        float_constructor = "Float64Col"
-    
-    pos = len(int_names)
-
-    for n in float_names:
-        toexec += "\t" + n + " = " + float_constructor + "(pos=" + str(pos) + ")\n"
-
-    exec(toexec)
-
-    return LocalDescription
-
-class Series():
-    def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'):
-        """Basic arguments each Series must get.
-
-        Parameters
-        ----------
-        table_name : str
-            Name of the table to create under group "hd5_group" (other parameter). No spaces, ie. follow variable naming restrictions.
-        hdf5_file : open HDF5 file
-            File opened with openFile() in PyTables (ie. return value of openFile).
-        index_names : tuple of str
-            Columns to use as index for elements in the series, other example would be ('epoch', 'minibatch'). This would then allow you to call append(index, element) with index made of two ints, one for epoch index, one for minibatch index in epoch.
-        title : str
-            Title to attach to this table as metadata. Can contain spaces and be longer then the table_name.
-        hdf5_group : str
-            Path of the group (kind of a file) in the HDF5 file under which to create the table.
-        """
-        self.table_name = table_name
-        self.hdf5_file = hdf5_file
-        self.index_names = index_names
-        self.title = title
-
-    def append(self, index, element):
-        raise NotImplementedError
-
-# To put in a series dictionary instead of a real series, to do nothing
-# when we don't want a given series to be saved.
-class DummySeries():
-    def append(self, index, element):
-        pass
-
-class ErrorSeries(Series):
-    def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'):
-        Series.__init__(self, table_name, hdf5_file, index_names, title)
-
-        self.error_name = error_name
-
-        table_description = self._get_table_description()
-
-        self._table = hdf5_file.createTable(hdf5_group, self.table_name, table_description, title=title)
-
-    def _get_table_description(self):
-        return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,))
-
-    def append(self, index, error):
-        """
-        Parameters
-        ----------
-        index : tuple of int
-            Following index_names passed to __init__, e.g. (12, 15) if index_names were ('epoch', 'minibatch_size')
-        error : float
-            Next error in the series.
-        """
-        if len(index) != len(self.index_names):
-            raise ValueError("index provided does not have the right length (expected " \
-                            + str(len(self.index_names)) + " got " + str(len(index)))
-
-        newrow = self._table.row
-
-        # Columns for index in table are based on index_names
-        for col_name, value in zip(self.index_names, index):
-            newrow[col_name] = value
-        newrow[self.error_name] = error
-
-        newrow.append()
-
-        self.hdf5_file.flush()
-
-# Does not inherit from Series because it does not itself need to
-# access the hdf5_file and does not need a series_name (provided
-# by the base_series.)
-class AccumulatorSeriesWrapper():
-    """
-    
-    """
-    def __init__(self, base_series, reduce_every, reduce_function=numpy.mean):
-        """
-        Parameters
-        ----------
-        base_series : Series
-            This object must have an append(index, value) function.
-        reduce_every : int
-            Apply the reduction function (e.g. mean()) every time we get this number of elements. E.g. if this is 100, then every 100 numbers passed to append(), we'll take the mean and call append(this_mean) on the BaseSeries.
-        reduce_function : function
-            Must take as input an array of "elements", as passed to (this accumulator's) append(). Basic case would be to take an array of floats and sum them into one float, for example.
-        """
-        self.base_series = base_series
-        self.reduce_function = reduce_function
-        self.reduce_every = reduce_every
-
-        self._buffer = []
-
-    
-    def append(self, index, element):
-        """
-        Parameters
-        ----------
-        index : tuple of int
-            The index used is the one of the last element reduced. E.g. if
-            you accumulate over the first 1000 minibatches, the index
-            passed to the base_series.append() function will be 1000.
-        element : float
-            Element that will be accumulated.
-        """
-        self._buffer.append(element)
-
-        if len(self._buffer) == self.reduce_every:
-            reduced = self.reduce_function(self._buffer)
-            self.base_series.append(index, reduced)
-            self._buffer = []
-
-        # This should never happen, except if lists
-        # were appended, which should be a red flag.
-        assert len(self._buffer) < self.reduce_every
-
-# Outside of class to fix an issue with exec in Python 2.6.
-# My sorries to the God of pretty code.
-def _BasicStatisticsSeries_construct_table_toexec(index_names):
-    toexec = get_beginning_description_n_ints(index_names)
-
-    bpos = len(index_names)
-    toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n"
-    toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n"
-    toexec += "\tmax = Float32Col(pos=" + str(bpos+2) + ")\n"
-    toexec += "\tstd = Float32Col(pos=" + str(bpos+3) + ")\n"
-    
-    # This creates "LocalDescription", which we may then use
-    exec(toexec)
-
-    return LocalDescription
-
-basic_stats_functions = {'mean': lambda(x): numpy.mean(x),
-                    'min': lambda(x): numpy.min(x),
-                    'max': lambda(x): numpy.max(x),
-                    'std': lambda(x): numpy.std(x)}
-
-class BasicStatisticsSeries(Series):
-    """
-    Parameters
-    ----------
-    series_name : str
-        Not optional here. Will be prepended with "Basic statistics for "
-    stats_functions : dict, optional
-        Dictionary with a function for each key "mean", "min", "max", "std". The function must take whatever is passed to append(...) and return a single number (float).
-    """
-    def __init__(self, table_name, hdf5_file, stats_functions=basic_stats_functions, index_names=('epoch',), title=None, hdf5_group='/'):
-        Series.__init__(self, table_name, hdf5_file, index_names, title)
-
-        self.hdf5_group = hdf5_group
-
-        self.stats_functions = stats_functions
-
-        self._construct_table()
-
-    def _construct_table(self):
-        table_description = _BasicStatisticsSeries_construct_table_toexec(self.index_names)
-
-        self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description)
-
-    def append(self, index, array):
-        """
-        Parameters
-        ----------
-        index : tuple of int
-            Following index_names passed to __init__, e.g. (12, 15) if index_names were ('epoch', 'minibatch_size')
-        array
-            Is of whatever type the stats_functions passed to __init__ can take. Default is anything numpy.mean(), min(), max(), std() can take. 
-        """
-        if len(index) != len(self.index_names):
-            raise ValueError("index provided does not have the right length (expected " \
-                            + str(len(self.index_names)) + " got " + str(len(index)))
-
-        newrow = self._table.row
-
-        for col_name, value in zip(self.index_names, index):
-            newrow[col_name] = value
-
-        newrow["mean"] = self.stats_functions['mean'](array)
-        newrow["min"] = self.stats_functions['min'](array)
-        newrow["max"] = self.stats_functions['max'](array)
-        newrow["std"] = self.stats_functions['std'](array)
-
-        newrow.append()
-
-        self.hdf5_file.flush()
-
-class SeriesArrayWrapper():
-    """
-    Simply redistributes any number of elements to sub-series to respective append()s.
-
-    To use if you have many elements to append in similar series, e.g. if you have an array containing [train_error, valid_error, test_error], and 3 corresponding series, this allows you to simply pass this array of 3 values to append() instead of passing each element to each individual series in turn.
-    """
-
-    def __init__(self, base_series_list):
-        self.base_series_list = base_series_list
-
-    def append(self, index, elements):
-        if len(elements) != len(self.base_series_list):
-            raise ValueError("not enough or too much elements provided (expected " \
-                            + str(len(self.base_series_list)) + " got " + str(len(elements)))
-
-        for series, el in zip(self.base_series_list, elements):
-            series.append(index, el)
-
-class SharedParamsStatisticsWrapper(SeriesArrayWrapper):
-    '''Save mean, min/max, std of shared parameters place in an array.
-
-    This is specifically for cases where we have _shared_ parameters,
-    as we take the .value of each array'''
-
-    def __init__(self, arrays_names, new_group_name, hdf5_file, base_group='/', index_names=('epoch',), title=""):
-        """
-        Parameters
-        ----------
-        array_names : array of str
-            Name of each array, in order of the array passed to append(). E.g. ('layer1_b', 'layer1_W', 'layer2_b', 'layer2_W')
-        new_group_name : str
-            Name of a new HDF5 group which will be created under base_group to store the new series.
-        base_group : str
-            Path of the group under which to create the new group which will store the series.
-        title : str
-            Here the title is attached to the new group, not a table.
-        """
-        base_series_list = []
-
-        new_group = hdf5_file.createGroup(base_group, new_group_name, title=title)
-
-        stats_functions = {'mean': lambda(x): numpy.mean(x.value),
-                    'min': lambda(x): numpy.min(x.value),
-                    'max': lambda(x): numpy.max(x.value),
-                    'std': lambda(x): numpy.std(x.value)}
-
-        for name in arrays_names:
-            base_series_list.append(
-                        BasicStatisticsSeries(
-                                table_name=name,
-                                hdf5_file=hdf5_file,
-                                index_names=index_names,
-                                stats_functions=stats_functions,
-                                hdf5_group=new_group._v_pathname))
-
-        SeriesArrayWrapper.__init__(self, base_series_list)
-
-
--- a/utils/tables_series/test_series.py	Wed Mar 10 16:17:59 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,173 +0,0 @@
-import tempfile
-import numpy
-import numpy.random
-
-from jobman import DD
-
-from tables import *
-
-from series import *
-
-
-def compare_floats(f1,f2):
-    if f1-f2 < 1e-3:
-        return True
-    return False
-
-def compare_lists(it1, it2, floats=False):
-    if len(it1) != len(it2):
-        return False
-
-    for el1,  el2 in zip(it1, it2):
-        if floats:
-            if not compare_floats(el1,el2):
-                return False
-        elif el1 != el2:
-            return False
-
-    return True
-
-def test_ErrorSeries_common_case(h5f=None):
-    if not h5f:
-        h5f_path = tempfile.NamedTemporaryFile().name
-        h5f = openFile(h5f_path, "w")
-
-    validation_error = ErrorSeries(error_name="validation_error", table_name="validation_error",
-                                hdf5_file=h5f, index_names=('epoch','minibatch'),
-                                title="Validation error indexed by epoch and minibatch")
-
-    # (1,1), (1,2) etc. are (epoch, minibatch) index
-    validation_error.append((1,1), 32.0)
-    validation_error.append((1,2), 30.0)
-    validation_error.append((2,1), 28.0)
-    validation_error.append((2,2), 26.0)
-
-    h5f.close()
-
-    h5f = openFile(h5f_path, "r")
-    
-    table = h5f.getNode('/', 'validation_error')
-
-    assert compare_lists(table.cols.epoch[:], [1,1,2,2])
-    assert compare_lists(table.cols.minibatch[:], [1,2,1,2])
-    assert compare_lists(table.cols.validation_error[:], [32.0, 30.0, 28.0, 26.0])
-
-def test_AccumulatorSeriesWrapper_common_case(h5f=None):
-    if not h5f:
-        h5f_path = tempfile.NamedTemporaryFile().name
-        h5f = openFile(h5f_path, "w")
-
-    validation_error = ErrorSeries(error_name="accumulated_validation_error",
-                                table_name="accumulated_validation_error",
-                                hdf5_file=h5f,
-                                index_names=('epoch','minibatch'),
-                                title="Validation error, summed every 3 minibatches, indexed by epoch and minibatch")
-
-    accumulator = AccumulatorSeriesWrapper(base_series=validation_error,
-                                    reduce_every=3, reduce_function=numpy.sum)
-
-    # (1,1), (1,2) etc. are (epoch, minibatch) index
-    accumulator.append((1,1), 32.0)
-    accumulator.append((1,2), 30.0)
-    accumulator.append((2,1), 28.0)
-    accumulator.append((2,2), 26.0)
-    accumulator.append((3,1), 24.0)
-    accumulator.append((3,2), 22.0)
-
-    h5f.close()
-
-    h5f = openFile(h5f_path, "r")
-    
-    table = h5f.getNode('/', 'accumulated_validation_error')
-
-    assert compare_lists(table.cols.epoch[:], [2,3])
-    assert compare_lists(table.cols.minibatch[:], [1,2])
-    assert compare_lists(table.cols.accumulated_validation_error[:], [90.0,72.0], floats=True)
-
-def test_BasicStatisticsSeries_common_case(h5f=None):
-    if not h5f:
-        h5f_path = tempfile.NamedTemporaryFile().name
-        h5f = openFile(h5f_path, "w")
-
-    stats_series = BasicStatisticsSeries(table_name="b_vector_statistics",
-                                hdf5_file=h5f, index_names=('epoch','minibatch'),
-                                title="Basic statistics for b vector indexed by epoch and minibatch")
-
-    # (1,1), (1,2) etc. are (epoch, minibatch) index
-    stats_series.append((1,1), [0.15, 0.20, 0.30])
-    stats_series.append((1,2), [-0.18, 0.30, 0.58])
-    stats_series.append((2,1), [0.18, -0.38, -0.68])
-    stats_series.append((2,2), [0.15, 0.02, 1.9])
-
-    h5f.close()
-
-    h5f = openFile(h5f_path, "r")
-    
-    table = h5f.getNode('/', 'b_vector_statistics')
-
-    assert compare_lists(table.cols.epoch[:], [1,1,2,2])
-    assert compare_lists(table.cols.minibatch[:], [1,2,1,2])
-    assert compare_lists(table.cols.mean[:], [0.21666667,  0.23333333, -0.29333332,  0.69], floats=True)
-    assert compare_lists(table.cols.min[:], [0.15000001, -0.18000001, -0.68000001,  0.02], floats=True)
-    assert compare_lists(table.cols.max[:], [0.30, 0.58, 0.18, 1.9], floats=True)
-    assert compare_lists(table.cols.std[:], [0.06236095, 0.31382939,  0.35640177, 0.85724366], floats=True)
-
-def test_SharedParamsStatisticsWrapper_commoncase(h5f=None):
-    import numpy.random
-
-    if not h5f:
-        h5f_path = tempfile.NamedTemporaryFile().name
-        h5f = openFile(h5f_path, "w")
-
-    stats = SharedParamsStatisticsWrapper(new_group_name="params", base_group="/",
-                                arrays_names=('b1','b2','b3'), hdf5_file=h5f,
-                                index_names=('epoch','minibatch'))
-
-    b1 = DD({'value':numpy.random.rand(5)})
-    b2 = DD({'value':numpy.random.rand(5)})
-    b3 = DD({'value':numpy.random.rand(5)})
-    stats.append((1,1), [b1,b2,b3])
-
-    h5f.close()
-
-    h5f = openFile(h5f_path, "r")
-
-    b1_table = h5f.getNode('/params', 'b1')
-    b3_table = h5f.getNode('/params', 'b3')
-
-    assert b1_table.cols.mean[0] - numpy.mean(b1.value) < 1e-3
-    assert b3_table.cols.mean[0] - numpy.mean(b3.value) < 1e-3
-    assert b1_table.cols.min[0] - numpy.min(b1.value) < 1e-3
-    assert b3_table.cols.min[0] - numpy.min(b3.value) < 1e-3
-
-def test_get_desc():
-    h5f_path = tempfile.NamedTemporaryFile().name
-    h5f = openFile(h5f_path, "w")
-
-    desc = get_description_with_n_ints_n_floats(("col1","col2"), ("col3","col4"))
-
-    mytable = h5f.createTable('/', 'mytable', desc)
-
-    # just make sure the columns are there... otherwise this will throw an exception
-    mytable.cols.col1
-    mytable.cols.col2
-    mytable.cols.col3
-    mytable.cols.col4
-
-    try:
-        # this should fail... LocalDescription must be local to get_desc_etc
-        test = LocalDescription
-        assert False
-    except:
-        assert True
-
-    assert True
-
-if __name__ == '__main__':
-    import tempfile
-    test_get_desc()
-    test_ErrorSeries_common_case()
-    test_BasicStatisticsSeries_common_case()
-    test_AccumulatorSeriesWrapper_common_case()
-    test_SharedParamsStatisticsWrapper_commoncase()
-