comparison utils/tables_series/series.py @ 208:acb942530923

Completely rewrote my series module, now based on HDF5 and PyTables (in a separate directory called 'tables_series' for retrocompatibility of running code). Minor (inconsequential) changes to stacked_dae.
author fsavard
date Fri, 05 Mar 2010 18:07:20 -0500
parents
children dc0d77c8a878
comparison
equal deleted inserted replaced
205:10a801240bfc 208:acb942530923
1 from tables import *
2 import numpy
3
4 '''
5 The way these "IsDescription constructor" work is simple: write the
6 code as if it were in a file, then exec()ute it, leaving us with
7 a local-scoped LocalDescription which may be used to call createTable.
8
9 It's a small hack, but it's necessary as the names of the columns
10 are retrieved based on the variable name, which we can't programmatically set
11 otherwise.
12 '''
13
14 def get_beginning_description_n_ints(int_names, int_width=64):
15 int_constructor = "Int64Col"
16 if int_width == 32:
17 int_constructor = "Int32Col"
18
19 toexec = "class LocalDescription(IsDescription):\n"
20
21 pos = 0
22
23 for n in int_names:
24 toexec += "\t" + n + " = " + int_constructor + "(pos=" + str(pos) + ")\n"
25
26 return toexec
27
28 def get_description_with_n_ints_n_floats(int_names, float_names, int_width=64, float_width=32):
29 """
30 Constructs a class to be used when constructing a table with PyTables.
31
32 This is useful to construct a series with an index with multiple levels.
33 E.g. if you want to index your "validation error" with "epoch" first, then
34 "minibatch_index" second, you'd use two "int_names".
35
36 Parameters
37 ----------
38 int_names : tuple of str
39 Names of the int (e.g. index) columns
40 float_names : tuple of str
41 Names of the float (e.g. error) columns
42 int_width : {'32', '64'}
43 Type of ints.
44 float_width : {'32', '64'}
45 Type of floats.
46
47 Returns
48 -------
49 A class object, to pass to createTable()
50 """
51
52 toexec = get_beginning_description_n_ints(int_names, int_width=int_width)
53
54 float_constructor = "Float32Col"
55 if float_width == 64:
56 float_constructor = "Float64Col"
57
58 pos = len(int_names)
59
60 for n in float_names:
61 toexec += "\t" + n + " = " + float_constructor + "(pos=" + str(pos) + ")\n"
62
63 exec(toexec)
64
65 return LocalDescription
66
67 class Series():
68 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'):
69 """This is used as metadata in the HDF5 file to identify the series"""
70 self.table_name = table_name
71 self.hdf5_file = hdf5_file
72 self.index_names = index_names
73 self.title = title
74
75 def append(self, index, element):
76 raise NotImplementedError
77
78 class ErrorSeries(Series):
79 def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'):
80 Series.__init__(self, table_name, hdf5_file, index_names, title)
81
82 self.error_name = error_name
83
84 table_description = self._get_table_description()
85
86 self._table = hdf5_file.createTable(hdf5_group, self.table_name, table_description, title=title)
87
88 def _get_table_description(self):
89 return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,))
90
91 def append(self, index, error):
92 if len(index) != len(self.index_names):
93 raise ValueError("index provided does not have the right length (expected " \
94 + str(len(self.index_names)) + " got " + str(len(index)))
95
96 newrow = self._table.row
97
98 for col_name, value in zip(self.index_names, index):
99 newrow[col_name] = value
100 newrow[self.error_name] = error
101
102 newrow.append()
103
104 self.hdf5_file.flush()
105
106 # Does not inherit from Series because it does not itself need to
107 # access the hdf5_file and does not need a series_name (provided
108 # by the base_series.)
109 class AccumulatorSeriesWrapper():
110 """
111
112 """
113 def __init__(self, base_series, reduce_every, reduce_function=numpy.mean):
114 self.base_series = base_series
115 self.reduce_function = reduce_function
116 self.reduce_every = reduce_every
117
118 self._buffer = []
119
120
121 def append(self, index, element):
122 """
123 Parameters
124 ----------
125 index : tuple of int
126 The index used is the one of the last element reduced. E.g. if
127 you accumulate over the first 1000 minibatches, the index
128 passed to the base_series.append() function will be 1000.
129 """
130 self._buffer.append(element)
131
132 if len(self._buffer) == self.reduce_every:
133 reduced = self.reduce_function(self._buffer)
134 self.base_series.append(index, reduced)
135 self._buffer = []
136
137 # This should never happen, except if lists
138 # were appended, which should be a red flag.
139 assert len(self._buffer) < self.reduce_every
140
141 # Outside of class to fix an issue with exec in Python 2.6.
142 # My sorries to the God of pretty code.
143 def BasicStatisticsSeries_construct_table_toexec(index_names):
144 toexec = get_beginning_description_n_ints(index_names)
145
146 bpos = len(index_names)
147 toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n"
148 toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n"
149 toexec += "\tmax = Float32Col(pos=" + str(bpos+2) + ")\n"
150 toexec += "\tstd = Float32Col(pos=" + str(bpos+3) + ")\n"
151
152 # This creates "LocalDescription", which we may then use
153 exec(toexec)
154
155 return LocalDescription
156
157 class BasicStatisticsSeries(Series):
158 """
159 Parameters
160 ----------
161 series_name : str
162 Not optional here. Will be prepended with "Basic statistics for "
163 """
164 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'):
165 Series.__init__(self, table_name, hdf5_file, index_names, title)
166
167 self.hdf5_group = hdf5_group
168
169 self.construct_table()
170
171 def construct_table(self):
172 table_description = BasicStatisticsSeries_construct_table_toexec(self.index_names)
173
174 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description)
175
176 def append(self, index, array):
177 if len(index) != len(self.index_names):
178 raise ValueError("index provided does not have the right length (expected " \
179 + str(len(self.index_names)) + " got " + str(len(index)))
180
181 newrow = self._table.row
182
183 for col_name, value in zip(self.index_names, index):
184 newrow[col_name] = value
185
186 newrow["mean"] = numpy.mean(array)
187 newrow["min"] = numpy.min(array)
188 newrow["max"] = numpy.max(array)
189 newrow["std"] = numpy.std(array)
190
191 newrow.append()
192
193 self.hdf5_file.flush()
194
195 class SeriesArrayWrapper():
196 """
197 Simply redistributes any number of elements to sub-series to respective append()s.
198 """
199
200 def __init__(self, base_series_list):
201 self.base_series_list = base_series_list
202
203 def append(self, index, elements):
204 if len(elements) != len(self.base_series_list):
205 raise ValueError("not enough or too much elements provided (expected " \
206 + str(len(self.base_series_list)) + " got " + str(len(elements)))
207
208 for series, el in zip(self.base_series_list, elements):
209 series.append(index, el)
210
211 class ParamsStatisticsWrapper(SeriesArrayWrapper):
212 def __init__(self, arrays_names, new_group_name, hdf5_file, base_group='/', index_names=('epoch',), title=""):
213 base_series_list = []
214
215 new_group = hdf5_file.createGroup(base_group, new_group_name, title=title)
216
217 for name in arrays_names:
218 base_series_list.append(
219 BasicStatisticsSeries(
220 table_name=name,
221 hdf5_file=hdf5_file,
222 index_names=('epoch','minibatch'),
223 hdf5_group=new_group._v_pathname))
224
225 SeriesArrayWrapper.__init__(self, base_series_list)
226
227