comparison utils/seriestables/series.py @ 217:de3aef84714a

merge, second try
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Wed, 10 Mar 2010 17:08:50 -0500
parents a96fa4de06d2
children 4c137f16b013
comparison
equal deleted inserted replaced
216:c89004f9cab2 217:de3aef84714a
1 from tables import *
2 import numpy
3
4 '''
5 The way these "IsDescription constructor" work is simple: write the
6 code as if it were in a file, then exec()ute it, leaving us with
7 a local-scoped LocalDescription which may be used to call createTable.
8
9 It's a small hack, but it's necessary as the names of the columns
10 are retrieved based on the variable name, which we can't programmatically set
11 otherwise.
12 '''
13
14 def get_beginning_description_n_ints(int_names, int_width=64):
15 """
16 Begins construction of a class inheriting from IsDescription
17 to construct an HDF5 table with index columns named with int_names.
18
19 See Series().__init__ to see how those are used.
20 """
21 int_constructor = "Int64Col"
22 if int_width == 32:
23 int_constructor = "Int32Col"
24
25 toexec = "class LocalDescription(IsDescription):\n"
26
27 pos = 0
28
29 for n in int_names:
30 toexec += "\t" + n + " = " + int_constructor + "(pos=" + str(pos) + ")\n"
31
32 return toexec
33
34 def get_description_with_n_ints_n_floats(int_names, float_names, int_width=64, float_width=32):
35 """
36 Constructs a class to be used when constructing a table with PyTables.
37
38 This is useful to construct a series with an index with multiple levels.
39 E.g. if you want to index your "validation error" with "epoch" first, then
40 "minibatch_index" second, you'd use two "int_names".
41
42 Parameters
43 ----------
44 int_names : tuple of str
45 Names of the int (e.g. index) columns
46 float_names : tuple of str
47 Names of the float (e.g. error) columns
48 int_width : {'32', '64'}
49 Type of ints.
50 float_width : {'32', '64'}
51 Type of floats.
52
53 Returns
54 -------
55 A class object, to pass to createTable()
56 """
57
58 toexec = get_beginning_description_n_ints(int_names, int_width=int_width)
59
60 float_constructor = "Float32Col"
61 if float_width == 64:
62 float_constructor = "Float64Col"
63
64 pos = len(int_names)
65
66 for n in float_names:
67 toexec += "\t" + n + " = " + float_constructor + "(pos=" + str(pos) + ")\n"
68
69 exec(toexec)
70
71 return LocalDescription
72
73 class Series():
74 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title="", hdf5_group='/'):
75 """Basic arguments each Series must get.
76
77 Parameters
78 ----------
79 table_name : str
80 Name of the table to create under group "hd5_group" (other parameter). No spaces, ie. follow variable naming restrictions.
81 hdf5_file : open HDF5 file
82 File opened with openFile() in PyTables (ie. return value of openFile).
83 index_names : tuple of str
84 Columns to use as index for elements in the series, other example would be ('epoch', 'minibatch'). This would then allow you to call append(index, element) with index made of two ints, one for epoch index, one for minibatch index in epoch.
85 title : str
86 Title to attach to this table as metadata. Can contain spaces and be longer then the table_name.
87 hdf5_group : str
88 Path of the group (kind of a file) in the HDF5 file under which to create the table.
89 """
90 self.table_name = table_name
91 self.hdf5_file = hdf5_file
92 self.index_names = index_names
93 self.title = title
94
95 def append(self, index, element):
96 raise NotImplementedError
97
98 # To put in a series dictionary instead of a real series, to do nothing
99 # when we don't want a given series to be saved.
100 class DummySeries():
101 def append(self, index, element):
102 pass
103
104 class ErrorSeries(Series):
105 def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title="", hdf5_group='/'):
106 Series.__init__(self, table_name, hdf5_file, index_names, title)
107
108 self.error_name = error_name
109
110 table_description = self._get_table_description()
111
112 self._table = hdf5_file.createTable(hdf5_group, self.table_name, table_description, title=title)
113
114 def _get_table_description(self):
115 return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,))
116
117 def append(self, index, error):
118 """
119 Parameters
120 ----------
121 index : tuple of int
122 Following index_names passed to __init__, e.g. (12, 15) if index_names were ('epoch', 'minibatch_size')
123 error : float
124 Next error in the series.
125 """
126 if len(index) != len(self.index_names):
127 raise ValueError("index provided does not have the right length (expected " \
128 + str(len(self.index_names)) + " got " + str(len(index)))
129
130 newrow = self._table.row
131
132 # Columns for index in table are based on index_names
133 for col_name, value in zip(self.index_names, index):
134 newrow[col_name] = value
135 newrow[self.error_name] = error
136
137 newrow.append()
138
139 self.hdf5_file.flush()
140
141 # Does not inherit from Series because it does not itself need to
142 # access the hdf5_file and does not need a series_name (provided
143 # by the base_series.)
144 class AccumulatorSeriesWrapper():
145 """
146
147 """
148 def __init__(self, base_series, reduce_every, reduce_function=numpy.mean):
149 """
150 Parameters
151 ----------
152 base_series : Series
153 This object must have an append(index, value) function.
154 reduce_every : int
155 Apply the reduction function (e.g. mean()) every time we get this number of elements. E.g. if this is 100, then every 100 numbers passed to append(), we'll take the mean and call append(this_mean) on the BaseSeries.
156 reduce_function : function
157 Must take as input an array of "elements", as passed to (this accumulator's) append(). Basic case would be to take an array of floats and sum them into one float, for example.
158 """
159 self.base_series = base_series
160 self.reduce_function = reduce_function
161 self.reduce_every = reduce_every
162
163 self._buffer = []
164
165
166 def append(self, index, element):
167 """
168 Parameters
169 ----------
170 index : tuple of int
171 The index used is the one of the last element reduced. E.g. if
172 you accumulate over the first 1000 minibatches, the index
173 passed to the base_series.append() function will be 1000.
174 element : float
175 Element that will be accumulated.
176 """
177 self._buffer.append(element)
178
179 if len(self._buffer) == self.reduce_every:
180 reduced = self.reduce_function(self._buffer)
181 self.base_series.append(index, reduced)
182 self._buffer = []
183
184 # This should never happen, except if lists
185 # were appended, which should be a red flag.
186 assert len(self._buffer) < self.reduce_every
187
188 # Outside of class to fix an issue with exec in Python 2.6.
189 # My sorries to the God of pretty code.
190 def _BasicStatisticsSeries_construct_table_toexec(index_names):
191 toexec = get_beginning_description_n_ints(index_names)
192
193 bpos = len(index_names)
194 toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n"
195 toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n"
196 toexec += "\tmax = Float32Col(pos=" + str(bpos+2) + ")\n"
197 toexec += "\tstd = Float32Col(pos=" + str(bpos+3) + ")\n"
198
199 # This creates "LocalDescription", which we may then use
200 exec(toexec)
201
202 return LocalDescription
203
204 basic_stats_functions = {'mean': lambda(x): numpy.mean(x),
205 'min': lambda(x): numpy.min(x),
206 'max': lambda(x): numpy.max(x),
207 'std': lambda(x): numpy.std(x)}
208
209 class BasicStatisticsSeries(Series):
210 """
211 Parameters
212 ----------
213 series_name : str
214 Not optional here. Will be prepended with "Basic statistics for "
215 stats_functions : dict, optional
216 Dictionary with a function for each key "mean", "min", "max", "std". The function must take whatever is passed to append(...) and return a single number (float).
217 """
218 def __init__(self, table_name, hdf5_file, stats_functions=basic_stats_functions, index_names=('epoch',), title="", hdf5_group='/'):
219 Series.__init__(self, table_name, hdf5_file, index_names, title)
220
221 self.hdf5_group = hdf5_group
222
223 self.stats_functions = stats_functions
224
225 self._construct_table()
226
227 def _construct_table(self):
228 table_description = _BasicStatisticsSeries_construct_table_toexec(self.index_names)
229
230 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description)
231
232 def append(self, index, array):
233 """
234 Parameters
235 ----------
236 index : tuple of int
237 Following index_names passed to __init__, e.g. (12, 15) if index_names were ('epoch', 'minibatch_size')
238 array
239 Is of whatever type the stats_functions passed to __init__ can take. Default is anything numpy.mean(), min(), max(), std() can take.
240 """
241 if len(index) != len(self.index_names):
242 raise ValueError("index provided does not have the right length (expected " \
243 + str(len(self.index_names)) + " got " + str(len(index)))
244
245 newrow = self._table.row
246
247 for col_name, value in zip(self.index_names, index):
248 newrow[col_name] = value
249
250 newrow["mean"] = self.stats_functions['mean'](array)
251 newrow["min"] = self.stats_functions['min'](array)
252 newrow["max"] = self.stats_functions['max'](array)
253 newrow["std"] = self.stats_functions['std'](array)
254
255 newrow.append()
256
257 self.hdf5_file.flush()
258
259 class SeriesArrayWrapper():
260 """
261 Simply redistributes any number of elements to sub-series to respective append()s.
262
263 To use if you have many elements to append in similar series, e.g. if you have an array containing [train_error, valid_error, test_error], and 3 corresponding series, this allows you to simply pass this array of 3 values to append() instead of passing each element to each individual series in turn.
264 """
265
266 def __init__(self, base_series_list):
267 self.base_series_list = base_series_list
268
269 def append(self, index, elements):
270 if len(elements) != len(self.base_series_list):
271 raise ValueError("not enough or too much elements provided (expected " \
272 + str(len(self.base_series_list)) + " got " + str(len(elements)))
273
274 for series, el in zip(self.base_series_list, elements):
275 series.append(index, el)
276
277 class SharedParamsStatisticsWrapper(SeriesArrayWrapper):
278 '''Save mean, min/max, std of shared parameters place in an array.
279
280 This is specifically for cases where we have _shared_ parameters,
281 as we take the .value of each array'''
282
283 def __init__(self, arrays_names, new_group_name, hdf5_file, base_group='/', index_names=('epoch',), title=""):
284 """
285 Parameters
286 ----------
287 array_names : array of str
288 Name of each array, in order of the array passed to append(). E.g. ('layer1_b', 'layer1_W', 'layer2_b', 'layer2_W')
289 new_group_name : str
290 Name of a new HDF5 group which will be created under base_group to store the new series.
291 base_group : str
292 Path of the group under which to create the new group which will store the series.
293 title : str
294 Here the title is attached to the new group, not a table.
295 """
296 base_series_list = []
297
298 new_group = hdf5_file.createGroup(base_group, new_group_name, title=title)
299
300 stats_functions = {'mean': lambda(x): numpy.mean(x.value),
301 'min': lambda(x): numpy.min(x.value),
302 'max': lambda(x): numpy.max(x.value),
303 'std': lambda(x): numpy.std(x.value)}
304
305 for name in arrays_names:
306 base_series_list.append(
307 BasicStatisticsSeries(
308 table_name=name,
309 hdf5_file=hdf5_file,
310 index_names=index_names,
311 stats_functions=stats_functions,
312 hdf5_group=new_group._v_pathname))
313
314 SeriesArrayWrapper.__init__(self, base_series_list)
315
316