Mercurial > ift6266
comparison utils/tables_series/series.py @ 210:dc0d77c8a878
Commented table_series code, changed ParamsStatisticsArray to take shared params instead, create DummySeries to use when we don't want to save a named series
author | savardf |
---|---|
date | Tue, 09 Mar 2010 10:15:19 -0500 |
parents | acb942530923 |
children |
comparison
equal
deleted
inserted
replaced
209:d982dfa583df | 210:dc0d77c8a878 |
---|---|
10 are retrieved based on the variable name, which we can't programmatically set | 10 are retrieved based on the variable name, which we can't programmatically set |
11 otherwise. | 11 otherwise. |
12 ''' | 12 ''' |
13 | 13 |
14 def get_beginning_description_n_ints(int_names, int_width=64): | 14 def get_beginning_description_n_ints(int_names, int_width=64): |
15 """ | |
16 Begins construction of a class inheriting from IsDescription | |
17 to construct an HDF5 table with index columns named with int_names. | |
18 | |
19 See Series().__init__ to see how those are used. | |
20 """ | |
15 int_constructor = "Int64Col" | 21 int_constructor = "Int64Col" |
16 if int_width == 32: | 22 if int_width == 32: |
17 int_constructor = "Int32Col" | 23 int_constructor = "Int32Col" |
18 | 24 |
19 toexec = "class LocalDescription(IsDescription):\n" | 25 toexec = "class LocalDescription(IsDescription):\n" |
64 | 70 |
65 return LocalDescription | 71 return LocalDescription |
66 | 72 |
67 class Series(): | 73 class Series(): |
68 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): | 74 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): |
69 """This is used as metadata in the HDF5 file to identify the series""" | 75 """Basic arguments each Series must get. |
76 | |
77 Parameters | |
78 ---------- | |
79 table_name : str | |
80 Name of the table to create under group "hd5_group" (other parameter). No spaces, ie. follow variable naming restrictions. | |
81 hdf5_file : open HDF5 file | |
82 File opened with openFile() in PyTables (ie. return value of openFile). | |
83 index_names : tuple of str | |
84 Columns to use as index for elements in the series, other example would be ('epoch', 'minibatch'). This would then allow you to call append(index, element) with index made of two ints, one for epoch index, one for minibatch index in epoch. | |
85 title : str | |
86 Title to attach to this table as metadata. Can contain spaces and be longer then the table_name. | |
87 hdf5_group : str | |
88 Path of the group (kind of a file) in the HDF5 file under which to create the table. | |
89 """ | |
70 self.table_name = table_name | 90 self.table_name = table_name |
71 self.hdf5_file = hdf5_file | 91 self.hdf5_file = hdf5_file |
72 self.index_names = index_names | 92 self.index_names = index_names |
73 self.title = title | 93 self.title = title |
74 | 94 |
75 def append(self, index, element): | 95 def append(self, index, element): |
76 raise NotImplementedError | 96 raise NotImplementedError |
77 | 97 |
98 # To put in a series dictionary instead of a real series, to do nothing | |
99 # when we don't want a given series to be saved. | |
100 class DummySeries(): | |
101 def append(self, index, element): | |
102 pass | |
103 | |
78 class ErrorSeries(Series): | 104 class ErrorSeries(Series): |
79 def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): | 105 def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): |
80 Series.__init__(self, table_name, hdf5_file, index_names, title) | 106 Series.__init__(self, table_name, hdf5_file, index_names, title) |
81 | 107 |
82 self.error_name = error_name | 108 self.error_name = error_name |
87 | 113 |
88 def _get_table_description(self): | 114 def _get_table_description(self): |
89 return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,)) | 115 return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,)) |
90 | 116 |
91 def append(self, index, error): | 117 def append(self, index, error): |
118 """ | |
119 Parameters | |
120 ---------- | |
121 index : tuple of int | |
122 Following index_names passed to __init__, e.g. (12, 15) if index_names were ('epoch', 'minibatch_size') | |
123 error : float | |
124 Next error in the series. | |
125 """ | |
92 if len(index) != len(self.index_names): | 126 if len(index) != len(self.index_names): |
93 raise ValueError("index provided does not have the right length (expected " \ | 127 raise ValueError("index provided does not have the right length (expected " \ |
94 + str(len(self.index_names)) + " got " + str(len(index))) | 128 + str(len(self.index_names)) + " got " + str(len(index))) |
95 | 129 |
96 newrow = self._table.row | 130 newrow = self._table.row |
97 | 131 |
132 # Columns for index in table are based on index_names | |
98 for col_name, value in zip(self.index_names, index): | 133 for col_name, value in zip(self.index_names, index): |
99 newrow[col_name] = value | 134 newrow[col_name] = value |
100 newrow[self.error_name] = error | 135 newrow[self.error_name] = error |
101 | 136 |
102 newrow.append() | 137 newrow.append() |
109 class AccumulatorSeriesWrapper(): | 144 class AccumulatorSeriesWrapper(): |
110 """ | 145 """ |
111 | 146 |
112 """ | 147 """ |
113 def __init__(self, base_series, reduce_every, reduce_function=numpy.mean): | 148 def __init__(self, base_series, reduce_every, reduce_function=numpy.mean): |
149 """ | |
150 Parameters | |
151 ---------- | |
152 base_series : Series | |
153 This object must have an append(index, value) function. | |
154 reduce_every : int | |
155 Apply the reduction function (e.g. mean()) every time we get this number of elements. E.g. if this is 100, then every 100 numbers passed to append(), we'll take the mean and call append(this_mean) on the BaseSeries. | |
156 reduce_function : function | |
157 Must take as input an array of "elements", as passed to (this accumulator's) append(). Basic case would be to take an array of floats and sum them into one float, for example. | |
158 """ | |
114 self.base_series = base_series | 159 self.base_series = base_series |
115 self.reduce_function = reduce_function | 160 self.reduce_function = reduce_function |
116 self.reduce_every = reduce_every | 161 self.reduce_every = reduce_every |
117 | 162 |
118 self._buffer = [] | 163 self._buffer = [] |
124 ---------- | 169 ---------- |
125 index : tuple of int | 170 index : tuple of int |
126 The index used is the one of the last element reduced. E.g. if | 171 The index used is the one of the last element reduced. E.g. if |
127 you accumulate over the first 1000 minibatches, the index | 172 you accumulate over the first 1000 minibatches, the index |
128 passed to the base_series.append() function will be 1000. | 173 passed to the base_series.append() function will be 1000. |
174 element : float | |
175 Element that will be accumulated. | |
129 """ | 176 """ |
130 self._buffer.append(element) | 177 self._buffer.append(element) |
131 | 178 |
132 if len(self._buffer) == self.reduce_every: | 179 if len(self._buffer) == self.reduce_every: |
133 reduced = self.reduce_function(self._buffer) | 180 reduced = self.reduce_function(self._buffer) |
138 # were appended, which should be a red flag. | 185 # were appended, which should be a red flag. |
139 assert len(self._buffer) < self.reduce_every | 186 assert len(self._buffer) < self.reduce_every |
140 | 187 |
141 # Outside of class to fix an issue with exec in Python 2.6. | 188 # Outside of class to fix an issue with exec in Python 2.6. |
142 # My sorries to the God of pretty code. | 189 # My sorries to the God of pretty code. |
143 def BasicStatisticsSeries_construct_table_toexec(index_names): | 190 def _BasicStatisticsSeries_construct_table_toexec(index_names): |
144 toexec = get_beginning_description_n_ints(index_names) | 191 toexec = get_beginning_description_n_ints(index_names) |
145 | 192 |
146 bpos = len(index_names) | 193 bpos = len(index_names) |
147 toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n" | 194 toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n" |
148 toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n" | 195 toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n" |
152 # This creates "LocalDescription", which we may then use | 199 # This creates "LocalDescription", which we may then use |
153 exec(toexec) | 200 exec(toexec) |
154 | 201 |
155 return LocalDescription | 202 return LocalDescription |
156 | 203 |
204 basic_stats_functions = {'mean': lambda(x): numpy.mean(x), | |
205 'min': lambda(x): numpy.min(x), | |
206 'max': lambda(x): numpy.max(x), | |
207 'std': lambda(x): numpy.std(x)} | |
208 | |
157 class BasicStatisticsSeries(Series): | 209 class BasicStatisticsSeries(Series): |
158 """ | 210 """ |
159 Parameters | 211 Parameters |
160 ---------- | 212 ---------- |
161 series_name : str | 213 series_name : str |
162 Not optional here. Will be prepended with "Basic statistics for " | 214 Not optional here. Will be prepended with "Basic statistics for " |
163 """ | 215 stats_functions : dict, optional |
164 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): | 216 Dictionary with a function for each key "mean", "min", "max", "std". The function must take whatever is passed to append(...) and return a single number (float). |
217 """ | |
218 def __init__(self, table_name, hdf5_file, stats_functions=basic_stats_functions, index_names=('epoch',), title=None, hdf5_group='/'): | |
165 Series.__init__(self, table_name, hdf5_file, index_names, title) | 219 Series.__init__(self, table_name, hdf5_file, index_names, title) |
166 | 220 |
167 self.hdf5_group = hdf5_group | 221 self.hdf5_group = hdf5_group |
168 | 222 |
169 self.construct_table() | 223 self.stats_functions = stats_functions |
170 | 224 |
171 def construct_table(self): | 225 self._construct_table() |
172 table_description = BasicStatisticsSeries_construct_table_toexec(self.index_names) | 226 |
227 def _construct_table(self): | |
228 table_description = _BasicStatisticsSeries_construct_table_toexec(self.index_names) | |
173 | 229 |
174 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description) | 230 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description) |
175 | 231 |
176 def append(self, index, array): | 232 def append(self, index, array): |
233 """ | |
234 Parameters | |
235 ---------- | |
236 index : tuple of int | |
237 Following index_names passed to __init__, e.g. (12, 15) if index_names were ('epoch', 'minibatch_size') | |
238 array | |
239 Is of whatever type the stats_functions passed to __init__ can take. Default is anything numpy.mean(), min(), max(), std() can take. | |
240 """ | |
177 if len(index) != len(self.index_names): | 241 if len(index) != len(self.index_names): |
178 raise ValueError("index provided does not have the right length (expected " \ | 242 raise ValueError("index provided does not have the right length (expected " \ |
179 + str(len(self.index_names)) + " got " + str(len(index))) | 243 + str(len(self.index_names)) + " got " + str(len(index))) |
180 | 244 |
181 newrow = self._table.row | 245 newrow = self._table.row |
182 | 246 |
183 for col_name, value in zip(self.index_names, index): | 247 for col_name, value in zip(self.index_names, index): |
184 newrow[col_name] = value | 248 newrow[col_name] = value |
185 | 249 |
186 newrow["mean"] = numpy.mean(array) | 250 newrow["mean"] = self.stats_functions['mean'](array) |
187 newrow["min"] = numpy.min(array) | 251 newrow["min"] = self.stats_functions['min'](array) |
188 newrow["max"] = numpy.max(array) | 252 newrow["max"] = self.stats_functions['max'](array) |
189 newrow["std"] = numpy.std(array) | 253 newrow["std"] = self.stats_functions['std'](array) |
190 | 254 |
191 newrow.append() | 255 newrow.append() |
192 | 256 |
193 self.hdf5_file.flush() | 257 self.hdf5_file.flush() |
194 | 258 |
195 class SeriesArrayWrapper(): | 259 class SeriesArrayWrapper(): |
196 """ | 260 """ |
197 Simply redistributes any number of elements to sub-series to respective append()s. | 261 Simply redistributes any number of elements to sub-series to respective append()s. |
262 | |
263 To use if you have many elements to append in similar series, e.g. if you have an array containing [train_error, valid_error, test_error], and 3 corresponding series, this allows you to simply pass this array of 3 values to append() instead of passing each element to each individual series in turn. | |
198 """ | 264 """ |
199 | 265 |
200 def __init__(self, base_series_list): | 266 def __init__(self, base_series_list): |
201 self.base_series_list = base_series_list | 267 self.base_series_list = base_series_list |
202 | 268 |
206 + str(len(self.base_series_list)) + " got " + str(len(elements))) | 272 + str(len(self.base_series_list)) + " got " + str(len(elements))) |
207 | 273 |
208 for series, el in zip(self.base_series_list, elements): | 274 for series, el in zip(self.base_series_list, elements): |
209 series.append(index, el) | 275 series.append(index, el) |
210 | 276 |
211 class ParamsStatisticsWrapper(SeriesArrayWrapper): | 277 class SharedParamsStatisticsWrapper(SeriesArrayWrapper): |
278 '''Save mean, min/max, std of shared parameters place in an array. | |
279 | |
280 This is specifically for cases where we have _shared_ parameters, | |
281 as we take the .value of each array''' | |
282 | |
212 def __init__(self, arrays_names, new_group_name, hdf5_file, base_group='/', index_names=('epoch',), title=""): | 283 def __init__(self, arrays_names, new_group_name, hdf5_file, base_group='/', index_names=('epoch',), title=""): |
284 """ | |
285 Parameters | |
286 ---------- | |
287 array_names : array of str | |
288 Name of each array, in order of the array passed to append(). E.g. ('layer1_b', 'layer1_W', 'layer2_b', 'layer2_W') | |
289 new_group_name : str | |
290 Name of a new HDF5 group which will be created under base_group to store the new series. | |
291 base_group : str | |
292 Path of the group under which to create the new group which will store the series. | |
293 title : str | |
294 Here the title is attached to the new group, not a table. | |
295 """ | |
213 base_series_list = [] | 296 base_series_list = [] |
214 | 297 |
215 new_group = hdf5_file.createGroup(base_group, new_group_name, title=title) | 298 new_group = hdf5_file.createGroup(base_group, new_group_name, title=title) |
299 | |
300 stats_functions = {'mean': lambda(x): numpy.mean(x.value), | |
301 'min': lambda(x): numpy.min(x.value), | |
302 'max': lambda(x): numpy.max(x.value), | |
303 'std': lambda(x): numpy.std(x.value)} | |
216 | 304 |
217 for name in arrays_names: | 305 for name in arrays_names: |
218 base_series_list.append( | 306 base_series_list.append( |
219 BasicStatisticsSeries( | 307 BasicStatisticsSeries( |
220 table_name=name, | 308 table_name=name, |
221 hdf5_file=hdf5_file, | 309 hdf5_file=hdf5_file, |
222 index_names=('epoch','minibatch'), | 310 index_names=index_names, |
311 stats_functions=stats_functions, | |
223 hdf5_group=new_group._v_pathname)) | 312 hdf5_group=new_group._v_pathname)) |
224 | 313 |
225 SeriesArrayWrapper.__init__(self, base_series_list) | 314 SeriesArrayWrapper.__init__(self, base_series_list) |
226 | 315 |
227 | 316 |