comparison utils/tables_series/series.py @ 210:dc0d77c8a878

Commented table_series code, changed ParamsStatisticsArray to take shared params instead, create DummySeries to use when we don't want to save a named series
author savardf
date Tue, 09 Mar 2010 10:15:19 -0500
parents acb942530923
children
comparison
equal deleted inserted replaced
209:d982dfa583df 210:dc0d77c8a878
10 are retrieved based on the variable name, which we can't programmatically set 10 are retrieved based on the variable name, which we can't programmatically set
11 otherwise. 11 otherwise.
12 ''' 12 '''
13 13
14 def get_beginning_description_n_ints(int_names, int_width=64): 14 def get_beginning_description_n_ints(int_names, int_width=64):
15 """
16 Begins construction of a class inheriting from IsDescription
17 to construct an HDF5 table with index columns named with int_names.
18
19 See Series().__init__ to see how those are used.
20 """
15 int_constructor = "Int64Col" 21 int_constructor = "Int64Col"
16 if int_width == 32: 22 if int_width == 32:
17 int_constructor = "Int32Col" 23 int_constructor = "Int32Col"
18 24
19 toexec = "class LocalDescription(IsDescription):\n" 25 toexec = "class LocalDescription(IsDescription):\n"
64 70
65 return LocalDescription 71 return LocalDescription
66 72
67 class Series(): 73 class Series():
68 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): 74 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'):
69 """This is used as metadata in the HDF5 file to identify the series""" 75 """Basic arguments each Series must get.
76
77 Parameters
78 ----------
79 table_name : str
80 Name of the table to create under group "hd5_group" (other parameter). No spaces, ie. follow variable naming restrictions.
81 hdf5_file : open HDF5 file
82 File opened with openFile() in PyTables (ie. return value of openFile).
83 index_names : tuple of str
84 Columns to use as index for elements in the series, other example would be ('epoch', 'minibatch'). This would then allow you to call append(index, element) with index made of two ints, one for epoch index, one for minibatch index in epoch.
85 title : str
86 Title to attach to this table as metadata. Can contain spaces and be longer then the table_name.
87 hdf5_group : str
88 Path of the group (kind of a file) in the HDF5 file under which to create the table.
89 """
70 self.table_name = table_name 90 self.table_name = table_name
71 self.hdf5_file = hdf5_file 91 self.hdf5_file = hdf5_file
72 self.index_names = index_names 92 self.index_names = index_names
73 self.title = title 93 self.title = title
74 94
75 def append(self, index, element): 95 def append(self, index, element):
76 raise NotImplementedError 96 raise NotImplementedError
77 97
98 # To put in a series dictionary instead of a real series, to do nothing
99 # when we don't want a given series to be saved.
100 class DummySeries():
101 def append(self, index, element):
102 pass
103
78 class ErrorSeries(Series): 104 class ErrorSeries(Series):
79 def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): 105 def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'):
80 Series.__init__(self, table_name, hdf5_file, index_names, title) 106 Series.__init__(self, table_name, hdf5_file, index_names, title)
81 107
82 self.error_name = error_name 108 self.error_name = error_name
87 113
88 def _get_table_description(self): 114 def _get_table_description(self):
89 return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,)) 115 return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,))
90 116
91 def append(self, index, error): 117 def append(self, index, error):
118 """
119 Parameters
120 ----------
121 index : tuple of int
122 Following index_names passed to __init__, e.g. (12, 15) if index_names were ('epoch', 'minibatch_size')
123 error : float
124 Next error in the series.
125 """
92 if len(index) != len(self.index_names): 126 if len(index) != len(self.index_names):
93 raise ValueError("index provided does not have the right length (expected " \ 127 raise ValueError("index provided does not have the right length (expected " \
94 + str(len(self.index_names)) + " got " + str(len(index))) 128 + str(len(self.index_names)) + " got " + str(len(index)))
95 129
96 newrow = self._table.row 130 newrow = self._table.row
97 131
132 # Columns for index in table are based on index_names
98 for col_name, value in zip(self.index_names, index): 133 for col_name, value in zip(self.index_names, index):
99 newrow[col_name] = value 134 newrow[col_name] = value
100 newrow[self.error_name] = error 135 newrow[self.error_name] = error
101 136
102 newrow.append() 137 newrow.append()
109 class AccumulatorSeriesWrapper(): 144 class AccumulatorSeriesWrapper():
110 """ 145 """
111 146
112 """ 147 """
113 def __init__(self, base_series, reduce_every, reduce_function=numpy.mean): 148 def __init__(self, base_series, reduce_every, reduce_function=numpy.mean):
149 """
150 Parameters
151 ----------
152 base_series : Series
153 This object must have an append(index, value) function.
154 reduce_every : int
155 Apply the reduction function (e.g. mean()) every time we get this number of elements. E.g. if this is 100, then every 100 numbers passed to append(), we'll take the mean and call append(this_mean) on the BaseSeries.
156 reduce_function : function
157 Must take as input an array of "elements", as passed to (this accumulator's) append(). Basic case would be to take an array of floats and sum them into one float, for example.
158 """
114 self.base_series = base_series 159 self.base_series = base_series
115 self.reduce_function = reduce_function 160 self.reduce_function = reduce_function
116 self.reduce_every = reduce_every 161 self.reduce_every = reduce_every
117 162
118 self._buffer = [] 163 self._buffer = []
124 ---------- 169 ----------
125 index : tuple of int 170 index : tuple of int
126 The index used is the one of the last element reduced. E.g. if 171 The index used is the one of the last element reduced. E.g. if
127 you accumulate over the first 1000 minibatches, the index 172 you accumulate over the first 1000 minibatches, the index
128 passed to the base_series.append() function will be 1000. 173 passed to the base_series.append() function will be 1000.
174 element : float
175 Element that will be accumulated.
129 """ 176 """
130 self._buffer.append(element) 177 self._buffer.append(element)
131 178
132 if len(self._buffer) == self.reduce_every: 179 if len(self._buffer) == self.reduce_every:
133 reduced = self.reduce_function(self._buffer) 180 reduced = self.reduce_function(self._buffer)
138 # were appended, which should be a red flag. 185 # were appended, which should be a red flag.
139 assert len(self._buffer) < self.reduce_every 186 assert len(self._buffer) < self.reduce_every
140 187
141 # Outside of class to fix an issue with exec in Python 2.6. 188 # Outside of class to fix an issue with exec in Python 2.6.
142 # My sorries to the God of pretty code. 189 # My sorries to the God of pretty code.
143 def BasicStatisticsSeries_construct_table_toexec(index_names): 190 def _BasicStatisticsSeries_construct_table_toexec(index_names):
144 toexec = get_beginning_description_n_ints(index_names) 191 toexec = get_beginning_description_n_ints(index_names)
145 192
146 bpos = len(index_names) 193 bpos = len(index_names)
147 toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n" 194 toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n"
148 toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n" 195 toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n"
152 # This creates "LocalDescription", which we may then use 199 # This creates "LocalDescription", which we may then use
153 exec(toexec) 200 exec(toexec)
154 201
155 return LocalDescription 202 return LocalDescription
156 203
204 basic_stats_functions = {'mean': lambda(x): numpy.mean(x),
205 'min': lambda(x): numpy.min(x),
206 'max': lambda(x): numpy.max(x),
207 'std': lambda(x): numpy.std(x)}
208
157 class BasicStatisticsSeries(Series): 209 class BasicStatisticsSeries(Series):
158 """ 210 """
159 Parameters 211 Parameters
160 ---------- 212 ----------
161 series_name : str 213 series_name : str
162 Not optional here. Will be prepended with "Basic statistics for " 214 Not optional here. Will be prepended with "Basic statistics for "
163 """ 215 stats_functions : dict, optional
164 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): 216 Dictionary with a function for each key "mean", "min", "max", "std". The function must take whatever is passed to append(...) and return a single number (float).
217 """
218 def __init__(self, table_name, hdf5_file, stats_functions=basic_stats_functions, index_names=('epoch',), title=None, hdf5_group='/'):
165 Series.__init__(self, table_name, hdf5_file, index_names, title) 219 Series.__init__(self, table_name, hdf5_file, index_names, title)
166 220
167 self.hdf5_group = hdf5_group 221 self.hdf5_group = hdf5_group
168 222
169 self.construct_table() 223 self.stats_functions = stats_functions
170 224
171 def construct_table(self): 225 self._construct_table()
172 table_description = BasicStatisticsSeries_construct_table_toexec(self.index_names) 226
227 def _construct_table(self):
228 table_description = _BasicStatisticsSeries_construct_table_toexec(self.index_names)
173 229
174 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description) 230 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description)
175 231
176 def append(self, index, array): 232 def append(self, index, array):
233 """
234 Parameters
235 ----------
236 index : tuple of int
237 Following index_names passed to __init__, e.g. (12, 15) if index_names were ('epoch', 'minibatch_size')
238 array
239 Is of whatever type the stats_functions passed to __init__ can take. Default is anything numpy.mean(), min(), max(), std() can take.
240 """
177 if len(index) != len(self.index_names): 241 if len(index) != len(self.index_names):
178 raise ValueError("index provided does not have the right length (expected " \ 242 raise ValueError("index provided does not have the right length (expected " \
179 + str(len(self.index_names)) + " got " + str(len(index))) 243 + str(len(self.index_names)) + " got " + str(len(index)))
180 244
181 newrow = self._table.row 245 newrow = self._table.row
182 246
183 for col_name, value in zip(self.index_names, index): 247 for col_name, value in zip(self.index_names, index):
184 newrow[col_name] = value 248 newrow[col_name] = value
185 249
186 newrow["mean"] = numpy.mean(array) 250 newrow["mean"] = self.stats_functions['mean'](array)
187 newrow["min"] = numpy.min(array) 251 newrow["min"] = self.stats_functions['min'](array)
188 newrow["max"] = numpy.max(array) 252 newrow["max"] = self.stats_functions['max'](array)
189 newrow["std"] = numpy.std(array) 253 newrow["std"] = self.stats_functions['std'](array)
190 254
191 newrow.append() 255 newrow.append()
192 256
193 self.hdf5_file.flush() 257 self.hdf5_file.flush()
194 258
195 class SeriesArrayWrapper(): 259 class SeriesArrayWrapper():
196 """ 260 """
197 Simply redistributes any number of elements to sub-series to respective append()s. 261 Simply redistributes any number of elements to sub-series to respective append()s.
262
263 To use if you have many elements to append in similar series, e.g. if you have an array containing [train_error, valid_error, test_error], and 3 corresponding series, this allows you to simply pass this array of 3 values to append() instead of passing each element to each individual series in turn.
198 """ 264 """
199 265
200 def __init__(self, base_series_list): 266 def __init__(self, base_series_list):
201 self.base_series_list = base_series_list 267 self.base_series_list = base_series_list
202 268
206 + str(len(self.base_series_list)) + " got " + str(len(elements))) 272 + str(len(self.base_series_list)) + " got " + str(len(elements)))
207 273
208 for series, el in zip(self.base_series_list, elements): 274 for series, el in zip(self.base_series_list, elements):
209 series.append(index, el) 275 series.append(index, el)
210 276
211 class ParamsStatisticsWrapper(SeriesArrayWrapper): 277 class SharedParamsStatisticsWrapper(SeriesArrayWrapper):
278 '''Save mean, min/max, std of shared parameters place in an array.
279
280 This is specifically for cases where we have _shared_ parameters,
281 as we take the .value of each array'''
282
212 def __init__(self, arrays_names, new_group_name, hdf5_file, base_group='/', index_names=('epoch',), title=""): 283 def __init__(self, arrays_names, new_group_name, hdf5_file, base_group='/', index_names=('epoch',), title=""):
284 """
285 Parameters
286 ----------
287 array_names : array of str
288 Name of each array, in order of the array passed to append(). E.g. ('layer1_b', 'layer1_W', 'layer2_b', 'layer2_W')
289 new_group_name : str
290 Name of a new HDF5 group which will be created under base_group to store the new series.
291 base_group : str
292 Path of the group under which to create the new group which will store the series.
293 title : str
294 Here the title is attached to the new group, not a table.
295 """
213 base_series_list = [] 296 base_series_list = []
214 297
215 new_group = hdf5_file.createGroup(base_group, new_group_name, title=title) 298 new_group = hdf5_file.createGroup(base_group, new_group_name, title=title)
299
300 stats_functions = {'mean': lambda(x): numpy.mean(x.value),
301 'min': lambda(x): numpy.min(x.value),
302 'max': lambda(x): numpy.max(x.value),
303 'std': lambda(x): numpy.std(x.value)}
216 304
217 for name in arrays_names: 305 for name in arrays_names:
218 base_series_list.append( 306 base_series_list.append(
219 BasicStatisticsSeries( 307 BasicStatisticsSeries(
220 table_name=name, 308 table_name=name,
221 hdf5_file=hdf5_file, 309 hdf5_file=hdf5_file,
222 index_names=('epoch','minibatch'), 310 index_names=index_names,
311 stats_functions=stats_functions,
223 hdf5_group=new_group._v_pathname)) 312 hdf5_group=new_group._v_pathname))
224 313
225 SeriesArrayWrapper.__init__(self, base_series_list) 314 SeriesArrayWrapper.__init__(self, base_series_list)
226 315
227 316