Mercurial > ift6266
comparison utils/tables_series/series.py @ 208:acb942530923
Completely rewrote my series module, now based on HDF5 and PyTables (in a separate directory called 'tables_series' for retrocompatibility of running code). Minor (inconsequential) changes to stacked_dae.
author | fsavard |
---|---|
date | Fri, 05 Mar 2010 18:07:20 -0500 |
parents | |
children | dc0d77c8a878 |
comparison
equal
deleted
inserted
replaced
205:10a801240bfc | 208:acb942530923 |
---|---|
1 from tables import * | |
2 import numpy | |
3 | |
4 ''' | |
5 The way these "IsDescription constructor" work is simple: write the | |
6 code as if it were in a file, then exec()ute it, leaving us with | |
7 a local-scoped LocalDescription which may be used to call createTable. | |
8 | |
9 It's a small hack, but it's necessary as the names of the columns | |
10 are retrieved based on the variable name, which we can't programmatically set | |
11 otherwise. | |
12 ''' | |
13 | |
14 def get_beginning_description_n_ints(int_names, int_width=64): | |
15 int_constructor = "Int64Col" | |
16 if int_width == 32: | |
17 int_constructor = "Int32Col" | |
18 | |
19 toexec = "class LocalDescription(IsDescription):\n" | |
20 | |
21 pos = 0 | |
22 | |
23 for n in int_names: | |
24 toexec += "\t" + n + " = " + int_constructor + "(pos=" + str(pos) + ")\n" | |
25 | |
26 return toexec | |
27 | |
28 def get_description_with_n_ints_n_floats(int_names, float_names, int_width=64, float_width=32): | |
29 """ | |
30 Constructs a class to be used when constructing a table with PyTables. | |
31 | |
32 This is useful to construct a series with an index with multiple levels. | |
33 E.g. if you want to index your "validation error" with "epoch" first, then | |
34 "minibatch_index" second, you'd use two "int_names". | |
35 | |
36 Parameters | |
37 ---------- | |
38 int_names : tuple of str | |
39 Names of the int (e.g. index) columns | |
40 float_names : tuple of str | |
41 Names of the float (e.g. error) columns | |
42 int_width : {'32', '64'} | |
43 Type of ints. | |
44 float_width : {'32', '64'} | |
45 Type of floats. | |
46 | |
47 Returns | |
48 ------- | |
49 A class object, to pass to createTable() | |
50 """ | |
51 | |
52 toexec = get_beginning_description_n_ints(int_names, int_width=int_width) | |
53 | |
54 float_constructor = "Float32Col" | |
55 if float_width == 64: | |
56 float_constructor = "Float64Col" | |
57 | |
58 pos = len(int_names) | |
59 | |
60 for n in float_names: | |
61 toexec += "\t" + n + " = " + float_constructor + "(pos=" + str(pos) + ")\n" | |
62 | |
63 exec(toexec) | |
64 | |
65 return LocalDescription | |
66 | |
67 class Series(): | |
68 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): | |
69 """This is used as metadata in the HDF5 file to identify the series""" | |
70 self.table_name = table_name | |
71 self.hdf5_file = hdf5_file | |
72 self.index_names = index_names | |
73 self.title = title | |
74 | |
75 def append(self, index, element): | |
76 raise NotImplementedError | |
77 | |
78 class ErrorSeries(Series): | |
79 def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): | |
80 Series.__init__(self, table_name, hdf5_file, index_names, title) | |
81 | |
82 self.error_name = error_name | |
83 | |
84 table_description = self._get_table_description() | |
85 | |
86 self._table = hdf5_file.createTable(hdf5_group, self.table_name, table_description, title=title) | |
87 | |
88 def _get_table_description(self): | |
89 return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,)) | |
90 | |
91 def append(self, index, error): | |
92 if len(index) != len(self.index_names): | |
93 raise ValueError("index provided does not have the right length (expected " \ | |
94 + str(len(self.index_names)) + " got " + str(len(index))) | |
95 | |
96 newrow = self._table.row | |
97 | |
98 for col_name, value in zip(self.index_names, index): | |
99 newrow[col_name] = value | |
100 newrow[self.error_name] = error | |
101 | |
102 newrow.append() | |
103 | |
104 self.hdf5_file.flush() | |
105 | |
106 # Does not inherit from Series because it does not itself need to | |
107 # access the hdf5_file and does not need a series_name (provided | |
108 # by the base_series.) | |
109 class AccumulatorSeriesWrapper(): | |
110 """ | |
111 | |
112 """ | |
113 def __init__(self, base_series, reduce_every, reduce_function=numpy.mean): | |
114 self.base_series = base_series | |
115 self.reduce_function = reduce_function | |
116 self.reduce_every = reduce_every | |
117 | |
118 self._buffer = [] | |
119 | |
120 | |
121 def append(self, index, element): | |
122 """ | |
123 Parameters | |
124 ---------- | |
125 index : tuple of int | |
126 The index used is the one of the last element reduced. E.g. if | |
127 you accumulate over the first 1000 minibatches, the index | |
128 passed to the base_series.append() function will be 1000. | |
129 """ | |
130 self._buffer.append(element) | |
131 | |
132 if len(self._buffer) == self.reduce_every: | |
133 reduced = self.reduce_function(self._buffer) | |
134 self.base_series.append(index, reduced) | |
135 self._buffer = [] | |
136 | |
137 # This should never happen, except if lists | |
138 # were appended, which should be a red flag. | |
139 assert len(self._buffer) < self.reduce_every | |
140 | |
141 # Outside of class to fix an issue with exec in Python 2.6. | |
142 # My sorries to the God of pretty code. | |
143 def BasicStatisticsSeries_construct_table_toexec(index_names): | |
144 toexec = get_beginning_description_n_ints(index_names) | |
145 | |
146 bpos = len(index_names) | |
147 toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n" | |
148 toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n" | |
149 toexec += "\tmax = Float32Col(pos=" + str(bpos+2) + ")\n" | |
150 toexec += "\tstd = Float32Col(pos=" + str(bpos+3) + ")\n" | |
151 | |
152 # This creates "LocalDescription", which we may then use | |
153 exec(toexec) | |
154 | |
155 return LocalDescription | |
156 | |
157 class BasicStatisticsSeries(Series): | |
158 """ | |
159 Parameters | |
160 ---------- | |
161 series_name : str | |
162 Not optional here. Will be prepended with "Basic statistics for " | |
163 """ | |
164 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title=None, hdf5_group='/'): | |
165 Series.__init__(self, table_name, hdf5_file, index_names, title) | |
166 | |
167 self.hdf5_group = hdf5_group | |
168 | |
169 self.construct_table() | |
170 | |
171 def construct_table(self): | |
172 table_description = BasicStatisticsSeries_construct_table_toexec(self.index_names) | |
173 | |
174 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description) | |
175 | |
176 def append(self, index, array): | |
177 if len(index) != len(self.index_names): | |
178 raise ValueError("index provided does not have the right length (expected " \ | |
179 + str(len(self.index_names)) + " got " + str(len(index))) | |
180 | |
181 newrow = self._table.row | |
182 | |
183 for col_name, value in zip(self.index_names, index): | |
184 newrow[col_name] = value | |
185 | |
186 newrow["mean"] = numpy.mean(array) | |
187 newrow["min"] = numpy.min(array) | |
188 newrow["max"] = numpy.max(array) | |
189 newrow["std"] = numpy.std(array) | |
190 | |
191 newrow.append() | |
192 | |
193 self.hdf5_file.flush() | |
194 | |
195 class SeriesArrayWrapper(): | |
196 """ | |
197 Simply redistributes any number of elements to sub-series to respective append()s. | |
198 """ | |
199 | |
200 def __init__(self, base_series_list): | |
201 self.base_series_list = base_series_list | |
202 | |
203 def append(self, index, elements): | |
204 if len(elements) != len(self.base_series_list): | |
205 raise ValueError("not enough or too much elements provided (expected " \ | |
206 + str(len(self.base_series_list)) + " got " + str(len(elements))) | |
207 | |
208 for series, el in zip(self.base_series_list, elements): | |
209 series.append(index, el) | |
210 | |
211 class ParamsStatisticsWrapper(SeriesArrayWrapper): | |
212 def __init__(self, arrays_names, new_group_name, hdf5_file, base_group='/', index_names=('epoch',), title=""): | |
213 base_series_list = [] | |
214 | |
215 new_group = hdf5_file.createGroup(base_group, new_group_name, title=title) | |
216 | |
217 for name in arrays_names: | |
218 base_series_list.append( | |
219 BasicStatisticsSeries( | |
220 table_name=name, | |
221 hdf5_file=hdf5_file, | |
222 index_names=('epoch','minibatch'), | |
223 hdf5_group=new_group._v_pathname)) | |
224 | |
225 SeriesArrayWrapper.__init__(self, base_series_list) | |
226 | |
227 |