Mercurial > ift6266
comparison utils/seriestables/series.py @ 218:4c137f16b013
Modifications pour stocker des timestamps/cpuclock avec chaque rangée créée, selon suggestion de Yoshua ce matin
author | fsavard |
---|---|
date | Wed, 10 Mar 2010 20:13:45 -0500 |
parents | a96fa4de06d2 |
children | e172ef73cdc5 |
comparison
equal
deleted
inserted
replaced
213:a96fa4de06d2 | 218:4c137f16b013 |
---|---|
1 from tables import * | 1 from tables import * |
2 import numpy | 2 import numpy |
3 import time | |
3 | 4 |
4 ''' | 5 ''' |
5 The way these "IsDescription constructor" work is simple: write the | 6 The way these "IsDescription constructor" work is simple: write the |
6 code as if it were in a file, then exec()ute it, leaving us with | 7 code as if it were in a file, then exec()ute it, leaving us with |
7 a local-scoped LocalDescription which may be used to call createTable. | 8 a local-scoped LocalDescription which may be used to call createTable. |
9 It's a small hack, but it's necessary as the names of the columns | 10 It's a small hack, but it's necessary as the names of the columns |
10 are retrieved based on the variable name, which we can't programmatically set | 11 are retrieved based on the variable name, which we can't programmatically set |
11 otherwise. | 12 otherwise. |
12 ''' | 13 ''' |
13 | 14 |
14 def get_beginning_description_n_ints(int_names, int_width=64): | 15 def _get_description_timestamp_cpuclock_columns(store_timestamp, store_cpuclock, pos=0): |
16 toexec = "" | |
17 | |
18 if store_timestamp: | |
19 toexec += "\ttimestamp = Time32Col(pos="+str(pos)+")\n" | |
20 pos += 1 | |
21 | |
22 if store_cpuclock: | |
23 toexec += "\tcpuclock = Float64Col(pos="+str(pos)+")\n" | |
24 pos += 1 | |
25 | |
26 return toexec, pos | |
27 | |
28 def _get_description_n_ints(int_names, int_width=64, pos=0): | |
15 """ | 29 """ |
16 Begins construction of a class inheriting from IsDescription | 30 Begins construction of a class inheriting from IsDescription |
17 to construct an HDF5 table with index columns named with int_names. | 31 to construct an HDF5 table with index columns named with int_names. |
18 | 32 |
19 See Series().__init__ to see how those are used. | 33 See Series().__init__ to see how those are used. |
20 """ | 34 """ |
21 int_constructor = "Int64Col" | 35 int_constructor = "Int64Col" |
22 if int_width == 32: | 36 if int_width == 32: |
23 int_constructor = "Int32Col" | 37 int_constructor = "Int32Col" |
24 | 38 |
25 toexec = "class LocalDescription(IsDescription):\n" | 39 toexec = "" |
26 | |
27 pos = 0 | |
28 | 40 |
29 for n in int_names: | 41 for n in int_names: |
30 toexec += "\t" + n + " = " + int_constructor + "(pos=" + str(pos) + ")\n" | 42 toexec += "\t" + n + " = " + int_constructor + "(pos=" + str(pos) + ")\n" |
31 | 43 pos += 1 |
32 return toexec | 44 |
33 | 45 return toexec, pos |
34 def get_description_with_n_ints_n_floats(int_names, float_names, int_width=64, float_width=32): | 46 |
47 def _get_description_with_n_ints_n_floats(int_names, float_names, | |
48 int_width=64, float_width=32, | |
49 store_timestamp=True, store_cpuclock=True): | |
35 """ | 50 """ |
36 Constructs a class to be used when constructing a table with PyTables. | 51 Constructs a class to be used when constructing a table with PyTables. |
37 | 52 |
38 This is useful to construct a series with an index with multiple levels. | 53 This is useful to construct a series with an index with multiple levels. |
39 E.g. if you want to index your "validation error" with "epoch" first, then | 54 E.g. if you want to index your "validation error" with "epoch" first, then |
47 Names of the float (e.g. error) columns | 62 Names of the float (e.g. error) columns |
48 int_width : {'32', '64'} | 63 int_width : {'32', '64'} |
49 Type of ints. | 64 Type of ints. |
50 float_width : {'32', '64'} | 65 float_width : {'32', '64'} |
51 Type of floats. | 66 Type of floats. |
67 store_timestamp : bool | |
68 See __init__ of Series | |
69 store_cpuclock : bool | |
70 See __init__ of Series | |
52 | 71 |
53 Returns | 72 Returns |
54 ------- | 73 ------- |
55 A class object, to pass to createTable() | 74 A class object, to pass to createTable() |
56 """ | 75 """ |
57 | 76 |
58 toexec = get_beginning_description_n_ints(int_names, int_width=int_width) | 77 toexec = "class LocalDescription(IsDescription):\n" |
78 | |
79 toexec_, pos = _get_description_timestamp_cpuclock_columns(store_timestamp, store_cpuclock) | |
80 toexec += toexec_ | |
81 | |
82 toexec_, pos = _get_description_n_ints(int_names, int_width=int_width, pos=pos) | |
83 toexec += toexec_ | |
59 | 84 |
60 float_constructor = "Float32Col" | 85 float_constructor = "Float32Col" |
61 if float_width == 64: | 86 if float_width == 64: |
62 float_constructor = "Float64Col" | 87 float_constructor = "Float64Col" |
63 | |
64 pos = len(int_names) | |
65 | 88 |
66 for n in float_names: | 89 for n in float_names: |
67 toexec += "\t" + n + " = " + float_constructor + "(pos=" + str(pos) + ")\n" | 90 toexec += "\t" + n + " = " + float_constructor + "(pos=" + str(pos) + ")\n" |
91 pos += 1 | |
68 | 92 |
69 exec(toexec) | 93 exec(toexec) |
70 | 94 |
71 return LocalDescription | 95 return LocalDescription |
72 | 96 |
73 class Series(): | 97 class Series(): |
74 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title="", hdf5_group='/'): | 98 def __init__(self, table_name, hdf5_file, index_names=('epoch',), |
99 title="", hdf5_group='/', | |
100 store_timestamp=True, store_cpuclock=True): | |
75 """Basic arguments each Series must get. | 101 """Basic arguments each Series must get. |
76 | 102 |
77 Parameters | 103 Parameters |
78 ---------- | 104 ---------- |
79 table_name : str | 105 table_name : str |
84 Columns to use as index for elements in the series, other example would be ('epoch', 'minibatch'). This would then allow you to call append(index, element) with index made of two ints, one for epoch index, one for minibatch index in epoch. | 110 Columns to use as index for elements in the series, other example would be ('epoch', 'minibatch'). This would then allow you to call append(index, element) with index made of two ints, one for epoch index, one for minibatch index in epoch. |
85 title : str | 111 title : str |
86 Title to attach to this table as metadata. Can contain spaces and be longer then the table_name. | 112 Title to attach to this table as metadata. Can contain spaces and be longer then the table_name. |
87 hdf5_group : str | 113 hdf5_group : str |
88 Path of the group (kind of a file) in the HDF5 file under which to create the table. | 114 Path of the group (kind of a file) in the HDF5 file under which to create the table. |
115 store_timestamp : bool | |
116 Whether to create a column for timestamps and store them with each record. | |
117 store_cpuclock : bool | |
118 Whether to create a column for cpu clock and store it with each record. | |
89 """ | 119 """ |
90 self.table_name = table_name | 120 self.table_name = table_name |
91 self.hdf5_file = hdf5_file | 121 self.hdf5_file = hdf5_file |
92 self.index_names = index_names | 122 self.index_names = index_names |
93 self.title = title | 123 self.title = title |
94 | 124 |
125 self.store_timestamp = store_timestamp | |
126 self.store_cpuclock = store_cpuclock | |
127 | |
95 def append(self, index, element): | 128 def append(self, index, element): |
96 raise NotImplementedError | 129 raise NotImplementedError |
130 | |
131 def _timestamp_cpuclock(self, newrow): | |
132 newrow["timestamp"] = time.time() | |
133 newrow["cpuclock"] = time.clock() | |
97 | 134 |
98 # To put in a series dictionary instead of a real series, to do nothing | 135 # To put in a series dictionary instead of a real series, to do nothing |
99 # when we don't want a given series to be saved. | 136 # when we don't want a given series to be saved. |
100 class DummySeries(): | 137 class DummySeries(): |
101 def append(self, index, element): | 138 def append(self, index, element): |
102 pass | 139 pass |
103 | 140 |
104 class ErrorSeries(Series): | 141 class ErrorSeries(Series): |
105 def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title="", hdf5_group='/'): | 142 def __init__(self, error_name, table_name, |
106 Series.__init__(self, table_name, hdf5_file, index_names, title) | 143 hdf5_file, index_names=('epoch',), |
144 title="", hdf5_group='/', | |
145 store_timestamp=True, store_cpuclock=True): | |
146 Series.__init__(self, table_name, hdf5_file, index_names, title, store_timestamp, store_cpuclock) | |
107 | 147 |
108 self.error_name = error_name | 148 self.error_name = error_name |
109 | 149 |
110 table_description = self._get_table_description() | 150 table_description = self._get_table_description() |
111 | 151 |
112 self._table = hdf5_file.createTable(hdf5_group, self.table_name, table_description, title=title) | 152 self._table = hdf5_file.createTable(hdf5_group, self.table_name, table_description, title=title) |
113 | 153 |
114 def _get_table_description(self): | 154 def _get_table_description(self): |
115 return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,)) | 155 return _get_description_with_n_ints_n_floats(self.index_names, (self.error_name,)) |
116 | 156 |
117 def append(self, index, error): | 157 def append(self, index, error): |
118 """ | 158 """ |
119 Parameters | 159 Parameters |
120 ---------- | 160 ---------- |
131 | 171 |
132 # Columns for index in table are based on index_names | 172 # Columns for index in table are based on index_names |
133 for col_name, value in zip(self.index_names, index): | 173 for col_name, value in zip(self.index_names, index): |
134 newrow[col_name] = value | 174 newrow[col_name] = value |
135 newrow[self.error_name] = error | 175 newrow[self.error_name] = error |
176 | |
177 self._timestamp_cpuclock(newrow) | |
136 | 178 |
137 newrow.append() | 179 newrow.append() |
138 | 180 |
139 self.hdf5_file.flush() | 181 self.hdf5_file.flush() |
140 | 182 |
185 # were appended, which should be a red flag. | 227 # were appended, which should be a red flag. |
186 assert len(self._buffer) < self.reduce_every | 228 assert len(self._buffer) < self.reduce_every |
187 | 229 |
188 # Outside of class to fix an issue with exec in Python 2.6. | 230 # Outside of class to fix an issue with exec in Python 2.6. |
189 # My sorries to the God of pretty code. | 231 # My sorries to the God of pretty code. |
190 def _BasicStatisticsSeries_construct_table_toexec(index_names): | 232 def _BasicStatisticsSeries_construct_table_toexec(index_names, store_timestamp, store_cpuclock): |
191 toexec = get_beginning_description_n_ints(index_names) | 233 toexec = "class LocalDescription(IsDescription):\n" |
192 | 234 |
193 bpos = len(index_names) | 235 toexec_, pos = _get_description_timestamp_cpuclock_columns(store_timestamp, store_cpuclock) |
194 toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n" | 236 toexec += toexec_ |
195 toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n" | 237 |
196 toexec += "\tmax = Float32Col(pos=" + str(bpos+2) + ")\n" | 238 toexec_, pos = _get_description_n_ints(index_names, pos=pos) |
197 toexec += "\tstd = Float32Col(pos=" + str(bpos+3) + ")\n" | 239 toexec += toexec_ |
198 | 240 |
241 toexec += "\tmean = Float32Col(pos=" + str(pos) + ")\n" | |
242 toexec += "\tmin = Float32Col(pos=" + str(pos+1) + ")\n" | |
243 toexec += "\tmax = Float32Col(pos=" + str(pos+2) + ")\n" | |
244 toexec += "\tstd = Float32Col(pos=" + str(pos+3) + ")\n" | |
245 | |
199 # This creates "LocalDescription", which we may then use | 246 # This creates "LocalDescription", which we may then use |
200 exec(toexec) | 247 exec(toexec) |
201 | 248 |
202 return LocalDescription | 249 return LocalDescription |
203 | 250 |
213 series_name : str | 260 series_name : str |
214 Not optional here. Will be prepended with "Basic statistics for " | 261 Not optional here. Will be prepended with "Basic statistics for " |
215 stats_functions : dict, optional | 262 stats_functions : dict, optional |
216 Dictionary with a function for each key "mean", "min", "max", "std". The function must take whatever is passed to append(...) and return a single number (float). | 263 Dictionary with a function for each key "mean", "min", "max", "std". The function must take whatever is passed to append(...) and return a single number (float). |
217 """ | 264 """ |
218 def __init__(self, table_name, hdf5_file, stats_functions=basic_stats_functions, index_names=('epoch',), title="", hdf5_group='/'): | 265 def __init__(self, table_name, hdf5_file, |
219 Series.__init__(self, table_name, hdf5_file, index_names, title) | 266 stats_functions=basic_stats_functions, |
267 index_names=('epoch',), title="", hdf5_group='/', | |
268 store_timestamp=True, store_cpuclock=True): | |
269 Series.__init__(self, table_name, hdf5_file, index_names, title, store_timestamp, store_cpuclock) | |
220 | 270 |
221 self.hdf5_group = hdf5_group | 271 self.hdf5_group = hdf5_group |
222 | 272 |
223 self.stats_functions = stats_functions | 273 self.stats_functions = stats_functions |
224 | 274 |
225 self._construct_table() | 275 self._construct_table() |
226 | 276 |
227 def _construct_table(self): | 277 def _construct_table(self): |
228 table_description = _BasicStatisticsSeries_construct_table_toexec(self.index_names) | 278 table_description = _BasicStatisticsSeries_construct_table_toexec(self.index_names, self.store_timestamp, self.store_cpuclock) |
229 | 279 |
230 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description) | 280 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description) |
231 | 281 |
232 def append(self, index, array): | 282 def append(self, index, array): |
233 """ | 283 """ |
249 | 299 |
250 newrow["mean"] = self.stats_functions['mean'](array) | 300 newrow["mean"] = self.stats_functions['mean'](array) |
251 newrow["min"] = self.stats_functions['min'](array) | 301 newrow["min"] = self.stats_functions['min'](array) |
252 newrow["max"] = self.stats_functions['max'](array) | 302 newrow["max"] = self.stats_functions['max'](array) |
253 newrow["std"] = self.stats_functions['std'](array) | 303 newrow["std"] = self.stats_functions['std'](array) |
304 | |
305 self._timestamp_cpuclock(newrow) | |
254 | 306 |
255 newrow.append() | 307 newrow.append() |
256 | 308 |
257 self.hdf5_file.flush() | 309 self.hdf5_file.flush() |
258 | 310 |