comparison utils/seriestables/series.py @ 218:4c137f16b013

Modifications pour stocker des timestamps/cpuclock avec chaque rangée créée, selon suggestion de Yoshua ce matin
author fsavard
date Wed, 10 Mar 2010 20:13:45 -0500
parents a96fa4de06d2
children e172ef73cdc5
comparison
equal deleted inserted replaced
213:a96fa4de06d2 218:4c137f16b013
1 from tables import * 1 from tables import *
2 import numpy 2 import numpy
3 import time
3 4
4 ''' 5 '''
5 The way these "IsDescription constructor" work is simple: write the 6 The way these "IsDescription constructor" work is simple: write the
6 code as if it were in a file, then exec()ute it, leaving us with 7 code as if it were in a file, then exec()ute it, leaving us with
7 a local-scoped LocalDescription which may be used to call createTable. 8 a local-scoped LocalDescription which may be used to call createTable.
9 It's a small hack, but it's necessary as the names of the columns 10 It's a small hack, but it's necessary as the names of the columns
10 are retrieved based on the variable name, which we can't programmatically set 11 are retrieved based on the variable name, which we can't programmatically set
11 otherwise. 12 otherwise.
12 ''' 13 '''
13 14
14 def get_beginning_description_n_ints(int_names, int_width=64): 15 def _get_description_timestamp_cpuclock_columns(store_timestamp, store_cpuclock, pos=0):
16 toexec = ""
17
18 if store_timestamp:
19 toexec += "\ttimestamp = Time32Col(pos="+str(pos)+")\n"
20 pos += 1
21
22 if store_cpuclock:
23 toexec += "\tcpuclock = Float64Col(pos="+str(pos)+")\n"
24 pos += 1
25
26 return toexec, pos
27
28 def _get_description_n_ints(int_names, int_width=64, pos=0):
15 """ 29 """
16 Begins construction of a class inheriting from IsDescription 30 Begins construction of a class inheriting from IsDescription
17 to construct an HDF5 table with index columns named with int_names. 31 to construct an HDF5 table with index columns named with int_names.
18 32
19 See Series().__init__ to see how those are used. 33 See Series().__init__ to see how those are used.
20 """ 34 """
21 int_constructor = "Int64Col" 35 int_constructor = "Int64Col"
22 if int_width == 32: 36 if int_width == 32:
23 int_constructor = "Int32Col" 37 int_constructor = "Int32Col"
24 38
25 toexec = "class LocalDescription(IsDescription):\n" 39 toexec = ""
26
27 pos = 0
28 40
29 for n in int_names: 41 for n in int_names:
30 toexec += "\t" + n + " = " + int_constructor + "(pos=" + str(pos) + ")\n" 42 toexec += "\t" + n + " = " + int_constructor + "(pos=" + str(pos) + ")\n"
31 43 pos += 1
32 return toexec 44
33 45 return toexec, pos
34 def get_description_with_n_ints_n_floats(int_names, float_names, int_width=64, float_width=32): 46
47 def _get_description_with_n_ints_n_floats(int_names, float_names,
48 int_width=64, float_width=32,
49 store_timestamp=True, store_cpuclock=True):
35 """ 50 """
36 Constructs a class to be used when constructing a table with PyTables. 51 Constructs a class to be used when constructing a table with PyTables.
37 52
38 This is useful to construct a series with an index with multiple levels. 53 This is useful to construct a series with an index with multiple levels.
39 E.g. if you want to index your "validation error" with "epoch" first, then 54 E.g. if you want to index your "validation error" with "epoch" first, then
47 Names of the float (e.g. error) columns 62 Names of the float (e.g. error) columns
48 int_width : {'32', '64'} 63 int_width : {'32', '64'}
49 Type of ints. 64 Type of ints.
50 float_width : {'32', '64'} 65 float_width : {'32', '64'}
51 Type of floats. 66 Type of floats.
67 store_timestamp : bool
68 See __init__ of Series
69 store_cpuclock : bool
70 See __init__ of Series
52 71
53 Returns 72 Returns
54 ------- 73 -------
55 A class object, to pass to createTable() 74 A class object, to pass to createTable()
56 """ 75 """
57 76
58 toexec = get_beginning_description_n_ints(int_names, int_width=int_width) 77 toexec = "class LocalDescription(IsDescription):\n"
78
79 toexec_, pos = _get_description_timestamp_cpuclock_columns(store_timestamp, store_cpuclock)
80 toexec += toexec_
81
82 toexec_, pos = _get_description_n_ints(int_names, int_width=int_width, pos=pos)
83 toexec += toexec_
59 84
60 float_constructor = "Float32Col" 85 float_constructor = "Float32Col"
61 if float_width == 64: 86 if float_width == 64:
62 float_constructor = "Float64Col" 87 float_constructor = "Float64Col"
63
64 pos = len(int_names)
65 88
66 for n in float_names: 89 for n in float_names:
67 toexec += "\t" + n + " = " + float_constructor + "(pos=" + str(pos) + ")\n" 90 toexec += "\t" + n + " = " + float_constructor + "(pos=" + str(pos) + ")\n"
91 pos += 1
68 92
69 exec(toexec) 93 exec(toexec)
70 94
71 return LocalDescription 95 return LocalDescription
72 96
73 class Series(): 97 class Series():
74 def __init__(self, table_name, hdf5_file, index_names=('epoch',), title="", hdf5_group='/'): 98 def __init__(self, table_name, hdf5_file, index_names=('epoch',),
99 title="", hdf5_group='/',
100 store_timestamp=True, store_cpuclock=True):
75 """Basic arguments each Series must get. 101 """Basic arguments each Series must get.
76 102
77 Parameters 103 Parameters
78 ---------- 104 ----------
79 table_name : str 105 table_name : str
84 Columns to use as index for elements in the series, other example would be ('epoch', 'minibatch'). This would then allow you to call append(index, element) with index made of two ints, one for epoch index, one for minibatch index in epoch. 110 Columns to use as index for elements in the series, other example would be ('epoch', 'minibatch'). This would then allow you to call append(index, element) with index made of two ints, one for epoch index, one for minibatch index in epoch.
85 title : str 111 title : str
86 Title to attach to this table as metadata. Can contain spaces and be longer then the table_name. 112 Title to attach to this table as metadata. Can contain spaces and be longer then the table_name.
87 hdf5_group : str 113 hdf5_group : str
88 Path of the group (kind of a file) in the HDF5 file under which to create the table. 114 Path of the group (kind of a file) in the HDF5 file under which to create the table.
115 store_timestamp : bool
116 Whether to create a column for timestamps and store them with each record.
117 store_cpuclock : bool
118 Whether to create a column for cpu clock and store it with each record.
89 """ 119 """
90 self.table_name = table_name 120 self.table_name = table_name
91 self.hdf5_file = hdf5_file 121 self.hdf5_file = hdf5_file
92 self.index_names = index_names 122 self.index_names = index_names
93 self.title = title 123 self.title = title
94 124
125 self.store_timestamp = store_timestamp
126 self.store_cpuclock = store_cpuclock
127
95 def append(self, index, element): 128 def append(self, index, element):
96 raise NotImplementedError 129 raise NotImplementedError
130
131 def _timestamp_cpuclock(self, newrow):
132 newrow["timestamp"] = time.time()
133 newrow["cpuclock"] = time.clock()
97 134
98 # To put in a series dictionary instead of a real series, to do nothing 135 # To put in a series dictionary instead of a real series, to do nothing
99 # when we don't want a given series to be saved. 136 # when we don't want a given series to be saved.
100 class DummySeries(): 137 class DummySeries():
101 def append(self, index, element): 138 def append(self, index, element):
102 pass 139 pass
103 140
104 class ErrorSeries(Series): 141 class ErrorSeries(Series):
105 def __init__(self, error_name, table_name, hdf5_file, index_names=('epoch',), title="", hdf5_group='/'): 142 def __init__(self, error_name, table_name,
106 Series.__init__(self, table_name, hdf5_file, index_names, title) 143 hdf5_file, index_names=('epoch',),
144 title="", hdf5_group='/',
145 store_timestamp=True, store_cpuclock=True):
146 Series.__init__(self, table_name, hdf5_file, index_names, title, store_timestamp, store_cpuclock)
107 147
108 self.error_name = error_name 148 self.error_name = error_name
109 149
110 table_description = self._get_table_description() 150 table_description = self._get_table_description()
111 151
112 self._table = hdf5_file.createTable(hdf5_group, self.table_name, table_description, title=title) 152 self._table = hdf5_file.createTable(hdf5_group, self.table_name, table_description, title=title)
113 153
114 def _get_table_description(self): 154 def _get_table_description(self):
115 return get_description_with_n_ints_n_floats(self.index_names, (self.error_name,)) 155 return _get_description_with_n_ints_n_floats(self.index_names, (self.error_name,))
116 156
117 def append(self, index, error): 157 def append(self, index, error):
118 """ 158 """
119 Parameters 159 Parameters
120 ---------- 160 ----------
131 171
132 # Columns for index in table are based on index_names 172 # Columns for index in table are based on index_names
133 for col_name, value in zip(self.index_names, index): 173 for col_name, value in zip(self.index_names, index):
134 newrow[col_name] = value 174 newrow[col_name] = value
135 newrow[self.error_name] = error 175 newrow[self.error_name] = error
176
177 self._timestamp_cpuclock(newrow)
136 178
137 newrow.append() 179 newrow.append()
138 180
139 self.hdf5_file.flush() 181 self.hdf5_file.flush()
140 182
185 # were appended, which should be a red flag. 227 # were appended, which should be a red flag.
186 assert len(self._buffer) < self.reduce_every 228 assert len(self._buffer) < self.reduce_every
187 229
188 # Outside of class to fix an issue with exec in Python 2.6. 230 # Outside of class to fix an issue with exec in Python 2.6.
189 # My sorries to the God of pretty code. 231 # My sorries to the God of pretty code.
190 def _BasicStatisticsSeries_construct_table_toexec(index_names): 232 def _BasicStatisticsSeries_construct_table_toexec(index_names, store_timestamp, store_cpuclock):
191 toexec = get_beginning_description_n_ints(index_names) 233 toexec = "class LocalDescription(IsDescription):\n"
192 234
193 bpos = len(index_names) 235 toexec_, pos = _get_description_timestamp_cpuclock_columns(store_timestamp, store_cpuclock)
194 toexec += "\tmean = Float32Col(pos=" + str(bpos) + ")\n" 236 toexec += toexec_
195 toexec += "\tmin = Float32Col(pos=" + str(bpos+1) + ")\n" 237
196 toexec += "\tmax = Float32Col(pos=" + str(bpos+2) + ")\n" 238 toexec_, pos = _get_description_n_ints(index_names, pos=pos)
197 toexec += "\tstd = Float32Col(pos=" + str(bpos+3) + ")\n" 239 toexec += toexec_
198 240
241 toexec += "\tmean = Float32Col(pos=" + str(pos) + ")\n"
242 toexec += "\tmin = Float32Col(pos=" + str(pos+1) + ")\n"
243 toexec += "\tmax = Float32Col(pos=" + str(pos+2) + ")\n"
244 toexec += "\tstd = Float32Col(pos=" + str(pos+3) + ")\n"
245
199 # This creates "LocalDescription", which we may then use 246 # This creates "LocalDescription", which we may then use
200 exec(toexec) 247 exec(toexec)
201 248
202 return LocalDescription 249 return LocalDescription
203 250
213 series_name : str 260 series_name : str
214 Not optional here. Will be prepended with "Basic statistics for " 261 Not optional here. Will be prepended with "Basic statistics for "
215 stats_functions : dict, optional 262 stats_functions : dict, optional
216 Dictionary with a function for each key "mean", "min", "max", "std". The function must take whatever is passed to append(...) and return a single number (float). 263 Dictionary with a function for each key "mean", "min", "max", "std". The function must take whatever is passed to append(...) and return a single number (float).
217 """ 264 """
218 def __init__(self, table_name, hdf5_file, stats_functions=basic_stats_functions, index_names=('epoch',), title="", hdf5_group='/'): 265 def __init__(self, table_name, hdf5_file,
219 Series.__init__(self, table_name, hdf5_file, index_names, title) 266 stats_functions=basic_stats_functions,
267 index_names=('epoch',), title="", hdf5_group='/',
268 store_timestamp=True, store_cpuclock=True):
269 Series.__init__(self, table_name, hdf5_file, index_names, title, store_timestamp, store_cpuclock)
220 270
221 self.hdf5_group = hdf5_group 271 self.hdf5_group = hdf5_group
222 272
223 self.stats_functions = stats_functions 273 self.stats_functions = stats_functions
224 274
225 self._construct_table() 275 self._construct_table()
226 276
227 def _construct_table(self): 277 def _construct_table(self):
228 table_description = _BasicStatisticsSeries_construct_table_toexec(self.index_names) 278 table_description = _BasicStatisticsSeries_construct_table_toexec(self.index_names, self.store_timestamp, self.store_cpuclock)
229 279
230 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description) 280 self._table = self.hdf5_file.createTable(self.hdf5_group, self.table_name, table_description)
231 281
232 def append(self, index, array): 282 def append(self, index, array):
233 """ 283 """
249 299
250 newrow["mean"] = self.stats_functions['mean'](array) 300 newrow["mean"] = self.stats_functions['mean'](array)
251 newrow["min"] = self.stats_functions['min'](array) 301 newrow["min"] = self.stats_functions['min'](array)
252 newrow["max"] = self.stats_functions['max'](array) 302 newrow["max"] = self.stats_functions['max'](array)
253 newrow["std"] = self.stats_functions['std'](array) 303 newrow["std"] = self.stats_functions['std'](array)
304
305 self._timestamp_cpuclock(newrow)
254 306
255 newrow.append() 307 newrow.append()
256 308
257 self.hdf5_file.flush() 309 self.hdf5_file.flush()
258 310