comparison dataset.py @ 292:174374d59405

merge
author James Bergstra <bergstrj@iro.umontreal.ca>
date Fri, 06 Jun 2008 15:56:18 -0400
parents 9b533cc7874a
children 4bfdda107a17
comparison
equal deleted inserted replaced
291:4e6b550fe131 292:174374d59405
159 """ 159 """
160 160
161 numpy_vstack = lambda fieldname,values: numpy.vstack(values) 161 numpy_vstack = lambda fieldname,values: numpy.vstack(values)
162 numpy_hstack = lambda fieldnames,values: numpy.hstack(values) 162 numpy_hstack = lambda fieldnames,values: numpy.hstack(values)
163 163
164 def __init__(self,description=None,fieldtypes=None): 164 def __init__(self, description=None, fieldnames=None, fieldtypes=None):
165 if description is None: 165 """
166 # by default return "<DataSetType>(<SuperClass1>,<SuperClass2>,...)" 166 @type fieldnames: list of strings
167 description = type(self).__name__ + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )" 167 @type fieldtypes: list of python types, same length as fieldnames
168 self.description=description 168 @type description: string
169 self.fieldtypes=fieldtypes 169 @param description: description/name for this dataset
170 """
171 def default_desc():
172 return type(self).__name__ \
173 + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )"
174
175 #self.fieldnames = fieldnames
176
177 self.fieldtypes = fieldtypes if fieldtypes is not None \
178 else [None]*1 #len(fieldnames)
179
180 self.description = default_desc() if description is None \
181 else description
170 self._attribute_names = ["description"] 182 self._attribute_names = ["description"]
171 if fieldtypes: 183
172 self._attribute_names.append("fieldtypes") 184 attributeNames = property(lambda self: copy.copy(self._attribute_names))
173 185
174 def attributeNames(self): return self._attribute_names 186 def __contains__(self, fieldname):
187 return (fieldname in self.fieldNames()) \
188 or (fieldname in self.attributeNames())
189
190 def __iter__(self):
191 """Supports the syntax "for i in dataset: ..."
192
193 Using this syntax, "i" will be an Example instance (or equivalent) with
194 all the fields of DataSet self. Every field of "i" will give access to
195 a field of a single example. Fields should be accessible via
196 i["fielname"] or i[3] (in the order defined by the elements of the
197 Example returned by this iterator), but the derived class is free
198 to accept any type of identifier, and add extra functionality to the iterator.
199
200 The default implementation calls the minibatches iterator and extracts the first example of each field.
201 """
202 return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))
203
204 def __len__(self):
205 """
206 len(dataset) returns the number of examples in the dataset.
207 By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint).
208 Sub-classes which implement finite-length datasets should redefine this method.
209 Some methods only make sense for finite-length datasets.
210 """
211 return None
212
175 213
176 class MinibatchToSingleExampleIterator(object): 214 class MinibatchToSingleExampleIterator(object):
177 """ 215 """
178 Converts the result of minibatch iterator with minibatch_size==1 into 216 Converts the result of minibatch iterator with minibatch_size==1 into
179 single-example values in the result. Therefore the result of 217 single-example values in the result. Therefore the result of
195 self.minibatch._values = [value[0] for value in size1_minibatch.values()] 233 self.minibatch._values = [value[0] for value in size1_minibatch.values()]
196 return self.minibatch 234 return self.minibatch
197 235
198 def next_index(self): 236 def next_index(self):
199 return self.minibatch_iterator.next_index() 237 return self.minibatch_iterator.next_index()
200
201 def __iter__(self):
202 """Supports the syntax "for i in dataset: ..."
203
204 Using this syntax, "i" will be an Example instance (or equivalent) with
205 all the fields of DataSet self. Every field of "i" will give access to
206 a field of a single example. Fields should be accessible via
207 i["fielname"] or i[3] (in the order defined by the elements of the
208 Example returned by this iterator), but the derived class is free
209 to accept any type of identifier, and add extra functionality to the iterator.
210
211 The default implementation calls the minibatches iterator and extracts the first example of each field.
212 """
213 return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))
214
215 def __contains__(self, fieldname):
216 return (fieldname in self.fieldNames()) \
217 or (fieldname in self.attributeNames())
218 238
219 class MinibatchWrapAroundIterator(object): 239 class MinibatchWrapAroundIterator(object):
220 """ 240 """
221 An iterator for minibatches that handles the case where we need to wrap around the 241 An iterator for minibatches that handles the case where we need to wrap around the
222 dataset because n_batches*minibatch_size > len(dataset). It is constructed from 242 dataset because n_batches*minibatch_size > len(dataset). It is constructed from
356 The iterator returned by minibatches_nowrap does not need to implement 376 The iterator returned by minibatches_nowrap does not need to implement
357 a next_index() method either, as this will be provided by MinibatchWrapAroundIterator. 377 a next_index() method either, as this will be provided by MinibatchWrapAroundIterator.
358 """ 378 """
359 raise AbstractFunction() 379 raise AbstractFunction()
360 380
361 def __len__(self):
362 """
363 len(dataset) returns the number of examples in the dataset.
364 By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint).
365 Sub-classes which implement finite-length datasets should redefine this method.
366 Some methods only make sense for finite-length datasets.
367 """
368 return maxint
369
370 def is_unbounded(self): 381 def is_unbounded(self):
371 """ 382 """
372 Tests whether a dataset is unbounded (e.g. a stream). 383 Tests whether a dataset is unbounded (e.g. a stream).
373 """ 384 """
374 return len(self)==maxint 385 return len(self)==maxint