Mercurial > pylearn
comparison dataset.py @ 292:174374d59405
merge
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 06 Jun 2008 15:56:18 -0400 |
parents | 9b533cc7874a |
children | 4bfdda107a17 |
comparison
equal
deleted
inserted
replaced
291:4e6b550fe131 | 292:174374d59405 |
---|---|
159 """ | 159 """ |
160 | 160 |
161 numpy_vstack = lambda fieldname,values: numpy.vstack(values) | 161 numpy_vstack = lambda fieldname,values: numpy.vstack(values) |
162 numpy_hstack = lambda fieldnames,values: numpy.hstack(values) | 162 numpy_hstack = lambda fieldnames,values: numpy.hstack(values) |
163 | 163 |
164 def __init__(self,description=None,fieldtypes=None): | 164 def __init__(self, description=None, fieldnames=None, fieldtypes=None): |
165 if description is None: | 165 """ |
166 # by default return "<DataSetType>(<SuperClass1>,<SuperClass2>,...)" | 166 @type fieldnames: list of strings |
167 description = type(self).__name__ + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )" | 167 @type fieldtypes: list of python types, same length as fieldnames |
168 self.description=description | 168 @type description: string |
169 self.fieldtypes=fieldtypes | 169 @param description: description/name for this dataset |
170 """ | |
171 def default_desc(): | |
172 return type(self).__name__ \ | |
173 + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )" | |
174 | |
175 #self.fieldnames = fieldnames | |
176 | |
177 self.fieldtypes = fieldtypes if fieldtypes is not None \ | |
178 else [None]*1 #len(fieldnames) | |
179 | |
180 self.description = default_desc() if description is None \ | |
181 else description | |
170 self._attribute_names = ["description"] | 182 self._attribute_names = ["description"] |
171 if fieldtypes: | 183 |
172 self._attribute_names.append("fieldtypes") | 184 attributeNames = property(lambda self: copy.copy(self._attribute_names)) |
173 | 185 |
174 def attributeNames(self): return self._attribute_names | 186 def __contains__(self, fieldname): |
187 return (fieldname in self.fieldNames()) \ | |
188 or (fieldname in self.attributeNames()) | |
189 | |
190 def __iter__(self): | |
191 """Supports the syntax "for i in dataset: ..." | |
192 | |
193 Using this syntax, "i" will be an Example instance (or equivalent) with | |
194 all the fields of DataSet self. Every field of "i" will give access to | |
195 a field of a single example. Fields should be accessible via | |
196 i["fielname"] or i[3] (in the order defined by the elements of the | |
197 Example returned by this iterator), but the derived class is free | |
198 to accept any type of identifier, and add extra functionality to the iterator. | |
199 | |
200 The default implementation calls the minibatches iterator and extracts the first example of each field. | |
201 """ | |
202 return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1)) | |
203 | |
204 def __len__(self): | |
205 """ | |
206 len(dataset) returns the number of examples in the dataset. | |
207 By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint). | |
208 Sub-classes which implement finite-length datasets should redefine this method. | |
209 Some methods only make sense for finite-length datasets. | |
210 """ | |
211 return None | |
212 | |
175 | 213 |
176 class MinibatchToSingleExampleIterator(object): | 214 class MinibatchToSingleExampleIterator(object): |
177 """ | 215 """ |
178 Converts the result of minibatch iterator with minibatch_size==1 into | 216 Converts the result of minibatch iterator with minibatch_size==1 into |
179 single-example values in the result. Therefore the result of | 217 single-example values in the result. Therefore the result of |
195 self.minibatch._values = [value[0] for value in size1_minibatch.values()] | 233 self.minibatch._values = [value[0] for value in size1_minibatch.values()] |
196 return self.minibatch | 234 return self.minibatch |
197 | 235 |
198 def next_index(self): | 236 def next_index(self): |
199 return self.minibatch_iterator.next_index() | 237 return self.minibatch_iterator.next_index() |
200 | |
201 def __iter__(self): | |
202 """Supports the syntax "for i in dataset: ..." | |
203 | |
204 Using this syntax, "i" will be an Example instance (or equivalent) with | |
205 all the fields of DataSet self. Every field of "i" will give access to | |
206 a field of a single example. Fields should be accessible via | |
207 i["fielname"] or i[3] (in the order defined by the elements of the | |
208 Example returned by this iterator), but the derived class is free | |
209 to accept any type of identifier, and add extra functionality to the iterator. | |
210 | |
211 The default implementation calls the minibatches iterator and extracts the first example of each field. | |
212 """ | |
213 return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1)) | |
214 | |
215 def __contains__(self, fieldname): | |
216 return (fieldname in self.fieldNames()) \ | |
217 or (fieldname in self.attributeNames()) | |
218 | 238 |
219 class MinibatchWrapAroundIterator(object): | 239 class MinibatchWrapAroundIterator(object): |
220 """ | 240 """ |
221 An iterator for minibatches that handles the case where we need to wrap around the | 241 An iterator for minibatches that handles the case where we need to wrap around the |
222 dataset because n_batches*minibatch_size > len(dataset). It is constructed from | 242 dataset because n_batches*minibatch_size > len(dataset). It is constructed from |
356 The iterator returned by minibatches_nowrap does not need to implement | 376 The iterator returned by minibatches_nowrap does not need to implement |
357 a next_index() method either, as this will be provided by MinibatchWrapAroundIterator. | 377 a next_index() method either, as this will be provided by MinibatchWrapAroundIterator. |
358 """ | 378 """ |
359 raise AbstractFunction() | 379 raise AbstractFunction() |
360 | 380 |
361 def __len__(self): | |
362 """ | |
363 len(dataset) returns the number of examples in the dataset. | |
364 By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint). | |
365 Sub-classes which implement finite-length datasets should redefine this method. | |
366 Some methods only make sense for finite-length datasets. | |
367 """ | |
368 return maxint | |
369 | |
370 def is_unbounded(self): | 381 def is_unbounded(self): |
371 """ | 382 """ |
372 Tests whether a dataset is unbounded (e.g. a stream). | 383 Tests whether a dataset is unbounded (e.g. a stream). |
373 """ | 384 """ |
374 return len(self)==maxint | 385 return len(self)==maxint |