Mercurial > pylearn
diff dataset.py @ 292:174374d59405
merge
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 06 Jun 2008 15:56:18 -0400 |
parents | 9b533cc7874a |
children | 4bfdda107a17 |
line wrap: on
line diff
--- a/dataset.py Thu Jun 05 18:43:16 2008 -0400 +++ b/dataset.py Fri Jun 06 15:56:18 2008 -0400 @@ -161,17 +161,55 @@ numpy_vstack = lambda fieldname,values: numpy.vstack(values) numpy_hstack = lambda fieldnames,values: numpy.hstack(values) - def __init__(self,description=None,fieldtypes=None): - if description is None: - # by default return "<DataSetType>(<SuperClass1>,<SuperClass2>,...)" - description = type(self).__name__ + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )" - self.description=description - self.fieldtypes=fieldtypes + def __init__(self, description=None, fieldnames=None, fieldtypes=None): + """ + @type fieldnames: list of strings + @type fieldtypes: list of python types, same length as fieldnames + @type description: string + @param description: description/name for this dataset + """ + def default_desc(): + return type(self).__name__ \ + + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )" + + #self.fieldnames = fieldnames + + self.fieldtypes = fieldtypes if fieldtypes is not None \ + else [None]*1 #len(fieldnames) + + self.description = default_desc() if description is None \ + else description self._attribute_names = ["description"] - if fieldtypes: - self._attribute_names.append("fieldtypes") + + attributeNames = property(lambda self: copy.copy(self._attribute_names)) + + def __contains__(self, fieldname): + return (fieldname in self.fieldNames()) \ + or (fieldname in self.attributeNames()) + + def __iter__(self): + """Supports the syntax "for i in dataset: ..." - def attributeNames(self): return self._attribute_names + Using this syntax, "i" will be an Example instance (or equivalent) with + all the fields of DataSet self. Every field of "i" will give access to + a field of a single example. Fields should be accessible via + i["fielname"] or i[3] (in the order defined by the elements of the + Example returned by this iterator), but the derived class is free + to accept any type of identifier, and add extra functionality to the iterator. + + The default implementation calls the minibatches iterator and extracts the first example of each field. + """ + return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1)) + + def __len__(self): + """ + len(dataset) returns the number of examples in the dataset. + By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint). + Sub-classes which implement finite-length datasets should redefine this method. + Some methods only make sense for finite-length datasets. + """ + return None + class MinibatchToSingleExampleIterator(object): """ @@ -198,24 +236,6 @@ def next_index(self): return self.minibatch_iterator.next_index() - def __iter__(self): - """Supports the syntax "for i in dataset: ..." - - Using this syntax, "i" will be an Example instance (or equivalent) with - all the fields of DataSet self. Every field of "i" will give access to - a field of a single example. Fields should be accessible via - i["fielname"] or i[3] (in the order defined by the elements of the - Example returned by this iterator), but the derived class is free - to accept any type of identifier, and add extra functionality to the iterator. - - The default implementation calls the minibatches iterator and extracts the first example of each field. - """ - return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1)) - - def __contains__(self, fieldname): - return (fieldname in self.fieldNames()) \ - or (fieldname in self.attributeNames()) - class MinibatchWrapAroundIterator(object): """ An iterator for minibatches that handles the case where we need to wrap around the @@ -358,15 +378,6 @@ """ raise AbstractFunction() - def __len__(self): - """ - len(dataset) returns the number of examples in the dataset. - By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint). - Sub-classes which implement finite-length datasets should redefine this method. - Some methods only make sense for finite-length datasets. - """ - return maxint - def is_unbounded(self): """ Tests whether a dataset is unbounded (e.g. a stream).