pylearn: dataset.py comparison

comparison dataset.py @ 292:174374d59405

merge

author	James Bergstra <bergstrj@iro.umontreal.ca>
date	Fri, 06 Jun 2008 15:56:18 -0400
parents	9b533cc7874a
children	4bfdda107a17

comparison

equal deleted inserted replaced

-:4e6b550fe131
+:174374d59405
 """
 numpy_vstack = lambda fieldname,values: numpy.vstack(values)
 numpy_hstack = lambda fieldnames,values: numpy.hstack(values)
-def __init__(self,description=None,fieldtypes=None):
+def __init__(self, description=None, fieldnames=None, fieldtypes=None):
-if description is None:
+"""
-# by default return "<DataSetType>(<SuperClass1>,<SuperClass2>,...)"
+@type fieldnames: list of strings
-description = type(self).__name__ + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )"
+@type fieldtypes: list of python types, same length as fieldnames
-self.description=description
+@type description: string
-self.fieldtypes=fieldtypes
+@param description: description/name for this dataset
+"""
+def default_desc():
+return type(self).__name__ \
++ " ( " + join([x.__name__ for x in type(self).__bases__]) + " )"
+#self.fieldnames = fieldnames
+self.fieldtypes = fieldtypes if fieldtypes is not None \
+else [None]*1 #len(fieldnames)
+self.description =  default_desc() if description is None \
+else description
 self._attribute_names = ["description"]
-if fieldtypes:
-self._attribute_names.append("fieldtypes")
+attributeNames = property(lambda self: copy.copy(self._attribute_names))
-def attributeNames(self): return self._attribute_names
+def __contains__(self, fieldname):
+return (fieldname in self.fieldNames()) \
+or (fieldname in self.attributeNames())
+def __iter__(self):
+"""Supports the syntax "for i in dataset: ..."
+Using this syntax, "i" will be an Example instance (or equivalent) with
+all the fields of DataSet self.  Every field of "i" will give access to
+a field of a single example.  Fields should be accessible via
+i["fielname"] or i[3] (in the order defined by the elements of the
+Example returned by this iterator), but the derived class is free
+to accept any type of identifier, and add extra functionality to the iterator.
+The default implementation calls the minibatches iterator and extracts the first example of each field.
+"""
+return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))
+def __len__(self):
+"""
+len(dataset) returns the number of examples in the dataset.
+By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint).
+Sub-classes which implement finite-length datasets should redefine this method.
+Some methods only make sense for finite-length datasets.
+"""
+return None
 class MinibatchToSingleExampleIterator(object):
 """
 Converts the result of minibatch iterator with minibatch_size==1 into
 single-example values in the result. Therefore the result of
 self.minibatch._values = [value[0] for value in size1_minibatch.values()]
 return self.minibatch
 def next_index(self):
 return self.minibatch_iterator.next_index()
-def __iter__(self):
-"""Supports the syntax "for i in dataset: ..."
-Using this syntax, "i" will be an Example instance (or equivalent) with
-all the fields of DataSet self.  Every field of "i" will give access to
-a field of a single example.  Fields should be accessible via
-i["fielname"] or i[3] (in the order defined by the elements of the
-Example returned by this iterator), but the derived class is free
-to accept any type of identifier, and add extra functionality to the iterator.
-The default implementation calls the minibatches iterator and extracts the first example of each field.
-"""
-return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))
-def __contains__(self, fieldname):
-return (fieldname in self.fieldNames()) \
-or (fieldname in self.attributeNames())
 class MinibatchWrapAroundIterator(object):
 """
 An iterator for minibatches that handles the case where we need to wrap around the
 dataset because n_batches*minibatch_size > len(dataset). It is constructed from
 The iterator returned by minibatches_nowrap does not need to implement
 a next_index() method either, as this will be provided by MinibatchWrapAroundIterator.
 """
 raise AbstractFunction()
-def __len__(self):
-"""
-len(dataset) returns the number of examples in the dataset.
-By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint).
-Sub-classes which implement finite-length datasets should redefine this method.
-Some methods only make sense for finite-length datasets.
-"""
-return maxint
 def is_unbounded(self):
 """
 Tests whether a dataset is unbounded (e.g. a stream).
 """
 return len(self)==maxint

Mercurial > pylearn

comparison dataset.py @ 292:174374d59405