diff dataset.py @ 292:174374d59405

merge
author James Bergstra <bergstrj@iro.umontreal.ca>
date Fri, 06 Jun 2008 15:56:18 -0400
parents 9b533cc7874a
children 4bfdda107a17
line wrap: on
line diff
--- a/dataset.py	Thu Jun 05 18:43:16 2008 -0400
+++ b/dataset.py	Fri Jun 06 15:56:18 2008 -0400
@@ -161,17 +161,55 @@
     numpy_vstack = lambda fieldname,values: numpy.vstack(values)
     numpy_hstack = lambda fieldnames,values: numpy.hstack(values)
         
-    def __init__(self,description=None,fieldtypes=None):
-        if description is None:
-            # by default return "<DataSetType>(<SuperClass1>,<SuperClass2>,...)"
-            description = type(self).__name__ + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )"
-        self.description=description
-        self.fieldtypes=fieldtypes
+    def __init__(self, description=None, fieldnames=None, fieldtypes=None):
+        """
+        @type fieldnames: list of strings
+        @type fieldtypes: list of python types, same length as fieldnames
+        @type description: string 
+        @param description: description/name for this dataset
+        """
+        def default_desc():
+            return type(self).__name__ \
+                    + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )"
+
+        #self.fieldnames = fieldnames
+
+        self.fieldtypes = fieldtypes if fieldtypes is not None \
+                else [None]*1 #len(fieldnames)
+
+        self.description =  default_desc() if description is None \
+                else description
         self._attribute_names = ["description"]
-        if fieldtypes:
-            self._attribute_names.append("fieldtypes")
+
+    attributeNames = property(lambda self: copy.copy(self._attribute_names))
+
+    def __contains__(self, fieldname):
+        return (fieldname in self.fieldNames()) \
+                or (fieldname in self.attributeNames())
+
+    def __iter__(self):
+        """Supports the syntax "for i in dataset: ..."
 
-    def attributeNames(self): return self._attribute_names
+        Using this syntax, "i" will be an Example instance (or equivalent) with
+        all the fields of DataSet self.  Every field of "i" will give access to
+        a field of a single example.  Fields should be accessible via
+        i["fielname"] or i[3] (in the order defined by the elements of the
+        Example returned by this iterator), but the derived class is free
+        to accept any type of identifier, and add extra functionality to the iterator.
+
+        The default implementation calls the minibatches iterator and extracts the first example of each field.
+        """
+        return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))
+
+    def __len__(self):
+        """
+        len(dataset) returns the number of examples in the dataset.
+        By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint).
+        Sub-classes which implement finite-length datasets should redefine this method.
+        Some methods only make sense for finite-length datasets.
+        """
+        return None
+
 
     class MinibatchToSingleExampleIterator(object):
         """
@@ -198,24 +236,6 @@
         def next_index(self):
             return self.minibatch_iterator.next_index()
 
-    def __iter__(self):
-        """Supports the syntax "for i in dataset: ..."
-
-        Using this syntax, "i" will be an Example instance (or equivalent) with
-        all the fields of DataSet self.  Every field of "i" will give access to
-        a field of a single example.  Fields should be accessible via
-        i["fielname"] or i[3] (in the order defined by the elements of the
-        Example returned by this iterator), but the derived class is free
-        to accept any type of identifier, and add extra functionality to the iterator.
-
-        The default implementation calls the minibatches iterator and extracts the first example of each field.
-        """
-        return DataSet.MinibatchToSingleExampleIterator(self.minibatches(None, minibatch_size = 1))
-
-    def __contains__(self, fieldname):
-        return (fieldname in self.fieldNames()) \
-                or (fieldname in self.attributeNames())
-
     class MinibatchWrapAroundIterator(object):
         """
         An iterator for minibatches that handles the case where we need to wrap around the
@@ -358,15 +378,6 @@
         """
         raise AbstractFunction()
 
-    def __len__(self):
-        """
-        len(dataset) returns the number of examples in the dataset.
-        By default, a DataSet is a 'stream', i.e. it has an unbounded length (sys.maxint).
-        Sub-classes which implement finite-length datasets should redefine this method.
-        Some methods only make sense for finite-length datasets.
-        """
-        return maxint
-
     def is_unbounded(self):
         """
         Tests whether a dataset is unbounded (e.g. a stream).