diff dataset.py @ 78:3499918faa9d

In the middle of designing TLearner
author bengioy@bengiomac.local
date Mon, 05 May 2008 09:35:30 -0400
parents 1e2bb5bad636
children 158653a9bc7c
line wrap: on
line diff
--- a/dataset.py	Sun May 04 15:09:22 2008 -0400
+++ b/dataset.py	Mon May 05 09:35:30 2008 -0400
@@ -88,6 +88,9 @@
      the name <property>. The following properties should be supported:
           - 'description': a textual description or name for the dataset
           - 'fieldtypes': a list of types (one per field)
+    A DataSet may have other attributes that it makes visible to other objects. These are
+    used to store information that is not example-wise but global to the dataset.
+    The list of names of these attributes is given by the attribute_names() method.
 
     Datasets can be concatenated either vertically (increasing the length) or
     horizontally (augmenting the set of fields), if they are compatible, using
@@ -114,7 +117,7 @@
     or other properties of the dataset or associated with the dataset or the result
     of a computation stored in a dataset. These can be accessed through the [key] syntax
     when key is a string (or more specifically, neither an integer, a slice, nor a list).
-    
+
     A DataSet sub-class should always redefine the following methods:
        - __len__ if it is not a stream
        - fieldNames
@@ -125,6 +128,11 @@
        - hasFields
        - __getitem__ may not be feasible with some streams
        - __iter__
+    A sub-class should also append attributes to self._attribute_names
+    (the default value returned by attributeNames()).
+    By convention, attributes not in attributeNames() should have a name
+    starting with an underscore.
+    @todo enforce/test that convention!
     """
 
     numpy_vstack = lambda fieldname,values: return numpy.vstack(values)
@@ -136,6 +144,15 @@
             description = type(self).__name__ + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )"
         self.description=description
         self.fieldtypes=fieldtypes
+        self._attribute_names = ["description"]
+        if fieldtypes:
+            self._attribute_names.append("fieldtypes")
+
+    def attributeNames(self): return self._attribute_names
+
+    def setAttributes(self,attribute_names,attribute_values):
+        for name,value in zip(attribute_names,attribute_values):
+            self.__setattr__(name,value)
     
     class MinibatchToSingleExampleIterator(object):
         """