Mercurial > pylearn
diff dataset.py @ 78:3499918faa9d
In the middle of designing TLearner
author | bengioy@bengiomac.local |
---|---|
date | Mon, 05 May 2008 09:35:30 -0400 |
parents | 1e2bb5bad636 |
children | 158653a9bc7c |
line wrap: on
line diff
--- a/dataset.py Sun May 04 15:09:22 2008 -0400 +++ b/dataset.py Mon May 05 09:35:30 2008 -0400 @@ -88,6 +88,9 @@ the name <property>. The following properties should be supported: - 'description': a textual description or name for the dataset - 'fieldtypes': a list of types (one per field) + A DataSet may have other attributes that it makes visible to other objects. These are + used to store information that is not example-wise but global to the dataset. + The list of names of these attributes is given by the attribute_names() method. Datasets can be concatenated either vertically (increasing the length) or horizontally (augmenting the set of fields), if they are compatible, using @@ -114,7 +117,7 @@ or other properties of the dataset or associated with the dataset or the result of a computation stored in a dataset. These can be accessed through the [key] syntax when key is a string (or more specifically, neither an integer, a slice, nor a list). - + A DataSet sub-class should always redefine the following methods: - __len__ if it is not a stream - fieldNames @@ -125,6 +128,11 @@ - hasFields - __getitem__ may not be feasible with some streams - __iter__ + A sub-class should also append attributes to self._attribute_names + (the default value returned by attributeNames()). + By convention, attributes not in attributeNames() should have a name + starting with an underscore. + @todo enforce/test that convention! """ numpy_vstack = lambda fieldname,values: return numpy.vstack(values) @@ -136,6 +144,15 @@ description = type(self).__name__ + " ( " + join([x.__name__ for x in type(self).__bases__]) + " )" self.description=description self.fieldtypes=fieldtypes + self._attribute_names = ["description"] + if fieldtypes: + self._attribute_names.append("fieldtypes") + + def attributeNames(self): return self._attribute_names + + def setAttributes(self,attribute_names,attribute_values): + for name,value in zip(attribute_names,attribute_values): + self.__setattr__(name,value) class MinibatchToSingleExampleIterator(object): """