Mercurial > pylearn
view dataset.py @ 2:3fddb1c8f955
Rewrote DataSet interface and created FiniteDataSet interface.
author | bengioy@bengiomac.local |
---|---|
date | Sat, 22 Mar 2008 22:21:59 -0400 |
parents | 2cd82666b9a7 |
children | 378b68d5c4ad |
line wrap: on
line source
class DataSet(object): """ This is a virtual base class or interface for datasets. A dataset is basically an iterator over examples. It does not necessarily have a fixed length (this is useful for 'streams' which feed on-line learning). Datasets with fixed and known length are FiniteDataSet, a subclass of DataSet. Examples and datasets have named fields. One can obtain a sub-dataset by taking dataset.field or dataset(field1,field2,field3,...). Fields are not mutually exclusive, i.e. two fields can overlap in their actual content. The content of a field can be of any type, but often will be a numpy tensor. """ def __init__(self): pass def __iter__(): return self def next(): """Return the next example in the dataset.""" raise NotImplementedError def __getattr__(fieldname): """Return a sub-dataset containing only the given fieldname as field.""" return self(fieldname) def __call__(*fieldnames): """Return a sub-dataset containing only the given fieldnames as fields.""" raise NotImplementedError fieldNames(self): """Return the list of field names that are supported by getattr and getFields.""" raise NotImplementedError class FiniteDataSet(DataSet): """ Virtual interface, a subclass of DataSet for datasets which have a finite, known length. Examples are indexed by an integer between 0 and self.length()-1, and a subdataset can be obtained by slicing. """ def __init__(self): pass def __len__(self): """len(dataset) returns the number of examples in the dataset.""" raise NotImplementedError def __getitem__(self,i): """dataset[i] returns the (i+1)-th example of the dataset.""" raise NotImplementedError def __getslice__(self,*slice_args): """dataset[i:j] returns the subdataset with examples i,i+1,...,j-1.""" raise NotImplementedError