Mercurial > pylearn
changeset 1157:9686c0d9689d
Quick implementation of the Dataset Api we propose.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Fri, 17 Sep 2010 12:01:12 -0400 |
parents | 0b666177f725 |
children | aea510b71386 |
files | doc/v2_planning/dataset.py |
diffstat | 1 files changed, 77 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/v2_planning/dataset.py Fri Sep 17 12:01:12 2010 -0400 @@ -0,0 +1,77 @@ +class DataColumn(object): + def __init__(self, loader, **metadata): + self.loader = loader + self.batch_size = 0 + self.modulo = None + self.metadata = metadata + + def __getitem__(self, idx): + if isinstance(idx, slice): + return self.loader.getitem(idx.start, idx.stop) + else: + return self.loader.getitem(idx, idx+1) + + def group(size, modulo): + self.batch_size = size + self.module = modulo + + def __iter__(self): + return DsetIter(self, self.size, self.modulo) + + def get_sub_datacolumn(self, i, j): + return DataColumn(DsetLoader(self, i, j), **self.metadata) + + def length(self): + return self.loader.length() + +class DsetIter(object): + def __init__(self, dset, size, modulo): + self.dset = dset + self.size = size + self.modulo = modulo + self.pos = 0 + + def __iter__(self): + return self + + def next(self): + res = self.dset[self.pos*self.size:(self.pos+self.size)*self.size] + if len(res) == 0: + raise StopIteration + if len(res) != self.size: + if modulo == True: + return res + if modulo == False: + raise StopIteration + +class DsetLoader(object): + def __init__(self, dset, start, stop): + self.dset = dset + self.start = start + self.stop = stop + if self.start is None: + self.start = 0 + if self.stop is None: + self.stop = self.dset.length() + + def getitem(self, i, j): + if self.stop is None: + return self.dset[self.start+i:self.start+j] + else: + return self.dset[min(self.start+i, self.stop): + min(self.start+j, self.stop)] + + def length(self): + if self.stop is None: + return None + else: + return self.stop - self.start + +class Dataset(object): + def __init__(self, cols): + self.cols = cols + + def get_sub_dataset(self, start, stop): + return Dataset([c.get_sub_datacolumn(start, stop) for c in self.cols]) + + # we could have the __getitem__ interface and the iterator interface also