# HG changeset patch # User Arnaud Bergeron # Date 1284739272 14400 # Node ID 9686c0d9689de73538d69c29d6092b1ce01c6fa2 # Parent 0b666177f7253ade53f52c42d0307b1344149976 Quick implementation of the Dataset Api we propose. diff -r 0b666177f725 -r 9686c0d9689d doc/v2_planning/dataset.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/v2_planning/dataset.py Fri Sep 17 12:01:12 2010 -0400 @@ -0,0 +1,77 @@ +class DataColumn(object): + def __init__(self, loader, **metadata): + self.loader = loader + self.batch_size = 0 + self.modulo = None + self.metadata = metadata + + def __getitem__(self, idx): + if isinstance(idx, slice): + return self.loader.getitem(idx.start, idx.stop) + else: + return self.loader.getitem(idx, idx+1) + + def group(size, modulo): + self.batch_size = size + self.module = modulo + + def __iter__(self): + return DsetIter(self, self.size, self.modulo) + + def get_sub_datacolumn(self, i, j): + return DataColumn(DsetLoader(self, i, j), **self.metadata) + + def length(self): + return self.loader.length() + +class DsetIter(object): + def __init__(self, dset, size, modulo): + self.dset = dset + self.size = size + self.modulo = modulo + self.pos = 0 + + def __iter__(self): + return self + + def next(self): + res = self.dset[self.pos*self.size:(self.pos+self.size)*self.size] + if len(res) == 0: + raise StopIteration + if len(res) != self.size: + if modulo == True: + return res + if modulo == False: + raise StopIteration + +class DsetLoader(object): + def __init__(self, dset, start, stop): + self.dset = dset + self.start = start + self.stop = stop + if self.start is None: + self.start = 0 + if self.stop is None: + self.stop = self.dset.length() + + def getitem(self, i, j): + if self.stop is None: + return self.dset[self.start+i:self.start+j] + else: + return self.dset[min(self.start+i, self.stop): + min(self.start+j, self.stop)] + + def length(self): + if self.stop is None: + return None + else: + return self.stop - self.start + +class Dataset(object): + def __init__(self, cols): + self.cols = cols + + def get_sub_dataset(self, start, stop): + return Dataset([c.get_sub_datacolumn(start, stop) for c in self.cols]) + + # we could have the __getitem__ interface and the iterator interface also