Mercurial > pylearn
view doc/v2_planning/dataset.py @ 1517:a6e634b83d88
allow to read filetensor compressed with bz2
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Wed, 09 May 2012 11:56:28 -0400 |
parents | 9686c0d9689d |
children |
line wrap: on
line source
class DataColumn(object): def __init__(self, loader, **metadata): self.loader = loader self.batch_size = 0 self.modulo = None self.metadata = metadata def __getitem__(self, idx): if isinstance(idx, slice): return self.loader.getitem(idx.start, idx.stop) else: return self.loader.getitem(idx, idx+1) def group(size, modulo): self.batch_size = size self.module = modulo def __iter__(self): return DsetIter(self, self.size, self.modulo) def get_sub_datacolumn(self, i, j): return DataColumn(DsetLoader(self, i, j), **self.metadata) def length(self): return self.loader.length() class DsetIter(object): def __init__(self, dset, size, modulo): self.dset = dset self.size = size self.modulo = modulo self.pos = 0 def __iter__(self): return self def next(self): res = self.dset[self.pos*self.size:(self.pos+self.size)*self.size] if len(res) == 0: raise StopIteration if len(res) != self.size: if modulo == True: return res if modulo == False: raise StopIteration class DsetLoader(object): def __init__(self, dset, start, stop): self.dset = dset self.start = start self.stop = stop if self.start is None: self.start = 0 if self.stop is None: self.stop = self.dset.length() def getitem(self, i, j): if self.stop is None: return self.dset[self.start+i:self.start+j] else: return self.dset[min(self.start+i, self.stop): min(self.start+j, self.stop)] def length(self): if self.stop is None: return None else: return self.stop - self.start class Dataset(object): def __init__(self, cols): self.cols = cols def get_sub_dataset(self, start, stop): return Dataset([c.get_sub_datacolumn(start, stop) for c in self.cols]) # we could have the __getitem__ interface and the iterator interface also