changeset 1157:9686c0d9689d

Quick implementation of the Dataset Api we propose.
author Arnaud Bergeron <abergeron@gmail.com>
date Fri, 17 Sep 2010 12:01:12 -0400
parents 0b666177f725
children aea510b71386
files doc/v2_planning/dataset.py
diffstat 1 files changed, 77 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/dataset.py	Fri Sep 17 12:01:12 2010 -0400
@@ -0,0 +1,77 @@
+class DataColumn(object):
+    def __init__(self, loader, **metadata):
+        self.loader = loader
+        self.batch_size = 0
+        self.modulo = None
+        self.metadata = metadata
+
+    def __getitem__(self, idx):
+        if isinstance(idx, slice):
+            return self.loader.getitem(idx.start, idx.stop)
+        else:
+            return self.loader.getitem(idx, idx+1)
+
+    def group(size, modulo):
+        self.batch_size = size
+        self.module = modulo
+
+    def __iter__(self):
+        return DsetIter(self, self.size, self.modulo)
+
+    def get_sub_datacolumn(self, i, j):
+        return DataColumn(DsetLoader(self, i, j), **self.metadata)
+
+    def length(self):
+        return self.loader.length()
+
+class DsetIter(object):
+    def __init__(self, dset, size, modulo):
+        self.dset = dset
+        self.size = size
+        self.modulo = modulo
+        self.pos = 0
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        res = self.dset[self.pos*self.size:(self.pos+self.size)*self.size]
+        if len(res) == 0:
+            raise StopIteration
+        if len(res) != self.size:
+            if modulo == True:
+                return res
+            if modulo == False:
+                raise StopIteration
+
+class DsetLoader(object):
+    def __init__(self, dset, start, stop):
+        self.dset = dset
+        self.start = start
+        self.stop = stop
+        if self.start is None:
+            self.start = 0
+        if self.stop is None:
+            self.stop = self.dset.length()
+        
+    def getitem(self, i, j):
+        if self.stop is None:
+            return self.dset[self.start+i:self.start+j]
+        else:
+            return self.dset[min(self.start+i, self.stop):
+                             min(self.start+j, self.stop)]
+
+    def length(self):
+        if self.stop is None:
+            return None
+        else:
+            return self.stop - self.start
+
+class Dataset(object):
+    def __init__(self, cols):
+        self.cols = cols
+
+    def get_sub_dataset(self, start, stop):
+        return Dataset([c.get_sub_datacolumn(start, stop) for c in self.cols])
+
+    # we could have the __getitem__ interface and the iterator interface also