comparison doc/v2_planning/dataset.py @ 1157:9686c0d9689d

Quick implementation of the Dataset Api we propose.
author Arnaud Bergeron <abergeron@gmail.com>
date Fri, 17 Sep 2010 12:01:12 -0400
parents
children
comparison
equal deleted inserted replaced
1099:0b666177f725 1157:9686c0d9689d
1 class DataColumn(object):
2 def __init__(self, loader, **metadata):
3 self.loader = loader
4 self.batch_size = 0
5 self.modulo = None
6 self.metadata = metadata
7
8 def __getitem__(self, idx):
9 if isinstance(idx, slice):
10 return self.loader.getitem(idx.start, idx.stop)
11 else:
12 return self.loader.getitem(idx, idx+1)
13
14 def group(size, modulo):
15 self.batch_size = size
16 self.module = modulo
17
18 def __iter__(self):
19 return DsetIter(self, self.size, self.modulo)
20
21 def get_sub_datacolumn(self, i, j):
22 return DataColumn(DsetLoader(self, i, j), **self.metadata)
23
24 def length(self):
25 return self.loader.length()
26
27 class DsetIter(object):
28 def __init__(self, dset, size, modulo):
29 self.dset = dset
30 self.size = size
31 self.modulo = modulo
32 self.pos = 0
33
34 def __iter__(self):
35 return self
36
37 def next(self):
38 res = self.dset[self.pos*self.size:(self.pos+self.size)*self.size]
39 if len(res) == 0:
40 raise StopIteration
41 if len(res) != self.size:
42 if modulo == True:
43 return res
44 if modulo == False:
45 raise StopIteration
46
47 class DsetLoader(object):
48 def __init__(self, dset, start, stop):
49 self.dset = dset
50 self.start = start
51 self.stop = stop
52 if self.start is None:
53 self.start = 0
54 if self.stop is None:
55 self.stop = self.dset.length()
56
57 def getitem(self, i, j):
58 if self.stop is None:
59 return self.dset[self.start+i:self.start+j]
60 else:
61 return self.dset[min(self.start+i, self.stop):
62 min(self.start+j, self.stop)]
63
64 def length(self):
65 if self.stop is None:
66 return None
67 else:
68 return self.stop - self.start
69
70 class Dataset(object):
71 def __init__(self, cols):
72 self.cols = cols
73
74 def get_sub_dataset(self, start, stop):
75 return Dataset([c.get_sub_datacolumn(start, stop) for c in self.cols])
76
77 # we could have the __getitem__ interface and the iterator interface also