Mercurial > pylearn
comparison doc/v2_planning/dataset.py @ 1157:9686c0d9689d
Quick implementation of the Dataset Api we propose.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Fri, 17 Sep 2010 12:01:12 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1099:0b666177f725 | 1157:9686c0d9689d |
---|---|
1 class DataColumn(object): | |
2 def __init__(self, loader, **metadata): | |
3 self.loader = loader | |
4 self.batch_size = 0 | |
5 self.modulo = None | |
6 self.metadata = metadata | |
7 | |
8 def __getitem__(self, idx): | |
9 if isinstance(idx, slice): | |
10 return self.loader.getitem(idx.start, idx.stop) | |
11 else: | |
12 return self.loader.getitem(idx, idx+1) | |
13 | |
14 def group(size, modulo): | |
15 self.batch_size = size | |
16 self.module = modulo | |
17 | |
18 def __iter__(self): | |
19 return DsetIter(self, self.size, self.modulo) | |
20 | |
21 def get_sub_datacolumn(self, i, j): | |
22 return DataColumn(DsetLoader(self, i, j), **self.metadata) | |
23 | |
24 def length(self): | |
25 return self.loader.length() | |
26 | |
27 class DsetIter(object): | |
28 def __init__(self, dset, size, modulo): | |
29 self.dset = dset | |
30 self.size = size | |
31 self.modulo = modulo | |
32 self.pos = 0 | |
33 | |
34 def __iter__(self): | |
35 return self | |
36 | |
37 def next(self): | |
38 res = self.dset[self.pos*self.size:(self.pos+self.size)*self.size] | |
39 if len(res) == 0: | |
40 raise StopIteration | |
41 if len(res) != self.size: | |
42 if modulo == True: | |
43 return res | |
44 if modulo == False: | |
45 raise StopIteration | |
46 | |
47 class DsetLoader(object): | |
48 def __init__(self, dset, start, stop): | |
49 self.dset = dset | |
50 self.start = start | |
51 self.stop = stop | |
52 if self.start is None: | |
53 self.start = 0 | |
54 if self.stop is None: | |
55 self.stop = self.dset.length() | |
56 | |
57 def getitem(self, i, j): | |
58 if self.stop is None: | |
59 return self.dset[self.start+i:self.start+j] | |
60 else: | |
61 return self.dset[min(self.start+i, self.stop): | |
62 min(self.start+j, self.stop)] | |
63 | |
64 def length(self): | |
65 if self.stop is None: | |
66 return None | |
67 else: | |
68 return self.stop - self.start | |
69 | |
70 class Dataset(object): | |
71 def __init__(self, cols): | |
72 self.cols = cols | |
73 | |
74 def get_sub_dataset(self, start, stop): | |
75 return Dataset([c.get_sub_datacolumn(start, stop) for c in self.cols]) | |
76 | |
77 # we could have the __getitem__ interface and the iterator interface also |