annotate doc/v2_planning/dataset.py @ 1286:8905186b176c

test_mcRBM - added code to iterate over tinyimages
author James Bergstra <bergstrj@iro.umontreal.ca>
date Thu, 23 Sep 2010 19:12:52 -0400
parents 9686c0d9689d
children
rev   line source
1157
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
1 class DataColumn(object):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
2 def __init__(self, loader, **metadata):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
3 self.loader = loader
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
4 self.batch_size = 0
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
5 self.modulo = None
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
6 self.metadata = metadata
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
7
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
8 def __getitem__(self, idx):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
9 if isinstance(idx, slice):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
10 return self.loader.getitem(idx.start, idx.stop)
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
11 else:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
12 return self.loader.getitem(idx, idx+1)
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
13
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
14 def group(size, modulo):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
15 self.batch_size = size
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
16 self.module = modulo
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
17
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
18 def __iter__(self):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
19 return DsetIter(self, self.size, self.modulo)
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
20
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
21 def get_sub_datacolumn(self, i, j):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
22 return DataColumn(DsetLoader(self, i, j), **self.metadata)
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
23
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
24 def length(self):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
25 return self.loader.length()
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
26
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
27 class DsetIter(object):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
28 def __init__(self, dset, size, modulo):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
29 self.dset = dset
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
30 self.size = size
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
31 self.modulo = modulo
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
32 self.pos = 0
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
33
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
34 def __iter__(self):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
35 return self
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
36
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
37 def next(self):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
38 res = self.dset[self.pos*self.size:(self.pos+self.size)*self.size]
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
39 if len(res) == 0:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
40 raise StopIteration
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
41 if len(res) != self.size:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
42 if modulo == True:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
43 return res
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
44 if modulo == False:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
45 raise StopIteration
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
46
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
47 class DsetLoader(object):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
48 def __init__(self, dset, start, stop):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
49 self.dset = dset
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
50 self.start = start
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
51 self.stop = stop
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
52 if self.start is None:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
53 self.start = 0
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
54 if self.stop is None:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
55 self.stop = self.dset.length()
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
56
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
57 def getitem(self, i, j):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
58 if self.stop is None:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
59 return self.dset[self.start+i:self.start+j]
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
60 else:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
61 return self.dset[min(self.start+i, self.stop):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
62 min(self.start+j, self.stop)]
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
63
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
64 def length(self):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
65 if self.stop is None:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
66 return None
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
67 else:
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
68 return self.stop - self.start
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
69
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
70 class Dataset(object):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
71 def __init__(self, cols):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
72 self.cols = cols
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
73
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
74 def get_sub_dataset(self, start, stop):
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
75 return Dataset([c.get_sub_datacolumn(start, stop) for c in self.cols])
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
76
9686c0d9689d Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
77 # we could have the __getitem__ interface and the iterator interface also