Mercurial > pylearn
annotate doc/v2_planning/dataset.py @ 1201:46527ae6db53
architecture: Clarified what I meant about saving the model
author | Olivier Delalleau <delallea@iro> |
---|---|
date | Mon, 20 Sep 2010 17:05:15 -0400 |
parents | 9686c0d9689d |
children |
rev | line source |
---|---|
1157
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
1 class DataColumn(object): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
2 def __init__(self, loader, **metadata): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
3 self.loader = loader |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
4 self.batch_size = 0 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
5 self.modulo = None |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
6 self.metadata = metadata |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
7 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
8 def __getitem__(self, idx): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
9 if isinstance(idx, slice): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
10 return self.loader.getitem(idx.start, idx.stop) |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
11 else: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
12 return self.loader.getitem(idx, idx+1) |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
13 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
14 def group(size, modulo): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
15 self.batch_size = size |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
16 self.module = modulo |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
17 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
18 def __iter__(self): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
19 return DsetIter(self, self.size, self.modulo) |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
20 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
21 def get_sub_datacolumn(self, i, j): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
22 return DataColumn(DsetLoader(self, i, j), **self.metadata) |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
23 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
24 def length(self): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
25 return self.loader.length() |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
26 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
27 class DsetIter(object): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
28 def __init__(self, dset, size, modulo): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
29 self.dset = dset |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
30 self.size = size |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
31 self.modulo = modulo |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
32 self.pos = 0 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
33 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
34 def __iter__(self): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
35 return self |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
36 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
37 def next(self): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
38 res = self.dset[self.pos*self.size:(self.pos+self.size)*self.size] |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
39 if len(res) == 0: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
40 raise StopIteration |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
41 if len(res) != self.size: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
42 if modulo == True: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
43 return res |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
44 if modulo == False: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
45 raise StopIteration |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
46 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
47 class DsetLoader(object): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
48 def __init__(self, dset, start, stop): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
49 self.dset = dset |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
50 self.start = start |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
51 self.stop = stop |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
52 if self.start is None: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
53 self.start = 0 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
54 if self.stop is None: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
55 self.stop = self.dset.length() |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
56 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
57 def getitem(self, i, j): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
58 if self.stop is None: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
59 return self.dset[self.start+i:self.start+j] |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
60 else: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
61 return self.dset[min(self.start+i, self.stop): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
62 min(self.start+j, self.stop)] |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
63 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
64 def length(self): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
65 if self.stop is None: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
66 return None |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
67 else: |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
68 return self.stop - self.start |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
69 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
70 class Dataset(object): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
71 def __init__(self, cols): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
72 self.cols = cols |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
73 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
74 def get_sub_dataset(self, start, stop): |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
75 return Dataset([c.get_sub_datacolumn(start, stop) for c in self.cols]) |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
76 |
9686c0d9689d
Quick implementation of the Dataset Api we propose.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
77 # we could have the __getitem__ interface and the iterator interface also |