Mercurial > ift6266
annotate datasets/gzpklfile.py @ 595:da46a62ce402
submitted JMLR pdf
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Tue, 05 Oct 2010 15:07:33 -0400 |
parents | c2fae7b96769 |
children |
rev | line source |
---|---|
222
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
1 import gzip |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
2 try: |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
3 import cPickle as pickle |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
4 except ImportError: |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
5 import pickle |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
6 |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
7 from dataset import DataSet |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
8 from dsetiter import DataIterator |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
9 from itertools import izip |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
10 |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
11 class ArrayFile(object): |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
12 def __init__(self, ary): |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
13 self.ary = ary |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
14 self.pos = 0 |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
15 |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
16 def read(self, num): |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
17 res = self.ary[self.pos:self.pos+num] |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
18 self.pos += num |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
19 return res |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
20 |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
21 class GzpklDataSet(DataSet): |
257
966272e7f14b
Make the datasets lazy-loading and add a maxsize parameter.
Arnaud Bergeron <abergeron@gmail.com>
parents:
222
diff
changeset
|
22 def __init__(self, fname, maxsize): |
222
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
23 self._fname = fname |
257
966272e7f14b
Make the datasets lazy-loading and add a maxsize parameter.
Arnaud Bergeron <abergeron@gmail.com>
parents:
222
diff
changeset
|
24 self.maxsize = maxsize |
222
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
25 self._train = 0 |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
26 self._valid = 1 |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
27 self._test = 2 |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
28 |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
29 def _load(self): |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
30 f = gzip.open(self._fname, 'rb') |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
31 try: |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
32 self.datas = pickle.load(f) |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
33 finally: |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
34 f.close() |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
35 |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
36 def _return_it(self, batchsz, bufsz, id): |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
37 if not hasattr(self, 'datas'): |
4cfd0eb438af
Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
38 self._load() |
258
c2fae7b96769
maxsize -> self.maxsize
Arnaud Bergeron <abergeron@gmail.com>
parents:
257
diff
changeset
|
39 return izip(DataIterator([ArrayFile(self.datas[id][0][:self.maxsize])], batchsz, bufsz), |
c2fae7b96769
maxsize -> self.maxsize
Arnaud Bergeron <abergeron@gmail.com>
parents:
257
diff
changeset
|
40 DataIterator([ArrayFile(self.datas[id][1][:self.maxsize])], batchsz, bufsz)) |