annotate datasets/gzpklfile.py @ 595:da46a62ce402

submitted JMLR pdf
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Tue, 05 Oct 2010 15:07:33 -0400
parents c2fae7b96769
children
rev   line source
222
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
1 import gzip
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
2 try:
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
3 import cPickle as pickle
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
4 except ImportError:
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
5 import pickle
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
6
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
7 from dataset import DataSet
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
8 from dsetiter import DataIterator
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
9 from itertools import izip
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
10
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
11 class ArrayFile(object):
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
12 def __init__(self, ary):
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
13 self.ary = ary
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
14 self.pos = 0
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
15
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
16 def read(self, num):
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
17 res = self.ary[self.pos:self.pos+num]
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
18 self.pos += num
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
19 return res
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
20
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
21 class GzpklDataSet(DataSet):
257
966272e7f14b Make the datasets lazy-loading and add a maxsize parameter.
Arnaud Bergeron <abergeron@gmail.com>
parents: 222
diff changeset
22 def __init__(self, fname, maxsize):
222
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
23 self._fname = fname
257
966272e7f14b Make the datasets lazy-loading and add a maxsize parameter.
Arnaud Bergeron <abergeron@gmail.com>
parents: 222
diff changeset
24 self.maxsize = maxsize
222
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
25 self._train = 0
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
26 self._valid = 1
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
27 self._test = 2
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
28
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
29 def _load(self):
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
30 f = gzip.open(self._fname, 'rb')
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
31 try:
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
32 self.datas = pickle.load(f)
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
33 finally:
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
34 f.close()
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
35
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
36 def _return_it(self, batchsz, bufsz, id):
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
37 if not hasattr(self, 'datas'):
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
38 self._load()
258
c2fae7b96769 maxsize -> self.maxsize
Arnaud Bergeron <abergeron@gmail.com>
parents: 257
diff changeset
39 return izip(DataIterator([ArrayFile(self.datas[id][0][:self.maxsize])], batchsz, bufsz),
c2fae7b96769 maxsize -> self.maxsize
Arnaud Bergeron <abergeron@gmail.com>
parents: 257
diff changeset
40 DataIterator([ArrayFile(self.datas[id][1][:self.maxsize])], batchsz, bufsz))