Mercurial > pylearn
annotate datasets/MNIST.py @ 518:4aa7f74ea93f
init dataset
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 12 Nov 2008 12:36:09 -0500 |
parents | 58810b63292b |
children |
rev | line source |
---|---|
470
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
1 """ |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
2 Various routines to load/access MNIST data. |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
3 """ |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
4 from __future__ import absolute_import |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
5 |
505
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
504
diff
changeset
|
6 import os |
470
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
7 import numpy |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
8 |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
9 from ..amat import AMat |
505
74b3e65f5f24
added smallNorb dataset, switched to PYLEARN_DATA_ROOT
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
504
diff
changeset
|
10 from .config import data_root |
470
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
11 |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
12 def head(n=10, path=None): |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
13 """Load the first MNIST examples. |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
14 |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
15 Returns two matrices: x, y. x has N rows of 784 columns. Each row of x represents the |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
16 28x28 grey-scale pixels in raster order. y is a vector of N integers. Each element y[i] |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
17 is the label of the i'th row of x. |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
18 |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
19 """ |
511
58810b63292b
fixed mnist path
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
505
diff
changeset
|
20 path = os.path.join(data_root(), 'mnist','mnist_with_header.amat') if path is None else path |
470
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
21 |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
22 dat = AMat(path=path, head=n) |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
23 |
504
19ab9ce916e3
slightly more sophisticated system for finding the mnist data
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
475
diff
changeset
|
24 try: |
19ab9ce916e3
slightly more sophisticated system for finding the mnist data
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
475
diff
changeset
|
25 assert dat.input.shape[0] == n |
19ab9ce916e3
slightly more sophisticated system for finding the mnist data
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
475
diff
changeset
|
26 assert dat.target.shape[0] == n |
19ab9ce916e3
slightly more sophisticated system for finding the mnist data
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
475
diff
changeset
|
27 except Exception , e: |
19ab9ce916e3
slightly more sophisticated system for finding the mnist data
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
475
diff
changeset
|
28 raise Exception("failed to read MNIST data", (dat, e)) |
19ab9ce916e3
slightly more sophisticated system for finding the mnist data
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
475
diff
changeset
|
29 |
470
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
30 return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0]) |
bd937e845bbb
new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
31 |
475
11e0357f06f4
typo in MNIST.train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
471
diff
changeset
|
32 def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None): |
11e0357f06f4
typo in MNIST.train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
471
diff
changeset
|
33 all_x, all_targ = head(ntrain+nvalid+ntest, path=path) |
471
45b3eb429c15
added train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
470
diff
changeset
|
34 |
45b3eb429c15
added train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
470
diff
changeset
|
35 train = all_x[0:ntrain], all_targ[0:ntrain] |
45b3eb429c15
added train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
470
diff
changeset
|
36 valid = all_x[ntrain:ntrain+nvalid], all_targ[ntrain:ntrain+nvalid] |
45b3eb429c15
added train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
470
diff
changeset
|
37 test = all_x[ntrain+nvalid:ntrain+nvalid+ntest], all_targ[ntrain+nvalid:ntrain+nvalid+ntest] |
45b3eb429c15
added train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
470
diff
changeset
|
38 |
45b3eb429c15
added train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
470
diff
changeset
|
39 return train, valid, test |
45b3eb429c15
added train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
470
diff
changeset
|
40 |
475
11e0357f06f4
typo in MNIST.train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
471
diff
changeset
|
41 def all(path=None): |
11e0357f06f4
typo in MNIST.train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
471
diff
changeset
|
42 return head(n=None, path=path) |
11e0357f06f4
typo in MNIST.train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
471
diff
changeset
|
43 |
11e0357f06f4
typo in MNIST.train_valid_test
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
471
diff
changeset
|
44 |