comparison pylearn/datasets/MNIST.py @ 794:951272679910

get the mnist data from the pmat file and not the amat file
author Frederic Bastien <bastienf@iro.umontreal.ca>
date Wed, 15 Jul 2009 13:18:55 -0400
parents 6d927441a38f
children f30bb746f279
comparison
equal deleted inserted replaced
793:4e70f509ec01 794:951272679910
4 from __future__ import absolute_import 4 from __future__ import absolute_import
5 5
6 import os 6 import os
7 import numpy 7 import numpy
8 8
9 from ..io.amat import AMat 9 from ..io.pmat import PMat
10 from .config import data_root # config 10 from .config import data_root # config
11 from .dataset import Dataset 11 from .dataset import Dataset
12 12
13 def head(n=10, path=None): 13 def head(n=10, path=None):
14 """Load the first MNIST examples. 14 """Load the first MNIST examples.
16 Returns two matrices: x, y. x has N rows of 784 columns. Each row of x represents the 16 Returns two matrices: x, y. x has N rows of 784 columns. Each row of x represents the
17 28x28 grey-scale pixels in raster order. y is a vector of N integers. Each element y[i] 17 28x28 grey-scale pixels in raster order. y is a vector of N integers. Each element y[i]
18 is the label of the i'th row of x. 18 is the label of the i'th row of x.
19 19
20 """ 20 """
21 path = os.path.join(data_root(), 'mnist','mnist_with_header.amat') if path is None else path 21 path = os.path.join(data_root(), 'mnist','mnist_all.pmat') if path is None else path
22 22
23 dat = AMat(path=path, head=n) 23 dat = PMat(fname=path)
24 24
25 try: 25 rows=dat.getRows(0,n)
26 assert dat.input.shape[0] == n
27 assert dat.target.shape[0] == n
28 except Exception , e:
29 raise Exception("failed to read MNIST data", (dat, e))
30 26
31 return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0]) 27 return rows[:,0:-1], numpy.asarray(rows[:,-1], dtype='int64')
32 28
33 def all(path=None): 29 def all(path=None):
34 return head(n=None, path=path) 30 return head(n=None, path=path)
35 31
36 def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None): 32 def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None):