Mercurial > pylearn
view pylearn/datasets/MNIST.py @ 658:6d927441a38f
added pylearn.datasets.MNIST.first_10 and pylearn.datasets.MNIST.first_100. They are usefull to test with small dataset.
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Tue, 03 Mar 2009 11:37:56 -0500 |
parents | d3d8f5a17909 |
children | 951272679910 a7dc8b28f4bc |
line wrap: on
line source
""" Various routines to load/access MNIST data. """ from __future__ import absolute_import import os import numpy from ..io.amat import AMat from .config import data_root # config from .dataset import Dataset def head(n=10, path=None): """Load the first MNIST examples. Returns two matrices: x, y. x has N rows of 784 columns. Each row of x represents the 28x28 grey-scale pixels in raster order. y is a vector of N integers. Each element y[i] is the label of the i'th row of x. """ path = os.path.join(data_root(), 'mnist','mnist_with_header.amat') if path is None else path dat = AMat(path=path, head=n) try: assert dat.input.shape[0] == n assert dat.target.shape[0] == n except Exception , e: raise Exception("failed to read MNIST data", (dat, e)) return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0]) def all(path=None): return head(n=None, path=path) def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None): all_x, all_targ = head(ntrain+nvalid+ntest, path=path) rval = Dataset() rval.train = Dataset.Obj(x=all_x[0:ntrain], y=all_targ[0:ntrain]) rval.valid = Dataset.Obj(x=all_x[ntrain:ntrain+nvalid], y=all_targ[ntrain:ntrain+nvalid]) rval.test = Dataset.Obj(x=all_x[ntrain+nvalid:ntrain+nvalid+ntest], y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest]) rval.n_classes = 10 rval.img_shape = (28,28) return rval def full(): return train_valid_test() #usefull for test, keep it def first_10(): return train_valid_test(ntrain=10, nvalid=10, ntest=10) #usefull for test, keep it def first_100(): return train_valid_test(ntrain=100, nvalid=100, ntest=100) def first_1k(): return train_valid_test(ntrain=1000, nvalid=200, ntest=200) def first_10k(): return train_valid_test(ntrain=10000, nvalid=2000, ntest=2000) #old method from factory idea days... delete when ready -JB20090119 def mnist_factory(variant="", ntrain=None, nvalid=None, ntest=None): if variant=="": return full() elif variant=="1k": return first_1k() elif variant=="10k": return first_10k() elif variant=="custom": return train_valid_test(ntrain=ntrain, nvalid=nvalid, ntest=ntest) else: raise Exception('Unknown MNIST variant', variant)