Mercurial > pylearn
diff pylearn/datasets/MNIST.py @ 537:b054271b2504
new file structure layout, factories, etc.
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 12 Nov 2008 21:57:54 -0500 |
parents | datasets/MNIST.py@58810b63292b |
children | 16f91ca016b1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/datasets/MNIST.py Wed Nov 12 21:57:54 2008 -0500 @@ -0,0 +1,64 @@ +""" +Various routines to load/access MNIST data. +""" +from __future__ import absolute_import + +import os +import numpy + +from ..io.amat import AMat +from .config import data_root +from .dataset import dataset_factory, Dataset + +def head(n=10, path=None): + """Load the first MNIST examples. + + Returns two matrices: x, y. x has N rows of 784 columns. Each row of x represents the + 28x28 grey-scale pixels in raster order. y is a vector of N integers. Each element y[i] + is the label of the i'th row of x. + + """ + path = os.path.join(data_root(), 'mnist','mnist_with_header.amat') if path is None else path + + dat = AMat(path=path, head=n) + + try: + assert dat.input.shape[0] == n + assert dat.target.shape[0] == n + except Exception , e: + raise Exception("failed to read MNIST data", (dat, e)) + + return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0]) + +def all(path=None): + return head(n=None, path=path) + +def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None): + all_x, all_targ = head(ntrain+nvalid+ntest, path=path) + + rval = Dataset() + + rval.train = Dataset.Obj(x=all_x[0:ntrain], + y=all_targ[0:ntrain]) + rval.valid = Dataset.Obj(x=all_x[ntrain:ntrain+nvalid], + y=all_targ[ntrain:ntrain+nvalid]) + rval.test = Dataset.Obj(x=all_x[ntrain+nvalid:ntrain+nvalid+ntest], + y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest]) + + rval.n_classes = 10 + return rval + + + +@dataset_factory('MNIST') +def mnist_factory(variant="", ntrain=None, nvalid=None, ntest=None): + if variant=="": + return train_valid_test() + elif variant=="1k": + return train_valid_test(ntrain=1000, nvalid=200, ntest=200) + elif variant=="10k": + return train_valid_test(ntrain=10000, nvalid=2000, ntest=2000) + elif variant=="custom": + return train_valid_test(ntrain=ntrain, nvalid=nvalid, ntest=ntest) + else: + raise Exception('Unknown MNIST variant', variant)