view datasets/MNIST.py @ 504:19ab9ce916e3

slightly more sophisticated system for finding the mnist data
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 29 Oct 2008 11:38:49 -0400
parents 11e0357f06f4
children 74b3e65f5f24
line wrap: on
line source

"""
Various routines to load/access MNIST data.
"""
from __future__ import absolute_import

import numpy

from ..amat import AMat

from .config import MNIST_amat

def head(n=10, path=None):
    """Load the first MNIST examples.

    Returns two matrices: x, y.  x has N rows of 784 columns.  Each row of x represents the
    28x28 grey-scale pixels in raster order.  y is a vector of N integers.  Each element y[i]
    is the label of the i'th row of x.
    
    """
    path = MNIST_amat if path is None else path

    dat = AMat(path=path, head=n)

    try:
        assert dat.input.shape[0] == n
        assert dat.target.shape[0] == n
    except Exception , e:
        raise Exception("failed to read MNIST data", (dat, e))

    return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0])

def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None):
    all_x, all_targ = head(ntrain+nvalid+ntest, path=path)

    train = all_x[0:ntrain], all_targ[0:ntrain]
    valid = all_x[ntrain:ntrain+nvalid], all_targ[ntrain:ntrain+nvalid]
    test = all_x[ntrain+nvalid:ntrain+nvalid+ntest], all_targ[ntrain+nvalid:ntrain+nvalid+ntest]

    return train, valid, test

def all(path=None):
    return head(n=None, path=path)