view pylearn/datasets/MNIST.py @ 627:ec27e19bb6eb

moving away from mnist_factory
author James Bergstra <bergstrj@iro.umontreal.ca>
date Mon, 19 Jan 2009 18:10:48 -0500
parents 16f91ca016b1
children d3d8f5a17909
line wrap: on
line source

"""
Various routines to load/access MNIST data.
"""
from __future__ import absolute_import

import os
import numpy

from ..io.amat import AMat
from .config import data_root
from .dataset import Dataset

def head(n=10, path=None):
    """Load the first MNIST examples.

    Returns two matrices: x, y.  x has N rows of 784 columns.  Each row of x represents the
    28x28 grey-scale pixels in raster order.  y is a vector of N integers.  Each element y[i]
    is the label of the i'th row of x.
    
    """
    path = os.path.join(data_root(), 'mnist','mnist_with_header.amat') if path is None else path

    dat = AMat(path=path, head=n)

    try:
        assert dat.input.shape[0] == n
        assert dat.target.shape[0] == n
    except Exception , e:
        raise Exception("failed to read MNIST data", (dat, e))

    return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0])

def all(path=None):
    return head(n=None, path=path)

def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None):
    all_x, all_targ = head(ntrain+nvalid+ntest, path=path)

    rval = Dataset()

    rval.train = Dataset.Obj(x=all_x[0:ntrain],
            y=all_targ[0:ntrain])
    rval.valid = Dataset.Obj(x=all_x[ntrain:ntrain+nvalid],
            y=all_targ[ntrain:ntrain+nvalid])
    rval.test =  Dataset.Obj(x=all_x[ntrain+nvalid:ntrain+nvalid+ntest],
            y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest])

    rval.n_classes = 10
    rval.img_shape = (28,28)
    return rval


def full():
    return train_valid_test()

def first_1k():
    return train_valid_test(ntrain=1000, nvalid=200, ntest=200)

def first_10k():
    return train_valid_test(ntrain=10000, nvalid=2000, ntest=2000)

#old method from factory idea days... delete when ready -JB20090119
def mnist_factory(variant="", ntrain=None, nvalid=None, ntest=None):
    if variant=="":
        return full()
    elif variant=="1k":
        return first_1k()
    elif variant=="10k":
        return first_10k()
    elif variant=="custom":
        return train_valid_test(ntrain=ntrain, nvalid=nvalid, ntest=ntest)
    else:
        raise Exception('Unknown MNIST variant', variant)