view pylearn/datasets/shapeset1.py @ 617:5120bf7c4694

More complete version of shapeset1 dataset.
author lamblinp@ip03.m
date Sat, 17 Jan 2009 19:06:21 -0500
parents 16f91ca016b1
children 8aef46b42cb5
line wrap: on
line source

"""
Routines to load/access Shapeset1
"""

from __future__ import absolute_import

import os
import numpy

from ..io.amat import AMat
from .config import data_root
from .dataset import Dataset

def _head(path, n):
    dat = AMat(path=path, head=n)

    try:
        assert dat.input.shape[0] == n
        assert dat.target.shape[0] == n
    except Exception , e:
        raise Exception("failed to read %i lines from file %s" % (n, path))

    return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0])


def head_train(n=10000):
    """Load the first Shapeset1 training examples.

    Returns two matrices: x, y.
    x has N rows of 1024 columns.
    Each row of x represents the 32x32 grey-scale pixels in raster order.
    y is a vector of N integers between 0 and 2.
    Each element y[i] is the label of the i'th row of x.
    """
    path = os.path.join(data_root(), 'shapeset1','shapeset1_1cspo_2_3.10000.train.shape.amat')
    return _head(path, n)

def head_valid(n=5000):
    """Load the first Shapeset1 validation examples.

    Returns two matrices: x, y.
    x has N rows of 1024 columns.
    Each row of x represents the 32x32 grey-scale pixels in raster order.
    y is a vector of N integers between 0 and 2.
    Each element y[i] is the label of the i'th row of x.
    """
    path = os.path.join(data_root(), 'shapeset1','shapeset1_1cspo_2_3.5000.valid.shape.amat')
    return _head(path, n)

def head_test(n=5000):
    """Load the first Shapeset1 testing examples.

    Returns two matrices: x, y.
    x has N rows of 1024 columns.
    Each row of x represents the 32x32 grey-scale pixels in raster order.
    y is a vector of N integers between 0 and 2.
    Each element y[i] is the label of the i'th row of x.
    """
    path = os.path.join(data_root(), 'shapeset1','shapeset1_1cspo_2_3.5000.test.shape.amat')
    return _head(path, n)

def train_valid_test(ntrain=10000, nvalid=5000, ntest=5000):
    train_x, train_y = head_train(n=ntrain)
    valid_x, valid_y = head_valid(n=nvalid)
    test_x,  test_y  = head_test(n=test)

    rval = Dataset()
    rval.train = Dataset.Obj(x = train_x, y = train_y)
    rval.valid = Dataset.Obj(x = valid_x, y = valid_y)
    rval.test  = Dataset.Obj(x = test_x,  y = test_y)

    rval.n_classes = 3
    rval.img_shape = (32, 32)

    return rval