Mercurial > pylearn
changeset 1465:490616262500
Adding datasets used in Hugo's NADE paper. Datasets have been converted from
.libsvm format and stored under $PYLEARN_DATA_ROOT/larocheh, using numpy.save in
uint8 format (data is all binary-valued).
author | gdesjardins |
---|---|
date | Wed, 20 Apr 2011 16:30:48 -0400 |
parents | c9179b0ed002 |
children | 4d6d6d4eab9e |
files | pylearn/datasets/nade.py |
diffstat | 1 files changed, 59 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/datasets/nade.py Wed Apr 20 16:30:48 2011 -0400 @@ -0,0 +1,59 @@ +import os +import numpy + +from pylearn.io.pmat import PMat +from pylearn.datasets.config import data_root # config +from pylearn.datasets.dataset import Dataset + +def load_dataset(name): + """ + Various datasets which were used in the following paper. + The Neural Autoregressive Distribution Estimator + Hugo Larochelle and Iain Murray, AISTATS 2011 + + :param name: string specifying which dataset to load + :return: Dataset object + dataset.train.x: matrix of training data of shape (num_examples, ndim) + dataset.train.y: vector of training labels of length num_examples. Labels are + integer valued and represent the class it belongs too. + dataset.valid.x: idem for validation data + dataset.valid.y: idem for validation data + dataset.test.x: idem for test data + dataset.test.y: idem for test data + + WARNING: class labels are integer-valued instead of 1-of-n encoding ! + """ + assert name in ['adult','binarized_mnist', 'mnist', 'connect4','dna', + 'mushrooms','nips','ocr_letters','rcv1','web'] + rval = Dataset() + + path = os.path.join(data_root(), 'larocheh', name) + + # load training set + x=numpy.load(os.path.join(path,'train_data.npy')) + y_fname = os.path.join(path, 'train_labels.npy') + if os.path.exists(y_fname): + y = numpy.load(os.path.join(path,'train_labels.npy')) + else: + y = None + rval.train = Dataset.Obj(x=x, y=y) + + # load validation set + x=numpy.load(os.path.join(path,'valid_data.npy')) + y_fname = os.path.join(path, 'valid_labels.npy') + if os.path.exists(y_fname): + y = numpy.load(os.path.join(path,'valid_labels.npy')) + else: + y = None + rval.valid = Dataset.Obj(x=x, y=y) + + # load training set + x=numpy.load(os.path.join(path,'test_data.npy')) + y_fname = os.path.join(path, 'test_labels.npy') + if os.path.exists(y_fname): + y = numpy.load(os.path.join(path,'test_labels.npy')) + else: + y = None + rval.test = Dataset.Obj(x=x, y=y) + + return rval