# HG changeset patch # User gdesjardins # Date 1303331448 14400 # Node ID 4906162625000017c09454c66f30e63651da9b5a # Parent c9179b0ed002ad7682de4ec6dfd0db0cd7ce6913 Adding datasets used in Hugo's NADE paper. Datasets have been converted from .libsvm format and stored under $PYLEARN_DATA_ROOT/larocheh, using numpy.save in uint8 format (data is all binary-valued). diff -r c9179b0ed002 -r 490616262500 pylearn/datasets/nade.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/datasets/nade.py Wed Apr 20 16:30:48 2011 -0400 @@ -0,0 +1,59 @@ +import os +import numpy + +from pylearn.io.pmat import PMat +from pylearn.datasets.config import data_root # config +from pylearn.datasets.dataset import Dataset + +def load_dataset(name): + """ + Various datasets which were used in the following paper. + The Neural Autoregressive Distribution Estimator + Hugo Larochelle and Iain Murray, AISTATS 2011 + + :param name: string specifying which dataset to load + :return: Dataset object + dataset.train.x: matrix of training data of shape (num_examples, ndim) + dataset.train.y: vector of training labels of length num_examples. Labels are + integer valued and represent the class it belongs too. + dataset.valid.x: idem for validation data + dataset.valid.y: idem for validation data + dataset.test.x: idem for test data + dataset.test.y: idem for test data + + WARNING: class labels are integer-valued instead of 1-of-n encoding ! + """ + assert name in ['adult','binarized_mnist', 'mnist', 'connect4','dna', + 'mushrooms','nips','ocr_letters','rcv1','web'] + rval = Dataset() + + path = os.path.join(data_root(), 'larocheh', name) + + # load training set + x=numpy.load(os.path.join(path,'train_data.npy')) + y_fname = os.path.join(path, 'train_labels.npy') + if os.path.exists(y_fname): + y = numpy.load(os.path.join(path,'train_labels.npy')) + else: + y = None + rval.train = Dataset.Obj(x=x, y=y) + + # load validation set + x=numpy.load(os.path.join(path,'valid_data.npy')) + y_fname = os.path.join(path, 'valid_labels.npy') + if os.path.exists(y_fname): + y = numpy.load(os.path.join(path,'valid_labels.npy')) + else: + y = None + rval.valid = Dataset.Obj(x=x, y=y) + + # load training set + x=numpy.load(os.path.join(path,'test_data.npy')) + y_fname = os.path.join(path, 'test_labels.npy') + if os.path.exists(y_fname): + y = numpy.load(os.path.join(path,'test_labels.npy')) + else: + y = None + rval.test = Dataset.Obj(x=x, y=y) + + return rval