Mercurial > pylearn
changeset 1408:2993b2a5c1af
allow to load UTLC transfer label data.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Fri, 28 Jan 2011 11:00:11 -0500 |
parents | f467c5457eff |
children | cedb48a300fc |
files | pylearn/datasets/utlc.py |
diffstat | 1 files changed, 35 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/datasets/utlc.py Fri Jan 28 10:58:45 2011 -0500 +++ b/pylearn/datasets/utlc.py Fri Jan 28 11:00:11 2011 -0500 @@ -16,7 +16,14 @@ import pylearn.io.filetensor as ft import config -def load_ndarray_dataset(name, normalize=True): +def load_ndarray_dataset(name, normalize=True, transfer=False): + """ Load the train,valid,test data for the dataset `name` + and return it in ndarray format. + + :param normalize: If True, we normalize the train dataset + before returning it + :param transfer: If True also return the transfer label + """ assert name in ['avicenna','harry','rita','sylvester','ule'] trname,vname,tename = [os.path.join(config.data_root(), 'UTLC','filetensor', @@ -59,9 +66,20 @@ test = (test) / max else: raise Exception("This dataset don't have its normalization defined") - return train, valid, test + if transfer: + transfer = load_filetensor(os.path.join(config.data_root(),"UTLC","filetensor",name+"_transfer.ft")) + return train, valid, test, transfer + else: + return train, valid, test -def load_sparse_dataset(name, normalize=True): +def load_sparse_dataset(name, normalize=True, transfer=False): + """ Load the train,valid,test data for the dataset `name` + and return it in sparse format. + + :param normalize: If True, we normalize the train dataset + before returning it + :param transfer: If True also return the transfer label + """ assert name in ['harry','terry','ule'] trname,vname,tename = [os.path.join(config.data_root(), 'UTLC','sparse', @@ -92,7 +110,11 @@ test = (test) / 300 else: raise Exception("This dataset don't have its normalization defined") - return train, valid, test + if transfer: + transfer = load_sparse(os.path.join(config.data_root(),"UTLC","sparse",name+"_transfer.npy")) + return train, valid, test, transfer + else: + return train, valid, test def load_filetensor(fname): f = None @@ -128,6 +150,11 @@ if __name__ == '__main__': import numpy import scipy.sparse + + # Test loading of transfer data + train, valid, test, transfer = load_ndarray_dataset("ule", normalize=True, transfer=True) + assert train.shape[0]==transfer.shape[0] + for name in ['avicenna','harry','rita','sylvester','ule']: train, valid, test = load_ndarray_dataset(name, normalize=True) print name,"dtype, max, min, mean, std" @@ -137,6 +164,10 @@ assert isinstance(test, numpy.ndarray) assert train.shape[1]==test.shape[1]==valid.shape[1] + # Test loading of transfer data + train, valid, test, transfer = load_sparse_dataset("ule", normalize=True, transfer=True) + assert train.shape[0]==transfer.shape[0] + for name in ['harry','terry','ule']: train, valid, test = load_sparse_dataset(name, normalize=True) nb_elem = numpy.prod(train.shape)