# HG changeset patch # User Frederic Bastien # Date 1296230411 18000 # Node ID 2993b2a5c1afb1cdb52d377a130c6728e4c7f05f # Parent f467c5457eff55c72d1e6c61a4fdecdb04bff3ac allow to load UTLC transfer label data. diff -r f467c5457eff -r 2993b2a5c1af pylearn/datasets/utlc.py --- a/pylearn/datasets/utlc.py Fri Jan 28 10:58:45 2011 -0500 +++ b/pylearn/datasets/utlc.py Fri Jan 28 11:00:11 2011 -0500 @@ -16,7 +16,14 @@ import pylearn.io.filetensor as ft import config -def load_ndarray_dataset(name, normalize=True): +def load_ndarray_dataset(name, normalize=True, transfer=False): + """ Load the train,valid,test data for the dataset `name` + and return it in ndarray format. + + :param normalize: If True, we normalize the train dataset + before returning it + :param transfer: If True also return the transfer label + """ assert name in ['avicenna','harry','rita','sylvester','ule'] trname,vname,tename = [os.path.join(config.data_root(), 'UTLC','filetensor', @@ -59,9 +66,20 @@ test = (test) / max else: raise Exception("This dataset don't have its normalization defined") - return train, valid, test + if transfer: + transfer = load_filetensor(os.path.join(config.data_root(),"UTLC","filetensor",name+"_transfer.ft")) + return train, valid, test, transfer + else: + return train, valid, test -def load_sparse_dataset(name, normalize=True): +def load_sparse_dataset(name, normalize=True, transfer=False): + """ Load the train,valid,test data for the dataset `name` + and return it in sparse format. + + :param normalize: If True, we normalize the train dataset + before returning it + :param transfer: If True also return the transfer label + """ assert name in ['harry','terry','ule'] trname,vname,tename = [os.path.join(config.data_root(), 'UTLC','sparse', @@ -92,7 +110,11 @@ test = (test) / 300 else: raise Exception("This dataset don't have its normalization defined") - return train, valid, test + if transfer: + transfer = load_sparse(os.path.join(config.data_root(),"UTLC","sparse",name+"_transfer.npy")) + return train, valid, test, transfer + else: + return train, valid, test def load_filetensor(fname): f = None @@ -128,6 +150,11 @@ if __name__ == '__main__': import numpy import scipy.sparse + + # Test loading of transfer data + train, valid, test, transfer = load_ndarray_dataset("ule", normalize=True, transfer=True) + assert train.shape[0]==transfer.shape[0] + for name in ['avicenna','harry','rita','sylvester','ule']: train, valid, test = load_ndarray_dataset(name, normalize=True) print name,"dtype, max, min, mean, std" @@ -137,6 +164,10 @@ assert isinstance(test, numpy.ndarray) assert train.shape[1]==test.shape[1]==valid.shape[1] + # Test loading of transfer data + train, valid, test, transfer = load_sparse_dataset("ule", normalize=True, transfer=True) + assert train.shape[0]==transfer.shape[0] + for name in ['harry','terry','ule']: train, valid, test = load_sparse_dataset(name, normalize=True) nb_elem = numpy.prod(train.shape)