changeset 1408:2993b2a5c1af

allow to load UTLC transfer label data.
author Frederic Bastien <nouiz@nouiz.org>
date Fri, 28 Jan 2011 11:00:11 -0500
parents f467c5457eff
children cedb48a300fc
files pylearn/datasets/utlc.py
diffstat 1 files changed, 35 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/datasets/utlc.py	Fri Jan 28 10:58:45 2011 -0500
+++ b/pylearn/datasets/utlc.py	Fri Jan 28 11:00:11 2011 -0500
@@ -16,7 +16,14 @@
 import pylearn.io.filetensor as ft
 import config
 
-def load_ndarray_dataset(name, normalize=True):
+def load_ndarray_dataset(name, normalize=True, transfer=False):
+    """ Load the train,valid,test data for the dataset `name`
+        and return it in ndarray format.
+    
+    :param normalize: If True, we normalize the train dataset
+                      before returning it
+    :param transfer: If True also return the transfer label
+    """
     assert name in ['avicenna','harry','rita','sylvester','ule']
     trname,vname,tename = [os.path.join(config.data_root(),
                                         'UTLC','filetensor',
@@ -59,9 +66,20 @@
             test = (test) / max  
         else:
             raise Exception("This dataset don't have its normalization defined")
-    return train, valid, test
+    if transfer:
+        transfer = load_filetensor(os.path.join(config.data_root(),"UTLC","filetensor",name+"_transfer.ft"))
+        return train, valid, test, transfer
+    else:
+        return train, valid, test
 
-def load_sparse_dataset(name, normalize=True):
+def load_sparse_dataset(name, normalize=True, transfer=False):
+    """ Load the train,valid,test data for the dataset `name`
+        and return it in sparse format.
+    
+    :param normalize: If True, we normalize the train dataset
+                      before returning it
+    :param transfer: If True also return the transfer label
+    """
     assert name in ['harry','terry','ule']
     trname,vname,tename = [os.path.join(config.data_root(),
                                         'UTLC','sparse',
@@ -92,7 +110,11 @@
             test = (test) / 300
         else:
             raise Exception("This dataset don't have its normalization defined")
-    return train, valid, test
+    if transfer:
+        transfer = load_sparse(os.path.join(config.data_root(),"UTLC","sparse",name+"_transfer.npy"))
+        return train, valid, test, transfer
+    else:
+        return train, valid, test
     
 def load_filetensor(fname):
     f = None
@@ -128,6 +150,11 @@
 if __name__ == '__main__':
     import numpy
     import scipy.sparse
+
+    # Test loading of transfer data
+    train, valid, test, transfer = load_ndarray_dataset("ule", normalize=True, transfer=True)
+    assert train.shape[0]==transfer.shape[0]
+
     for name in ['avicenna','harry','rita','sylvester','ule']:
         train, valid, test = load_ndarray_dataset(name, normalize=True)
         print name,"dtype, max, min, mean, std"
@@ -137,6 +164,10 @@
         assert isinstance(test, numpy.ndarray)
         assert train.shape[1]==test.shape[1]==valid.shape[1]
 
+    # Test loading of transfer data
+    train, valid, test, transfer = load_sparse_dataset("ule", normalize=True, transfer=True)
+    assert train.shape[0]==transfer.shape[0]
+
     for name in ['harry','terry','ule']:
         train, valid, test = load_sparse_dataset(name, normalize=True)
         nb_elem = numpy.prod(train.shape)