changeset 1430:931a19eeab5a

'allow to randomize the sparse valid/test utlc dataset at load time'
author Frederic Bastien <nouiz@nouiz.org>
date Tue, 08 Feb 2011 16:19:18 -0500
parents b0141efbf6a2
children dce602150b5f
files pylearn/datasets/utlc.py
diffstat 1 files changed, 18 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/datasets/utlc.py	Tue Feb 08 16:17:56 2011 -0500
+++ b/pylearn/datasets/utlc.py	Tue Feb 08 16:19:18 2011 -0500
@@ -107,13 +107,18 @@
     else:
         return train, valid, test
 
-def load_sparse_dataset(name, normalize=True, transfer=False):
+def load_sparse_dataset(name, normalize=True, transfer=False,
+                        randomize_valid=False,
+                        randomize_test=False):
     """ Load the train,valid,test data for the dataset `name`
         and return it in sparse format.
 
     :param normalize: If True, we normalize the train dataset
                       before returning it
     :param transfer: If True also return the transfer label
+    :param randomize_valid: see same option for load_ndarray_dataset
+    :param randomize_test: see same option for load_ndarray_dataset
+
     """
     assert name in ['harry','terry','ule']
     common = os.path.join('UTLC','sparse',name+'_')
@@ -123,6 +128,18 @@
     train = load_sparse(trname)
     valid = load_sparse(vname)
     test = load_sparse(tename)
+
+    # Data should already be in csr format that support
+    # this type of indexing.
+    if randomize_valid:
+        rng = numpy.random.RandomState([1,2,3,4])
+        perm = rng.permutation(valid.shape[0])
+        valid = valid[perm]
+    if randomize_test:
+        rng = numpy.random.RandomState([1,2,3,4])
+        perm = rng.permutation(test.shape[0])
+        test = test[perm]
+
     if normalize:
         if name == "ule":
             train = train.astype(theano.config.floatX) / 255