changeset 1428:3823dbfff6cf

add parameter to randomize the valid and test data.
author Frederic Bastien <nouiz@nouiz.org>
date Tue, 08 Feb 2011 12:57:15 -0500
parents a36d3a406c59
children b0141efbf6a2
files pylearn/datasets/utlc.py
diffstat 1 files changed, 18 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/datasets/utlc.py	Tue Feb 08 12:34:07 2011 -0500
+++ b/pylearn/datasets/utlc.py	Tue Feb 08 12:57:15 2011 -0500
@@ -16,13 +16,15 @@
 import pylearn.io.filetensor as ft
 import config
 
-def load_ndarray_dataset(name, normalize=True, transfer=False, normalize_on_the_fly=False):
+def load_ndarray_dataset(name, normalize=True, transfer=False,
+                         normalize_on_the_fly=False, randomize_valid=False,
+                         randomize_test=False):
     """ Load the train,valid,test data for the dataset `name`
         and return it in ndarray format.
 
     :param normalize: If True, we normalize the train dataset
                       before returning it
-    :param transfer: If True also return the transfer label
+    :param transfer: If True also return the transfer label(currently only available for ule)
     :param normalize_on_the_fly: If True, we return a Theano Variable that will give
                                  as output the normalized value. If the user only
                                  take a subtensor of that variable, Theano optimization
@@ -33,6 +35,12 @@
                                  This is usefull to have the original data in its original
                                  dtype in memory to same memory. Especialy usefull to
                                  be able to use rita and harry with 1G per jobs.
+    :param randomize_valid: Do we randomize the order of the valid set?
+                            We always use the same random order
+                            If False, return in the same order as downloaded on the web
+    :param randomize_test: Do we randomize the order of the test set?
+                           We always use the same random order
+                           If False, return in the same order as downloaded on the web
     """
     assert not (normalize and normalize_on_the_fly), "Can't normalize in 2 way at the same time!"
 
@@ -45,6 +53,14 @@
     train = load_filetensor(trname)
     valid = load_filetensor(vname)
     test = load_filetensor(tename)
+    if randomize_valid:
+        rng = numpy.random.RandomState([1,2,3,4])
+        perm = rng.permutation(valid.shape[0])
+        valid = valid[perm]
+    if randomize_test:
+        rng = numpy.random.RandomState([1,2,3,4])
+        perm = rng.permutation(test.shape[0])
+        test = test[perm]
 
     if normalize or normalize_on_the_fly:
         if normalize_on_the_fly: