pylearn: pylearn/datasets/utlc.py comparison

comparison pylearn/datasets/utlc.py @ 1428:3823dbfff6cf

add parameter to randomize the valid and test data.

author	Frederic Bastien <nouiz@nouiz.org>
date	Tue, 08 Feb 2011 12:57:15 -0500
parents	a36d3a406c59
children	b0141efbf6a2

comparison

equal deleted inserted replaced

-:a36d3a406c59
+:3823dbfff6cf
 import theano
 import pylearn.io.filetensor as ft
 import config
-def load_ndarray_dataset(name, normalize=True, transfer=False, normalize_on_the_fly=False):
+def load_ndarray_dataset(name, normalize=True, transfer=False,
+normalize_on_the_fly=False, randomize_valid=False,
+randomize_test=False):
 """ Load the train,valid,test data for the dataset `name`
 and return it in ndarray format.
 :param normalize: If True, we normalize the train dataset
 before returning it
-:param transfer: If True also return the transfer label
+:param transfer: If True also return the transfer label(currently only available for ule)
 :param normalize_on_the_fly: If True, we return a Theano Variable that will give
 as output the normalized value. If the user only
 take a subtensor of that variable, Theano optimization
 should make that we will only have in memory the subtensor
 portion that is computed in normalized form. We store
 the original data in shared memory in its original dtype.
 This is usefull to have the original data in its original
 dtype in memory to same memory. Especialy usefull to
 be able to use rita and harry with 1G per jobs.
+:param randomize_valid: Do we randomize the order of the valid set?
+We always use the same random order
+If False, return in the same order as downloaded on the web
+:param randomize_test: Do we randomize the order of the test set?
+We always use the same random order
+If False, return in the same order as downloaded on the web
 """
 assert not (normalize and normalize_on_the_fly), "Can't normalize in 2 way at the same time!"
 assert name in ['avicenna','harry','rita','sylvester','ule']
 common = os.path.join('UTLC','filetensor',name+'_')
 for subset in ['train','valid','test']]
 train = load_filetensor(trname)
 valid = load_filetensor(vname)
 test = load_filetensor(tename)
+if randomize_valid:
+rng = numpy.random.RandomState([1,2,3,4])
+perm = rng.permutation(valid.shape[0])
+valid = valid[perm]
+if randomize_test:
+rng = numpy.random.RandomState([1,2,3,4])
+perm = rng.permutation(test.shape[0])
+test = test[perm]
 if normalize or normalize_on_the_fly:
 if normalize_on_the_fly:
 train = theano.shared(train, borrow=True, name=name+"_train")
 valid = theano.shared(valid, borrow=True, name=name+"_valid")

Mercurial > pylearn

comparison pylearn/datasets/utlc.py @ 1428:3823dbfff6cf