# HG changeset patch # User Frederic Bastien # Date 1297199958 18000 # Node ID 931a19eeab5af6492a13648b16544386074558b5 # Parent b0141efbf6a21c36b49f95bd2cfbe17b86971290 'allow to randomize the sparse valid/test utlc dataset at load time' diff -r b0141efbf6a2 -r 931a19eeab5a pylearn/datasets/utlc.py --- a/pylearn/datasets/utlc.py Tue Feb 08 16:17:56 2011 -0500 +++ b/pylearn/datasets/utlc.py Tue Feb 08 16:19:18 2011 -0500 @@ -107,13 +107,18 @@ else: return train, valid, test -def load_sparse_dataset(name, normalize=True, transfer=False): +def load_sparse_dataset(name, normalize=True, transfer=False, + randomize_valid=False, + randomize_test=False): """ Load the train,valid,test data for the dataset `name` and return it in sparse format. :param normalize: If True, we normalize the train dataset before returning it :param transfer: If True also return the transfer label + :param randomize_valid: see same option for load_ndarray_dataset + :param randomize_test: see same option for load_ndarray_dataset + """ assert name in ['harry','terry','ule'] common = os.path.join('UTLC','sparse',name+'_') @@ -123,6 +128,18 @@ train = load_sparse(trname) valid = load_sparse(vname) test = load_sparse(tename) + + # Data should already be in csr format that support + # this type of indexing. + if randomize_valid: + rng = numpy.random.RandomState([1,2,3,4]) + perm = rng.permutation(valid.shape[0]) + valid = valid[perm] + if randomize_test: + rng = numpy.random.RandomState([1,2,3,4]) + perm = rng.permutation(test.shape[0]) + test = test[perm] + if normalize: if name == "ule": train = train.astype(theano.config.floatX) / 255