Mercurial > pylearn
changeset 1430:931a19eeab5a
'allow to randomize the sparse valid/test utlc dataset at load time'
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Tue, 08 Feb 2011 16:19:18 -0500 |
parents | b0141efbf6a2 |
children | dce602150b5f |
files | pylearn/datasets/utlc.py |
diffstat | 1 files changed, 18 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/datasets/utlc.py Tue Feb 08 16:17:56 2011 -0500 +++ b/pylearn/datasets/utlc.py Tue Feb 08 16:19:18 2011 -0500 @@ -107,13 +107,18 @@ else: return train, valid, test -def load_sparse_dataset(name, normalize=True, transfer=False): +def load_sparse_dataset(name, normalize=True, transfer=False, + randomize_valid=False, + randomize_test=False): """ Load the train,valid,test data for the dataset `name` and return it in sparse format. :param normalize: If True, we normalize the train dataset before returning it :param transfer: If True also return the transfer label + :param randomize_valid: see same option for load_ndarray_dataset + :param randomize_test: see same option for load_ndarray_dataset + """ assert name in ['harry','terry','ule'] common = os.path.join('UTLC','sparse',name+'_') @@ -123,6 +128,18 @@ train = load_sparse(trname) valid = load_sparse(vname) test = load_sparse(tename) + + # Data should already be in csr format that support + # this type of indexing. + if randomize_valid: + rng = numpy.random.RandomState([1,2,3,4]) + perm = rng.permutation(valid.shape[0]) + valid = valid[perm] + if randomize_test: + rng = numpy.random.RandomState([1,2,3,4]) + perm = rng.permutation(test.shape[0]) + test = test[perm] + if normalize: if name == "ule": train = train.astype(theano.config.floatX) / 255