Mercurial > pylearn
comparison pylearn/datasets/utlc.py @ 1428:3823dbfff6cf
add parameter to randomize the valid and test data.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Tue, 08 Feb 2011 12:57:15 -0500 |
parents | a36d3a406c59 |
children | b0141efbf6a2 |
comparison
equal
deleted
inserted
replaced
1427:a36d3a406c59 | 1428:3823dbfff6cf |
---|---|
14 import theano | 14 import theano |
15 | 15 |
16 import pylearn.io.filetensor as ft | 16 import pylearn.io.filetensor as ft |
17 import config | 17 import config |
18 | 18 |
19 def load_ndarray_dataset(name, normalize=True, transfer=False, normalize_on_the_fly=False): | 19 def load_ndarray_dataset(name, normalize=True, transfer=False, |
20 normalize_on_the_fly=False, randomize_valid=False, | |
21 randomize_test=False): | |
20 """ Load the train,valid,test data for the dataset `name` | 22 """ Load the train,valid,test data for the dataset `name` |
21 and return it in ndarray format. | 23 and return it in ndarray format. |
22 | 24 |
23 :param normalize: If True, we normalize the train dataset | 25 :param normalize: If True, we normalize the train dataset |
24 before returning it | 26 before returning it |
25 :param transfer: If True also return the transfer label | 27 :param transfer: If True also return the transfer label(currently only available for ule) |
26 :param normalize_on_the_fly: If True, we return a Theano Variable that will give | 28 :param normalize_on_the_fly: If True, we return a Theano Variable that will give |
27 as output the normalized value. If the user only | 29 as output the normalized value. If the user only |
28 take a subtensor of that variable, Theano optimization | 30 take a subtensor of that variable, Theano optimization |
29 should make that we will only have in memory the subtensor | 31 should make that we will only have in memory the subtensor |
30 portion that is computed in normalized form. We store | 32 portion that is computed in normalized form. We store |
31 the original data in shared memory in its original dtype. | 33 the original data in shared memory in its original dtype. |
32 | 34 |
33 This is usefull to have the original data in its original | 35 This is usefull to have the original data in its original |
34 dtype in memory to same memory. Especialy usefull to | 36 dtype in memory to same memory. Especialy usefull to |
35 be able to use rita and harry with 1G per jobs. | 37 be able to use rita and harry with 1G per jobs. |
38 :param randomize_valid: Do we randomize the order of the valid set? | |
39 We always use the same random order | |
40 If False, return in the same order as downloaded on the web | |
41 :param randomize_test: Do we randomize the order of the test set? | |
42 We always use the same random order | |
43 If False, return in the same order as downloaded on the web | |
36 """ | 44 """ |
37 assert not (normalize and normalize_on_the_fly), "Can't normalize in 2 way at the same time!" | 45 assert not (normalize and normalize_on_the_fly), "Can't normalize in 2 way at the same time!" |
38 | 46 |
39 assert name in ['avicenna','harry','rita','sylvester','ule'] | 47 assert name in ['avicenna','harry','rita','sylvester','ule'] |
40 common = os.path.join('UTLC','filetensor',name+'_') | 48 common = os.path.join('UTLC','filetensor',name+'_') |
43 for subset in ['train','valid','test']] | 51 for subset in ['train','valid','test']] |
44 | 52 |
45 train = load_filetensor(trname) | 53 train = load_filetensor(trname) |
46 valid = load_filetensor(vname) | 54 valid = load_filetensor(vname) |
47 test = load_filetensor(tename) | 55 test = load_filetensor(tename) |
56 if randomize_valid: | |
57 rng = numpy.random.RandomState([1,2,3,4]) | |
58 perm = rng.permutation(valid.shape[0]) | |
59 valid = valid[perm] | |
60 if randomize_test: | |
61 rng = numpy.random.RandomState([1,2,3,4]) | |
62 perm = rng.permutation(test.shape[0]) | |
63 test = test[perm] | |
48 | 64 |
49 if normalize or normalize_on_the_fly: | 65 if normalize or normalize_on_the_fly: |
50 if normalize_on_the_fly: | 66 if normalize_on_the_fly: |
51 train = theano.shared(train, borrow=True, name=name+"_train") | 67 train = theano.shared(train, borrow=True, name=name+"_train") |
52 valid = theano.shared(valid, borrow=True, name=name+"_valid") | 68 valid = theano.shared(valid, borrow=True, name=name+"_valid") |