comparison pylearn/datasets/utlc.py @ 1406:6003f733a994

added the normalization of the last UTLC dataset
author Frederic Bastien <nouiz@nouiz.org>
date Tue, 25 Jan 2011 04:16:33 -0500
parents 89017617ab36
children 2993b2a5c1af
comparison
equal deleted inserted replaced
1404:89017617ab36 1406:6003f733a994
81 test = test.astype(theano.config.floatX) 81 test = test.astype(theano.config.floatX)
82 std = 0.69336046033925791#train.std()slow to compute 82 std = 0.69336046033925791#train.std()slow to compute
83 train = (train) / std 83 train = (train) / std
84 valid = (valid) / std 84 valid = (valid) / std
85 test = (test) / std 85 test = (test) / std
86 #elif name == "terry": 86 elif name == "terry":
87 # import pdb;pdb.set_trace() 87 train = train.astype(theano.config.floatX)
88 # train = train.astype(theano.config.floatX) 88 valid = valid.astype(theano.config.floatX)
89 # valid = valid.astype(theano.config.floatX) 89 test = test.astype(theano.config.floatX)
90 # test = test.astype(theano.config.floatX) 90 train = (train) / 300
91 #max = max(train.data.max(),0) 91 valid = (valid) / 300
92 #train = (train) / max 92 test = (test) / 300
93 #valid = (valid) / max
94 #test = (test) / max
95 else: 93 else:
96 raise Exception("This dataset don't have its normalization defined") 94 raise Exception("This dataset don't have its normalization defined")
97 return train, valid, test 95 return train, valid, test
98 96
99 def load_filetensor(fname): 97 def load_filetensor(fname):
137 assert isinstance(train, numpy.ndarray) 135 assert isinstance(train, numpy.ndarray)
138 assert isinstance(valid, numpy.ndarray) 136 assert isinstance(valid, numpy.ndarray)
139 assert isinstance(test, numpy.ndarray) 137 assert isinstance(test, numpy.ndarray)
140 assert train.shape[1]==test.shape[1]==valid.shape[1] 138 assert train.shape[1]==test.shape[1]==valid.shape[1]
141 139
142 for name in ['harry','ule','ule']: 140 for name in ['harry','terry','ule']:
143 train, valid, test = load_sparse_dataset(name) 141 train, valid, test = load_sparse_dataset(name, normalize=True)
144 nb_elem = numpy.prod(train.shape) 142 nb_elem = numpy.prod(train.shape)
145 mi = train.data.min() 143 mi = train.data.min()
146 ma = train.data.max() 144 ma = train.data.max()
147 mi = min(0, mi) 145 mi = min(0, mi)
148 ma = max(0, ma) 146 ma = max(0, ma)
149 su = train.data.sum() 147 su = train.data.sum()
150 mean = float(su)/nb_elem 148 mean = float(su)/nb_elem
151 print name,"dtype, max, min, mean, nb non-zero, nb element, %sparse" 149 print name,"dtype, max, min, mean, nb non-zero, nb element, %sparse"
152 print train.dtype, ma, mi, mean, train.nnz, nb_elem, (nb_elem-float(train.nnz))/nb_elem 150 print train.dtype, ma, mi, mean, train.nnz, nb_elem, (nb_elem-float(train.nnz))/nb_elem
153 151 print name,"max, min, mean, std (all stats on non-zero element)"
152 print train.data.max(), train.data.min(), train.data.mean(), train.data.std()
154 assert scipy.sparse.issparse(train) 153 assert scipy.sparse.issparse(train)
155 assert scipy.sparse.issparse(valid) 154 assert scipy.sparse.issparse(valid)
156 assert scipy.sparse.issparse(test) 155 assert scipy.sparse.issparse(test)
157 assert train.shape[1]==test.shape[1]==valid.shape[1] 156 assert train.shape[1]==test.shape[1]==valid.shape[1]
158 157