Mercurial > pylearn
comparison pylearn/datasets/utlc.py @ 1406:6003f733a994
added the normalization of the last UTLC dataset
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Tue, 25 Jan 2011 04:16:33 -0500 |
parents | 89017617ab36 |
children | 2993b2a5c1af |
comparison
equal
deleted
inserted
replaced
1404:89017617ab36 | 1406:6003f733a994 |
---|---|
81 test = test.astype(theano.config.floatX) | 81 test = test.astype(theano.config.floatX) |
82 std = 0.69336046033925791#train.std()slow to compute | 82 std = 0.69336046033925791#train.std()slow to compute |
83 train = (train) / std | 83 train = (train) / std |
84 valid = (valid) / std | 84 valid = (valid) / std |
85 test = (test) / std | 85 test = (test) / std |
86 #elif name == "terry": | 86 elif name == "terry": |
87 # import pdb;pdb.set_trace() | 87 train = train.astype(theano.config.floatX) |
88 # train = train.astype(theano.config.floatX) | 88 valid = valid.astype(theano.config.floatX) |
89 # valid = valid.astype(theano.config.floatX) | 89 test = test.astype(theano.config.floatX) |
90 # test = test.astype(theano.config.floatX) | 90 train = (train) / 300 |
91 #max = max(train.data.max(),0) | 91 valid = (valid) / 300 |
92 #train = (train) / max | 92 test = (test) / 300 |
93 #valid = (valid) / max | |
94 #test = (test) / max | |
95 else: | 93 else: |
96 raise Exception("This dataset don't have its normalization defined") | 94 raise Exception("This dataset don't have its normalization defined") |
97 return train, valid, test | 95 return train, valid, test |
98 | 96 |
99 def load_filetensor(fname): | 97 def load_filetensor(fname): |
137 assert isinstance(train, numpy.ndarray) | 135 assert isinstance(train, numpy.ndarray) |
138 assert isinstance(valid, numpy.ndarray) | 136 assert isinstance(valid, numpy.ndarray) |
139 assert isinstance(test, numpy.ndarray) | 137 assert isinstance(test, numpy.ndarray) |
140 assert train.shape[1]==test.shape[1]==valid.shape[1] | 138 assert train.shape[1]==test.shape[1]==valid.shape[1] |
141 | 139 |
142 for name in ['harry','ule','ule']: | 140 for name in ['harry','terry','ule']: |
143 train, valid, test = load_sparse_dataset(name) | 141 train, valid, test = load_sparse_dataset(name, normalize=True) |
144 nb_elem = numpy.prod(train.shape) | 142 nb_elem = numpy.prod(train.shape) |
145 mi = train.data.min() | 143 mi = train.data.min() |
146 ma = train.data.max() | 144 ma = train.data.max() |
147 mi = min(0, mi) | 145 mi = min(0, mi) |
148 ma = max(0, ma) | 146 ma = max(0, ma) |
149 su = train.data.sum() | 147 su = train.data.sum() |
150 mean = float(su)/nb_elem | 148 mean = float(su)/nb_elem |
151 print name,"dtype, max, min, mean, nb non-zero, nb element, %sparse" | 149 print name,"dtype, max, min, mean, nb non-zero, nb element, %sparse" |
152 print train.dtype, ma, mi, mean, train.nnz, nb_elem, (nb_elem-float(train.nnz))/nb_elem | 150 print train.dtype, ma, mi, mean, train.nnz, nb_elem, (nb_elem-float(train.nnz))/nb_elem |
153 | 151 print name,"max, min, mean, std (all stats on non-zero element)" |
152 print train.data.max(), train.data.min(), train.data.mean(), train.data.std() | |
154 assert scipy.sparse.issparse(train) | 153 assert scipy.sparse.issparse(train) |
155 assert scipy.sparse.issparse(valid) | 154 assert scipy.sparse.issparse(valid) |
156 assert scipy.sparse.issparse(test) | 155 assert scipy.sparse.issparse(test) |
157 assert train.shape[1]==test.shape[1]==valid.shape[1] | 156 assert train.shape[1]==test.shape[1]==valid.shape[1] |
158 | 157 |