# HG changeset patch # User Frederic Bastien # Date 1295946993 18000 # Node ID 6003f733a994caa94b0bbb44d250775760af4834 # Parent 89017617ab36b027f2728cae75d26150c6984bc1 added the normalization of the last UTLC dataset diff -r 89017617ab36 -r 6003f733a994 pylearn/datasets/utlc.py --- a/pylearn/datasets/utlc.py Mon Jan 24 13:18:43 2011 -0500 +++ b/pylearn/datasets/utlc.py Tue Jan 25 04:16:33 2011 -0500 @@ -83,15 +83,13 @@ train = (train) / std valid = (valid) / std test = (test) / std - #elif name == "terry": - # import pdb;pdb.set_trace() - # train = train.astype(theano.config.floatX) - # valid = valid.astype(theano.config.floatX) - # test = test.astype(theano.config.floatX) - #max = max(train.data.max(),0) - #train = (train) / max - #valid = (valid) / max - #test = (test) / max + elif name == "terry": + train = train.astype(theano.config.floatX) + valid = valid.astype(theano.config.floatX) + test = test.astype(theano.config.floatX) + train = (train) / 300 + valid = (valid) / 300 + test = (test) / 300 else: raise Exception("This dataset don't have its normalization defined") return train, valid, test @@ -139,8 +137,8 @@ assert isinstance(test, numpy.ndarray) assert train.shape[1]==test.shape[1]==valid.shape[1] - for name in ['harry','ule','ule']: - train, valid, test = load_sparse_dataset(name) + for name in ['harry','terry','ule']: + train, valid, test = load_sparse_dataset(name, normalize=True) nb_elem = numpy.prod(train.shape) mi = train.data.min() ma = train.data.max() @@ -150,7 +148,8 @@ mean = float(su)/nb_elem print name,"dtype, max, min, mean, nb non-zero, nb element, %sparse" print train.dtype, ma, mi, mean, train.nnz, nb_elem, (nb_elem-float(train.nnz))/nb_elem - + print name,"max, min, mean, std (all stats on non-zero element)" + print train.data.max(), train.data.min(), train.data.mean(), train.data.std() assert scipy.sparse.issparse(train) assert scipy.sparse.issparse(valid) assert scipy.sparse.issparse(test)