changeset 1406:6003f733a994

added the normalization of the last UTLC dataset
author Frederic Bastien <nouiz@nouiz.org>
date Tue, 25 Jan 2011 04:16:33 -0500
parents 89017617ab36
children f467c5457eff
files pylearn/datasets/utlc.py
diffstat 1 files changed, 11 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/datasets/utlc.py	Mon Jan 24 13:18:43 2011 -0500
+++ b/pylearn/datasets/utlc.py	Tue Jan 25 04:16:33 2011 -0500
@@ -83,15 +83,13 @@
             train = (train) / std
             valid = (valid) / std
             test = (test) / std  
-        #elif name == "terry":
-        #    import pdb;pdb.set_trace()
-        #    train = train.astype(theano.config.floatX)
-        #    valid = valid.astype(theano.config.floatX)
-        #    test = test.astype(theano.config.floatX)
-            #max = max(train.data.max(),0)
-            #train = (train) / max
-            #valid = (valid) / max
-            #test = (test) / max  
+        elif name == "terry":
+            train = train.astype(theano.config.floatX)
+            valid = valid.astype(theano.config.floatX)
+            test = test.astype(theano.config.floatX)
+            train = (train) / 300
+            valid = (valid) / 300
+            test = (test) / 300
         else:
             raise Exception("This dataset don't have its normalization defined")
     return train, valid, test
@@ -139,8 +137,8 @@
         assert isinstance(test, numpy.ndarray)
         assert train.shape[1]==test.shape[1]==valid.shape[1]
 
-    for name in ['harry','ule','ule']:
-        train, valid, test = load_sparse_dataset(name)
+    for name in ['harry','terry','ule']:
+        train, valid, test = load_sparse_dataset(name, normalize=True)
         nb_elem = numpy.prod(train.shape)
         mi = train.data.min()
         ma = train.data.max()
@@ -150,7 +148,8 @@
         mean = float(su)/nb_elem
         print name,"dtype, max, min, mean, nb non-zero, nb element, %sparse"
         print train.dtype, ma, mi, mean, train.nnz, nb_elem, (nb_elem-float(train.nnz))/nb_elem
-        
+        print name,"max, min, mean, std (all stats on non-zero element)"
+        print train.data.max(), train.data.min(), train.data.mean(), train.data.std()
         assert scipy.sparse.issparse(train)
         assert scipy.sparse.issparse(valid)
         assert scipy.sparse.issparse(test)