changeset 1333:c7b2da4e2df6

modifs to test_mcRBM to reproduce mcRBM classif results
author James Bergstra <bergstrj@iro.umontreal.ca>
date Mon, 18 Oct 2010 14:58:39 -0400
parents d6726417cf57
children 6fd2610c1706
files pylearn/algorithms/tests/test_mcRBM.py pylearn/dataset_ops/tinyimages.py
diffstat 2 files changed, 67 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/tests/test_mcRBM.py	Sun Oct 10 14:51:02 2010 -0400
+++ b/pylearn/algorithms/tests/test_mcRBM.py	Mon Oct 18 14:58:39 2010 -0400
@@ -296,8 +296,8 @@
 
 
     # extract convolutional features from the CIFAR10 data
+    feat_filename = 'mcrbm_features.npy'
     feat_filename = 'cifar10.features.46000.npy'
-    feat_filename = 'mcrbm_features.npy'
     try:
         open(feat_filename).close()
         load_features = True
@@ -373,69 +373,102 @@
         # nothing to load
         pass
     else:
-        batchsize=10
+        batchsize=100
+
+        if feat_filename.startswith('cifar'):
+            learnrate = 0.002
+            l1_regularization = 0.004
+            anneal_epoch=100
+            n_epochs = 500
+        else:
+            learnrate = 0.005
+            l1_regularization = 0.004
+            n_epochs = 100
+            anneal_epoch=20
+
         x_i = tensor.matrix()
         y_i = tensor.ivector()
         lr = tensor.scalar()
-        l1_regularization = float(sys.argv[1]) #1.e-3
-        l2_regularization = float(sys.argv[2]) #1.e-3*0
+        #l1_regularization = float(sys.argv[1]) #1.e-3
+        #l2_regularization = float(sys.argv[2]) #1.e-3*0
 
         feature_logreg = LogisticRegression.new(x_i, 
                 n_in = 11025, n_out=10,
                 dtype=x_i.dtype)
 
+        # marc'aurelio does this...
+        feature_logreg.w.value = numpy.random.RandomState(44).randn(11025,10)*.02
+
         traincost = feature_logreg.nll(y_i).sum()
         traincost = traincost + abs(feature_logreg.w).sum() * l1_regularization
-        traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization
+        #traincost = traincost + (feature_logreg.w**2).sum() * l2_regularization
         train_logreg_fn = function([x_i, y_i, lr], 
                 [feature_logreg.nll(y_i).mean(),
                     feature_logreg.errors(y_i).mean()],
                 updates=pylearn.gd.sgd.sgd_updates(
                     params=feature_logreg.params,
                     grads=tensor.grad(traincost, feature_logreg.params),
-                    stepsizes=[lr,lr]))
+                    stepsizes=[lr,lr/10.]))
 
         all_labels = pylearn.dataset_ops.cifar10.all_data_labels('uint8')[1]
         pylearn.dataset_ops.cifar10.all_data_labels.forget() # clear memo cache
         assert len(all_labels)==60000
-        train_labels = all_labels[:40000]
-        valid_labels = all_labels[40000:50000]
-        test_labels = all_labels[50000:60000]
-        train_features = all_features[:40000]
-        valid_features = all_features[40000:50000]
-        test_features = all_features[50000:60000]
+        if 0:
+            print "Using validation set"
+            train_labels = all_labels[:40000]
+            valid_labels = all_labels[40000:50000]
+            test_labels = all_labels[50000:60000]
+            train_features = all_features[:40000]
+            valid_features = all_features[40000:50000]
+            test_features = all_features[50000:60000]
+        else:
+            print "NOT USING validation set"
+            train_labels = all_labels[:50000]
+            valid_labels = None
+            test_labels = all_labels[50000:60000]
+            train_features = all_features[:50000]
+            valid_features = None
+            test_features = all_features[50000:60000]
 
-        print "Computing mean and std.dev"
-        train_mean = train_features.mean(axis=0)
-        train_std = train_features.std(axis=0)+1e-4
+        if 1:
+            print "Computing mean and std.dev"
+            train_mean = train_features.mean(axis=0)
+            train_std = train_features.std(axis=0)+1e-4
+            preproc = lambda x: (x-train_mean)/(0.1+train_std)
+        else:
+            print "Not centering data"
+            preproc = lambda x:x
 
-
-        for epoch in xrange(20):
+        for epoch in xrange(n_epochs):
             print 'epoch', epoch
             # validate
+            # Marc'Aurelio, you crazy!!
+            # the division by batchsize is done in the cost function
+            e_lr = learnrate / (batchsize*max(1.0, numpy.floor(max(1., epoch/float(anneal_epoch))-2)))
 
-            l01s = []
-            nlls = []
-            for i in xrange(10000/batchsize):
-                x_i = valid_features[i*batchsize:(i+1)*batchsize]
-                y_i = valid_labels[i*batchsize:(i+1)*batchsize]
+            if valid_features is not None:
+                l01s = []
+                nlls = []
+                for i in xrange(10000/batchsize):
+                    x_i = valid_features[i*batchsize:(i+1)*batchsize]
+                    y_i = valid_labels[i*batchsize:(i+1)*batchsize]
 
-                #lr=0.0 -> no learning, safe for validation set
-                nll, l01 = train_logreg_fn((x_i-train_mean)/train_std, y_i, 0.0) 
-                nlls.append(nll)
-                l01s.append(l01)
-            print 'validate log_reg', numpy.mean(nlls), numpy.mean(l01s)
+                    #lr=0.0 -> no learning, safe for validation set
+                    nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) 
+                    nlls.append(nll)
+                    l01s.append(l01)
+                print 'validate log_reg', numpy.mean(nlls), numpy.mean(l01s)
 
             # test
 
             l01s = []
             nlls = []
-            for i in xrange(10000/batchsize):
+            for i in xrange(len(test_features)//batchsize):
                 x_i = test_features[i*batchsize:(i+1)*batchsize]
                 y_i = test_labels[i*batchsize:(i+1)*batchsize]
 
                 #lr=0.0 -> no learning, safe for validation set
-                nll, l01 = train_logreg_fn((x_i-train_mean)/train_std, y_i, 0.0) 
+                nll, l01 = train_logreg_fn(preproc(x_i), y_i, 0.0) 
                 nlls.append(nll)
                 l01s.append(l01)
             print 'test log_reg', numpy.mean(nlls), numpy.mean(l01s)
@@ -443,10 +476,10 @@
             #train
             l01s = []
             nlls = []
-            for i in xrange(40000/batchsize):
+            for i in xrange(len(train_features)//batchsize):
                 x_i = train_features[i*batchsize:(i+1)*batchsize]
                 y_i = train_labels[i*batchsize:(i+1)*batchsize]
-                nll, l01 = train_logreg_fn((x_i-train_mean)/train_std, y_i, 0.00003)
+                nll, l01 = train_logreg_fn(preproc(x_i), y_i, e_lr)
                 nlls.append(nll)
                 l01s.append(l01)
             print 'train log_reg', numpy.mean(nlls), numpy.mean(l01s)
--- a/pylearn/dataset_ops/tinyimages.py	Sun Oct 10 14:51:02 2010 -0400
+++ b/pylearn/dataset_ops/tinyimages.py	Mon Oct 18 14:58:39 2010 -0400
@@ -247,7 +247,7 @@
     return x
 
 
-def save_filters(X, fname):
+def save_filters(X, fname, tile_shape=None):
     dct = load_pca_dct()
     eigs = dct['eig_vals'], dct['eig_vecs']
     mean = dct['mean']
@@ -255,7 +255,8 @@
     _img = image_tiling.tile_raster_images(
             (rasterized[:,::3], rasterized[:,1::3], rasterized[:,2::3], None),
             img_shape=(8,8),
-            min_dynamic_range=1e-6)
+            min_dynamic_range=1e-6,
+            tile_shape=tile_shape)
     image_tiling.save_tiled_raster_images(_img, fname)
 
 def glviewer(split='train'):