diff deep/convolutional_dae/scdae.py @ 277:20ebc1f2a9fe

Use softmax for the output layer and rework the dset iterator stuff.
author Arnaud Bergeron <abergeron@gmail.com>
date Mon, 22 Mar 2010 16:37:34 -0400
parents 727ed56fad12
children 80ee63c3e749
line wrap: on
line diff
--- a/deep/convolutional_dae/scdae.py	Mon Mar 22 13:33:29 2010 -0400
+++ b/deep/convolutional_dae/scdae.py	Mon Mar 22 16:37:34 2010 -0400
@@ -60,7 +60,7 @@
     rl2 = ReshapeLayer((None, outs))
     layer_sizes = [outs]+layer_sizes
     ls2 = mlp(layer_sizes, dtype)
-    lrl = SimpleLayer(layer_sizes[-1], out_size, activation=nlins.sigmoid)
+    lrl = SimpleLayer(layer_sizes[-1], out_size, activation=nlins.softmax)
     return NNet([rl1, ls, rl2, ls2, lrl], error=errors.nll)
 
 def build_funcs(batch_size, img_size, filter_sizes, num_filters, subs,
@@ -89,7 +89,7 @@
     pretrain_funcs_reg = [pretrainfunc(l, 0.01) for l in n.layers[1].layers]
     trainf_reg = trainfunc(n, 0.1)
     evalf_reg = theano.function([x, y], errors.class_error(n.output, y))
-    
+
     def select_f(f1, f2, bsize):
         def f(x):
             if x.shape[0] == bsize:
@@ -117,25 +117,25 @@
         for i in xrange(pretrain_epochs):
             f()
 
-def massage_funcs(batch_size, dset, pretrain_funcs, trainf, evalf):
+def massage_funcs(train_it, dset, batch_size, pretrain_funcs, trainf, evalf):
     def pretrain_f(f):
         def res():
-            for x, y in dset.train(batch_size):
-                print "pretrain:", f(x)
-        return res
+            for x, y in train_it:
+                yield f(x)
+        it = res()
+        return lambda: it.next()
 
     pretrain_fs = map(pretrain_f, pretrain_funcs)
 
-    def train_f(f, dsetf):
+    def train_f(f):
         def dset_it():
-            while True:
-                for x, y in dsetf(batch_size):
-                    yield f(x, y)
+            for x, y in train_it:
+                yield f(x, y)
         it = dset_it()
         return lambda: it.next()
-
-    train = train_f(trainf, dset.train)
-
+    
+    train = train_f(trainf)
+    
     def eval_f(f, dsetf):
         def res():
             c = 0
@@ -151,6 +151,11 @@
 
     return pretrain_fs, train, valid, test
 
+def repeat_itf(itf, *args, **kwargs):
+    while True:
+        for e in itf(*args, **kwargs):
+            yield e
+
 def run_exp(state, channel):
     from ift6266 import datasets
     from sgd_opt import sgd_opt
@@ -205,25 +210,28 @@
     import sys, time
     
     batch_size = 100
-    dset = datasets.mnist(200)
+    dset = datasets.mnist()
 
     pretrain_funcs, trainf, evalf = build_funcs(
         img_size = (28, 28),
-        batch_size=batch_size, filter_sizes=[(5,5), (5,5)],
-        num_filters=[4, 3], subs=[(2,2), (2,2)], noise=[0.2, 0.2],
+        batch_size=batch_size, filter_sizes=[(5,5), (3,3)],
+        num_filters=[4, 4], subs=[(2,2), (2,2)], noise=[0.2, 0.2],
         mlp_sizes=[500], out_size=10, dtype=numpy.float32,
         pretrain_lr=0.01, train_lr=0.1)
-
+    
     pretrain_fs, train, valid, test = massage_funcs(
-        batch_size, dset, pretrain_funcs, trainf, evalf)
+        repeat_itf(dset.train, batch_size),
+        dset, batch_size,
+        pretrain_funcs, trainf, evalf)
 
     print "pretraining ...",
     sys.stdout.flush()
     start = time.time()
-    do_pretrain(pretrain_fs, 0)
+    do_pretrain(pretrain_fs, 2500)
     end = time.time()
     print "done (in", end-start, "s)"
     
-    sgd_opt(train, valid, test, training_epochs=1000, patience=1000,
+    sgd_opt(train, valid, test, training_epochs=10000, patience=1000,
             patience_increase=2., improvement_threshold=0.995,
-            validation_frequency=500)
+            validation_frequency=250)
+