diff deep/convolutional_dae/stacked_convolutional_dae.py @ 249:1bf046c0c84a

Fixed a bug with image_shape.
author humel
date Tue, 16 Mar 2010 20:19:13 -0400
parents 7e6fecabb656
children 0c0f0b3f6a93
line wrap: on
line diff
--- a/deep/convolutional_dae/stacked_convolutional_dae.py	Tue Mar 16 14:46:25 2010 -0400
+++ b/deep/convolutional_dae/stacked_convolutional_dae.py	Tue Mar 16 20:19:13 2010 -0400
@@ -21,7 +21,7 @@
     def __init__(self, rng, input, n_in, n_out):
 
         self.input = input
- 
+
         W_values = numpy.asarray( rng.uniform( \
               low = -numpy.sqrt(6./(n_in+n_out)), \
               high = numpy.sqrt(6./(n_in+n_out)), \
@@ -37,8 +37,7 @@
 class dA_conv(object):
  
   def __init__(self, input, filter_shape, corruption_level = 0.1, 
-               shared_W = None, shared_b = None, image_shape = None, 
-               poolsize = (2,2)):
+               shared_W = None, shared_b = None, image_shape = None):
 
     theano_rng = RandomStreams()
     
@@ -70,7 +69,9 @@
     self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x
 
     conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape,
-                            image_shape=image_shape, unroll_kern=4,unroll_batch=4, border_mode='valid')
+                            image_shape=image_shape,
+                            unroll_kern=4,unroll_batch=4, 
+                            border_mode='valid')
 
     
     self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
@@ -78,7 +79,8 @@
     
     da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
                        filter_shape[3] ]
-    da_image_shape = [ image_shape[0], filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ]
+    da_image_shape = [ batch_size, filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ]
+    #import pdb; pdb.set_trace()
     initial_W_prime =  numpy.asarray( numpy.random.uniform( \
               low = -numpy.sqrt(6./(fan_in+fan_out)), \
               high = numpy.sqrt(6./(fan_in+fan_out)), \
@@ -113,7 +115,8 @@
         self.b = theano.shared(value=b_values)
  
         conv_out = conv.conv2d(input, self.W,
-                filter_shape=filter_shape, image_shape=image_shape, unroll_kern=4,unroll_batch=4)
+                filter_shape=filter_shape, image_shape=image_shape,
+                               unroll_kern=4,unroll_batch=4)
  
 
         fan_in = numpy.prod(filter_shape[1:])
@@ -151,7 +154,7 @@
             max_poolsize=conv_hidden_layers_sizes[i][2]
                 
             if i == 0 :
-                layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32))
+                layer_input=self.x.reshape((batch_size, 1, 32, 32))
             else:
                 layer_input=self.layers[-1].output
             
@@ -167,7 +170,7 @@
             da_layer = dA_conv(corruption_level = corruption_levels[0],
                                input = layer_input,
                                shared_W = layer.W, shared_b = layer.b,
-                               filter_shape = filter_shape,
+                               filter_shape=filter_shape,
                                image_shape = image_shape )
             
             gparams = T.grad(da_layer.cost, da_layer.params)
@@ -218,9 +221,9 @@
         
         self.errors = self.logLayer.errors(self.y)
 
-def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \
+def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 0, \
                             pretrain_lr = 0.1, training_epochs = 1000, \
-                            kernels = [ [4,5,5] , [4,3,3] ], mlp_layers=[500], \
+                            kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \
                             corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \
                             max_pool_layers = [ [2,2] , [2,2] ], \
                             dataset=datasets.nist_digits):
@@ -232,7 +235,7 @@
     y = T.ivector('y') # the labels are presented as 1d vector of
     # [int] labels
 
-    layer0_input = x.reshape((x.shape[0],1,32,32))
+    layer0_input = x.reshape((batch_size,1,32,32))
     
     rng = numpy.random.RandomState(1234)
     conv_layers=[]
@@ -241,14 +244,15 @@
                     max_pool_layers[0] ]
     conv_layers.append(init_layer)
 
-    conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0]
-
+    conv_n_out = int((32-kernels[0][2]+1)/max_pool_layers[0][0])
+    print init_layer[1]
+    
     for i in range(1,len(kernels)):    
         layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\
-                  [ batch_size, kernels[i-1][0], conv_n_out,conv_n_out ],    
+                  [ batch_size, kernels[i-1][0],conv_n_out,conv_n_out ],    
                    max_pool_layers[i] ]
         conv_layers.append(layer)
-        conv_n_out =  (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]
+        conv_n_out = int( (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0])
         print layer [1]
     network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2,
                   conv_hidden_layers_sizes = conv_layers,
@@ -263,7 +267,9 @@
     for i in xrange(len(network.layers)-len(mlp_layers)):
         for epoch in xrange(pretraining_epochs):
             for x, y in dataset.train(batch_size):
-                c = network.pretrain_functions[i](x)
+                if x.shape[0] == batch_size:
+                    c = network.pretrain_functions[i](x)
+
             print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c
 
     patience = 10000 # look as this many examples regardless
@@ -285,12 +291,16 @@
     while (epoch < training_epochs) and (not done_looping):
       epoch = epoch + 1
       for x, y in dataset.train(batch_size):
- 
+        if x.shape[0] != batch_size:
+            continue
         cost_ij = network.finetune(x, y)
         iter += 1
         
         if iter % validation_frequency == 0:
-            validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)]
+            validation_losses = []
+            for xv, yv in dataset.valid(batch_size):
+                if xv.shape[0] == batch_size:
+                    validation_losses.append(test_model(xv, yv))
             this_validation_loss = numpy.mean(validation_losses)
             print('epoch %i, iter %i, validation error %f %%' % \
                    (epoch, iter, this_validation_loss*100.))
@@ -308,7 +318,10 @@
                 best_iter = iter
                 
                 # test it on the test set
-                test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)]
+                test_losses=[]
+                for xt, yt in dataset.test(batch_size):
+                    if xt.shape[0] == batch_size:
+                        test_losses.append(test_model(xt, yt))
                 test_score = numpy.mean(test_losses)
                 print((' epoch %i, iter %i, test error of best '
                       'model %f %%') %