diff deep/convolutional_dae/stacked_convolutional_dae.py @ 259:3919c71e3091

Make img_size a parameter, and remove the passing of the image size to the ConvOp. This will have to get back in later somehow.
author Arnaud Bergeron <abergeron@gmail.com>
date Wed, 17 Mar 2010 15:24:25 -0400
parents 7e6fecabb656
children 0c0f0b3f6a93
line wrap: on
line diff
--- a/deep/convolutional_dae/stacked_convolutional_dae.py	Tue Mar 16 19:05:59 2010 -0400
+++ b/deep/convolutional_dae/stacked_convolutional_dae.py	Wed Mar 17 15:24:25 2010 -0400
@@ -4,19 +4,16 @@
 import sys
 import theano.tensor as T
 from theano.tensor.shared_randomstreams import RandomStreams
-import theano.sandbox.softsign
+#import theano.sandbox.softsign
 
 from theano.tensor.signal import downsample
 from theano.tensor.nnet import conv 
 
-sys.path.append('../../../')
-
 from ift6266 import datasets
 from ift6266.baseline.log_reg.log_reg import LogisticRegression
 
 batch_size = 100
 
-
 class SigmoidalLayer(object):
     def __init__(self, rng, input, n_in, n_out):
 
@@ -70,15 +67,12 @@
     self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x
 
     conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape,
-                            image_shape=image_shape, unroll_kern=4,unroll_batch=4, border_mode='valid')
-
+                            image_shape=image_shape, border_mode='valid')
     
     self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
-
     
-    da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
-                       filter_shape[3] ]
-    da_image_shape = [ image_shape[0], filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ]
+    da_filter_shape = [ filter_shape[1], filter_shape[0], 
+                        filter_shape[2], filter_shape[3] ]
     initial_W_prime =  numpy.asarray( numpy.random.uniform( \
               low = -numpy.sqrt(6./(fan_in+fan_out)), \
               high = numpy.sqrt(6./(fan_in+fan_out)), \
@@ -86,9 +80,7 @@
     self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
 
     conv2_out = conv.conv2d(self.y, self.W_prime,
-                            filter_shape = da_filter_shape,\
-                            image_shape = da_image_shape, \
-                            unroll_kern=4,unroll_batch=4, \
+                            filter_shape = da_filter_shape,
                             border_mode='full')
 
     self.z =  (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale
@@ -113,7 +105,7 @@
         self.b = theano.shared(value=b_values)
  
         conv_out = conv.conv2d(input, self.W,
-                filter_shape=filter_shape, image_shape=image_shape, unroll_kern=4,unroll_batch=4)
+                filter_shape=filter_shape, image_shape=image_shape)
  
 
         fan_in = numpy.prod(filter_shape[1:])
@@ -134,7 +126,7 @@
 class SdA():
     def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes,
                  mlp_hidden_layers_sizes, corruption_levels, rng, n_out, 
-                 pretrain_lr, finetune_lr):
+                 pretrain_lr, finetune_lr, img_shape):
         
         self.layers = []
         self.pretrain_functions = []
@@ -151,7 +143,7 @@
             max_poolsize=conv_hidden_layers_sizes[i][2]
                 
             if i == 0 :
-                layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32))
+                layer_input=self.x.reshape((self.x.shape[0], 1) + img_shape)
             else:
                 layer_input=self.layers[-1].output
             
@@ -218,13 +210,13 @@
         
         self.errors = self.logLayer.errors(self.y)
 
-def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \
-                            pretrain_lr = 0.1, training_epochs = 1000, \
-                            kernels = [ [4,5,5] , [4,3,3] ], mlp_layers=[500], \
-                            corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \
-                            max_pool_layers = [ [2,2] , [2,2] ], \
-                            dataset=datasets.nist_digits):
-    
+def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 1,
+                           pretrain_lr = 0.1, training_epochs = 1000,
+                           kernels = [[4,5,5], [4,3,3]], mlp_layers=[500],
+                           corruption_levels = [0.2, 0.2, 0.2], 
+                           batch_size = batch_size, img_shape=(28, 28),
+                           max_pool_layers = [[2,2], [2,2]],
+                           dataset=datasets.mnist(5000)):
  
     # allocate symbolic variables for the data
     index = T.lscalar() # index to a [mini]batch
@@ -232,30 +224,32 @@
     y = T.ivector('y') # the labels are presented as 1d vector of
     # [int] labels
 
-    layer0_input = x.reshape((x.shape[0],1,32,32))
+    layer0_input = x.reshape((x.shape[0],1)+img_shape)
     
     rng = numpy.random.RandomState(1234)
     conv_layers=[]
-    init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\
-                   [ batch_size , 1, 32, 32 ],    
-                    max_pool_layers[0] ]
+    init_layer = [[kernels[0][0],1,kernels[0][1],kernels[0][2]],
+                  None, # do not specify the batch size since it can 
+                        # change for the last one and then theano will 
+                        # crash.
+                  max_pool_layers[0]]
     conv_layers.append(init_layer)
 
-    conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0]
+    conv_n_out = (img_shape[0]-kernels[0][2]+1)/max_pool_layers[0][0]
 
     for i in range(1,len(kernels)):    
-        layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\
-                  [ batch_size, kernels[i-1][0], conv_n_out,conv_n_out ],    
-                   max_pool_layers[i] ]
+        layer = [[kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2]],
+                 None, # same comment as for init_layer
+                 max_pool_layers[i] ]
         conv_layers.append(layer)
         conv_n_out =  (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]
-        print layer [1]
+
     network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2,
                   conv_hidden_layers_sizes = conv_layers,
                   mlp_hidden_layers_sizes = mlp_layers,
-                  corruption_levels = corruption_levels , n_out = 62,
-                  rng = rng , pretrain_lr = pretrain_lr ,
-                  finetune_lr = learning_rate )
+                  corruption_levels = corruption_levels, n_out = 62,
+                  rng = rng , pretrain_lr = pretrain_lr,
+                  finetune_lr = learning_rate, img_shape=img_shape)
 
     test_model = theano.function([network.x, network.y], network.errors)