Mercurial > ift6266
diff deep/convolutional_dae/stacked_convolutional_dae.py @ 259:3919c71e3091
Make img_size a parameter, and remove the passing of the image size to the ConvOp. This will have to get back in later somehow.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Wed, 17 Mar 2010 15:24:25 -0400 |
parents | 7e6fecabb656 |
children | 0c0f0b3f6a93 |
line wrap: on
line diff
--- a/deep/convolutional_dae/stacked_convolutional_dae.py Tue Mar 16 19:05:59 2010 -0400 +++ b/deep/convolutional_dae/stacked_convolutional_dae.py Wed Mar 17 15:24:25 2010 -0400 @@ -4,19 +4,16 @@ import sys import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams -import theano.sandbox.softsign +#import theano.sandbox.softsign from theano.tensor.signal import downsample from theano.tensor.nnet import conv -sys.path.append('../../../') - from ift6266 import datasets from ift6266.baseline.log_reg.log_reg import LogisticRegression batch_size = 100 - class SigmoidalLayer(object): def __init__(self, rng, input, n_in, n_out): @@ -70,15 +67,12 @@ self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, - image_shape=image_shape, unroll_kern=4,unroll_batch=4, border_mode='valid') - + image_shape=image_shape, border_mode='valid') self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) - - da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ - filter_shape[3] ] - da_image_shape = [ image_shape[0], filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ] + da_filter_shape = [ filter_shape[1], filter_shape[0], + filter_shape[2], filter_shape[3] ] initial_W_prime = numpy.asarray( numpy.random.uniform( \ low = -numpy.sqrt(6./(fan_in+fan_out)), \ high = numpy.sqrt(6./(fan_in+fan_out)), \ @@ -86,9 +80,7 @@ self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") conv2_out = conv.conv2d(self.y, self.W_prime, - filter_shape = da_filter_shape,\ - image_shape = da_image_shape, \ - unroll_kern=4,unroll_batch=4, \ + filter_shape = da_filter_shape, border_mode='full') self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale @@ -113,7 +105,7 @@ self.b = theano.shared(value=b_values) conv_out = conv.conv2d(input, self.W, - filter_shape=filter_shape, image_shape=image_shape, unroll_kern=4,unroll_batch=4) + filter_shape=filter_shape, image_shape=image_shape) fan_in = numpy.prod(filter_shape[1:]) @@ -134,7 +126,7 @@ class SdA(): def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes, mlp_hidden_layers_sizes, corruption_levels, rng, n_out, - pretrain_lr, finetune_lr): + pretrain_lr, finetune_lr, img_shape): self.layers = [] self.pretrain_functions = [] @@ -151,7 +143,7 @@ max_poolsize=conv_hidden_layers_sizes[i][2] if i == 0 : - layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32)) + layer_input=self.x.reshape((self.x.shape[0], 1) + img_shape) else: layer_input=self.layers[-1].output @@ -218,13 +210,13 @@ self.errors = self.logLayer.errors(self.y) -def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \ - pretrain_lr = 0.1, training_epochs = 1000, \ - kernels = [ [4,5,5] , [4,3,3] ], mlp_layers=[500], \ - corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \ - max_pool_layers = [ [2,2] , [2,2] ], \ - dataset=datasets.nist_digits): - +def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 1, + pretrain_lr = 0.1, training_epochs = 1000, + kernels = [[4,5,5], [4,3,3]], mlp_layers=[500], + corruption_levels = [0.2, 0.2, 0.2], + batch_size = batch_size, img_shape=(28, 28), + max_pool_layers = [[2,2], [2,2]], + dataset=datasets.mnist(5000)): # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch @@ -232,30 +224,32 @@ y = T.ivector('y') # the labels are presented as 1d vector of # [int] labels - layer0_input = x.reshape((x.shape[0],1,32,32)) + layer0_input = x.reshape((x.shape[0],1)+img_shape) rng = numpy.random.RandomState(1234) conv_layers=[] - init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\ - [ batch_size , 1, 32, 32 ], - max_pool_layers[0] ] + init_layer = [[kernels[0][0],1,kernels[0][1],kernels[0][2]], + None, # do not specify the batch size since it can + # change for the last one and then theano will + # crash. + max_pool_layers[0]] conv_layers.append(init_layer) - conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0] + conv_n_out = (img_shape[0]-kernels[0][2]+1)/max_pool_layers[0][0] for i in range(1,len(kernels)): - layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\ - [ batch_size, kernels[i-1][0], conv_n_out,conv_n_out ], - max_pool_layers[i] ] + layer = [[kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2]], + None, # same comment as for init_layer + max_pool_layers[i] ] conv_layers.append(layer) conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] - print layer [1] + network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, conv_hidden_layers_sizes = conv_layers, mlp_hidden_layers_sizes = mlp_layers, - corruption_levels = corruption_levels , n_out = 62, - rng = rng , pretrain_lr = pretrain_lr , - finetune_lr = learning_rate ) + corruption_levels = corruption_levels, n_out = 62, + rng = rng , pretrain_lr = pretrain_lr, + finetune_lr = learning_rate, img_shape=img_shape) test_model = theano.function([network.x, network.y], network.errors)