Mercurial > ift6266
diff deep/convolutional_dae/stacked_convolutional_dae.py @ 266:1e4e60ddadb1
Merge. Ah, et dans le dernier commit, j'avais oublié de mentionner que j'ai ajouté du code pour gérer l'isolation de différents clones pour rouler des expériences et modifier le code en même temps.
author | fsavard |
---|---|
date | Fri, 19 Mar 2010 10:56:16 -0400 |
parents | 0c0f0b3f6a93 |
children |
line wrap: on
line diff
--- a/deep/convolutional_dae/stacked_convolutional_dae.py Fri Mar 19 10:54:39 2010 -0400 +++ b/deep/convolutional_dae/stacked_convolutional_dae.py Fri Mar 19 10:56:16 2010 -0400 @@ -1,17 +1,19 @@ import numpy import theano import time +import sys import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams -import theano.sandbox.softsign +#import theano.sandbox.softsign from theano.tensor.signal import downsample from theano.tensor.nnet import conv from ift6266 import datasets +from ift6266.baseline.log_reg.log_reg import LogisticRegression -from ift6266.baseline.log_reg.log_reg import LogisticRegression - +batch_size = 100 + class SigmoidalLayer(object): def __init__(self, rng, input, n_in, n_out): @@ -57,8 +59,6 @@ initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX) - - self.W_prime=T.dtensor4('W_prime') self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime") @@ -68,13 +68,11 @@ conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='valid') - self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) - - da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ - filter_shape[3] ] + da_filter_shape = [ filter_shape[1], filter_shape[0], + filter_shape[2], filter_shape[3] ] initial_W_prime = numpy.asarray( numpy.random.uniform( \ low = -numpy.sqrt(6./(fan_in+fan_out)), \ high = numpy.sqrt(6./(fan_in+fan_out)), \ @@ -96,6 +94,7 @@ self.params = [ self.W, self.b, self.b_prime ] class LeNetConvPoolLayer(object): + def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): self.input = input @@ -127,7 +126,7 @@ class SdA(): def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes, mlp_hidden_layers_sizes, corruption_levels, rng, n_out, - pretrain_lr, finetune_lr): + pretrain_lr, finetune_lr, img_shape): self.layers = [] self.pretrain_functions = [] @@ -144,7 +143,7 @@ max_poolsize=conv_hidden_layers_sizes[i][2] if i == 0 : - layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32)) + layer_input=self.x.reshape((self.x.shape[0], 1) + img_shape) else: layer_input=self.layers[-1].output @@ -211,38 +210,46 @@ self.errors = self.logLayer.errors(self.y) -def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \ - pretrain_lr = 0.01, training_epochs = 1000, \ - dataset=datasets.nist_digits): - - batch_size = 500 # size of the minibatch +def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 1, + pretrain_lr = 0.1, training_epochs = 1000, + kernels = [[4,5,5], [4,3,3]], mlp_layers=[500], + corruption_levels = [0.2, 0.2, 0.2], + batch_size = batch_size, img_shape=(28, 28), + max_pool_layers = [[2,2], [2,2]], + dataset=datasets.mnist(5000)): # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1d vector of # [int] labels - layer0_input = x.reshape((x.shape[0],1,32,32)) + + layer0_input = x.reshape((x.shape[0],1)+img_shape) - - # Setup the convolutional layers with their DAs(add as many as you want) - corruption_levels = [ 0.2, 0.2, 0.2] rng = numpy.random.RandomState(1234) - ker1=2 - ker2=2 conv_layers=[] - conv_layers.append([[ker1,1,5,5], None, [2,2] ]) - conv_layers.append([[ker2,ker1,5,5], None, [2,2] ]) + init_layer = [[kernels[0][0],1,kernels[0][1],kernels[0][2]], + None, # do not specify the batch size since it can + # change for the last one and then theano will + # crash. + max_pool_layers[0]] + conv_layers.append(init_layer) + + conv_n_out = (img_shape[0]-kernels[0][2]+1)/max_pool_layers[0][0] - # Setup the MLP layers of the network - mlp_layers=[500] - - network = SdA(input = layer0_input, n_ins_mlp = ker2*4*4, + for i in range(1,len(kernels)): + layer = [[kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2]], + None, # same comment as for init_layer + max_pool_layers[i] ] + conv_layers.append(layer) + conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] + + network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, conv_hidden_layers_sizes = conv_layers, mlp_hidden_layers_sizes = mlp_layers, - corruption_levels = corruption_levels , n_out = 10, - rng = rng , pretrain_lr = pretrain_lr , - finetune_lr = learning_rate ) + corruption_levels = corruption_levels, n_out = 62, + rng = rng , pretrain_lr = pretrain_lr, + finetune_lr = learning_rate, img_shape=img_shape) test_model = theano.function([network.x, network.y], network.errors)