Mercurial > ift6266
diff deep/convolutional_dae/stacked_convolutional_dae.py @ 247:4d109b648c31
Fixed dataset import. Removed unuseful code from da_conv. Keys parameters are now passed as arguments.
author | humel |
---|---|
date | Tue, 16 Mar 2010 13:16:28 -0400 |
parents | 334d2444000d |
children | 7e6fecabb656 |
line wrap: on
line diff
--- a/deep/convolutional_dae/stacked_convolutional_dae.py Tue Mar 16 12:14:10 2010 -0400 +++ b/deep/convolutional_dae/stacked_convolutional_dae.py Tue Mar 16 13:16:28 2010 -0400 @@ -1,6 +1,7 @@ import numpy import theano import time +import sys import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams import theano.sandbox.softsign @@ -8,10 +9,11 @@ from theano.tensor.signal import downsample from theano.tensor.nnet import conv -from ift6266 import datasets +sys.path.append('../../../') +from ift6266 import datasets from ift6266.baseline.log_reg.log_reg import LogisticRegression - + class SigmoidalLayer(object): def __init__(self, rng, input, n_in, n_out): @@ -57,8 +59,6 @@ initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX) - - self.W_prime=T.dtensor4('W_prime') self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime") @@ -96,6 +96,7 @@ self.params = [ self.W, self.b, self.b_prime ] class LeNetConvPoolLayer(object): + def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): self.input = input @@ -211,36 +212,38 @@ self.errors = self.logLayer.errors(self.y) -def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \ - pretrain_lr = 0.01, training_epochs = 1000, \ +def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \ + pretrain_lr = 0.1, training_epochs = 1000, \ + kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \ + corruption_levels = [ 0.2, 0.2, 0.2], \ + max_pool_layers = [ [2,2] , [2,2] ], \ dataset=datasets.nist_digits): - batch_size = 500 # size of the minibatch + batch_size = 100 # size of the minibatch # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1d vector of # [int] labels + layer0_input = x.reshape((x.shape[0],1,32,32)) - - # Setup the convolutional layers with their DAs(add as many as you want) - corruption_levels = [ 0.2, 0.2, 0.2] rng = numpy.random.RandomState(1234) - ker1=2 - ker2=2 conv_layers=[] - conv_layers.append([[ker1,1,5,5], None, [2,2] ]) - conv_layers.append([[ker2,ker1,5,5], None, [2,2] ]) + init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ], None, max_pool_layers[0] ] + conv_layers.append(init_layer) + conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0] - # Setup the MLP layers of the network - mlp_layers=[500] - - network = SdA(input = layer0_input, n_ins_mlp = ker2*4*4, + for i in range(1,len(kernels)): + layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ], None, max_pool_layers[i] ] + conv_layers.append(layer) + conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] + + network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, conv_hidden_layers_sizes = conv_layers, mlp_hidden_layers_sizes = mlp_layers, - corruption_levels = corruption_levels , n_out = 10, + corruption_levels = corruption_levels , n_out = 62, rng = rng , pretrain_lr = pretrain_lr , finetune_lr = learning_rate )