Mercurial > ift6266
comparison deep/convolutional_dae/stacked_convolutional_dae.py @ 247:4d109b648c31
Fixed dataset import. Removed unuseful code from da_conv. Keys parameters are now passed as arguments.
author | humel |
---|---|
date | Tue, 16 Mar 2010 13:16:28 -0400 |
parents | 334d2444000d |
children | 7e6fecabb656 |
comparison
equal
deleted
inserted
replaced
246:2024368a8d3d | 247:4d109b648c31 |
---|---|
1 import numpy | 1 import numpy |
2 import theano | 2 import theano |
3 import time | 3 import time |
4 import sys | |
4 import theano.tensor as T | 5 import theano.tensor as T |
5 from theano.tensor.shared_randomstreams import RandomStreams | 6 from theano.tensor.shared_randomstreams import RandomStreams |
6 import theano.sandbox.softsign | 7 import theano.sandbox.softsign |
7 | 8 |
8 from theano.tensor.signal import downsample | 9 from theano.tensor.signal import downsample |
9 from theano.tensor.nnet import conv | 10 from theano.tensor.nnet import conv |
10 | 11 |
12 sys.path.append('../../../') | |
13 | |
11 from ift6266 import datasets | 14 from ift6266 import datasets |
12 | |
13 from ift6266.baseline.log_reg.log_reg import LogisticRegression | 15 from ift6266.baseline.log_reg.log_reg import LogisticRegression |
14 | 16 |
15 class SigmoidalLayer(object): | 17 class SigmoidalLayer(object): |
16 def __init__(self, rng, input, n_in, n_out): | 18 def __init__(self, rng, input, n_in, n_out): |
17 | 19 |
18 self.input = input | 20 self.input = input |
19 | 21 |
55 self.W = theano.shared(value = initial_W, name = "W") | 57 self.W = theano.shared(value = initial_W, name = "W") |
56 self.b = theano.shared(value = initial_b, name = "b") | 58 self.b = theano.shared(value = initial_b, name = "b") |
57 | 59 |
58 | 60 |
59 initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX) | 61 initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX) |
60 | |
61 self.W_prime=T.dtensor4('W_prime') | |
62 | 62 |
63 self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime") | 63 self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime") |
64 | 64 |
65 self.x = input | 65 self.x = input |
66 | 66 |
94 self.cost = T.mean(self.L) | 94 self.cost = T.mean(self.L) |
95 | 95 |
96 self.params = [ self.W, self.b, self.b_prime ] | 96 self.params = [ self.W, self.b, self.b_prime ] |
97 | 97 |
98 class LeNetConvPoolLayer(object): | 98 class LeNetConvPoolLayer(object): |
99 | |
99 def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): | 100 def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): |
100 self.input = input | 101 self.input = input |
101 | 102 |
102 W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX) | 103 W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX) |
103 self.W = theano.shared(value=W_values) | 104 self.W = theano.shared(value=W_values) |
209 | 210 |
210 self.finetune = theano.function([self.x, self.y], cost, updates = updates) | 211 self.finetune = theano.function([self.x, self.y], cost, updates = updates) |
211 | 212 |
212 self.errors = self.logLayer.errors(self.y) | 213 self.errors = self.logLayer.errors(self.y) |
213 | 214 |
214 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \ | 215 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \ |
215 pretrain_lr = 0.01, training_epochs = 1000, \ | 216 pretrain_lr = 0.1, training_epochs = 1000, \ |
217 kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \ | |
218 corruption_levels = [ 0.2, 0.2, 0.2], \ | |
219 max_pool_layers = [ [2,2] , [2,2] ], \ | |
216 dataset=datasets.nist_digits): | 220 dataset=datasets.nist_digits): |
217 | 221 |
218 batch_size = 500 # size of the minibatch | 222 batch_size = 100 # size of the minibatch |
219 | 223 |
220 # allocate symbolic variables for the data | 224 # allocate symbolic variables for the data |
221 index = T.lscalar() # index to a [mini]batch | 225 index = T.lscalar() # index to a [mini]batch |
222 x = T.matrix('x') # the data is presented as rasterized images | 226 x = T.matrix('x') # the data is presented as rasterized images |
223 y = T.ivector('y') # the labels are presented as 1d vector of | 227 y = T.ivector('y') # the labels are presented as 1d vector of |
224 # [int] labels | 228 # [int] labels |
229 | |
225 layer0_input = x.reshape((x.shape[0],1,32,32)) | 230 layer0_input = x.reshape((x.shape[0],1,32,32)) |
226 | 231 |
227 | |
228 # Setup the convolutional layers with their DAs(add as many as you want) | |
229 corruption_levels = [ 0.2, 0.2, 0.2] | |
230 rng = numpy.random.RandomState(1234) | 232 rng = numpy.random.RandomState(1234) |
231 ker1=2 | |
232 ker2=2 | |
233 conv_layers=[] | 233 conv_layers=[] |
234 conv_layers.append([[ker1,1,5,5], None, [2,2] ]) | 234 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ], None, max_pool_layers[0] ] |
235 conv_layers.append([[ker2,ker1,5,5], None, [2,2] ]) | 235 conv_layers.append(init_layer) |
236 | 236 conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0] |
237 # Setup the MLP layers of the network | 237 |
238 mlp_layers=[500] | 238 for i in range(1,len(kernels)): |
239 | 239 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ], None, max_pool_layers[i] ] |
240 network = SdA(input = layer0_input, n_ins_mlp = ker2*4*4, | 240 conv_layers.append(layer) |
241 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] | |
242 | |
243 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, | |
241 conv_hidden_layers_sizes = conv_layers, | 244 conv_hidden_layers_sizes = conv_layers, |
242 mlp_hidden_layers_sizes = mlp_layers, | 245 mlp_hidden_layers_sizes = mlp_layers, |
243 corruption_levels = corruption_levels , n_out = 10, | 246 corruption_levels = corruption_levels , n_out = 62, |
244 rng = rng , pretrain_lr = pretrain_lr , | 247 rng = rng , pretrain_lr = pretrain_lr , |
245 finetune_lr = learning_rate ) | 248 finetune_lr = learning_rate ) |
246 | 249 |
247 test_model = theano.function([network.x, network.y], network.errors) | 250 test_model = theano.function([network.x, network.y], network.errors) |
248 | 251 |