Mercurial > ift6266
comparison deep/convolutional_dae/stacked_convolutional_dae.py @ 266:1e4e60ddadb1
Merge. Ah, et dans le dernier commit, j'avais oublié de mentionner que j'ai ajouté du code pour gérer l'isolation de différents clones pour rouler des expériences et modifier le code en même temps.
author | fsavard |
---|---|
date | Fri, 19 Mar 2010 10:56:16 -0400 |
parents | 0c0f0b3f6a93 |
children |
comparison
equal
deleted
inserted
replaced
265:c8fe09a65039 | 266:1e4e60ddadb1 |
---|---|
1 import numpy | 1 import numpy |
2 import theano | 2 import theano |
3 import time | 3 import time |
4 import sys | |
4 import theano.tensor as T | 5 import theano.tensor as T |
5 from theano.tensor.shared_randomstreams import RandomStreams | 6 from theano.tensor.shared_randomstreams import RandomStreams |
6 import theano.sandbox.softsign | 7 #import theano.sandbox.softsign |
7 | 8 |
8 from theano.tensor.signal import downsample | 9 from theano.tensor.signal import downsample |
9 from theano.tensor.nnet import conv | 10 from theano.tensor.nnet import conv |
10 | 11 |
11 from ift6266 import datasets | 12 from ift6266 import datasets |
12 | |
13 from ift6266.baseline.log_reg.log_reg import LogisticRegression | 13 from ift6266.baseline.log_reg.log_reg import LogisticRegression |
14 | 14 |
15 batch_size = 100 | |
16 | |
15 class SigmoidalLayer(object): | 17 class SigmoidalLayer(object): |
16 def __init__(self, rng, input, n_in, n_out): | 18 def __init__(self, rng, input, n_in, n_out): |
17 | 19 |
18 self.input = input | 20 self.input = input |
19 | 21 |
55 self.W = theano.shared(value = initial_W, name = "W") | 57 self.W = theano.shared(value = initial_W, name = "W") |
56 self.b = theano.shared(value = initial_b, name = "b") | 58 self.b = theano.shared(value = initial_b, name = "b") |
57 | 59 |
58 | 60 |
59 initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX) | 61 initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX) |
60 | |
61 self.W_prime=T.dtensor4('W_prime') | |
62 | 62 |
63 self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime") | 63 self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime") |
64 | 64 |
65 self.x = input | 65 self.x = input |
66 | 66 |
67 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x | 67 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x |
68 | 68 |
69 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, | 69 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, |
70 image_shape=image_shape, border_mode='valid') | 70 image_shape=image_shape, border_mode='valid') |
71 | |
72 | 71 |
73 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) | 72 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) |
74 | 73 |
75 | 74 da_filter_shape = [ filter_shape[1], filter_shape[0], |
76 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ | 75 filter_shape[2], filter_shape[3] ] |
77 filter_shape[3] ] | |
78 initial_W_prime = numpy.asarray( numpy.random.uniform( \ | 76 initial_W_prime = numpy.asarray( numpy.random.uniform( \ |
79 low = -numpy.sqrt(6./(fan_in+fan_out)), \ | 77 low = -numpy.sqrt(6./(fan_in+fan_out)), \ |
80 high = numpy.sqrt(6./(fan_in+fan_out)), \ | 78 high = numpy.sqrt(6./(fan_in+fan_out)), \ |
81 size = da_filter_shape), dtype = theano.config.floatX) | 79 size = da_filter_shape), dtype = theano.config.floatX) |
82 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") | 80 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") |
94 self.cost = T.mean(self.L) | 92 self.cost = T.mean(self.L) |
95 | 93 |
96 self.params = [ self.W, self.b, self.b_prime ] | 94 self.params = [ self.W, self.b, self.b_prime ] |
97 | 95 |
98 class LeNetConvPoolLayer(object): | 96 class LeNetConvPoolLayer(object): |
97 | |
99 def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): | 98 def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): |
100 self.input = input | 99 self.input = input |
101 | 100 |
102 W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX) | 101 W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX) |
103 self.W = theano.shared(value=W_values) | 102 self.W = theano.shared(value=W_values) |
125 | 124 |
126 | 125 |
127 class SdA(): | 126 class SdA(): |
128 def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes, | 127 def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes, |
129 mlp_hidden_layers_sizes, corruption_levels, rng, n_out, | 128 mlp_hidden_layers_sizes, corruption_levels, rng, n_out, |
130 pretrain_lr, finetune_lr): | 129 pretrain_lr, finetune_lr, img_shape): |
131 | 130 |
132 self.layers = [] | 131 self.layers = [] |
133 self.pretrain_functions = [] | 132 self.pretrain_functions = [] |
134 self.params = [] | 133 self.params = [] |
135 self.conv_n_layers = len(conv_hidden_layers_sizes) | 134 self.conv_n_layers = len(conv_hidden_layers_sizes) |
142 filter_shape=conv_hidden_layers_sizes[i][0] | 141 filter_shape=conv_hidden_layers_sizes[i][0] |
143 image_shape=conv_hidden_layers_sizes[i][1] | 142 image_shape=conv_hidden_layers_sizes[i][1] |
144 max_poolsize=conv_hidden_layers_sizes[i][2] | 143 max_poolsize=conv_hidden_layers_sizes[i][2] |
145 | 144 |
146 if i == 0 : | 145 if i == 0 : |
147 layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32)) | 146 layer_input=self.x.reshape((self.x.shape[0], 1) + img_shape) |
148 else: | 147 else: |
149 layer_input=self.layers[-1].output | 148 layer_input=self.layers[-1].output |
150 | 149 |
151 layer = LeNetConvPoolLayer(rng, input=layer_input, | 150 layer = LeNetConvPoolLayer(rng, input=layer_input, |
152 image_shape=image_shape, | 151 image_shape=image_shape, |
209 | 208 |
210 self.finetune = theano.function([self.x, self.y], cost, updates = updates) | 209 self.finetune = theano.function([self.x, self.y], cost, updates = updates) |
211 | 210 |
212 self.errors = self.logLayer.errors(self.y) | 211 self.errors = self.logLayer.errors(self.y) |
213 | 212 |
214 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \ | 213 def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 1, |
215 pretrain_lr = 0.01, training_epochs = 1000, \ | 214 pretrain_lr = 0.1, training_epochs = 1000, |
216 dataset=datasets.nist_digits): | 215 kernels = [[4,5,5], [4,3,3]], mlp_layers=[500], |
217 | 216 corruption_levels = [0.2, 0.2, 0.2], |
218 batch_size = 500 # size of the minibatch | 217 batch_size = batch_size, img_shape=(28, 28), |
218 max_pool_layers = [[2,2], [2,2]], | |
219 dataset=datasets.mnist(5000)): | |
219 | 220 |
220 # allocate symbolic variables for the data | 221 # allocate symbolic variables for the data |
221 index = T.lscalar() # index to a [mini]batch | 222 index = T.lscalar() # index to a [mini]batch |
222 x = T.matrix('x') # the data is presented as rasterized images | 223 x = T.matrix('x') # the data is presented as rasterized images |
223 y = T.ivector('y') # the labels are presented as 1d vector of | 224 y = T.ivector('y') # the labels are presented as 1d vector of |
224 # [int] labels | 225 # [int] labels |
225 layer0_input = x.reshape((x.shape[0],1,32,32)) | 226 |
226 | 227 layer0_input = x.reshape((x.shape[0],1)+img_shape) |
227 | 228 |
228 # Setup the convolutional layers with their DAs(add as many as you want) | |
229 corruption_levels = [ 0.2, 0.2, 0.2] | |
230 rng = numpy.random.RandomState(1234) | 229 rng = numpy.random.RandomState(1234) |
231 ker1=2 | |
232 ker2=2 | |
233 conv_layers=[] | 230 conv_layers=[] |
234 conv_layers.append([[ker1,1,5,5], None, [2,2] ]) | 231 init_layer = [[kernels[0][0],1,kernels[0][1],kernels[0][2]], |
235 conv_layers.append([[ker2,ker1,5,5], None, [2,2] ]) | 232 None, # do not specify the batch size since it can |
236 | 233 # change for the last one and then theano will |
237 # Setup the MLP layers of the network | 234 # crash. |
238 mlp_layers=[500] | 235 max_pool_layers[0]] |
239 | 236 conv_layers.append(init_layer) |
240 network = SdA(input = layer0_input, n_ins_mlp = ker2*4*4, | 237 |
238 conv_n_out = (img_shape[0]-kernels[0][2]+1)/max_pool_layers[0][0] | |
239 | |
240 for i in range(1,len(kernels)): | |
241 layer = [[kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2]], | |
242 None, # same comment as for init_layer | |
243 max_pool_layers[i] ] | |
244 conv_layers.append(layer) | |
245 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] | |
246 | |
247 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, | |
241 conv_hidden_layers_sizes = conv_layers, | 248 conv_hidden_layers_sizes = conv_layers, |
242 mlp_hidden_layers_sizes = mlp_layers, | 249 mlp_hidden_layers_sizes = mlp_layers, |
243 corruption_levels = corruption_levels , n_out = 10, | 250 corruption_levels = corruption_levels, n_out = 62, |
244 rng = rng , pretrain_lr = pretrain_lr , | 251 rng = rng , pretrain_lr = pretrain_lr, |
245 finetune_lr = learning_rate ) | 252 finetune_lr = learning_rate, img_shape=img_shape) |
246 | 253 |
247 test_model = theano.function([network.x, network.y], network.errors) | 254 test_model = theano.function([network.x, network.y], network.errors) |
248 | 255 |
249 start_time = time.clock() | 256 start_time = time.clock() |
250 for i in xrange(len(network.layers)-len(mlp_layers)): | 257 for i in xrange(len(network.layers)-len(mlp_layers)): |