Mercurial > ift6266
comparison deep/convolutional_dae/stacked_convolutional_dae.py @ 262:716c99f4eb3a
merge
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Wed, 17 Mar 2010 16:41:51 -0400 |
parents | 0c0f0b3f6a93 |
children |
comparison
equal
deleted
inserted
replaced
261:6d16a2bf142b | 262:716c99f4eb3a |
---|---|
2 import theano | 2 import theano |
3 import time | 3 import time |
4 import sys | 4 import sys |
5 import theano.tensor as T | 5 import theano.tensor as T |
6 from theano.tensor.shared_randomstreams import RandomStreams | 6 from theano.tensor.shared_randomstreams import RandomStreams |
7 import theano.sandbox.softsign | 7 #import theano.sandbox.softsign |
8 | 8 |
9 from theano.tensor.signal import downsample | 9 from theano.tensor.signal import downsample |
10 from theano.tensor.nnet import conv | 10 from theano.tensor.nnet import conv |
11 | 11 |
12 sys.path.append('../../../') | |
13 | |
14 from ift6266 import datasets | 12 from ift6266 import datasets |
15 from ift6266.baseline.log_reg.log_reg import LogisticRegression | 13 from ift6266.baseline.log_reg.log_reg import LogisticRegression |
16 | 14 |
17 batch_size = 100 | 15 batch_size = 100 |
18 | 16 |
19 | |
20 class SigmoidalLayer(object): | 17 class SigmoidalLayer(object): |
21 def __init__(self, rng, input, n_in, n_out): | 18 def __init__(self, rng, input, n_in, n_out): |
22 | 19 |
23 self.input = input | 20 self.input = input |
24 | 21 |
25 W_values = numpy.asarray( rng.uniform( \ | 22 W_values = numpy.asarray( rng.uniform( \ |
26 low = -numpy.sqrt(6./(n_in+n_out)), \ | 23 low = -numpy.sqrt(6./(n_in+n_out)), \ |
27 high = numpy.sqrt(6./(n_in+n_out)), \ | 24 high = numpy.sqrt(6./(n_in+n_out)), \ |
28 size = (n_in, n_out)), dtype = theano.config.floatX) | 25 size = (n_in, n_out)), dtype = theano.config.floatX) |
29 self.W = theano.shared(value = W_values) | 26 self.W = theano.shared(value = W_values) |
35 self.params = [self.W, self.b] | 32 self.params = [self.W, self.b] |
36 | 33 |
37 class dA_conv(object): | 34 class dA_conv(object): |
38 | 35 |
39 def __init__(self, input, filter_shape, corruption_level = 0.1, | 36 def __init__(self, input, filter_shape, corruption_level = 0.1, |
40 shared_W = None, shared_b = None, image_shape = None): | 37 shared_W = None, shared_b = None, image_shape = None, |
38 poolsize = (2,2)): | |
41 | 39 |
42 theano_rng = RandomStreams() | 40 theano_rng = RandomStreams() |
43 | 41 |
44 fan_in = numpy.prod(filter_shape[1:]) | 42 fan_in = numpy.prod(filter_shape[1:]) |
45 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) | 43 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) |
67 self.x = input | 65 self.x = input |
68 | 66 |
69 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x | 67 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x |
70 | 68 |
71 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, | 69 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, |
72 image_shape=image_shape, | 70 image_shape=image_shape, border_mode='valid') |
73 unroll_kern=4,unroll_batch=4, | |
74 border_mode='valid') | |
75 | |
76 | 71 |
77 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) | 72 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) |
78 | 73 |
79 | 74 da_filter_shape = [ filter_shape[1], filter_shape[0], |
80 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ | 75 filter_shape[2], filter_shape[3] ] |
81 filter_shape[3] ] | |
82 da_image_shape = [ batch_size, filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ] | |
83 #import pdb; pdb.set_trace() | |
84 initial_W_prime = numpy.asarray( numpy.random.uniform( \ | 76 initial_W_prime = numpy.asarray( numpy.random.uniform( \ |
85 low = -numpy.sqrt(6./(fan_in+fan_out)), \ | 77 low = -numpy.sqrt(6./(fan_in+fan_out)), \ |
86 high = numpy.sqrt(6./(fan_in+fan_out)), \ | 78 high = numpy.sqrt(6./(fan_in+fan_out)), \ |
87 size = da_filter_shape), dtype = theano.config.floatX) | 79 size = da_filter_shape), dtype = theano.config.floatX) |
88 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") | 80 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") |
89 | 81 |
90 conv2_out = conv.conv2d(self.y, self.W_prime, | 82 conv2_out = conv.conv2d(self.y, self.W_prime, |
91 filter_shape = da_filter_shape,\ | 83 filter_shape = da_filter_shape, |
92 image_shape = da_image_shape, \ | |
93 unroll_kern=4,unroll_batch=4, \ | |
94 border_mode='full') | 84 border_mode='full') |
95 | 85 |
96 self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale | 86 self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale |
97 | 87 |
98 scaled_x = (self.x + center) / scale | 88 scaled_x = (self.x + center) / scale |
113 | 103 |
114 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) | 104 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) |
115 self.b = theano.shared(value=b_values) | 105 self.b = theano.shared(value=b_values) |
116 | 106 |
117 conv_out = conv.conv2d(input, self.W, | 107 conv_out = conv.conv2d(input, self.W, |
118 filter_shape=filter_shape, image_shape=image_shape, | 108 filter_shape=filter_shape, image_shape=image_shape) |
119 unroll_kern=4,unroll_batch=4) | |
120 | 109 |
121 | 110 |
122 fan_in = numpy.prod(filter_shape[1:]) | 111 fan_in = numpy.prod(filter_shape[1:]) |
123 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) | 112 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) |
124 | 113 |
135 | 124 |
136 | 125 |
137 class SdA(): | 126 class SdA(): |
138 def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes, | 127 def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes, |
139 mlp_hidden_layers_sizes, corruption_levels, rng, n_out, | 128 mlp_hidden_layers_sizes, corruption_levels, rng, n_out, |
140 pretrain_lr, finetune_lr): | 129 pretrain_lr, finetune_lr, img_shape): |
141 | 130 |
142 self.layers = [] | 131 self.layers = [] |
143 self.pretrain_functions = [] | 132 self.pretrain_functions = [] |
144 self.params = [] | 133 self.params = [] |
145 self.conv_n_layers = len(conv_hidden_layers_sizes) | 134 self.conv_n_layers = len(conv_hidden_layers_sizes) |
152 filter_shape=conv_hidden_layers_sizes[i][0] | 141 filter_shape=conv_hidden_layers_sizes[i][0] |
153 image_shape=conv_hidden_layers_sizes[i][1] | 142 image_shape=conv_hidden_layers_sizes[i][1] |
154 max_poolsize=conv_hidden_layers_sizes[i][2] | 143 max_poolsize=conv_hidden_layers_sizes[i][2] |
155 | 144 |
156 if i == 0 : | 145 if i == 0 : |
157 layer_input=self.x.reshape((batch_size, 1, 32, 32)) | 146 layer_input=self.x.reshape((self.x.shape[0], 1) + img_shape) |
158 else: | 147 else: |
159 layer_input=self.layers[-1].output | 148 layer_input=self.layers[-1].output |
160 | 149 |
161 layer = LeNetConvPoolLayer(rng, input=layer_input, | 150 layer = LeNetConvPoolLayer(rng, input=layer_input, |
162 image_shape=image_shape, | 151 image_shape=image_shape, |
168 self.params += layer.params | 157 self.params += layer.params |
169 | 158 |
170 da_layer = dA_conv(corruption_level = corruption_levels[0], | 159 da_layer = dA_conv(corruption_level = corruption_levels[0], |
171 input = layer_input, | 160 input = layer_input, |
172 shared_W = layer.W, shared_b = layer.b, | 161 shared_W = layer.W, shared_b = layer.b, |
173 filter_shape=filter_shape, | 162 filter_shape = filter_shape, |
174 image_shape = image_shape ) | 163 image_shape = image_shape ) |
175 | 164 |
176 gparams = T.grad(da_layer.cost, da_layer.params) | 165 gparams = T.grad(da_layer.cost, da_layer.params) |
177 | 166 |
178 updates = {} | 167 updates = {} |
219 | 208 |
220 self.finetune = theano.function([self.x, self.y], cost, updates = updates) | 209 self.finetune = theano.function([self.x, self.y], cost, updates = updates) |
221 | 210 |
222 self.errors = self.logLayer.errors(self.y) | 211 self.errors = self.logLayer.errors(self.y) |
223 | 212 |
224 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 0, \ | 213 def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 1, |
225 pretrain_lr = 0.1, training_epochs = 1000, \ | 214 pretrain_lr = 0.1, training_epochs = 1000, |
226 kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \ | 215 kernels = [[4,5,5], [4,3,3]], mlp_layers=[500], |
227 corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \ | 216 corruption_levels = [0.2, 0.2, 0.2], |
228 max_pool_layers = [ [2,2] , [2,2] ], \ | 217 batch_size = batch_size, img_shape=(28, 28), |
229 dataset=datasets.nist_digits): | 218 max_pool_layers = [[2,2], [2,2]], |
230 | 219 dataset=datasets.mnist(5000)): |
231 | 220 |
232 # allocate symbolic variables for the data | 221 # allocate symbolic variables for the data |
233 index = T.lscalar() # index to a [mini]batch | 222 index = T.lscalar() # index to a [mini]batch |
234 x = T.matrix('x') # the data is presented as rasterized images | 223 x = T.matrix('x') # the data is presented as rasterized images |
235 y = T.ivector('y') # the labels are presented as 1d vector of | 224 y = T.ivector('y') # the labels are presented as 1d vector of |
236 # [int] labels | 225 # [int] labels |
237 | 226 |
238 layer0_input = x.reshape((batch_size,1,32,32)) | 227 layer0_input = x.reshape((x.shape[0],1)+img_shape) |
239 | 228 |
240 rng = numpy.random.RandomState(1234) | 229 rng = numpy.random.RandomState(1234) |
241 conv_layers=[] | 230 conv_layers=[] |
242 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\ | 231 init_layer = [[kernels[0][0],1,kernels[0][1],kernels[0][2]], |
243 [ batch_size , 1, 32, 32 ], | 232 None, # do not specify the batch size since it can |
244 max_pool_layers[0] ] | 233 # change for the last one and then theano will |
234 # crash. | |
235 max_pool_layers[0]] | |
245 conv_layers.append(init_layer) | 236 conv_layers.append(init_layer) |
246 | 237 |
247 conv_n_out = int((32-kernels[0][2]+1)/max_pool_layers[0][0]) | 238 conv_n_out = (img_shape[0]-kernels[0][2]+1)/max_pool_layers[0][0] |
248 print init_layer[1] | 239 |
249 | |
250 for i in range(1,len(kernels)): | 240 for i in range(1,len(kernels)): |
251 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\ | 241 layer = [[kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2]], |
252 [ batch_size, kernels[i-1][0],conv_n_out,conv_n_out ], | 242 None, # same comment as for init_layer |
253 max_pool_layers[i] ] | 243 max_pool_layers[i] ] |
254 conv_layers.append(layer) | 244 conv_layers.append(layer) |
255 conv_n_out = int( (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]) | 245 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] |
256 print layer [1] | 246 |
257 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, | 247 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, |
258 conv_hidden_layers_sizes = conv_layers, | 248 conv_hidden_layers_sizes = conv_layers, |
259 mlp_hidden_layers_sizes = mlp_layers, | 249 mlp_hidden_layers_sizes = mlp_layers, |
260 corruption_levels = corruption_levels , n_out = 62, | 250 corruption_levels = corruption_levels, n_out = 62, |
261 rng = rng , pretrain_lr = pretrain_lr , | 251 rng = rng , pretrain_lr = pretrain_lr, |
262 finetune_lr = learning_rate ) | 252 finetune_lr = learning_rate, img_shape=img_shape) |
263 | 253 |
264 test_model = theano.function([network.x, network.y], network.errors) | 254 test_model = theano.function([network.x, network.y], network.errors) |
265 | 255 |
266 start_time = time.clock() | 256 start_time = time.clock() |
267 for i in xrange(len(network.layers)-len(mlp_layers)): | 257 for i in xrange(len(network.layers)-len(mlp_layers)): |
268 for epoch in xrange(pretraining_epochs): | 258 for epoch in xrange(pretraining_epochs): |
269 for x, y in dataset.train(batch_size): | 259 for x, y in dataset.train(batch_size): |
270 if x.shape[0] == batch_size: | 260 c = network.pretrain_functions[i](x) |
271 c = network.pretrain_functions[i](x) | |
272 | |
273 print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c | 261 print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c |
274 | 262 |
275 patience = 10000 # look as this many examples regardless | 263 patience = 10000 # look as this many examples regardless |
276 patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS | 264 patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS |
277 # FOUND | 265 # FOUND |
289 iter = 0 | 277 iter = 0 |
290 | 278 |
291 while (epoch < training_epochs) and (not done_looping): | 279 while (epoch < training_epochs) and (not done_looping): |
292 epoch = epoch + 1 | 280 epoch = epoch + 1 |
293 for x, y in dataset.train(batch_size): | 281 for x, y in dataset.train(batch_size): |
294 if x.shape[0] != batch_size: | 282 |
295 continue | |
296 cost_ij = network.finetune(x, y) | 283 cost_ij = network.finetune(x, y) |
297 iter += 1 | 284 iter += 1 |
298 | 285 |
299 if iter % validation_frequency == 0: | 286 if iter % validation_frequency == 0: |
300 validation_losses = [] | 287 validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)] |
301 for xv, yv in dataset.valid(batch_size): | |
302 if xv.shape[0] == batch_size: | |
303 validation_losses.append(test_model(xv, yv)) | |
304 this_validation_loss = numpy.mean(validation_losses) | 288 this_validation_loss = numpy.mean(validation_losses) |
305 print('epoch %i, iter %i, validation error %f %%' % \ | 289 print('epoch %i, iter %i, validation error %f %%' % \ |
306 (epoch, iter, this_validation_loss*100.)) | 290 (epoch, iter, this_validation_loss*100.)) |
307 | 291 |
308 # if we got the best validation score until now | 292 # if we got the best validation score until now |
316 # save best validation score and iteration number | 300 # save best validation score and iteration number |
317 best_validation_loss = this_validation_loss | 301 best_validation_loss = this_validation_loss |
318 best_iter = iter | 302 best_iter = iter |
319 | 303 |
320 # test it on the test set | 304 # test it on the test set |
321 test_losses=[] | 305 test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] |
322 for xt, yt in dataset.test(batch_size): | |
323 if xt.shape[0] == batch_size: | |
324 test_losses.append(test_model(xt, yt)) | |
325 test_score = numpy.mean(test_losses) | 306 test_score = numpy.mean(test_losses) |
326 print((' epoch %i, iter %i, test error of best ' | 307 print((' epoch %i, iter %i, test error of best ' |
327 'model %f %%') % | 308 'model %f %%') % |
328 (epoch, iter, test_score*100.)) | 309 (epoch, iter, test_score*100.)) |
329 | 310 |