Mercurial > ift6266
comparison deep/convolutional_dae/stacked_convolutional_dae.py @ 249:1bf046c0c84a
Fixed a bug with image_shape.
author | humel |
---|---|
date | Tue, 16 Mar 2010 20:19:13 -0400 |
parents | 7e6fecabb656 |
children | 0c0f0b3f6a93 |
comparison
equal
deleted
inserted
replaced
248:7e6fecabb656 | 249:1bf046c0c84a |
---|---|
19 | 19 |
20 class SigmoidalLayer(object): | 20 class SigmoidalLayer(object): |
21 def __init__(self, rng, input, n_in, n_out): | 21 def __init__(self, rng, input, n_in, n_out): |
22 | 22 |
23 self.input = input | 23 self.input = input |
24 | 24 |
25 W_values = numpy.asarray( rng.uniform( \ | 25 W_values = numpy.asarray( rng.uniform( \ |
26 low = -numpy.sqrt(6./(n_in+n_out)), \ | 26 low = -numpy.sqrt(6./(n_in+n_out)), \ |
27 high = numpy.sqrt(6./(n_in+n_out)), \ | 27 high = numpy.sqrt(6./(n_in+n_out)), \ |
28 size = (n_in, n_out)), dtype = theano.config.floatX) | 28 size = (n_in, n_out)), dtype = theano.config.floatX) |
29 self.W = theano.shared(value = W_values) | 29 self.W = theano.shared(value = W_values) |
35 self.params = [self.W, self.b] | 35 self.params = [self.W, self.b] |
36 | 36 |
37 class dA_conv(object): | 37 class dA_conv(object): |
38 | 38 |
39 def __init__(self, input, filter_shape, corruption_level = 0.1, | 39 def __init__(self, input, filter_shape, corruption_level = 0.1, |
40 shared_W = None, shared_b = None, image_shape = None, | 40 shared_W = None, shared_b = None, image_shape = None): |
41 poolsize = (2,2)): | |
42 | 41 |
43 theano_rng = RandomStreams() | 42 theano_rng = RandomStreams() |
44 | 43 |
45 fan_in = numpy.prod(filter_shape[1:]) | 44 fan_in = numpy.prod(filter_shape[1:]) |
46 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) | 45 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) |
68 self.x = input | 67 self.x = input |
69 | 68 |
70 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x | 69 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x |
71 | 70 |
72 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, | 71 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, |
73 image_shape=image_shape, unroll_kern=4,unroll_batch=4, border_mode='valid') | 72 image_shape=image_shape, |
73 unroll_kern=4,unroll_batch=4, | |
74 border_mode='valid') | |
74 | 75 |
75 | 76 |
76 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) | 77 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) |
77 | 78 |
78 | 79 |
79 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ | 80 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ |
80 filter_shape[3] ] | 81 filter_shape[3] ] |
81 da_image_shape = [ image_shape[0], filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ] | 82 da_image_shape = [ batch_size, filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ] |
83 #import pdb; pdb.set_trace() | |
82 initial_W_prime = numpy.asarray( numpy.random.uniform( \ | 84 initial_W_prime = numpy.asarray( numpy.random.uniform( \ |
83 low = -numpy.sqrt(6./(fan_in+fan_out)), \ | 85 low = -numpy.sqrt(6./(fan_in+fan_out)), \ |
84 high = numpy.sqrt(6./(fan_in+fan_out)), \ | 86 high = numpy.sqrt(6./(fan_in+fan_out)), \ |
85 size = da_filter_shape), dtype = theano.config.floatX) | 87 size = da_filter_shape), dtype = theano.config.floatX) |
86 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") | 88 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") |
111 | 113 |
112 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) | 114 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) |
113 self.b = theano.shared(value=b_values) | 115 self.b = theano.shared(value=b_values) |
114 | 116 |
115 conv_out = conv.conv2d(input, self.W, | 117 conv_out = conv.conv2d(input, self.W, |
116 filter_shape=filter_shape, image_shape=image_shape, unroll_kern=4,unroll_batch=4) | 118 filter_shape=filter_shape, image_shape=image_shape, |
119 unroll_kern=4,unroll_batch=4) | |
117 | 120 |
118 | 121 |
119 fan_in = numpy.prod(filter_shape[1:]) | 122 fan_in = numpy.prod(filter_shape[1:]) |
120 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) | 123 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) |
121 | 124 |
149 filter_shape=conv_hidden_layers_sizes[i][0] | 152 filter_shape=conv_hidden_layers_sizes[i][0] |
150 image_shape=conv_hidden_layers_sizes[i][1] | 153 image_shape=conv_hidden_layers_sizes[i][1] |
151 max_poolsize=conv_hidden_layers_sizes[i][2] | 154 max_poolsize=conv_hidden_layers_sizes[i][2] |
152 | 155 |
153 if i == 0 : | 156 if i == 0 : |
154 layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32)) | 157 layer_input=self.x.reshape((batch_size, 1, 32, 32)) |
155 else: | 158 else: |
156 layer_input=self.layers[-1].output | 159 layer_input=self.layers[-1].output |
157 | 160 |
158 layer = LeNetConvPoolLayer(rng, input=layer_input, | 161 layer = LeNetConvPoolLayer(rng, input=layer_input, |
159 image_shape=image_shape, | 162 image_shape=image_shape, |
165 self.params += layer.params | 168 self.params += layer.params |
166 | 169 |
167 da_layer = dA_conv(corruption_level = corruption_levels[0], | 170 da_layer = dA_conv(corruption_level = corruption_levels[0], |
168 input = layer_input, | 171 input = layer_input, |
169 shared_W = layer.W, shared_b = layer.b, | 172 shared_W = layer.W, shared_b = layer.b, |
170 filter_shape = filter_shape, | 173 filter_shape=filter_shape, |
171 image_shape = image_shape ) | 174 image_shape = image_shape ) |
172 | 175 |
173 gparams = T.grad(da_layer.cost, da_layer.params) | 176 gparams = T.grad(da_layer.cost, da_layer.params) |
174 | 177 |
175 updates = {} | 178 updates = {} |
216 | 219 |
217 self.finetune = theano.function([self.x, self.y], cost, updates = updates) | 220 self.finetune = theano.function([self.x, self.y], cost, updates = updates) |
218 | 221 |
219 self.errors = self.logLayer.errors(self.y) | 222 self.errors = self.logLayer.errors(self.y) |
220 | 223 |
221 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \ | 224 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 0, \ |
222 pretrain_lr = 0.1, training_epochs = 1000, \ | 225 pretrain_lr = 0.1, training_epochs = 1000, \ |
223 kernels = [ [4,5,5] , [4,3,3] ], mlp_layers=[500], \ | 226 kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \ |
224 corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \ | 227 corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \ |
225 max_pool_layers = [ [2,2] , [2,2] ], \ | 228 max_pool_layers = [ [2,2] , [2,2] ], \ |
226 dataset=datasets.nist_digits): | 229 dataset=datasets.nist_digits): |
227 | 230 |
228 | 231 |
230 index = T.lscalar() # index to a [mini]batch | 233 index = T.lscalar() # index to a [mini]batch |
231 x = T.matrix('x') # the data is presented as rasterized images | 234 x = T.matrix('x') # the data is presented as rasterized images |
232 y = T.ivector('y') # the labels are presented as 1d vector of | 235 y = T.ivector('y') # the labels are presented as 1d vector of |
233 # [int] labels | 236 # [int] labels |
234 | 237 |
235 layer0_input = x.reshape((x.shape[0],1,32,32)) | 238 layer0_input = x.reshape((batch_size,1,32,32)) |
236 | 239 |
237 rng = numpy.random.RandomState(1234) | 240 rng = numpy.random.RandomState(1234) |
238 conv_layers=[] | 241 conv_layers=[] |
239 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\ | 242 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\ |
240 [ batch_size , 1, 32, 32 ], | 243 [ batch_size , 1, 32, 32 ], |
241 max_pool_layers[0] ] | 244 max_pool_layers[0] ] |
242 conv_layers.append(init_layer) | 245 conv_layers.append(init_layer) |
243 | 246 |
244 conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0] | 247 conv_n_out = int((32-kernels[0][2]+1)/max_pool_layers[0][0]) |
245 | 248 print init_layer[1] |
249 | |
246 for i in range(1,len(kernels)): | 250 for i in range(1,len(kernels)): |
247 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\ | 251 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\ |
248 [ batch_size, kernels[i-1][0], conv_n_out,conv_n_out ], | 252 [ batch_size, kernels[i-1][0],conv_n_out,conv_n_out ], |
249 max_pool_layers[i] ] | 253 max_pool_layers[i] ] |
250 conv_layers.append(layer) | 254 conv_layers.append(layer) |
251 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] | 255 conv_n_out = int( (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]) |
252 print layer [1] | 256 print layer [1] |
253 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, | 257 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, |
254 conv_hidden_layers_sizes = conv_layers, | 258 conv_hidden_layers_sizes = conv_layers, |
255 mlp_hidden_layers_sizes = mlp_layers, | 259 mlp_hidden_layers_sizes = mlp_layers, |
256 corruption_levels = corruption_levels , n_out = 62, | 260 corruption_levels = corruption_levels , n_out = 62, |
261 | 265 |
262 start_time = time.clock() | 266 start_time = time.clock() |
263 for i in xrange(len(network.layers)-len(mlp_layers)): | 267 for i in xrange(len(network.layers)-len(mlp_layers)): |
264 for epoch in xrange(pretraining_epochs): | 268 for epoch in xrange(pretraining_epochs): |
265 for x, y in dataset.train(batch_size): | 269 for x, y in dataset.train(batch_size): |
266 c = network.pretrain_functions[i](x) | 270 if x.shape[0] == batch_size: |
271 c = network.pretrain_functions[i](x) | |
272 | |
267 print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c | 273 print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c |
268 | 274 |
269 patience = 10000 # look as this many examples regardless | 275 patience = 10000 # look as this many examples regardless |
270 patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS | 276 patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS |
271 # FOUND | 277 # FOUND |
283 iter = 0 | 289 iter = 0 |
284 | 290 |
285 while (epoch < training_epochs) and (not done_looping): | 291 while (epoch < training_epochs) and (not done_looping): |
286 epoch = epoch + 1 | 292 epoch = epoch + 1 |
287 for x, y in dataset.train(batch_size): | 293 for x, y in dataset.train(batch_size): |
288 | 294 if x.shape[0] != batch_size: |
295 continue | |
289 cost_ij = network.finetune(x, y) | 296 cost_ij = network.finetune(x, y) |
290 iter += 1 | 297 iter += 1 |
291 | 298 |
292 if iter % validation_frequency == 0: | 299 if iter % validation_frequency == 0: |
293 validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)] | 300 validation_losses = [] |
301 for xv, yv in dataset.valid(batch_size): | |
302 if xv.shape[0] == batch_size: | |
303 validation_losses.append(test_model(xv, yv)) | |
294 this_validation_loss = numpy.mean(validation_losses) | 304 this_validation_loss = numpy.mean(validation_losses) |
295 print('epoch %i, iter %i, validation error %f %%' % \ | 305 print('epoch %i, iter %i, validation error %f %%' % \ |
296 (epoch, iter, this_validation_loss*100.)) | 306 (epoch, iter, this_validation_loss*100.)) |
297 | 307 |
298 # if we got the best validation score until now | 308 # if we got the best validation score until now |
306 # save best validation score and iteration number | 316 # save best validation score and iteration number |
307 best_validation_loss = this_validation_loss | 317 best_validation_loss = this_validation_loss |
308 best_iter = iter | 318 best_iter = iter |
309 | 319 |
310 # test it on the test set | 320 # test it on the test set |
311 test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] | 321 test_losses=[] |
322 for xt, yt in dataset.test(batch_size): | |
323 if xt.shape[0] == batch_size: | |
324 test_losses.append(test_model(xt, yt)) | |
312 test_score = numpy.mean(test_losses) | 325 test_score = numpy.mean(test_losses) |
313 print((' epoch %i, iter %i, test error of best ' | 326 print((' epoch %i, iter %i, test error of best ' |
314 'model %f %%') % | 327 'model %f %%') % |
315 (epoch, iter, test_score*100.)) | 328 (epoch, iter, test_score*100.)) |
316 | 329 |