comparison deep/convolutional_dae/stacked_convolutional_dae.py @ 249:1bf046c0c84a

Fixed a bug with image_shape.
author humel
date Tue, 16 Mar 2010 20:19:13 -0400
parents 7e6fecabb656
children 0c0f0b3f6a93
comparison
equal deleted inserted replaced
248:7e6fecabb656 249:1bf046c0c84a
19 19
20 class SigmoidalLayer(object): 20 class SigmoidalLayer(object):
21 def __init__(self, rng, input, n_in, n_out): 21 def __init__(self, rng, input, n_in, n_out):
22 22
23 self.input = input 23 self.input = input
24 24
25 W_values = numpy.asarray( rng.uniform( \ 25 W_values = numpy.asarray( rng.uniform( \
26 low = -numpy.sqrt(6./(n_in+n_out)), \ 26 low = -numpy.sqrt(6./(n_in+n_out)), \
27 high = numpy.sqrt(6./(n_in+n_out)), \ 27 high = numpy.sqrt(6./(n_in+n_out)), \
28 size = (n_in, n_out)), dtype = theano.config.floatX) 28 size = (n_in, n_out)), dtype = theano.config.floatX)
29 self.W = theano.shared(value = W_values) 29 self.W = theano.shared(value = W_values)
35 self.params = [self.W, self.b] 35 self.params = [self.W, self.b]
36 36
37 class dA_conv(object): 37 class dA_conv(object):
38 38
39 def __init__(self, input, filter_shape, corruption_level = 0.1, 39 def __init__(self, input, filter_shape, corruption_level = 0.1,
40 shared_W = None, shared_b = None, image_shape = None, 40 shared_W = None, shared_b = None, image_shape = None):
41 poolsize = (2,2)):
42 41
43 theano_rng = RandomStreams() 42 theano_rng = RandomStreams()
44 43
45 fan_in = numpy.prod(filter_shape[1:]) 44 fan_in = numpy.prod(filter_shape[1:])
46 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) 45 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])
68 self.x = input 67 self.x = input
69 68
70 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x 69 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x
71 70
72 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, 71 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape,
73 image_shape=image_shape, unroll_kern=4,unroll_batch=4, border_mode='valid') 72 image_shape=image_shape,
73 unroll_kern=4,unroll_batch=4,
74 border_mode='valid')
74 75
75 76
76 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) 77 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
77 78
78 79
79 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ 80 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
80 filter_shape[3] ] 81 filter_shape[3] ]
81 da_image_shape = [ image_shape[0], filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ] 82 da_image_shape = [ batch_size, filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ]
83 #import pdb; pdb.set_trace()
82 initial_W_prime = numpy.asarray( numpy.random.uniform( \ 84 initial_W_prime = numpy.asarray( numpy.random.uniform( \
83 low = -numpy.sqrt(6./(fan_in+fan_out)), \ 85 low = -numpy.sqrt(6./(fan_in+fan_out)), \
84 high = numpy.sqrt(6./(fan_in+fan_out)), \ 86 high = numpy.sqrt(6./(fan_in+fan_out)), \
85 size = da_filter_shape), dtype = theano.config.floatX) 87 size = da_filter_shape), dtype = theano.config.floatX)
86 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") 88 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
111 113
112 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) 114 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
113 self.b = theano.shared(value=b_values) 115 self.b = theano.shared(value=b_values)
114 116
115 conv_out = conv.conv2d(input, self.W, 117 conv_out = conv.conv2d(input, self.W,
116 filter_shape=filter_shape, image_shape=image_shape, unroll_kern=4,unroll_batch=4) 118 filter_shape=filter_shape, image_shape=image_shape,
119 unroll_kern=4,unroll_batch=4)
117 120
118 121
119 fan_in = numpy.prod(filter_shape[1:]) 122 fan_in = numpy.prod(filter_shape[1:])
120 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) 123 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
121 124
149 filter_shape=conv_hidden_layers_sizes[i][0] 152 filter_shape=conv_hidden_layers_sizes[i][0]
150 image_shape=conv_hidden_layers_sizes[i][1] 153 image_shape=conv_hidden_layers_sizes[i][1]
151 max_poolsize=conv_hidden_layers_sizes[i][2] 154 max_poolsize=conv_hidden_layers_sizes[i][2]
152 155
153 if i == 0 : 156 if i == 0 :
154 layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32)) 157 layer_input=self.x.reshape((batch_size, 1, 32, 32))
155 else: 158 else:
156 layer_input=self.layers[-1].output 159 layer_input=self.layers[-1].output
157 160
158 layer = LeNetConvPoolLayer(rng, input=layer_input, 161 layer = LeNetConvPoolLayer(rng, input=layer_input,
159 image_shape=image_shape, 162 image_shape=image_shape,
165 self.params += layer.params 168 self.params += layer.params
166 169
167 da_layer = dA_conv(corruption_level = corruption_levels[0], 170 da_layer = dA_conv(corruption_level = corruption_levels[0],
168 input = layer_input, 171 input = layer_input,
169 shared_W = layer.W, shared_b = layer.b, 172 shared_W = layer.W, shared_b = layer.b,
170 filter_shape = filter_shape, 173 filter_shape=filter_shape,
171 image_shape = image_shape ) 174 image_shape = image_shape )
172 175
173 gparams = T.grad(da_layer.cost, da_layer.params) 176 gparams = T.grad(da_layer.cost, da_layer.params)
174 177
175 updates = {} 178 updates = {}
216 219
217 self.finetune = theano.function([self.x, self.y], cost, updates = updates) 220 self.finetune = theano.function([self.x, self.y], cost, updates = updates)
218 221
219 self.errors = self.logLayer.errors(self.y) 222 self.errors = self.logLayer.errors(self.y)
220 223
221 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \ 224 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 0, \
222 pretrain_lr = 0.1, training_epochs = 1000, \ 225 pretrain_lr = 0.1, training_epochs = 1000, \
223 kernels = [ [4,5,5] , [4,3,3] ], mlp_layers=[500], \ 226 kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \
224 corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \ 227 corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \
225 max_pool_layers = [ [2,2] , [2,2] ], \ 228 max_pool_layers = [ [2,2] , [2,2] ], \
226 dataset=datasets.nist_digits): 229 dataset=datasets.nist_digits):
227 230
228 231
230 index = T.lscalar() # index to a [mini]batch 233 index = T.lscalar() # index to a [mini]batch
231 x = T.matrix('x') # the data is presented as rasterized images 234 x = T.matrix('x') # the data is presented as rasterized images
232 y = T.ivector('y') # the labels are presented as 1d vector of 235 y = T.ivector('y') # the labels are presented as 1d vector of
233 # [int] labels 236 # [int] labels
234 237
235 layer0_input = x.reshape((x.shape[0],1,32,32)) 238 layer0_input = x.reshape((batch_size,1,32,32))
236 239
237 rng = numpy.random.RandomState(1234) 240 rng = numpy.random.RandomState(1234)
238 conv_layers=[] 241 conv_layers=[]
239 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\ 242 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\
240 [ batch_size , 1, 32, 32 ], 243 [ batch_size , 1, 32, 32 ],
241 max_pool_layers[0] ] 244 max_pool_layers[0] ]
242 conv_layers.append(init_layer) 245 conv_layers.append(init_layer)
243 246
244 conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0] 247 conv_n_out = int((32-kernels[0][2]+1)/max_pool_layers[0][0])
245 248 print init_layer[1]
249
246 for i in range(1,len(kernels)): 250 for i in range(1,len(kernels)):
247 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\ 251 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\
248 [ batch_size, kernels[i-1][0], conv_n_out,conv_n_out ], 252 [ batch_size, kernels[i-1][0],conv_n_out,conv_n_out ],
249 max_pool_layers[i] ] 253 max_pool_layers[i] ]
250 conv_layers.append(layer) 254 conv_layers.append(layer)
251 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] 255 conv_n_out = int( (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0])
252 print layer [1] 256 print layer [1]
253 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, 257 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2,
254 conv_hidden_layers_sizes = conv_layers, 258 conv_hidden_layers_sizes = conv_layers,
255 mlp_hidden_layers_sizes = mlp_layers, 259 mlp_hidden_layers_sizes = mlp_layers,
256 corruption_levels = corruption_levels , n_out = 62, 260 corruption_levels = corruption_levels , n_out = 62,
261 265
262 start_time = time.clock() 266 start_time = time.clock()
263 for i in xrange(len(network.layers)-len(mlp_layers)): 267 for i in xrange(len(network.layers)-len(mlp_layers)):
264 for epoch in xrange(pretraining_epochs): 268 for epoch in xrange(pretraining_epochs):
265 for x, y in dataset.train(batch_size): 269 for x, y in dataset.train(batch_size):
266 c = network.pretrain_functions[i](x) 270 if x.shape[0] == batch_size:
271 c = network.pretrain_functions[i](x)
272
267 print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c 273 print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c
268 274
269 patience = 10000 # look as this many examples regardless 275 patience = 10000 # look as this many examples regardless
270 patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS 276 patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS
271 # FOUND 277 # FOUND
283 iter = 0 289 iter = 0
284 290
285 while (epoch < training_epochs) and (not done_looping): 291 while (epoch < training_epochs) and (not done_looping):
286 epoch = epoch + 1 292 epoch = epoch + 1
287 for x, y in dataset.train(batch_size): 293 for x, y in dataset.train(batch_size):
288 294 if x.shape[0] != batch_size:
295 continue
289 cost_ij = network.finetune(x, y) 296 cost_ij = network.finetune(x, y)
290 iter += 1 297 iter += 1
291 298
292 if iter % validation_frequency == 0: 299 if iter % validation_frequency == 0:
293 validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)] 300 validation_losses = []
301 for xv, yv in dataset.valid(batch_size):
302 if xv.shape[0] == batch_size:
303 validation_losses.append(test_model(xv, yv))
294 this_validation_loss = numpy.mean(validation_losses) 304 this_validation_loss = numpy.mean(validation_losses)
295 print('epoch %i, iter %i, validation error %f %%' % \ 305 print('epoch %i, iter %i, validation error %f %%' % \
296 (epoch, iter, this_validation_loss*100.)) 306 (epoch, iter, this_validation_loss*100.))
297 307
298 # if we got the best validation score until now 308 # if we got the best validation score until now
306 # save best validation score and iteration number 316 # save best validation score and iteration number
307 best_validation_loss = this_validation_loss 317 best_validation_loss = this_validation_loss
308 best_iter = iter 318 best_iter = iter
309 319
310 # test it on the test set 320 # test it on the test set
311 test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] 321 test_losses=[]
322 for xt, yt in dataset.test(batch_size):
323 if xt.shape[0] == batch_size:
324 test_losses.append(test_model(xt, yt))
312 test_score = numpy.mean(test_losses) 325 test_score = numpy.mean(test_losses)
313 print((' epoch %i, iter %i, test error of best ' 326 print((' epoch %i, iter %i, test error of best '
314 'model %f %%') % 327 'model %f %%') %
315 (epoch, iter, test_score*100.)) 328 (epoch, iter, test_score*100.))
316 329