comparison deep/convolutional_dae/stacked_convolutional_dae.py @ 262:716c99f4eb3a

merge
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Wed, 17 Mar 2010 16:41:51 -0400
parents 0c0f0b3f6a93
children
comparison
equal deleted inserted replaced
261:6d16a2bf142b 262:716c99f4eb3a
2 import theano 2 import theano
3 import time 3 import time
4 import sys 4 import sys
5 import theano.tensor as T 5 import theano.tensor as T
6 from theano.tensor.shared_randomstreams import RandomStreams 6 from theano.tensor.shared_randomstreams import RandomStreams
7 import theano.sandbox.softsign 7 #import theano.sandbox.softsign
8 8
9 from theano.tensor.signal import downsample 9 from theano.tensor.signal import downsample
10 from theano.tensor.nnet import conv 10 from theano.tensor.nnet import conv
11 11
12 sys.path.append('../../../')
13
14 from ift6266 import datasets 12 from ift6266 import datasets
15 from ift6266.baseline.log_reg.log_reg import LogisticRegression 13 from ift6266.baseline.log_reg.log_reg import LogisticRegression
16 14
17 batch_size = 100 15 batch_size = 100
18 16
19
20 class SigmoidalLayer(object): 17 class SigmoidalLayer(object):
21 def __init__(self, rng, input, n_in, n_out): 18 def __init__(self, rng, input, n_in, n_out):
22 19
23 self.input = input 20 self.input = input
24 21
25 W_values = numpy.asarray( rng.uniform( \ 22 W_values = numpy.asarray( rng.uniform( \
26 low = -numpy.sqrt(6./(n_in+n_out)), \ 23 low = -numpy.sqrt(6./(n_in+n_out)), \
27 high = numpy.sqrt(6./(n_in+n_out)), \ 24 high = numpy.sqrt(6./(n_in+n_out)), \
28 size = (n_in, n_out)), dtype = theano.config.floatX) 25 size = (n_in, n_out)), dtype = theano.config.floatX)
29 self.W = theano.shared(value = W_values) 26 self.W = theano.shared(value = W_values)
35 self.params = [self.W, self.b] 32 self.params = [self.W, self.b]
36 33
37 class dA_conv(object): 34 class dA_conv(object):
38 35
39 def __init__(self, input, filter_shape, corruption_level = 0.1, 36 def __init__(self, input, filter_shape, corruption_level = 0.1,
40 shared_W = None, shared_b = None, image_shape = None): 37 shared_W = None, shared_b = None, image_shape = None,
38 poolsize = (2,2)):
41 39
42 theano_rng = RandomStreams() 40 theano_rng = RandomStreams()
43 41
44 fan_in = numpy.prod(filter_shape[1:]) 42 fan_in = numpy.prod(filter_shape[1:])
45 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) 43 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])
67 self.x = input 65 self.x = input
68 66
69 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x 67 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x
70 68
71 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, 69 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape,
72 image_shape=image_shape, 70 image_shape=image_shape, border_mode='valid')
73 unroll_kern=4,unroll_batch=4,
74 border_mode='valid')
75
76 71
77 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) 72 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
78 73
79 74 da_filter_shape = [ filter_shape[1], filter_shape[0],
80 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ 75 filter_shape[2], filter_shape[3] ]
81 filter_shape[3] ]
82 da_image_shape = [ batch_size, filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ]
83 #import pdb; pdb.set_trace()
84 initial_W_prime = numpy.asarray( numpy.random.uniform( \ 76 initial_W_prime = numpy.asarray( numpy.random.uniform( \
85 low = -numpy.sqrt(6./(fan_in+fan_out)), \ 77 low = -numpy.sqrt(6./(fan_in+fan_out)), \
86 high = numpy.sqrt(6./(fan_in+fan_out)), \ 78 high = numpy.sqrt(6./(fan_in+fan_out)), \
87 size = da_filter_shape), dtype = theano.config.floatX) 79 size = da_filter_shape), dtype = theano.config.floatX)
88 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") 80 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
89 81
90 conv2_out = conv.conv2d(self.y, self.W_prime, 82 conv2_out = conv.conv2d(self.y, self.W_prime,
91 filter_shape = da_filter_shape,\ 83 filter_shape = da_filter_shape,
92 image_shape = da_image_shape, \
93 unroll_kern=4,unroll_batch=4, \
94 border_mode='full') 84 border_mode='full')
95 85
96 self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale 86 self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale
97 87
98 scaled_x = (self.x + center) / scale 88 scaled_x = (self.x + center) / scale
113 103
114 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) 104 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
115 self.b = theano.shared(value=b_values) 105 self.b = theano.shared(value=b_values)
116 106
117 conv_out = conv.conv2d(input, self.W, 107 conv_out = conv.conv2d(input, self.W,
118 filter_shape=filter_shape, image_shape=image_shape, 108 filter_shape=filter_shape, image_shape=image_shape)
119 unroll_kern=4,unroll_batch=4)
120 109
121 110
122 fan_in = numpy.prod(filter_shape[1:]) 111 fan_in = numpy.prod(filter_shape[1:])
123 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) 112 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
124 113
135 124
136 125
137 class SdA(): 126 class SdA():
138 def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes, 127 def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes,
139 mlp_hidden_layers_sizes, corruption_levels, rng, n_out, 128 mlp_hidden_layers_sizes, corruption_levels, rng, n_out,
140 pretrain_lr, finetune_lr): 129 pretrain_lr, finetune_lr, img_shape):
141 130
142 self.layers = [] 131 self.layers = []
143 self.pretrain_functions = [] 132 self.pretrain_functions = []
144 self.params = [] 133 self.params = []
145 self.conv_n_layers = len(conv_hidden_layers_sizes) 134 self.conv_n_layers = len(conv_hidden_layers_sizes)
152 filter_shape=conv_hidden_layers_sizes[i][0] 141 filter_shape=conv_hidden_layers_sizes[i][0]
153 image_shape=conv_hidden_layers_sizes[i][1] 142 image_shape=conv_hidden_layers_sizes[i][1]
154 max_poolsize=conv_hidden_layers_sizes[i][2] 143 max_poolsize=conv_hidden_layers_sizes[i][2]
155 144
156 if i == 0 : 145 if i == 0 :
157 layer_input=self.x.reshape((batch_size, 1, 32, 32)) 146 layer_input=self.x.reshape((self.x.shape[0], 1) + img_shape)
158 else: 147 else:
159 layer_input=self.layers[-1].output 148 layer_input=self.layers[-1].output
160 149
161 layer = LeNetConvPoolLayer(rng, input=layer_input, 150 layer = LeNetConvPoolLayer(rng, input=layer_input,
162 image_shape=image_shape, 151 image_shape=image_shape,
168 self.params += layer.params 157 self.params += layer.params
169 158
170 da_layer = dA_conv(corruption_level = corruption_levels[0], 159 da_layer = dA_conv(corruption_level = corruption_levels[0],
171 input = layer_input, 160 input = layer_input,
172 shared_W = layer.W, shared_b = layer.b, 161 shared_W = layer.W, shared_b = layer.b,
173 filter_shape=filter_shape, 162 filter_shape = filter_shape,
174 image_shape = image_shape ) 163 image_shape = image_shape )
175 164
176 gparams = T.grad(da_layer.cost, da_layer.params) 165 gparams = T.grad(da_layer.cost, da_layer.params)
177 166
178 updates = {} 167 updates = {}
219 208
220 self.finetune = theano.function([self.x, self.y], cost, updates = updates) 209 self.finetune = theano.function([self.x, self.y], cost, updates = updates)
221 210
222 self.errors = self.logLayer.errors(self.y) 211 self.errors = self.logLayer.errors(self.y)
223 212
224 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 0, \ 213 def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 1,
225 pretrain_lr = 0.1, training_epochs = 1000, \ 214 pretrain_lr = 0.1, training_epochs = 1000,
226 kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \ 215 kernels = [[4,5,5], [4,3,3]], mlp_layers=[500],
227 corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \ 216 corruption_levels = [0.2, 0.2, 0.2],
228 max_pool_layers = [ [2,2] , [2,2] ], \ 217 batch_size = batch_size, img_shape=(28, 28),
229 dataset=datasets.nist_digits): 218 max_pool_layers = [[2,2], [2,2]],
230 219 dataset=datasets.mnist(5000)):
231 220
232 # allocate symbolic variables for the data 221 # allocate symbolic variables for the data
233 index = T.lscalar() # index to a [mini]batch 222 index = T.lscalar() # index to a [mini]batch
234 x = T.matrix('x') # the data is presented as rasterized images 223 x = T.matrix('x') # the data is presented as rasterized images
235 y = T.ivector('y') # the labels are presented as 1d vector of 224 y = T.ivector('y') # the labels are presented as 1d vector of
236 # [int] labels 225 # [int] labels
237 226
238 layer0_input = x.reshape((batch_size,1,32,32)) 227 layer0_input = x.reshape((x.shape[0],1)+img_shape)
239 228
240 rng = numpy.random.RandomState(1234) 229 rng = numpy.random.RandomState(1234)
241 conv_layers=[] 230 conv_layers=[]
242 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\ 231 init_layer = [[kernels[0][0],1,kernels[0][1],kernels[0][2]],
243 [ batch_size , 1, 32, 32 ], 232 None, # do not specify the batch size since it can
244 max_pool_layers[0] ] 233 # change for the last one and then theano will
234 # crash.
235 max_pool_layers[0]]
245 conv_layers.append(init_layer) 236 conv_layers.append(init_layer)
246 237
247 conv_n_out = int((32-kernels[0][2]+1)/max_pool_layers[0][0]) 238 conv_n_out = (img_shape[0]-kernels[0][2]+1)/max_pool_layers[0][0]
248 print init_layer[1] 239
249
250 for i in range(1,len(kernels)): 240 for i in range(1,len(kernels)):
251 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\ 241 layer = [[kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2]],
252 [ batch_size, kernels[i-1][0],conv_n_out,conv_n_out ], 242 None, # same comment as for init_layer
253 max_pool_layers[i] ] 243 max_pool_layers[i] ]
254 conv_layers.append(layer) 244 conv_layers.append(layer)
255 conv_n_out = int( (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]) 245 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]
256 print layer [1] 246
257 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, 247 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2,
258 conv_hidden_layers_sizes = conv_layers, 248 conv_hidden_layers_sizes = conv_layers,
259 mlp_hidden_layers_sizes = mlp_layers, 249 mlp_hidden_layers_sizes = mlp_layers,
260 corruption_levels = corruption_levels , n_out = 62, 250 corruption_levels = corruption_levels, n_out = 62,
261 rng = rng , pretrain_lr = pretrain_lr , 251 rng = rng , pretrain_lr = pretrain_lr,
262 finetune_lr = learning_rate ) 252 finetune_lr = learning_rate, img_shape=img_shape)
263 253
264 test_model = theano.function([network.x, network.y], network.errors) 254 test_model = theano.function([network.x, network.y], network.errors)
265 255
266 start_time = time.clock() 256 start_time = time.clock()
267 for i in xrange(len(network.layers)-len(mlp_layers)): 257 for i in xrange(len(network.layers)-len(mlp_layers)):
268 for epoch in xrange(pretraining_epochs): 258 for epoch in xrange(pretraining_epochs):
269 for x, y in dataset.train(batch_size): 259 for x, y in dataset.train(batch_size):
270 if x.shape[0] == batch_size: 260 c = network.pretrain_functions[i](x)
271 c = network.pretrain_functions[i](x)
272
273 print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c 261 print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c
274 262
275 patience = 10000 # look as this many examples regardless 263 patience = 10000 # look as this many examples regardless
276 patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS 264 patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS
277 # FOUND 265 # FOUND
289 iter = 0 277 iter = 0
290 278
291 while (epoch < training_epochs) and (not done_looping): 279 while (epoch < training_epochs) and (not done_looping):
292 epoch = epoch + 1 280 epoch = epoch + 1
293 for x, y in dataset.train(batch_size): 281 for x, y in dataset.train(batch_size):
294 if x.shape[0] != batch_size: 282
295 continue
296 cost_ij = network.finetune(x, y) 283 cost_ij = network.finetune(x, y)
297 iter += 1 284 iter += 1
298 285
299 if iter % validation_frequency == 0: 286 if iter % validation_frequency == 0:
300 validation_losses = [] 287 validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)]
301 for xv, yv in dataset.valid(batch_size):
302 if xv.shape[0] == batch_size:
303 validation_losses.append(test_model(xv, yv))
304 this_validation_loss = numpy.mean(validation_losses) 288 this_validation_loss = numpy.mean(validation_losses)
305 print('epoch %i, iter %i, validation error %f %%' % \ 289 print('epoch %i, iter %i, validation error %f %%' % \
306 (epoch, iter, this_validation_loss*100.)) 290 (epoch, iter, this_validation_loss*100.))
307 291
308 # if we got the best validation score until now 292 # if we got the best validation score until now
316 # save best validation score and iteration number 300 # save best validation score and iteration number
317 best_validation_loss = this_validation_loss 301 best_validation_loss = this_validation_loss
318 best_iter = iter 302 best_iter = iter
319 303
320 # test it on the test set 304 # test it on the test set
321 test_losses=[] 305 test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)]
322 for xt, yt in dataset.test(batch_size):
323 if xt.shape[0] == batch_size:
324 test_losses.append(test_model(xt, yt))
325 test_score = numpy.mean(test_losses) 306 test_score = numpy.mean(test_losses)
326 print((' epoch %i, iter %i, test error of best ' 307 print((' epoch %i, iter %i, test error of best '
327 'model %f %%') % 308 'model %f %%') %
328 (epoch, iter, test_score*100.)) 309 (epoch, iter, test_score*100.))
329 310