comparison deep/convolutional_dae/stacked_convolutional_dae.py @ 248:7e6fecabb656

Optimized the call of ConvOp by specifying additional parameters. Specified image shape of the da_conv layer.
author humel
date Tue, 16 Mar 2010 14:46:25 -0400
parents 4d109b648c31
children 1bf046c0c84a 3919c71e3091
comparison
equal deleted inserted replaced
247:4d109b648c31 248:7e6fecabb656
11 11
12 sys.path.append('../../../') 12 sys.path.append('../../../')
13 13
14 from ift6266 import datasets 14 from ift6266 import datasets
15 from ift6266.baseline.log_reg.log_reg import LogisticRegression 15 from ift6266.baseline.log_reg.log_reg import LogisticRegression
16
17 batch_size = 100
18
16 19
17 class SigmoidalLayer(object): 20 class SigmoidalLayer(object):
18 def __init__(self, rng, input, n_in, n_out): 21 def __init__(self, rng, input, n_in, n_out):
19 22
20 self.input = input 23 self.input = input
65 self.x = input 68 self.x = input
66 69
67 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x 70 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x
68 71
69 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, 72 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape,
70 image_shape=image_shape, border_mode='valid') 73 image_shape=image_shape, unroll_kern=4,unroll_batch=4, border_mode='valid')
71 74
72 75
73 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) 76 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
74 77
75 78
76 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ 79 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
77 filter_shape[3] ] 80 filter_shape[3] ]
81 da_image_shape = [ image_shape[0], filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ]
78 initial_W_prime = numpy.asarray( numpy.random.uniform( \ 82 initial_W_prime = numpy.asarray( numpy.random.uniform( \
79 low = -numpy.sqrt(6./(fan_in+fan_out)), \ 83 low = -numpy.sqrt(6./(fan_in+fan_out)), \
80 high = numpy.sqrt(6./(fan_in+fan_out)), \ 84 high = numpy.sqrt(6./(fan_in+fan_out)), \
81 size = da_filter_shape), dtype = theano.config.floatX) 85 size = da_filter_shape), dtype = theano.config.floatX)
82 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") 86 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
83 87
84 conv2_out = conv.conv2d(self.y, self.W_prime, 88 conv2_out = conv.conv2d(self.y, self.W_prime,
85 filter_shape = da_filter_shape, 89 filter_shape = da_filter_shape,\
90 image_shape = da_image_shape, \
91 unroll_kern=4,unroll_batch=4, \
86 border_mode='full') 92 border_mode='full')
87 93
88 self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale 94 self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale
89 95
90 scaled_x = (self.x + center) / scale 96 scaled_x = (self.x + center) / scale
105 111
106 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) 112 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
107 self.b = theano.shared(value=b_values) 113 self.b = theano.shared(value=b_values)
108 114
109 conv_out = conv.conv2d(input, self.W, 115 conv_out = conv.conv2d(input, self.W,
110 filter_shape=filter_shape, image_shape=image_shape) 116 filter_shape=filter_shape, image_shape=image_shape, unroll_kern=4,unroll_batch=4)
111 117
112 118
113 fan_in = numpy.prod(filter_shape[1:]) 119 fan_in = numpy.prod(filter_shape[1:])
114 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) 120 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
115 121
212 218
213 self.errors = self.logLayer.errors(self.y) 219 self.errors = self.logLayer.errors(self.y)
214 220
215 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \ 221 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \
216 pretrain_lr = 0.1, training_epochs = 1000, \ 222 pretrain_lr = 0.1, training_epochs = 1000, \
217 kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \ 223 kernels = [ [4,5,5] , [4,3,3] ], mlp_layers=[500], \
218 corruption_levels = [ 0.2, 0.2, 0.2], \ 224 corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \
219 max_pool_layers = [ [2,2] , [2,2] ], \ 225 max_pool_layers = [ [2,2] , [2,2] ], \
220 dataset=datasets.nist_digits): 226 dataset=datasets.nist_digits):
221 227
222 batch_size = 100 # size of the minibatch
223 228
224 # allocate symbolic variables for the data 229 # allocate symbolic variables for the data
225 index = T.lscalar() # index to a [mini]batch 230 index = T.lscalar() # index to a [mini]batch
226 x = T.matrix('x') # the data is presented as rasterized images 231 x = T.matrix('x') # the data is presented as rasterized images
227 y = T.ivector('y') # the labels are presented as 1d vector of 232 y = T.ivector('y') # the labels are presented as 1d vector of
229 234
230 layer0_input = x.reshape((x.shape[0],1,32,32)) 235 layer0_input = x.reshape((x.shape[0],1,32,32))
231 236
232 rng = numpy.random.RandomState(1234) 237 rng = numpy.random.RandomState(1234)
233 conv_layers=[] 238 conv_layers=[]
234 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ], None, max_pool_layers[0] ] 239 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\
240 [ batch_size , 1, 32, 32 ],
241 max_pool_layers[0] ]
235 conv_layers.append(init_layer) 242 conv_layers.append(init_layer)
243
236 conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0] 244 conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0]
237 245
238 for i in range(1,len(kernels)): 246 for i in range(1,len(kernels)):
239 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ], None, max_pool_layers[i] ] 247 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\
248 [ batch_size, kernels[i-1][0], conv_n_out,conv_n_out ],
249 max_pool_layers[i] ]
240 conv_layers.append(layer) 250 conv_layers.append(layer)
241 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] 251 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]
242 252 print layer [1]
243 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, 253 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2,
244 conv_hidden_layers_sizes = conv_layers, 254 conv_hidden_layers_sizes = conv_layers,
245 mlp_hidden_layers_sizes = mlp_layers, 255 mlp_hidden_layers_sizes = mlp_layers,
246 corruption_levels = corruption_levels , n_out = 62, 256 corruption_levels = corruption_levels , n_out = 62,
247 rng = rng , pretrain_lr = pretrain_lr , 257 rng = rng , pretrain_lr = pretrain_lr ,