Mercurial > ift6266
comparison deep/convolutional_dae/stacked_convolutional_dae.py @ 248:7e6fecabb656
Optimized the call of ConvOp by specifying additional parameters. Specified image shape of the da_conv layer.
author | humel |
---|---|
date | Tue, 16 Mar 2010 14:46:25 -0400 |
parents | 4d109b648c31 |
children | 1bf046c0c84a 3919c71e3091 |
comparison
equal
deleted
inserted
replaced
247:4d109b648c31 | 248:7e6fecabb656 |
---|---|
11 | 11 |
12 sys.path.append('../../../') | 12 sys.path.append('../../../') |
13 | 13 |
14 from ift6266 import datasets | 14 from ift6266 import datasets |
15 from ift6266.baseline.log_reg.log_reg import LogisticRegression | 15 from ift6266.baseline.log_reg.log_reg import LogisticRegression |
16 | |
17 batch_size = 100 | |
18 | |
16 | 19 |
17 class SigmoidalLayer(object): | 20 class SigmoidalLayer(object): |
18 def __init__(self, rng, input, n_in, n_out): | 21 def __init__(self, rng, input, n_in, n_out): |
19 | 22 |
20 self.input = input | 23 self.input = input |
65 self.x = input | 68 self.x = input |
66 | 69 |
67 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x | 70 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x |
68 | 71 |
69 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, | 72 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, |
70 image_shape=image_shape, border_mode='valid') | 73 image_shape=image_shape, unroll_kern=4,unroll_batch=4, border_mode='valid') |
71 | 74 |
72 | 75 |
73 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) | 76 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) |
74 | 77 |
75 | 78 |
76 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ | 79 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ |
77 filter_shape[3] ] | 80 filter_shape[3] ] |
81 da_image_shape = [ image_shape[0], filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ] | |
78 initial_W_prime = numpy.asarray( numpy.random.uniform( \ | 82 initial_W_prime = numpy.asarray( numpy.random.uniform( \ |
79 low = -numpy.sqrt(6./(fan_in+fan_out)), \ | 83 low = -numpy.sqrt(6./(fan_in+fan_out)), \ |
80 high = numpy.sqrt(6./(fan_in+fan_out)), \ | 84 high = numpy.sqrt(6./(fan_in+fan_out)), \ |
81 size = da_filter_shape), dtype = theano.config.floatX) | 85 size = da_filter_shape), dtype = theano.config.floatX) |
82 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") | 86 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") |
83 | 87 |
84 conv2_out = conv.conv2d(self.y, self.W_prime, | 88 conv2_out = conv.conv2d(self.y, self.W_prime, |
85 filter_shape = da_filter_shape, | 89 filter_shape = da_filter_shape,\ |
90 image_shape = da_image_shape, \ | |
91 unroll_kern=4,unroll_batch=4, \ | |
86 border_mode='full') | 92 border_mode='full') |
87 | 93 |
88 self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale | 94 self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale |
89 | 95 |
90 scaled_x = (self.x + center) / scale | 96 scaled_x = (self.x + center) / scale |
105 | 111 |
106 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) | 112 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) |
107 self.b = theano.shared(value=b_values) | 113 self.b = theano.shared(value=b_values) |
108 | 114 |
109 conv_out = conv.conv2d(input, self.W, | 115 conv_out = conv.conv2d(input, self.W, |
110 filter_shape=filter_shape, image_shape=image_shape) | 116 filter_shape=filter_shape, image_shape=image_shape, unroll_kern=4,unroll_batch=4) |
111 | 117 |
112 | 118 |
113 fan_in = numpy.prod(filter_shape[1:]) | 119 fan_in = numpy.prod(filter_shape[1:]) |
114 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) | 120 fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) |
115 | 121 |
212 | 218 |
213 self.errors = self.logLayer.errors(self.y) | 219 self.errors = self.logLayer.errors(self.y) |
214 | 220 |
215 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \ | 221 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \ |
216 pretrain_lr = 0.1, training_epochs = 1000, \ | 222 pretrain_lr = 0.1, training_epochs = 1000, \ |
217 kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \ | 223 kernels = [ [4,5,5] , [4,3,3] ], mlp_layers=[500], \ |
218 corruption_levels = [ 0.2, 0.2, 0.2], \ | 224 corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \ |
219 max_pool_layers = [ [2,2] , [2,2] ], \ | 225 max_pool_layers = [ [2,2] , [2,2] ], \ |
220 dataset=datasets.nist_digits): | 226 dataset=datasets.nist_digits): |
221 | 227 |
222 batch_size = 100 # size of the minibatch | |
223 | 228 |
224 # allocate symbolic variables for the data | 229 # allocate symbolic variables for the data |
225 index = T.lscalar() # index to a [mini]batch | 230 index = T.lscalar() # index to a [mini]batch |
226 x = T.matrix('x') # the data is presented as rasterized images | 231 x = T.matrix('x') # the data is presented as rasterized images |
227 y = T.ivector('y') # the labels are presented as 1d vector of | 232 y = T.ivector('y') # the labels are presented as 1d vector of |
229 | 234 |
230 layer0_input = x.reshape((x.shape[0],1,32,32)) | 235 layer0_input = x.reshape((x.shape[0],1,32,32)) |
231 | 236 |
232 rng = numpy.random.RandomState(1234) | 237 rng = numpy.random.RandomState(1234) |
233 conv_layers=[] | 238 conv_layers=[] |
234 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ], None, max_pool_layers[0] ] | 239 init_layer = [ [ kernels[0][0],1,kernels[0][1],kernels[0][2] ],\ |
240 [ batch_size , 1, 32, 32 ], | |
241 max_pool_layers[0] ] | |
235 conv_layers.append(init_layer) | 242 conv_layers.append(init_layer) |
243 | |
236 conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0] | 244 conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0] |
237 | 245 |
238 for i in range(1,len(kernels)): | 246 for i in range(1,len(kernels)): |
239 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ], None, max_pool_layers[i] ] | 247 layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\ |
248 [ batch_size, kernels[i-1][0], conv_n_out,conv_n_out ], | |
249 max_pool_layers[i] ] | |
240 conv_layers.append(layer) | 250 conv_layers.append(layer) |
241 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] | 251 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] |
242 | 252 print layer [1] |
243 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, | 253 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, |
244 conv_hidden_layers_sizes = conv_layers, | 254 conv_hidden_layers_sizes = conv_layers, |
245 mlp_hidden_layers_sizes = mlp_layers, | 255 mlp_hidden_layers_sizes = mlp_layers, |
246 corruption_levels = corruption_levels , n_out = 62, | 256 corruption_levels = corruption_levels , n_out = 62, |
247 rng = rng , pretrain_lr = pretrain_lr , | 257 rng = rng , pretrain_lr = pretrain_lr , |