comparison deep/convolutional_dae/stacked_convolutional_dae.py @ 266:1e4e60ddadb1

Merge. Ah, et dans le dernier commit, j'avais oublié de mentionner que j'ai ajouté du code pour gérer l'isolation de différents clones pour rouler des expériences et modifier le code en même temps.
author fsavard
date Fri, 19 Mar 2010 10:56:16 -0400
parents 0c0f0b3f6a93
children
comparison
equal deleted inserted replaced
265:c8fe09a65039 266:1e4e60ddadb1
1 import numpy 1 import numpy
2 import theano 2 import theano
3 import time 3 import time
4 import sys
4 import theano.tensor as T 5 import theano.tensor as T
5 from theano.tensor.shared_randomstreams import RandomStreams 6 from theano.tensor.shared_randomstreams import RandomStreams
6 import theano.sandbox.softsign 7 #import theano.sandbox.softsign
7 8
8 from theano.tensor.signal import downsample 9 from theano.tensor.signal import downsample
9 from theano.tensor.nnet import conv 10 from theano.tensor.nnet import conv
10 11
11 from ift6266 import datasets 12 from ift6266 import datasets
12
13 from ift6266.baseline.log_reg.log_reg import LogisticRegression 13 from ift6266.baseline.log_reg.log_reg import LogisticRegression
14 14
15 batch_size = 100
16
15 class SigmoidalLayer(object): 17 class SigmoidalLayer(object):
16 def __init__(self, rng, input, n_in, n_out): 18 def __init__(self, rng, input, n_in, n_out):
17 19
18 self.input = input 20 self.input = input
19 21
55 self.W = theano.shared(value = initial_W, name = "W") 57 self.W = theano.shared(value = initial_W, name = "W")
56 self.b = theano.shared(value = initial_b, name = "b") 58 self.b = theano.shared(value = initial_b, name = "b")
57 59
58 60
59 initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX) 61 initial_b_prime= numpy.zeros((filter_shape[1],),dtype=theano.config.floatX)
60
61 self.W_prime=T.dtensor4('W_prime')
62 62
63 self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime") 63 self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime")
64 64
65 self.x = input 65 self.x = input
66 66
67 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x 67 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x
68 68
69 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, 69 conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape,
70 image_shape=image_shape, border_mode='valid') 70 image_shape=image_shape, border_mode='valid')
71
72 71
73 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) 72 self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
74 73
75 74 da_filter_shape = [ filter_shape[1], filter_shape[0],
76 da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ 75 filter_shape[2], filter_shape[3] ]
77 filter_shape[3] ]
78 initial_W_prime = numpy.asarray( numpy.random.uniform( \ 76 initial_W_prime = numpy.asarray( numpy.random.uniform( \
79 low = -numpy.sqrt(6./(fan_in+fan_out)), \ 77 low = -numpy.sqrt(6./(fan_in+fan_out)), \
80 high = numpy.sqrt(6./(fan_in+fan_out)), \ 78 high = numpy.sqrt(6./(fan_in+fan_out)), \
81 size = da_filter_shape), dtype = theano.config.floatX) 79 size = da_filter_shape), dtype = theano.config.floatX)
82 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime") 80 self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
94 self.cost = T.mean(self.L) 92 self.cost = T.mean(self.L)
95 93
96 self.params = [ self.W, self.b, self.b_prime ] 94 self.params = [ self.W, self.b, self.b_prime ]
97 95
98 class LeNetConvPoolLayer(object): 96 class LeNetConvPoolLayer(object):
97
99 def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)): 98 def __init__(self, rng, input, filter_shape, image_shape=None, poolsize=(2,2)):
100 self.input = input 99 self.input = input
101 100
102 W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX) 101 W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
103 self.W = theano.shared(value=W_values) 102 self.W = theano.shared(value=W_values)
125 124
126 125
127 class SdA(): 126 class SdA():
128 def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes, 127 def __init__(self, input, n_ins_mlp, conv_hidden_layers_sizes,
129 mlp_hidden_layers_sizes, corruption_levels, rng, n_out, 128 mlp_hidden_layers_sizes, corruption_levels, rng, n_out,
130 pretrain_lr, finetune_lr): 129 pretrain_lr, finetune_lr, img_shape):
131 130
132 self.layers = [] 131 self.layers = []
133 self.pretrain_functions = [] 132 self.pretrain_functions = []
134 self.params = [] 133 self.params = []
135 self.conv_n_layers = len(conv_hidden_layers_sizes) 134 self.conv_n_layers = len(conv_hidden_layers_sizes)
142 filter_shape=conv_hidden_layers_sizes[i][0] 141 filter_shape=conv_hidden_layers_sizes[i][0]
143 image_shape=conv_hidden_layers_sizes[i][1] 142 image_shape=conv_hidden_layers_sizes[i][1]
144 max_poolsize=conv_hidden_layers_sizes[i][2] 143 max_poolsize=conv_hidden_layers_sizes[i][2]
145 144
146 if i == 0 : 145 if i == 0 :
147 layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32)) 146 layer_input=self.x.reshape((self.x.shape[0], 1) + img_shape)
148 else: 147 else:
149 layer_input=self.layers[-1].output 148 layer_input=self.layers[-1].output
150 149
151 layer = LeNetConvPoolLayer(rng, input=layer_input, 150 layer = LeNetConvPoolLayer(rng, input=layer_input,
152 image_shape=image_shape, 151 image_shape=image_shape,
209 208
210 self.finetune = theano.function([self.x, self.y], cost, updates = updates) 209 self.finetune = theano.function([self.x, self.y], cost, updates = updates)
211 210
212 self.errors = self.logLayer.errors(self.y) 211 self.errors = self.logLayer.errors(self.y)
213 212
214 def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \ 213 def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 1,
215 pretrain_lr = 0.01, training_epochs = 1000, \ 214 pretrain_lr = 0.1, training_epochs = 1000,
216 dataset=datasets.nist_digits): 215 kernels = [[4,5,5], [4,3,3]], mlp_layers=[500],
217 216 corruption_levels = [0.2, 0.2, 0.2],
218 batch_size = 500 # size of the minibatch 217 batch_size = batch_size, img_shape=(28, 28),
218 max_pool_layers = [[2,2], [2,2]],
219 dataset=datasets.mnist(5000)):
219 220
220 # allocate symbolic variables for the data 221 # allocate symbolic variables for the data
221 index = T.lscalar() # index to a [mini]batch 222 index = T.lscalar() # index to a [mini]batch
222 x = T.matrix('x') # the data is presented as rasterized images 223 x = T.matrix('x') # the data is presented as rasterized images
223 y = T.ivector('y') # the labels are presented as 1d vector of 224 y = T.ivector('y') # the labels are presented as 1d vector of
224 # [int] labels 225 # [int] labels
225 layer0_input = x.reshape((x.shape[0],1,32,32)) 226
226 227 layer0_input = x.reshape((x.shape[0],1)+img_shape)
227 228
228 # Setup the convolutional layers with their DAs(add as many as you want)
229 corruption_levels = [ 0.2, 0.2, 0.2]
230 rng = numpy.random.RandomState(1234) 229 rng = numpy.random.RandomState(1234)
231 ker1=2
232 ker2=2
233 conv_layers=[] 230 conv_layers=[]
234 conv_layers.append([[ker1,1,5,5], None, [2,2] ]) 231 init_layer = [[kernels[0][0],1,kernels[0][1],kernels[0][2]],
235 conv_layers.append([[ker2,ker1,5,5], None, [2,2] ]) 232 None, # do not specify the batch size since it can
236 233 # change for the last one and then theano will
237 # Setup the MLP layers of the network 234 # crash.
238 mlp_layers=[500] 235 max_pool_layers[0]]
239 236 conv_layers.append(init_layer)
240 network = SdA(input = layer0_input, n_ins_mlp = ker2*4*4, 237
238 conv_n_out = (img_shape[0]-kernels[0][2]+1)/max_pool_layers[0][0]
239
240 for i in range(1,len(kernels)):
241 layer = [[kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2]],
242 None, # same comment as for init_layer
243 max_pool_layers[i] ]
244 conv_layers.append(layer)
245 conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]
246
247 network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2,
241 conv_hidden_layers_sizes = conv_layers, 248 conv_hidden_layers_sizes = conv_layers,
242 mlp_hidden_layers_sizes = mlp_layers, 249 mlp_hidden_layers_sizes = mlp_layers,
243 corruption_levels = corruption_levels , n_out = 10, 250 corruption_levels = corruption_levels, n_out = 62,
244 rng = rng , pretrain_lr = pretrain_lr , 251 rng = rng , pretrain_lr = pretrain_lr,
245 finetune_lr = learning_rate ) 252 finetune_lr = learning_rate, img_shape=img_shape)
246 253
247 test_model = theano.function([network.x, network.y], network.errors) 254 test_model = theano.function([network.x, network.y], network.errors)
248 255
249 start_time = time.clock() 256 start_time = time.clock()
250 for i in xrange(len(network.layers)-len(mlp_layers)): 257 for i in xrange(len(network.layers)-len(mlp_layers)):