Mercurial > ift6266
comparison deep/stacked_dae/v_sylvain/stacked_dae.py @ 353:bc4464c0894c
Ajout d'une fonctionnalite pour pouvoir avoir un taux d'apprentissage decroissant dans le pretrain
author | SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca> |
---|---|
date | Wed, 21 Apr 2010 14:51:14 -0400 |
parents | 799ad23a161f |
children | 14b28e43ce4e |
comparison
equal
deleted
inserted
replaced
352:cfb79f9fd1a4 | 353:bc4464c0894c |
---|---|
151 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level, dtype=theano.config.floatX) * self.x | 151 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level, dtype=theano.config.floatX) * self.x |
152 # Equation (2) | 152 # Equation (2) |
153 # note : y is stored as an attribute of the class so that it can be | 153 # note : y is stored as an attribute of the class so that it can be |
154 # used later when stacking dAs. | 154 # used later when stacking dAs. |
155 | 155 |
156 self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) | 156 ## self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) |
157 | 157 ## |
158 # Equation (3) | 158 ## # Equation (3) |
159 #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) | 159 ## #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) |
160 # Equation (4) | 160 ## # Equation (4) |
161 # note : we sum over the size of a datapoint; if we are using minibatches, | 161 ## # note : we sum over the size of a datapoint; if we are using minibatches, |
162 # L will be a vector, with one entry per example in minibatch | 162 ## # L will be a vector, with one entry per example in minibatch |
163 #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) | 163 ## #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) |
164 #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) | 164 ## #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) |
165 | 165 ## |
166 # bypassing z to avoid running to log(0) | 166 ## # bypassing z to avoid running to log(0) |
167 z_a = T.dot(self.y, self.W_prime) + self.b_prime | 167 ## z_a = T.dot(self.y, self.W_prime) + self.b_prime |
168 log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a)) | 168 ## log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a)) |
169 # log(1-sigmoid(z_a)) | 169 ## # log(1-sigmoid(z_a)) |
170 log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a)) | 170 ## log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a)) |
171 self.L = -T.sum( self.x * (log_sigmoid) \ | 171 ## self.L = -T.sum( self.x * (log_sigmoid) \ |
172 + (1.0-self.x) * (log_1_sigmoid), axis=1 ) | 172 ## + (1.0-self.x) * (log_1_sigmoid), axis=1 ) |
173 | 173 |
174 # I added this epsilon to avoid getting log(0) and 1/0 in grad | 174 # I added this epsilon to avoid getting log(0) and 1/0 in grad |
175 # This means conceptually that there'd be no probability of 0, but that | 175 # This means conceptually that there'd be no probability of 0, but that |
176 # doesn't seem to me as important (maybe I'm wrong?). | 176 # doesn't seem to me as important (maybe I'm wrong?). |
177 #eps = 0.00000001 | 177 #eps = 0.00000001 |
181 # note : L is now a vector, where each element is the cross-entropy cost | 181 # note : L is now a vector, where each element is the cross-entropy cost |
182 # of the reconstruction of the corresponding example of the | 182 # of the reconstruction of the corresponding example of the |
183 # minibatch. We need to compute the average of all these to get | 183 # minibatch. We need to compute the average of all these to get |
184 # the cost of the minibatch | 184 # the cost of the minibatch |
185 | 185 |
186 ## #Or use a Tanh everything is always between 0 and 1, the range is | 186 #Or use a Tanh everything is always between 0 and 1, the range is |
187 ## #changed so it remain the same as when sigmoid is used | 187 #changed so it remain the same as when sigmoid is used |
188 ## self.y = (T.tanh(T.dot(self.tilde_x, self.W ) + self.b)+1.0)/2.0 | 188 self.y = (T.tanh(T.dot(self.tilde_x, self.W ) + self.b)+1.0)/2.0 |
189 ## | 189 |
190 ## z_a = T.dot(self.y, self.W_prime) + self.b_prime | 190 z_a = T.dot(self.y, self.W_prime) + self.b_prime |
191 ## self.z = (T.tanh(z_a + self.b_prime)+1.0) / 2.0 | 191 self.z = (T.tanh(z_a + self.b_prime)+1.0) / 2.0 |
192 ## #To ensure to do not have a log(0) operation | 192 #To ensure to do not have a log(0) operation |
193 ## if self.z <= 0: | 193 if self.z <= 0: |
194 ## self.z = 0.000001 | 194 self.z = 0.000001 |
195 ## if self.z >= 1: | 195 if self.z >= 1: |
196 ## self.z = 0.999999 | 196 self.z = 0.999999 |
197 ## | 197 |
198 ## self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) | 198 self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) |
199 | 199 |
200 self.cost = T.mean(self.L) | 200 self.cost = T.mean(self.L) |
201 | 201 |
202 self.params = [ self.W, self.b, self.b_prime ] | 202 self.params = [ self.W, self.b, self.b_prime ] |
203 | 203 |
239 #index = T.lscalar() # index to a [mini]batch | 239 #index = T.lscalar() # index to a [mini]batch |
240 self.x = T.matrix('x') # the data is presented as rasterized images | 240 self.x = T.matrix('x') # the data is presented as rasterized images |
241 self.y = T.ivector('y') # the labels are presented as 1D vector of | 241 self.y = T.ivector('y') # the labels are presented as 1D vector of |
242 # [int] labels | 242 # [int] labels |
243 self.finetune_lr = T.fscalar('finetune_lr') #To get a dynamic finetune learning rate | 243 self.finetune_lr = T.fscalar('finetune_lr') #To get a dynamic finetune learning rate |
244 self.pretrain_lr = T.fscalar('pretrain_lr') #To get a dynamic pretrain learning rate | |
244 | 245 |
245 for i in xrange( self.n_layers ): | 246 for i in xrange( self.n_layers ): |
246 # construct the sigmoidal layer | 247 # construct the sigmoidal layer |
247 | 248 |
248 # the size of the input is either the number of hidden units of | 249 # the size of the input is either the number of hidden units of |
259 layer_input = self.x | 260 layer_input = self.x |
260 else: | 261 else: |
261 layer_input = self.layers[-1].output | 262 layer_input = self.layers[-1].output |
262 #We have to choose between sigmoidal layer or tanh layer ! | 263 #We have to choose between sigmoidal layer or tanh layer ! |
263 | 264 |
264 layer = SigmoidalLayer(rng, layer_input, input_size, | 265 ## layer = SigmoidalLayer(rng, layer_input, input_size, |
266 ## hidden_layers_sizes[i] ) | |
267 | |
268 layer = TanhLayer(rng, layer_input, input_size, | |
265 hidden_layers_sizes[i] ) | 269 hidden_layers_sizes[i] ) |
266 | |
267 ## layer = TanhLayer(rng, layer_input, input_size, | |
268 ## hidden_layers_sizes[i] ) | |
269 # add the layer to the | 270 # add the layer to the |
270 self.layers += [layer] | 271 self.layers += [layer] |
271 self.params += layer.params | 272 self.params += layer.params |
272 | 273 |
273 # Construct a denoising autoencoder that shared weights with this | 274 # Construct a denoising autoencoder that shared weights with this |
283 # compute gradients of layer parameters | 284 # compute gradients of layer parameters |
284 gparams = T.grad(dA_layer.cost, dA_layer.params) | 285 gparams = T.grad(dA_layer.cost, dA_layer.params) |
285 # compute the list of updates | 286 # compute the list of updates |
286 updates = {} | 287 updates = {} |
287 for param, gparam in zip(dA_layer.params, gparams): | 288 for param, gparam in zip(dA_layer.params, gparams): |
288 updates[param] = param - gparam * pretrain_lr | 289 updates[param] = param - gparam * self.pretrain_lr |
289 | 290 |
290 # create a function that trains the dA | 291 # create a function that trains the dA |
291 update_fn = theano.function([self.x], dA_layer.cost, \ | 292 update_fn = theano.function([self.x, self.pretrain_lr], dA_layer.cost, \ |
292 updates = updates)#, | 293 updates = updates)#, |
293 # givens = { | 294 # givens = { |
294 # self.x : ensemble}) | 295 # self.x : ensemble}) |
295 # collect this function into a list | 296 # collect this function into a list |
296 #update_fn = theano.function([index], dA_layer.cost, \ | 297 #update_fn = theano.function([index], dA_layer.cost, \ |