Mercurial > ift6266
comparison deep/stacked_dae/stacked_dae.py @ 186:d364a130b221
Ajout du code de base pour scalar_series. Modifications à stacked_dae: réglé un problème avec les input_divider (empêchait une optimisation), et ajouté utilisation des séries. Si j'avais pas déjà commité, aussi, j'ai enlevé l'histoire de réutilisation du pretraining: c'était compliqué (error prone) et ça créait des jobs beaucoup trop longues.
author | fsavard |
---|---|
date | Mon, 01 Mar 2010 11:45:25 -0500 |
parents | b9ea8e2d071a |
children | 3632e6258642 |
comparison
equal
deleted
inserted
replaced
185:b9ea8e2d071a | 186:d364a130b221 |
---|---|
150 update_locals(self, locals()) | 150 update_locals(self, locals()) |
151 | 151 |
152 self.layers = [] | 152 self.layers = [] |
153 self.pretrain_functions = [] | 153 self.pretrain_functions = [] |
154 self.params = [] | 154 self.params = [] |
155 # MODIF: added this so we also get the b_primes | |
156 # (not used for finetuning... still using ".params") | |
157 self.all_params = [] | |
155 self.n_layers = len(hidden_layers_sizes) | 158 self.n_layers = len(hidden_layers_sizes) |
156 | 159 |
157 self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX) | 160 self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) |
158 | 161 |
159 if len(hidden_layers_sizes) < 1 : | 162 if len(hidden_layers_sizes) < 1 : |
160 raiseException (' You must have at least one hidden layer ') | 163 raiseException (' You must have at least one hidden layer ') |
161 | 164 |
162 | 165 |
194 # layer | 197 # layer |
195 dA_layer = dA(input_size, hidden_layers_sizes[i], \ | 198 dA_layer = dA(input_size, hidden_layers_sizes[i], \ |
196 corruption_level = corruption_levels[0],\ | 199 corruption_level = corruption_levels[0],\ |
197 input = layer_input, \ | 200 input = layer_input, \ |
198 shared_W = layer.W, shared_b = layer.b) | 201 shared_W = layer.W, shared_b = layer.b) |
202 | |
203 self.all_params += dA_layer.params | |
199 | 204 |
200 # Construct a function that trains this dA | 205 # Construct a function that trains this dA |
201 # compute gradients of layer parameters | 206 # compute gradients of layer parameters |
202 gparams = T.grad(dA_layer.cost, dA_layer.params) | 207 gparams = T.grad(dA_layer.cost, dA_layer.params) |
203 # compute the list of updates | 208 # compute the list of updates |
207 | 212 |
208 # create a function that trains the dA | 213 # create a function that trains the dA |
209 update_fn = theano.function([index], dA_layer.cost, \ | 214 update_fn = theano.function([index], dA_layer.cost, \ |
210 updates = updates, | 215 updates = updates, |
211 givens = { | 216 givens = { |
212 self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.input_divider}) | 217 self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider}) |
213 # collect this function into a list | 218 # collect this function into a list |
214 self.pretrain_functions += [update_fn] | 219 self.pretrain_functions += [update_fn] |
215 | 220 |
216 | 221 |
217 # We now need to add a logistic layer on top of the MLP | 222 # We now need to add a logistic layer on top of the MLP |
218 self.logLayer = LogisticRegression(\ | 223 self.logLayer = LogisticRegression(\ |
219 input = self.layers[-1].output,\ | 224 input = self.layers[-1].output,\ |
220 n_in = hidden_layers_sizes[-1], n_out = n_outs) | 225 n_in = hidden_layers_sizes[-1], n_out = n_outs) |
221 | 226 |
222 self.params += self.logLayer.params | 227 self.params += self.logLayer.params |
228 self.all_params += self.logLayer.params | |
223 # construct a function that implements one step of finetunining | 229 # construct a function that implements one step of finetunining |
224 | 230 |
225 # compute the cost, defined as the negative log likelihood | 231 # compute the cost, defined as the negative log likelihood |
226 cost = self.logLayer.negative_log_likelihood(self.y) | 232 cost = self.logLayer.negative_log_likelihood(self.y) |
227 # compute the gradients with respect to the model parameters | 233 # compute the gradients with respect to the model parameters |
232 updates[param] = param - gparam*finetune_lr | 238 updates[param] = param - gparam*finetune_lr |
233 | 239 |
234 self.finetune = theano.function([index], cost, | 240 self.finetune = theano.function([index], cost, |
235 updates = updates, | 241 updates = updates, |
236 givens = { | 242 givens = { |
237 self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.input_divider, | 243 self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, |
238 self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) | 244 self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) |
239 | 245 |
240 # symbolic variable that points to the number of errors made on the | 246 # symbolic variable that points to the number of errors made on the |
241 # minibatch given by self.x and self.y | 247 # minibatch given by self.x and self.y |
242 | 248 |