Mercurial > ift6266
comparison scripts/stacked_dae/stacked_dae.py @ 139:7d8366fb90bf
Ajouté des __init__.py dans l'arborescence pour que les scripts puissent être utilisés avec des paths pour jobman, et fait pas mal de modifs dans stacked_dae pour pouvoir réutiliser le travail fait pour des tests où le pretraining est le même.
author | fsavard |
---|---|
date | Mon, 22 Feb 2010 13:38:25 -0500 |
parents | 5c79a2557f2f |
children |
comparison
equal
deleted
inserted
replaced
138:128507ac4edf | 139:7d8366fb90bf |
---|---|
4 import numpy | 4 import numpy |
5 import theano | 5 import theano |
6 import time | 6 import time |
7 import theano.tensor as T | 7 import theano.tensor as T |
8 from theano.tensor.shared_randomstreams import RandomStreams | 8 from theano.tensor.shared_randomstreams import RandomStreams |
9 import copy | |
10 | |
11 from utils import update_locals | |
9 | 12 |
10 class LogisticRegression(object): | 13 class LogisticRegression(object): |
11 def __init__(self, input, n_in, n_out): | 14 def __init__(self, input, n_in, n_out): |
12 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) | 15 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) |
13 self.W = theano.shared( value=numpy.zeros((n_in,n_out), | 16 self.W = theano.shared( value=numpy.zeros((n_in,n_out), |
138 | 141 |
139 | 142 |
140 class SdA(object): | 143 class SdA(object): |
141 def __init__(self, train_set_x, train_set_y, batch_size, n_ins, | 144 def __init__(self, train_set_x, train_set_y, batch_size, n_ins, |
142 hidden_layers_sizes, n_outs, | 145 hidden_layers_sizes, n_outs, |
143 corruption_levels, rng, pretrain_lr, finetune_lr): | 146 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0): |
144 | 147 update_locals(self, locals()) |
148 | |
145 self.layers = [] | 149 self.layers = [] |
146 self.pretrain_functions = [] | 150 self.pretrain_functions = [] |
147 self.params = [] | 151 self.params = [] |
148 self.n_layers = len(hidden_layers_sizes) | 152 self.n_layers = len(hidden_layers_sizes) |
153 | |
154 self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX) | |
149 | 155 |
150 if len(hidden_layers_sizes) < 1 : | 156 if len(hidden_layers_sizes) < 1 : |
151 raiseException (' You must have at least one hidden layer ') | 157 raiseException (' You must have at least one hidden layer ') |
152 | 158 |
153 | 159 |
198 | 204 |
199 # create a function that trains the dA | 205 # create a function that trains the dA |
200 update_fn = theano.function([index], dA_layer.cost, \ | 206 update_fn = theano.function([index], dA_layer.cost, \ |
201 updates = updates, | 207 updates = updates, |
202 givens = { | 208 givens = { |
203 self.x : train_set_x[index*batch_size:(index+1)*batch_size]}) | 209 self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.input_divider}) |
204 # collect this function into a list | 210 # collect this function into a list |
205 self.pretrain_functions += [update_fn] | 211 self.pretrain_functions += [update_fn] |
206 | 212 |
207 | 213 |
208 # We now need to add a logistic layer on top of the MLP | 214 # We now need to add a logistic layer on top of the MLP |
223 updates[param] = param - gparam*finetune_lr | 229 updates[param] = param - gparam*finetune_lr |
224 | 230 |
225 self.finetune = theano.function([index], cost, | 231 self.finetune = theano.function([index], cost, |
226 updates = updates, | 232 updates = updates, |
227 givens = { | 233 givens = { |
228 self.x : train_set_x[index*batch_size:(index+1)*batch_size], | 234 self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.input_divider, |
229 self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) | 235 self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) |
230 | 236 |
231 # symbolic variable that points to the number of errors made on the | 237 # symbolic variable that points to the number of errors made on the |
232 # minibatch given by self.x and self.y | 238 # minibatch given by self.x and self.y |
233 | 239 |
234 self.errors = self.logLayer.errors(self.y) | 240 self.errors = self.logLayer.errors(self.y) |
241 | |
242 @classmethod | |
243 def copy_reusing_lower_layers(cls, obj, num_hidden_layers, new_finetuning_lr=None): | |
244 assert(num_hidden_layers <= obj.n_layers) | |
245 | |
246 if not new_finetuning_lr: | |
247 new_finetuning_lr = obj.finetune_lr | |
248 | |
249 new_sda = cls(train_set_x= obj.train_set_x, \ | |
250 train_set_y = obj.train_set_y,\ | |
251 batch_size = obj.batch_size, \ | |
252 n_ins= obj.n_ins, \ | |
253 hidden_layers_sizes = obj.hidden_layers_sizes[:num_hidden_layers], \ | |
254 n_outs = obj.n_outs, \ | |
255 corruption_levels = obj.corruption_levels[:num_hidden_layers],\ | |
256 rng = obj.rng,\ | |
257 pretrain_lr = obj.pretrain_lr, \ | |
258 finetune_lr = new_finetuning_lr, \ | |
259 input_divider = obj.input_divider ) | |
260 | |
261 # new_sda.layers contains only the hidden layers actually | |
262 for i, layer in enumerate(new_sda.layers): | |
263 original_layer = obj.layers[i] | |
264 for p1,p2 in zip(layer.params, original_layer.params): | |
265 p1.value = p2.value.copy() | |
266 | |
267 return new_sda | |
268 | |
269 def get_params_copy(self): | |
270 return copy.deepcopy(self.params) | |
271 | |
272 def set_params_from_copy(self, copy): | |
273 # We don't want to replace the var, as the functions have pointers in there | |
274 # We only want to replace values. | |
275 for i, p in enumerate(self.params): | |
276 p.value = copy[i].value | |
277 | |
278 def get_params_means(self): | |
279 s = [] | |
280 for p in self.params: | |
281 s.append(numpy.mean(p.value)) | |
282 return s | |
235 | 283 |
236 if __name__ == '__main__': | 284 if __name__ == '__main__': |
237 import sys | 285 import sys |
238 args = sys.argv[1:] | 286 args = sys.argv[1:] |
239 | 287 |
240 if len(args) < 1: | |
241 print "Options: mnist, jobman_add, load_nist" | |
242 sys.exit(0) | |
243 | |
244 if args[0] == "jobman_add": | |
245 jobman_add() | |
246 elif args[0] == "mnist": | |
247 sgd_optimization_mnist(dataset=MNIST_LOCATION) | |
248 elif args[0] == "load_nist": | |
249 load_nist_test() | |
250 elif args[0] == "nist": | |
251 sgd_optimization_nist() | |
252 elif args[0] == "pc": | |
253 test_produit_croise_jobs() | |
254 | |
255 |