Mercurial > ift6266
diff scripts/stacked_dae/stacked_dae.py @ 139:7d8366fb90bf
Ajouté des __init__.py dans l'arborescence pour que les scripts puissent être utilisés avec des paths pour jobman, et fait pas mal de modifs dans stacked_dae pour pouvoir réutiliser le travail fait pour des tests où le pretraining est le même.
author | fsavard |
---|---|
date | Mon, 22 Feb 2010 13:38:25 -0500 |
parents | 5c79a2557f2f |
children |
line wrap: on
line diff
--- a/scripts/stacked_dae/stacked_dae.py Sun Feb 21 17:30:38 2010 -0600 +++ b/scripts/stacked_dae/stacked_dae.py Mon Feb 22 13:38:25 2010 -0500 @@ -6,6 +6,9 @@ import time import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams +import copy + +from utils import update_locals class LogisticRegression(object): def __init__(self, input, n_in, n_out): @@ -140,13 +143,16 @@ class SdA(object): def __init__(self, train_set_x, train_set_y, batch_size, n_ins, hidden_layers_sizes, n_outs, - corruption_levels, rng, pretrain_lr, finetune_lr): - + corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0): + update_locals(self, locals()) + self.layers = [] self.pretrain_functions = [] self.params = [] self.n_layers = len(hidden_layers_sizes) + self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX) + if len(hidden_layers_sizes) < 1 : raiseException (' You must have at least one hidden layer ') @@ -200,7 +206,7 @@ update_fn = theano.function([index], dA_layer.cost, \ updates = updates, givens = { - self.x : train_set_x[index*batch_size:(index+1)*batch_size]}) + self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.input_divider}) # collect this function into a list self.pretrain_functions += [update_fn] @@ -225,7 +231,7 @@ self.finetune = theano.function([index], cost, updates = updates, givens = { - self.x : train_set_x[index*batch_size:(index+1)*batch_size], + self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.input_divider, self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) # symbolic variable that points to the number of errors made on the @@ -233,23 +239,49 @@ self.errors = self.logLayer.errors(self.y) + @classmethod + def copy_reusing_lower_layers(cls, obj, num_hidden_layers, new_finetuning_lr=None): + assert(num_hidden_layers <= obj.n_layers) + + if not new_finetuning_lr: + new_finetuning_lr = obj.finetune_lr + + new_sda = cls(train_set_x= obj.train_set_x, \ + train_set_y = obj.train_set_y,\ + batch_size = obj.batch_size, \ + n_ins= obj.n_ins, \ + hidden_layers_sizes = obj.hidden_layers_sizes[:num_hidden_layers], \ + n_outs = obj.n_outs, \ + corruption_levels = obj.corruption_levels[:num_hidden_layers],\ + rng = obj.rng,\ + pretrain_lr = obj.pretrain_lr, \ + finetune_lr = new_finetuning_lr, \ + input_divider = obj.input_divider ) + + # new_sda.layers contains only the hidden layers actually + for i, layer in enumerate(new_sda.layers): + original_layer = obj.layers[i] + for p1,p2 in zip(layer.params, original_layer.params): + p1.value = p2.value.copy() + + return new_sda + + def get_params_copy(self): + return copy.deepcopy(self.params) + + def set_params_from_copy(self, copy): + # We don't want to replace the var, as the functions have pointers in there + # We only want to replace values. + for i, p in enumerate(self.params): + p.value = copy[i].value + + def get_params_means(self): + s = [] + for p in self.params: + s.append(numpy.mean(p.value)) + return s + if __name__ == '__main__': import sys args = sys.argv[1:] - if len(args) < 1: - print "Options: mnist, jobman_add, load_nist" - sys.exit(0) - - if args[0] == "jobman_add": - jobman_add() - elif args[0] == "mnist": - sgd_optimization_mnist(dataset=MNIST_LOCATION) - elif args[0] == "load_nist": - load_nist_test() - elif args[0] == "nist": - sgd_optimization_nist() - elif args[0] == "pc": - test_produit_croise_jobs() - -