Mercurial > ift6266
diff deep/stacked_dae/v_sylvain/sgd_optimization.py @ 266:1e4e60ddadb1
Merge. Ah, et dans le dernier commit, j'avais oublié de mentionner que j'ai ajouté du code pour gérer l'isolation de différents clones pour rouler des expériences et modifier le code en même temps.
author | fsavard |
---|---|
date | Fri, 19 Mar 2010 10:56:16 -0400 |
parents | a0264184684e |
children | a8b92a4a708d |
line wrap: on
line diff
--- a/deep/stacked_dae/v_sylvain/sgd_optimization.py Fri Mar 19 10:54:39 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py Fri Mar 19 10:56:16 2010 -0400 @@ -9,34 +9,16 @@ import datetime import theano.tensor as T import sys +import pickle from jobman import DD import jobman, jobman.sql +from copy import copy from stacked_dae import SdA from ift6266.utils.seriestables import * -##def shared_dataset(data_xy): -## data_x, data_y = data_xy -## if theano.config.device.startswith("gpu"): -## print "TRANSFERING DATASETS (via shared()) TO GPU" -## shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) -## shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) -## shared_y = T.cast(shared_y, 'int32') -## else: -## print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES" -## shared_x = theano.shared(data_x) -## shared_y = theano.shared(data_y) -## return shared_x, shared_y - - ######Les shared seront remplacees utilisant "given" dans les enonces de fonction plus loin -def shared_dataset(batch_size, n_in): - - shared_x = theano.shared(numpy.asarray(numpy.zeros((batch_size,n_in)), dtype=theano.config.floatX)) - shared_y = theano.shared(numpy.asarray(numpy.zeros(batch_size), dtype=theano.config.floatX)) - return shared_x, shared_y - default_series = { \ 'reconstruction_error' : DummySeries(), 'training_error' : DummySeries(), @@ -45,37 +27,34 @@ 'params' : DummySeries() } +def itermax(iter, max): + for i,it in enumerate(iter): + if i >= max: + break + yield it + class SdaSgdOptimizer: - def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series): + def __init__(self, dataset, hyperparameters, n_ins, n_outs, + examples_per_epoch, series=default_series, max_minibatches=None): self.dataset = dataset self.hp = hyperparameters self.n_ins = n_ins self.n_outs = n_outs - self.input_divider = input_divider + self.parameters_pre=[] + self.max_minibatches = max_minibatches + print "SdaSgdOptimizer, max_minibatches =", max_minibatches + + self.ex_per_epoch = examples_per_epoch + self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size + self.series = series self.rng = numpy.random.RandomState(1234) - self.init_datasets() self.init_classifier() sys.stdout.flush() - - def init_datasets(self): - print "init_datasets" - sys.stdout.flush() - - #train_set, valid_set, test_set = self.dataset - self.test_set_x, self.test_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - self.valid_set_x, self.valid_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - self.train_set_x, self.train_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - - # compute number of minibatches for training, validation and testing - self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size - self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size - # remove last batch in case it's incomplete - self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 def init_classifier(self): print "Constructing classifier" @@ -88,8 +67,6 @@ # construct the stacked denoising autoencoder class self.classifier = SdA( \ - train_set_x= self.train_set_x, \ - train_set_y = self.train_set_y,\ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ hidden_layers_sizes = layers_sizes, \ @@ -97,8 +74,7 @@ corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ - finetune_lr = self.hp.finetuning_lr,\ - input_divider = self.input_divider ) + finetune_lr = self.hp.finetuning_lr) #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") @@ -108,7 +84,7 @@ self.pretrain(self.dataset) self.finetune(self.dataset) - def pretrain(self,dataset,reduce): + def pretrain(self,dataset): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() @@ -118,15 +94,19 @@ # go through pretraining epochs for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set - batch_index=int(0) + batch_index=0 for x,y in dataset.train(self.hp.minibatch_size): - batch_index+=1 - if batch_index > reduce: #If maximum number of mini-batch is used - break c = self.classifier.pretrain_functions[i](x) - self.series["reconstruction_error"].append((epoch, batch_index), c) + batch_index+=1 + + #if batch_index % 100 == 0: + # print "100 batches" + + # useful when doing tests + if self.max_minibatches and batch_index >= self.max_minibatches: + break print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c sys.stdout.flush() @@ -137,33 +117,41 @@ print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) self.hp.update({'pretraining_time': end_time-start_time}) - + sys.stdout.flush() + + #To be able to load them later for tests on finetune + self.parameters_pre=[copy(x.value) for x in self.classifier.params] + f = open('params_pretrain.txt', 'w') + pickle.dump(self.parameters_pre,f) + f.close() - def finetune(self,dataset,reduce): + + def finetune(self,dataset,dataset_test,num_finetune,ind_test): print "STARTING FINETUNING, time = ", datetime.datetime.now() - #index = T.lscalar() # index to a [mini]batch minibatch_size = self.hp.minibatch_size - ensemble_x = T.matrix('ensemble_x') - ensemble_y = T.ivector('ensemble_y') + if ind_test == 0: + nom_test = "NIST" + else: + nom_test = "P07" + # create a function to compute the mistakes that are made by the model # on the validation set, or testing set - shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) - test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, - givens = { - #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) - self.classifier.x: ensemble_x, - self.classifier.y: ensemble_y}) + test_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: ensemble_x, + # self.classifier.y: ensemble_y]}) - validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, - givens = { - #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) - self.classifier.x: ensemble_x, - self.classifier.y: ensemble_y}) + validate_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: , + # self.classifier.y: ]}) # early-stopping parameters @@ -172,11 +160,13 @@ # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min(self.n_train_batches, patience/2) + validation_frequency = min(self.mb_per_epoch, patience/2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch + if self.max_minibatches and validation_frequency > self.max_minibatches: + validation_frequency = self.max_minibatches / 2 best_params = None best_validation_loss = float('inf') @@ -186,37 +176,31 @@ done_looping = False epoch = 0 - while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): + total_mb_index = 0 + + while (epoch < num_finetune) and (not done_looping): epoch = epoch + 1 - minibatch_index=int(0) + minibatch_index = -1 for x,y in dataset.train(minibatch_size): - minibatch_index +=1 - - if minibatch_index > reduce: #If maximum number of mini-batchs is used - break - + minibatch_index += 1 cost_ij = self.classifier.finetune(x,y) - iter = epoch * self.n_train_batches + minibatch_index + total_mb_index += 1 self.series["training_error"].append((epoch, minibatch_index), cost_ij) - if (iter+1) % validation_frequency == 0: + if (total_mb_index+1) % validation_frequency == 0: - #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)] - test_index=int(0) - validation_losses=[] - for x,y in dataset.valid(minibatch_size): - test_index+=1 - if test_index > reduce: - break - validation_losses.append(validate_model(x,y)) + iter = dataset.valid(minibatch_size) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + validation_losses = [validate_model(x,y) for x,y in iter] this_validation_loss = numpy.mean(validation_losses) self.series["validation_error"].\ append((epoch, minibatch_index), this_validation_loss*100.) print('epoch %i, minibatch %i, validation error %f %%' % \ - (epoch, minibatch_index, \ + (epoch, minibatch_index+1, \ this_validation_loss*100.)) @@ -226,36 +210,48 @@ #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : - patience = max(patience, iter * patience_increase) + patience = max(patience, total_mb_index * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss - best_iter = iter + best_iter = total_mb_index # test it on the test set - #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)] - test_losses=[] - i=0 - for x,y in dataset.test(minibatch_size): - i+=1 - if i > reduce: - break - test_losses.append(test_model(x,y)) + iter = dataset.test(minibatch_size) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + test_losses = [test_model(x,y) for x,y in iter] test_score = numpy.mean(test_losses) + + #test it on the second test set + iter2 = dataset_test.test(minibatch_size) + if self.max_minibatches: + iter2 = itermax(iter2, self.max_minibatches) + test_losses2 = [test_model(x,y) for x,y in iter2] + test_score2 = numpy.mean(test_losses2) self.series["test_error"].\ append((epoch, minibatch_index), test_score*100.) print((' epoch %i, minibatch %i, test error of best ' 'model %f %%') % - (epoch, minibatch_index, + (epoch, minibatch_index+1, test_score*100.)) + + print((' epoch %i, minibatch %i, test error on dataset %s of best ' + 'model %f %%') % + (epoch, minibatch_index+1,nom_test, + test_score2*100.)) sys.stdout.flush() + # useful when doing tests + if self.max_minibatches and minibatch_index >= self.max_minibatches: + break + self.series['params'].append((epoch,), self.classifier.all_params) - if patience <= iter : + if patience <= total_mb_index: done_looping = True break @@ -268,7 +264,22 @@ print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score*100.)) + print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.)) + print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) + + + #Set parameters like they where right after pre-train + def reload_parameters(self): + + #self.parameters_pre=pickle.load('params_pretrain.txt') + f = open('params_pretrain.txt') + self.parameters_pre=pickle.load(f) + f.close() + for idx,x in enumerate(self.parameters_pre): + self.classifier.params[idx].value=copy(x) + +