Mercurial > ift6266
diff deep/stacked_dae/v_sylvain/sgd_optimization.py @ 260:0c0f0b3f6a93
branch merge.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Wed, 17 Mar 2010 15:31:21 -0400 |
parents | 7dd43ef66d15 |
children | a0264184684e |
line wrap: on
line diff
--- a/deep/stacked_dae/v_sylvain/sgd_optimization.py Wed Mar 17 15:24:25 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py Wed Mar 17 15:31:21 2010 -0400 @@ -12,31 +12,12 @@ from jobman import DD import jobman, jobman.sql +from copy import copy from stacked_dae import SdA from ift6266.utils.seriestables import * -##def shared_dataset(data_xy): -## data_x, data_y = data_xy -## if theano.config.device.startswith("gpu"): -## print "TRANSFERING DATASETS (via shared()) TO GPU" -## shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) -## shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) -## shared_y = T.cast(shared_y, 'int32') -## else: -## print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES" -## shared_x = theano.shared(data_x) -## shared_y = theano.shared(data_y) -## return shared_x, shared_y - - ######Les shared seront remplacees utilisant "given" dans les enonces de fonction plus loin -def shared_dataset(batch_size, n_in): - - shared_x = theano.shared(numpy.asarray(numpy.zeros((batch_size,n_in)), dtype=theano.config.floatX)) - shared_y = theano.shared(numpy.asarray(numpy.zeros(batch_size), dtype=theano.config.floatX)) - return shared_x, shared_y - default_series = { \ 'reconstruction_error' : DummySeries(), 'training_error' : DummySeries(), @@ -45,37 +26,34 @@ 'params' : DummySeries() } +def itermax(iter, max): + for i,it in enumerate(iter): + if i >= max: + break + yield it + class SdaSgdOptimizer: - def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series): + def __init__(self, dataset, hyperparameters, n_ins, n_outs, + examples_per_epoch, series=default_series, max_minibatches=None): self.dataset = dataset self.hp = hyperparameters self.n_ins = n_ins self.n_outs = n_outs - self.input_divider = input_divider + self.parameters_pre=[] + self.max_minibatches = max_minibatches + print "SdaSgdOptimizer, max_minibatches =", max_minibatches + + self.ex_per_epoch = examples_per_epoch + self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size + self.series = series self.rng = numpy.random.RandomState(1234) - self.init_datasets() self.init_classifier() sys.stdout.flush() - - def init_datasets(self): - print "init_datasets" - sys.stdout.flush() - - #train_set, valid_set, test_set = self.dataset - self.test_set_x, self.test_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - self.valid_set_x, self.valid_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - self.train_set_x, self.train_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins) - - # compute number of minibatches for training, validation and testing - self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size - self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size - # remove last batch in case it's incomplete - self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 def init_classifier(self): print "Constructing classifier" @@ -88,8 +66,6 @@ # construct the stacked denoising autoencoder class self.classifier = SdA( \ - train_set_x= self.train_set_x, \ - train_set_y = self.train_set_y,\ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ hidden_layers_sizes = layers_sizes, \ @@ -97,8 +73,7 @@ corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ - finetune_lr = self.hp.finetuning_lr,\ - input_divider = self.input_divider ) + finetune_lr = self.hp.finetuning_lr) #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") @@ -108,7 +83,7 @@ self.pretrain(self.dataset) self.finetune(self.dataset) - def pretrain(self,dataset,reduce): + def pretrain(self,dataset): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() @@ -118,15 +93,19 @@ # go through pretraining epochs for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set - batch_index=int(0) + batch_index=0 for x,y in dataset.train(self.hp.minibatch_size): - batch_index+=1 - if batch_index > reduce: #If maximum number of mini-batch is used - break c = self.classifier.pretrain_functions[i](x) - self.series["reconstruction_error"].append((epoch, batch_index), c) + batch_index+=1 + + #if batch_index % 100 == 0: + # print "100 batches" + + # useful when doing tests + if self.max_minibatches and batch_index >= self.max_minibatches: + break print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c sys.stdout.flush() @@ -137,33 +116,33 @@ print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) self.hp.update({'pretraining_time': end_time-start_time}) - + sys.stdout.flush() + + #To be able to load them later for tests on finetune + self.parameters_pre=[copy(x.value) for x in self.classifier.params] - def finetune(self,dataset,reduce): + + def finetune(self,dataset,num_finetune): print "STARTING FINETUNING, time = ", datetime.datetime.now() - #index = T.lscalar() # index to a [mini]batch minibatch_size = self.hp.minibatch_size - ensemble_x = T.matrix('ensemble_x') - ensemble_y = T.ivector('ensemble_y') # create a function to compute the mistakes that are made by the model # on the validation set, or testing set - shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) - test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, - givens = { - #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) - self.classifier.x: ensemble_x, - self.classifier.y: ensemble_y}) + test_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: ensemble_x, + # self.classifier.y: ensemble_y]}) - validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors, - givens = { - #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) - self.classifier.x: ensemble_x, - self.classifier.y: ensemble_y}) + validate_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: , + # self.classifier.y: ]}) # early-stopping parameters @@ -172,11 +151,13 @@ # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min(self.n_train_batches, patience/2) + validation_frequency = min(self.mb_per_epoch, patience/2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch + if self.max_minibatches and validation_frequency > self.max_minibatches: + validation_frequency = self.max_minibatches / 2 best_params = None best_validation_loss = float('inf') @@ -186,37 +167,31 @@ done_looping = False epoch = 0 - while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): + total_mb_index = 0 + + while (epoch < num_finetune) and (not done_looping): epoch = epoch + 1 - minibatch_index=int(0) + minibatch_index = -1 for x,y in dataset.train(minibatch_size): - minibatch_index +=1 - - if minibatch_index > reduce: #If maximum number of mini-batchs is used - break - + minibatch_index += 1 cost_ij = self.classifier.finetune(x,y) - iter = epoch * self.n_train_batches + minibatch_index + total_mb_index += 1 self.series["training_error"].append((epoch, minibatch_index), cost_ij) - if (iter+1) % validation_frequency == 0: + if (total_mb_index+1) % validation_frequency == 0: - #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)] - test_index=int(0) - validation_losses=[] - for x,y in dataset.valid(minibatch_size): - test_index+=1 - if test_index > reduce: - break - validation_losses.append(validate_model(x,y)) + iter = dataset.valid(minibatch_size) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + validation_losses = [validate_model(x,y) for x,y in iter] this_validation_loss = numpy.mean(validation_losses) self.series["validation_error"].\ append((epoch, minibatch_index), this_validation_loss*100.) print('epoch %i, minibatch %i, validation error %f %%' % \ - (epoch, minibatch_index, \ + (epoch, minibatch_index+1, \ this_validation_loss*100.)) @@ -226,21 +201,17 @@ #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : - patience = max(patience, iter * patience_increase) + patience = max(patience, total_mb_index * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss - best_iter = iter + best_iter = total_mb_index # test it on the test set - #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)] - test_losses=[] - i=0 - for x,y in dataset.test(minibatch_size): - i+=1 - if i > reduce: - break - test_losses.append(test_model(x,y)) + iter = dataset.test(minibatch_size) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + test_losses = [test_model(x,y) for x,y in iter] test_score = numpy.mean(test_losses) self.series["test_error"].\ @@ -248,14 +219,18 @@ print((' epoch %i, minibatch %i, test error of best ' 'model %f %%') % - (epoch, minibatch_index, + (epoch, minibatch_index+1, test_score*100.)) sys.stdout.flush() + # useful when doing tests + if self.max_minibatches and minibatch_index >= self.max_minibatches: + break + self.series['params'].append((epoch,), self.classifier.all_params) - if patience <= iter : + if patience <= total_mb_index: done_looping = True break @@ -269,6 +244,15 @@ 'with test performance %f %%') % (best_validation_loss * 100., test_score*100.)) print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) + + + #Set parameters like they where right after pre-train + def reload_parameters(self): + + for idx,x in enumerate(self.parameters_pre): + self.classifier.params[idx].value=copy(x) + +