Mercurial > ift6266
diff deep/stacked_dae/sgd_optimization.py @ 275:7b4507295eba
merge
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Mon, 22 Mar 2010 10:20:10 -0400 |
parents | acb942530923 c8fe09a65039 |
children | 8a3af19ae272 |
line wrap: on
line diff
--- a/deep/stacked_dae/sgd_optimization.py Mon Mar 22 10:19:45 2010 -0400 +++ b/deep/stacked_dae/sgd_optimization.py Mon Mar 22 10:20:10 2010 -0400 @@ -15,53 +15,43 @@ from stacked_dae import SdA -def shared_dataset(data_xy): - data_x, data_y = data_xy - #shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) - #shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) - #shared_y = T.cast(shared_y, 'int32') - shared_x = theano.shared(data_x) - shared_y = theano.shared(data_y) - return shared_x, shared_y +from ift6266.utils.seriestables import * -class DummyMux(): - def append(self, param1, param2): - pass +default_series = { \ + 'reconstruction_error' : DummySeries(), + 'training_error' : DummySeries(), + 'validation_error' : DummySeries(), + 'test_error' : DummySeries(), + 'params' : DummySeries() + } + +def itermax(iter, max): + for i,it in enumerate(iter): + if i >= max: + break + yield it class SdaSgdOptimizer: - def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series_mux=None): + def __init__(self, dataset, hyperparameters, n_ins, n_outs, + examples_per_epoch, series=default_series, max_minibatches=None): self.dataset = dataset self.hp = hyperparameters self.n_ins = n_ins self.n_outs = n_outs - self.input_divider = input_divider - if not series_mux: - series_mux = DummyMux() - print "No series multiplexer set" - self.series_mux = series_mux + self.max_minibatches = max_minibatches + print "SdaSgdOptimizer, max_minibatches =", max_minibatches + + self.ex_per_epoch = examples_per_epoch + self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size + + self.series = series self.rng = numpy.random.RandomState(1234) - self.init_datasets() self.init_classifier() sys.stdout.flush() - - def init_datasets(self): - print "init_datasets" - sys.stdout.flush() - - train_set, valid_set, test_set = self.dataset - self.test_set_x, self.test_set_y = shared_dataset(test_set) - self.valid_set_x, self.valid_set_y = shared_dataset(valid_set) - self.train_set_x, self.train_set_y = shared_dataset(train_set) - - # compute number of minibatches for training, validation and testing - self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size - self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size - # remove last batch in case it's incomplete - self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 def init_classifier(self): print "Constructing classifier" @@ -74,8 +64,6 @@ # construct the stacked denoising autoencoder class self.classifier = SdA( \ - train_set_x= self.train_set_x, \ - train_set_y = self.train_set_y,\ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ hidden_layers_sizes = layers_sizes, \ @@ -83,46 +71,44 @@ corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ - finetune_lr = self.hp.finetuning_lr,\ - input_divider = self.input_divider ) + finetune_lr = self.hp.finetuning_lr) #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") sys.stdout.flush() def train(self): - self.pretrain() - self.finetune() + self.pretrain(self.dataset) + self.finetune(self.dataset) - def pretrain(self): + def pretrain(self,dataset): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() - #time_acc_func = 0.0 - #time_acc_total = 0.0 - start_time = time.clock() ## Pre-train layer-wise for i in xrange(self.classifier.n_layers): # go through pretraining epochs for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set - for batch_index in xrange(self.n_train_batches): - #t1 = time.clock() - c = self.classifier.pretrain_functions[i](batch_index) - #t2 = time.clock() + batch_index=0 + for x,y in dataset.train(self.hp.minibatch_size): + c = self.classifier.pretrain_functions[i](x) + + self.series["reconstruction_error"].append((epoch, batch_index), c) + batch_index+=1 - #time_acc_func += t2 - t1 + #if batch_index % 100 == 0: + # print "100 batches" - #if batch_index % 500 == 0: - # print "acc / total", time_acc_func / (t2 - start_time), time_acc_func - - self.series_mux.append("reconstruction_error", c) + # useful when doing tests + if self.max_minibatches and batch_index >= self.max_minibatches: + break print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c sys.stdout.flush() - self.series_mux.append("params", self.classifier.all_params) + self.series['params'].append((epoch,), self.classifier.all_params) end_time = time.clock() @@ -131,24 +117,26 @@ sys.stdout.flush() - def finetune(self): + def finetune(self,dataset): print "STARTING FINETUNING, time = ", datetime.datetime.now() - index = T.lscalar() # index to a [mini]batch minibatch_size = self.hp.minibatch_size # create a function to compute the mistakes that are made by the model # on the validation set, or testing set - shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) - test_model = theano.function([index], self.classifier.errors, - givens = { - self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) + test_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: ensemble_x, + # self.classifier.y: ensemble_y]}) - validate_model = theano.function([index], self.classifier.errors, - givens = { - self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, - self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) + validate_model = \ + theano.function( + [self.classifier.x,self.classifier.y], self.classifier.errors) + # givens = { + # self.classifier.x: , + # self.classifier.y: ]}) # early-stopping parameters @@ -157,11 +145,13 @@ # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant - validation_frequency = min(self.n_train_batches, patience/2) + validation_frequency = min(self.mb_per_epoch, patience/2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch + if self.max_minibatches and validation_frequency > self.max_minibatches: + validation_frequency = self.max_minibatches / 2 best_params = None best_validation_loss = float('inf') @@ -171,22 +161,31 @@ done_looping = False epoch = 0 + total_mb_index = 0 + while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): epoch = epoch + 1 - for minibatch_index in xrange(self.n_train_batches): + minibatch_index = -1 + for x,y in dataset.train(minibatch_size): + minibatch_index += 1 + cost_ij = self.classifier.finetune(x,y) + total_mb_index += 1 - cost_ij = self.classifier.finetune(minibatch_index) - iter = epoch * self.n_train_batches + minibatch_index - - self.series_mux.append("training_error", cost_ij) + self.series["training_error"].append((epoch, minibatch_index), cost_ij) - if (iter+1) % validation_frequency == 0: + if (total_mb_index+1) % validation_frequency == 0: - validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] + iter = dataset.valid(minibatch_size) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + validation_losses = [validate_model(x,y) for x,y in iter] this_validation_loss = numpy.mean(validation_losses) - self.series_mux.append("validation_error", this_validation_loss) + + self.series["validation_error"].\ + append((epoch, minibatch_index), this_validation_loss*100.) + print('epoch %i, minibatch %i/%i, validation error %f %%' % \ - (epoch, minibatch_index+1, self.n_train_batches, \ + (epoch, minibatch_index+1, self.mb_per_epoch, \ this_validation_loss*100.)) @@ -196,26 +195,36 @@ #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : - patience = max(patience, iter * patience_increase) + patience = max(patience, total_mb_index * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss - best_iter = iter + best_iter = total_mb_index # test it on the test set - test_losses = [test_model(i) for i in xrange(self.n_test_batches)] + iter = dataset.test(minibatch_size) + if self.max_minibatches: + iter = itermax(iter, self.max_minibatches) + test_losses = [test_model(x,y) for x,y in iter] test_score = numpy.mean(test_losses) - self.series_mux.append("test_error", test_score) + + self.series["test_error"].\ + append((epoch, minibatch_index), test_score*100.) + print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % - (epoch, minibatch_index+1, self.n_train_batches, + (epoch, minibatch_index+1, self.mb_per_epoch, test_score*100.)) sys.stdout.flush() - self.series_mux.append("params", self.classifier.all_params) + # useful when doing tests + if self.max_minibatches and minibatch_index >= self.max_minibatches: + break - if patience <= iter : + self.series['params'].append((epoch,), self.classifier.all_params) + + if patience <= total_mb_index: done_looping = True break