ift6266: deep/stacked_dae/sgd_optimization.py comparison

comparison deep/stacked_dae/sgd_optimization.py @ 185:b9ea8e2d071a

Enlevé ce qui concernait la réutilisation de résultats de préentraînement (trop compliqué pour peu de bénéfice: c'est le finetuning qui est vraiment long

author	fsavard
date	Fri, 26 Feb 2010 17:45:52 -0500
parents	1f5937e9e530
children	d364a130b221

comparison

equal deleted inserted replaced

-:4d3d3627df3e
+:b9ea8e2d071a
 import numpy
 import theano
 import time
 import theano.tensor as T
-import copy
 import sys
 from jobman import DD
 import jobman, jobman.sql
 #shared_y = T.cast(shared_y, 'int32')
 shared_x = theano.shared(data_x)
 shared_y = theano.shared(data_y)
 return shared_x, shared_y
+class DummyMux():
+def append(self, param1, param2):
+pass
 class SdaSgdOptimizer:
-def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0,\
+def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series_mux=None):
-job_tree=False, results_db=None,\
-experiment="",\
-num_hidden_layers_to_try=[1,2,3], \
-finetuning_lr_to_try=[0.1, 0.01, 0.001, 0.0001, 0.00001]):
 self.dataset = dataset
-self.hp = copy.copy(hyperparameters)
+self.hp = hyperparameters
 self.n_ins = n_ins
 self.n_outs = n_outs
 self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
-self.job_tree = job_tree
+if not series_mux:
-self.results_db = results_db
+series_mux = DummyMux()
-self.experiment = experiment
+print "No series multiplexer set"
-if self.job_tree:
+self.series_mux = series_mux
-assert(not results_db is None)
-# these hp should not be there, so we insert default values
-# we use 3 hidden layers as we'll iterate through 1,2,3
-self.hp.finetuning_lr = 0.1 # dummy value, will be replaced anyway
-cl = self.hp.corruption_levels
-nh = self.hp.hidden_layers_sizes
-self.hp.corruption_levels = [cl,cl,cl]
-self.hp.hidden_layers_sizes = [nh,nh,nh]
-self.num_hidden_layers_to_try = num_hidden_layers_to_try
-self.finetuning_lr_to_try = finetuning_lr_to_try
-self.printout_frequency = 1000
 self.rng = numpy.random.RandomState(1234)
 self.init_datasets()
 self.init_classifier()
+sys.stdout.flush()
 def init_datasets(self):
 print "init_datasets"
+sys.stdout.flush()
 train_set, valid_set, test_set = self.dataset
 self.test_set_x, self.test_set_y = shared_dataset(test_set)
 self.valid_set_x, self.valid_set_y = shared_dataset(valid_set)
 self.train_set_x, self.train_set_y = shared_dataset(train_set)
 self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
 self.n_test_batches  = self.test_set_x.value.shape[0]  / self.hp.minibatch_size
 def init_classifier(self):
 print "Constructing classifier"
 # construct the stacked denoising autoencoder class
 self.classifier = SdA( \
 train_set_x= self.train_set_x, \
 train_set_y = self.train_set_y,\
 batch_size = self.hp.minibatch_size, \
 rng = self.rng,\
 pretrain_lr = self.hp.pretraining_lr, \
 finetune_lr = self.hp.finetuning_lr,\
 input_divider = self.input_divider )
+sys.stdout.flush()
 def train(self):
 self.pretrain()
-if not self.job_tree:
+self.finetune()
-# if job_tree is True, finetuning was already performed
-self.finetune()
 def pretrain(self):
 print "STARTING PRETRAINING"
+sys.stdout.flush()
-printout_acc = 0.0
-last_error = 0.0
 start_time = time.clock()
 ## Pre-train layer-wise
 for i in xrange(self.classifier.n_layers):
 # go through pretraining epochs
 for epoch in xrange(self.hp.pretraining_epochs_per_layer):
 # go through the training set
 for batch_index in xrange(self.n_train_batches):
 c = self.classifier.pretrain_functions[i](batch_index)
-printout_acc += c / self.printout_frequency
+self.series_mux.append("reconstruction_error", c)
-if (batch_index+1) % self.printout_frequency == 0:
-print batch_index, "reconstruction cost avg=", printout_acc
-last_error = printout_acc
-printout_acc = 0.0
 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
+sys.stdout.flush()
-self.job_splitter(i+1, time.clock()-start_time, last_error)
 end_time = time.clock()
 print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
+self.hp.update({'pretraining_time': end_time-start_time})
-# Save time by reusing intermediate results
-def job_splitter(self, current_pretraining_layer, pretraining_time, last_error):
+sys.stdout.flush()
-state_copy = None
-original_classifier = None
-if self.job_tree and current_pretraining_layer in self.num_hidden_layers_to_try:
-for lr in self.finetuning_lr_to_try:
-sys.stdout.flush()
-sys.stderr.flush()
-state_copy = copy.copy(self.hp)
-self.hp.update({'num_hidden_layers':current_pretraining_layer, \
-'finetuning_lr':lr,\
-'pretraining_time':pretraining_time,\
-'last_reconstruction_error':last_error})
-original_classifier = self.classifier
-print "ORIGINAL CLASSIFIER MEANS",original_classifier.get_params_means()
-self.classifier = SdA.copy_reusing_lower_layers(original_classifier, current_pretraining_layer, new_finetuning_lr=lr)
-self.finetune()
-self.insert_finished_job()
-print "NEW CLASSIFIER MEANS AFTERWARDS",self.classifier.get_params_means()
-print "ORIGINAL CLASSIFIER MEANS AFTERWARDS",original_classifier.get_params_means()
-self.classifier = original_classifier
-self.hp = state_copy
-def insert_finished_job(self):
-job = copy.copy(self.hp)
-job[jobman.sql.STATUS] = jobman.sql.DONE
-job[jobman.sql.EXPERIMENT] = self.experiment
-# don,t try to store arrays in db
-job['hidden_layers_sizes'] = job.hidden_layers_sizes[0]
-job['corruption_levels'] = job.corruption_levels[0]
-print "Will insert finished job", job
-jobman.sql.insert_dict(jobman.flatten(job), self.results_db)
 def finetune(self):
 print "STARTING FINETUNING"
 index   = T.lscalar()    # index to a [mini]batch
 start_time = time.clock()
 done_looping = False
 epoch = 0
-printout_acc = 0.0
-if not self.hp.has_key('max_finetuning_epochs'):
-self.hp.max_finetuning_epochs = 1000
 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
 epoch = epoch + 1
 for minibatch_index in xrange(self.n_train_batches):
 cost_ij = self.classifier.finetune(minibatch_index)
 iter    = epoch * self.n_train_batches + minibatch_index
-printout_acc += cost_ij / float(self.printout_frequency * minibatch_size)
+self.series_mux.append("training_error", cost_ij)
-if (iter+1) % self.printout_frequency == 0:
-print iter, "cost avg=", printout_acc
-printout_acc = 0.0
 if (iter+1) % validation_frequency == 0:
 validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)]
 this_validation_loss = numpy.mean(validation_losses)
 print(('     epoch %i, minibatch %i/%i, test error of best '
 'model %f %%') %
 (epoch, minibatch_index+1, self.n_train_batches,
 test_score*100.))
+sys.stdout.flush()
+self.series_mux.append("params", self.classifier.params)
 if patience <= iter :
 done_looping = True
 break
 end_time = time.clock()
 self.hp.update({'finetuning_time':end_time-start_time,\
 'best_validation_error':best_validation_loss,\
 'test_score':test_score,
 'num_finetuning_epochs':epoch})
 print(('Optimization complete with best validation score of %f %%,'
 'with test performance %f %%') %
 (best_validation_loss * 100., test_score*100.))
 print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))

Mercurial > ift6266

comparison deep/stacked_dae/sgd_optimization.py @ 185:b9ea8e2d071a