diff deep/stacked_dae/sgd_optimization.py @ 185:b9ea8e2d071a

Enlevé ce qui concernait la réutilisation de résultats de préentraînement (trop compliqué pour peu de bénéfice: c'est le finetuning qui est vraiment long
author fsavard
date Fri, 26 Feb 2010 17:45:52 -0500
parents 1f5937e9e530
children d364a130b221
line wrap: on
line diff
--- a/deep/stacked_dae/sgd_optimization.py	Fri Feb 26 15:25:44 2010 -0500
+++ b/deep/stacked_dae/sgd_optimization.py	Fri Feb 26 17:45:52 2010 -0500
@@ -7,7 +7,6 @@
 import theano
 import time
 import theano.tensor as T
-import copy
 import sys
 
 from jobman import DD
@@ -24,44 +23,34 @@
     shared_y = theano.shared(data_y)
     return shared_x, shared_y
 
+class DummyMux():
+    def append(self, param1, param2):
+        pass
+
 class SdaSgdOptimizer:
-    def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0,\
-                job_tree=False, results_db=None,\
-                experiment="",\
-                num_hidden_layers_to_try=[1,2,3], \
-                finetuning_lr_to_try=[0.1, 0.01, 0.001, 0.0001, 0.00001]):
-
+    def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series_mux=None):
         self.dataset = dataset
-        self.hp = copy.copy(hyperparameters)
+        self.hp = hyperparameters
         self.n_ins = n_ins
         self.n_outs = n_outs
         self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
-
-        self.job_tree = job_tree
-        self.results_db = results_db
-        self.experiment = experiment
-        if self.job_tree:
-            assert(not results_db is None)
-            # these hp should not be there, so we insert default values
-            # we use 3 hidden layers as we'll iterate through 1,2,3
-            self.hp.finetuning_lr = 0.1 # dummy value, will be replaced anyway
-            cl = self.hp.corruption_levels
-            nh = self.hp.hidden_layers_sizes
-            self.hp.corruption_levels = [cl,cl,cl]
-            self.hp.hidden_layers_sizes = [nh,nh,nh]
-            
-        self.num_hidden_layers_to_try = num_hidden_layers_to_try
-        self.finetuning_lr_to_try = finetuning_lr_to_try
-
-        self.printout_frequency = 1000
+   
+        if not series_mux:
+            series_mux = DummyMux()
+            print "No series multiplexer set"
+        self.series_mux = series_mux
 
         self.rng = numpy.random.RandomState(1234)
 
         self.init_datasets()
         self.init_classifier()
+
+        sys.stdout.flush()
      
     def init_datasets(self):
         print "init_datasets"
+        sys.stdout.flush()
+
         train_set, valid_set, test_set = self.dataset
         self.test_set_x, self.test_set_y = shared_dataset(test_set)
         self.valid_set_x, self.valid_set_y = shared_dataset(valid_set)
@@ -74,6 +63,7 @@
 
     def init_classifier(self):
         print "Constructing classifier"
+
         # construct the stacked denoising autoencoder class
         self.classifier = SdA( \
                           train_set_x= self.train_set_x, \
@@ -88,17 +78,15 @@
                           finetune_lr = self.hp.finetuning_lr,\
                           input_divider = self.input_divider )
 
+        sys.stdout.flush()
+
     def train(self):
         self.pretrain()
-        if not self.job_tree:
-            # if job_tree is True, finetuning was already performed
-            self.finetune()
+        self.finetune()
 
     def pretrain(self):
         print "STARTING PRETRAINING"
-
-        printout_acc = 0.0
-        last_error = 0.0
+        sys.stdout.flush()
 
         start_time = time.clock()  
         ## Pre-train layer-wise 
@@ -109,62 +97,17 @@
                 for batch_index in xrange(self.n_train_batches):
                     c = self.classifier.pretrain_functions[i](batch_index)
 
-                    printout_acc += c / self.printout_frequency
-                    if (batch_index+1) % self.printout_frequency == 0:
-                        print batch_index, "reconstruction cost avg=", printout_acc
-                        last_error = printout_acc
-                        printout_acc = 0.0
+                    self.series_mux.append("reconstruction_error", c)
                         
                 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
-
-            self.job_splitter(i+1, time.clock()-start_time, last_error)
+                sys.stdout.flush()
      
         end_time = time.clock()
 
         print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
-
-    # Save time by reusing intermediate results
-    def job_splitter(self, current_pretraining_layer, pretraining_time, last_error):
-
-        state_copy = None
-        original_classifier = None
-
-        if self.job_tree and current_pretraining_layer in self.num_hidden_layers_to_try:
-            for lr in self.finetuning_lr_to_try:
-                sys.stdout.flush()
-                sys.stderr.flush()
-
-                state_copy = copy.copy(self.hp)
-
-                self.hp.update({'num_hidden_layers':current_pretraining_layer, \
-                            'finetuning_lr':lr,\
-                            'pretraining_time':pretraining_time,\
-                            'last_reconstruction_error':last_error})
+        self.hp.update({'pretraining_time': end_time-start_time})
 
-                original_classifier = self.classifier
-                print "ORIGINAL CLASSIFIER MEANS",original_classifier.get_params_means()
-                self.classifier = SdA.copy_reusing_lower_layers(original_classifier, current_pretraining_layer, new_finetuning_lr=lr)
-                
-                self.finetune()
-            
-                self.insert_finished_job()
-
-                print "NEW CLASSIFIER MEANS AFTERWARDS",self.classifier.get_params_means()
-                print "ORIGINAL CLASSIFIER MEANS AFTERWARDS",original_classifier.get_params_means()
-                self.classifier = original_classifier
-                self.hp = state_copy
-
-    def insert_finished_job(self):
-        job = copy.copy(self.hp)
-        job[jobman.sql.STATUS] = jobman.sql.DONE
-        job[jobman.sql.EXPERIMENT] = self.experiment
-
-        # don,t try to store arrays in db
-        job['hidden_layers_sizes'] = job.hidden_layers_sizes[0]
-        job['corruption_levels'] = job.corruption_levels[0]
-
-        print "Will insert finished job", job
-        jobman.sql.insert_dict(jobman.flatten(job), self.results_db)
+        sys.stdout.flush()
 
     def finetune(self):
         print "STARTING FINETUNING"
@@ -205,11 +148,6 @@
         done_looping = False
         epoch = 0
 
-        printout_acc = 0.0
-
-        if not self.hp.has_key('max_finetuning_epochs'):
-            self.hp.max_finetuning_epochs = 1000
-
         while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
             epoch = epoch + 1
             for minibatch_index in xrange(self.n_train_batches):
@@ -217,10 +155,7 @@
                 cost_ij = self.classifier.finetune(minibatch_index)
                 iter    = epoch * self.n_train_batches + minibatch_index
 
-                printout_acc += cost_ij / float(self.printout_frequency * minibatch_size)
-                if (iter+1) % self.printout_frequency == 0:
-                    print iter, "cost avg=", printout_acc
-                    printout_acc = 0.0
+                self.series_mux.append("training_error", cost_ij)
 
                 if (iter+1) % validation_frequency == 0: 
                     
@@ -251,6 +186,9 @@
                                      (epoch, minibatch_index+1, self.n_train_batches,
                                       test_score*100.))
 
+                    sys.stdout.flush()
+
+            self.series_mux.append("params", self.classifier.params)
 
             if patience <= iter :
                 done_looping = True
@@ -261,6 +199,7 @@
                     'best_validation_error':best_validation_loss,\
                     'test_score':test_score,
                     'num_finetuning_epochs':epoch})
+
         print(('Optimization complete with best validation score of %f %%,'
                'with test performance %f %%') %  
                      (best_validation_loss * 100., test_score*100.))