diff deep/stacked_dae/v_sylvain/sgd_optimization.py @ 266:1e4e60ddadb1

Merge. Ah, et dans le dernier commit, j'avais oublié de mentionner que j'ai ajouté du code pour gérer l'isolation de différents clones pour rouler des expériences et modifier le code en même temps.
author fsavard
date Fri, 19 Mar 2010 10:56:16 -0400
parents a0264184684e
children a8b92a4a708d
line wrap: on
line diff
--- a/deep/stacked_dae/v_sylvain/sgd_optimization.py	Fri Mar 19 10:54:39 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py	Fri Mar 19 10:56:16 2010 -0400
@@ -9,34 +9,16 @@
 import datetime
 import theano.tensor as T
 import sys
+import pickle
 
 from jobman import DD
 import jobman, jobman.sql
+from copy import copy
 
 from stacked_dae import SdA
 
 from ift6266.utils.seriestables import *
 
-##def shared_dataset(data_xy):
-##    data_x, data_y = data_xy
-##    if theano.config.device.startswith("gpu"):
-##        print "TRANSFERING DATASETS (via shared()) TO GPU"
-##        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
-##        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
-##        shared_y = T.cast(shared_y, 'int32')
-##    else:
-##        print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES"
-##        shared_x = theano.shared(data_x)
-##        shared_y = theano.shared(data_y)
-##    return shared_x, shared_y
-
-    ######Les shared seront remplacees utilisant "given" dans les enonces de fonction plus loin
-def shared_dataset(batch_size, n_in):
-    
-    shared_x = theano.shared(numpy.asarray(numpy.zeros((batch_size,n_in)), dtype=theano.config.floatX))
-    shared_y = theano.shared(numpy.asarray(numpy.zeros(batch_size), dtype=theano.config.floatX))
-    return shared_x, shared_y
-
 default_series = { \
         'reconstruction_error' : DummySeries(),
         'training_error' : DummySeries(),
@@ -45,37 +27,34 @@
         'params' : DummySeries()
         }
 
+def itermax(iter, max):
+    for i,it in enumerate(iter):
+        if i >= max:
+            break
+        yield it
+
 class SdaSgdOptimizer:
-    def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series):
+    def __init__(self, dataset, hyperparameters, n_ins, n_outs,
+                    examples_per_epoch, series=default_series, max_minibatches=None):
         self.dataset = dataset
         self.hp = hyperparameters
         self.n_ins = n_ins
         self.n_outs = n_outs
-        self.input_divider = input_divider
+        self.parameters_pre=[]
    
+        self.max_minibatches = max_minibatches
+        print "SdaSgdOptimizer, max_minibatches =", max_minibatches
+
+        self.ex_per_epoch = examples_per_epoch
+        self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size
+
         self.series = series
 
         self.rng = numpy.random.RandomState(1234)
 
-        self.init_datasets()
         self.init_classifier()
 
         sys.stdout.flush()
-     
-    def init_datasets(self):
-        print "init_datasets"
-        sys.stdout.flush()
-
-        #train_set, valid_set, test_set = self.dataset
-        self.test_set_x, self.test_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
-        self.valid_set_x, self.valid_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
-        self.train_set_x, self.train_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
-
-        # compute number of minibatches for training, validation and testing
-        self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
-        self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
-        # remove last batch in case it's incomplete
-        self.n_test_batches  = (self.test_set_x.value.shape[0]  / self.hp.minibatch_size) - 1
 
     def init_classifier(self):
         print "Constructing classifier"
@@ -88,8 +67,6 @@
 
         # construct the stacked denoising autoencoder class
         self.classifier = SdA( \
-                          train_set_x= self.train_set_x, \
-                          train_set_y = self.train_set_y,\
                           batch_size = self.hp.minibatch_size, \
                           n_ins= self.n_ins, \
                           hidden_layers_sizes = layers_sizes, \
@@ -97,8 +74,7 @@
                           corruption_levels = corruption_levels,\
                           rng = self.rng,\
                           pretrain_lr = self.hp.pretraining_lr, \
-                          finetune_lr = self.hp.finetuning_lr,\
-                          input_divider = self.input_divider )
+                          finetune_lr = self.hp.finetuning_lr)
 
         #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
 
@@ -108,7 +84,7 @@
         self.pretrain(self.dataset)
         self.finetune(self.dataset)
 
-    def pretrain(self,dataset,reduce):
+    def pretrain(self,dataset):
         print "STARTING PRETRAINING, time = ", datetime.datetime.now()
         sys.stdout.flush()
 
@@ -118,15 +94,19 @@
             # go through pretraining epochs 
             for epoch in xrange(self.hp.pretraining_epochs_per_layer):
                 # go through the training set
-                batch_index=int(0)
+                batch_index=0
                 for x,y in dataset.train(self.hp.minibatch_size):
-                    batch_index+=1
-                    if batch_index > reduce: #If maximum number of mini-batch is used
-                        break
                     c = self.classifier.pretrain_functions[i](x)
 
-                    
                     self.series["reconstruction_error"].append((epoch, batch_index), c)
+                    batch_index+=1
+
+                    #if batch_index % 100 == 0:
+                    #    print "100 batches"
+
+                    # useful when doing tests
+                    if self.max_minibatches and batch_index >= self.max_minibatches:
+                        break
                         
                 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
                 sys.stdout.flush()
@@ -137,33 +117,41 @@
 
         print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
         self.hp.update({'pretraining_time': end_time-start_time})
-
+        
         sys.stdout.flush()
+        
+        #To be able to load them later for tests on finetune
+        self.parameters_pre=[copy(x.value) for x in self.classifier.params]
+        f = open('params_pretrain.txt', 'w')
+        pickle.dump(self.parameters_pre,f)
+        f.close()
 
-    def finetune(self,dataset,reduce):
+
+    def finetune(self,dataset,dataset_test,num_finetune,ind_test):
         print "STARTING FINETUNING, time = ", datetime.datetime.now()
 
-        #index   = T.lscalar()    # index to a [mini]batch 
         minibatch_size = self.hp.minibatch_size
-        ensemble_x = T.matrix('ensemble_x')
-        ensemble_y = T.ivector('ensemble_y')
+        if ind_test == 0:
+            nom_test = "NIST"
+        else:
+            nom_test = "P07"
+
 
         # create a function to compute the mistakes that are made by the model
         # on the validation set, or testing set
-        shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX))
-        test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors,
-                 givens = {
-                   #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
-                   #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]})
-                   self.classifier.x: ensemble_x,
-                   self.classifier.y: ensemble_y})
+        test_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #         givens = {
+        #           self.classifier.x: ensemble_x,
+        #           self.classifier.y: ensemble_y]})
 
-        validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors,
-                givens = {
-                   #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
-                   #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]})
-                   self.classifier.x: ensemble_x,
-                   self.classifier.y: ensemble_y})
+        validate_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #        givens = {
+        #           self.classifier.x: ,
+        #           self.classifier.y: ]})
 
 
         # early-stopping parameters
@@ -172,11 +160,13 @@
                                       # found
         improvement_threshold = 0.995 # a relative improvement of this much is 
                                       # considered significant
-        validation_frequency  = min(self.n_train_batches, patience/2)
+        validation_frequency  = min(self.mb_per_epoch, patience/2)
                                       # go through this many 
                                       # minibatche before checking the network 
                                       # on the validation set; in this case we 
                                       # check every epoch 
+        if self.max_minibatches and validation_frequency > self.max_minibatches:
+            validation_frequency = self.max_minibatches / 2
 
         best_params          = None
         best_validation_loss = float('inf')
@@ -186,37 +176,31 @@
         done_looping = False
         epoch = 0
 
-        while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
+        total_mb_index = 0
+
+        while (epoch < num_finetune) and (not done_looping):
             epoch = epoch + 1
-            minibatch_index=int(0)
+            minibatch_index = -1
             for x,y in dataset.train(minibatch_size):
-                minibatch_index +=1
-                
-                if minibatch_index > reduce:   #If maximum number of mini-batchs is used 
-                    break
-                
+                minibatch_index += 1
                 cost_ij = self.classifier.finetune(x,y)
-                iter    = epoch * self.n_train_batches + minibatch_index
+                total_mb_index += 1
 
                 self.series["training_error"].append((epoch, minibatch_index), cost_ij)
 
-                if (iter+1) % validation_frequency == 0: 
+                if (total_mb_index+1) % validation_frequency == 0: 
                     
-                    #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)]
-                    test_index=int(0)
-                    validation_losses=[]    
-                    for x,y in dataset.valid(minibatch_size):
-                        test_index+=1
-                        if test_index > reduce:
-                            break
-                        validation_losses.append(validate_model(x,y))
+                    iter = dataset.valid(minibatch_size)
+                    if self.max_minibatches:
+                        iter = itermax(iter, self.max_minibatches)
+                    validation_losses = [validate_model(x,y) for x,y in iter]
                     this_validation_loss = numpy.mean(validation_losses)
 
                     self.series["validation_error"].\
                         append((epoch, minibatch_index), this_validation_loss*100.)
 
                     print('epoch %i, minibatch %i, validation error %f %%' % \
-                           (epoch, minibatch_index, \
+                           (epoch, minibatch_index+1, \
                             this_validation_loss*100.))
 
 
@@ -226,36 +210,48 @@
                         #improve patience if loss improvement is good enough
                         if this_validation_loss < best_validation_loss *  \
                                improvement_threshold :
-                            patience = max(patience, iter * patience_increase)
+                            patience = max(patience, total_mb_index * patience_increase)
 
                         # save best validation score and iteration number
                         best_validation_loss = this_validation_loss
-                        best_iter = iter
+                        best_iter = total_mb_index
 
                         # test it on the test set
-                        #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)]
-                        test_losses=[]
-                        i=0
-                        for x,y in dataset.test(minibatch_size):
-                            i+=1
-                            if i > reduce:
-                                break
-                            test_losses.append(test_model(x,y))
+                        iter = dataset.test(minibatch_size)
+                        if self.max_minibatches:
+                            iter = itermax(iter, self.max_minibatches)
+                        test_losses = [test_model(x,y) for x,y in iter]
                         test_score = numpy.mean(test_losses)
+                        
+                        #test it on the second test set
+                        iter2 = dataset_test.test(minibatch_size)
+                        if self.max_minibatches:
+                            iter2 = itermax(iter2, self.max_minibatches)
+                        test_losses2 = [test_model(x,y) for x,y in iter2]
+                        test_score2 = numpy.mean(test_losses2)
 
                         self.series["test_error"].\
                             append((epoch, minibatch_index), test_score*100.)
 
                         print(('     epoch %i, minibatch %i, test error of best '
                               'model %f %%') % 
-                                     (epoch, minibatch_index,
+                                     (epoch, minibatch_index+1,
                                       test_score*100.))
+                                    
+                        print(('     epoch %i, minibatch %i, test error on dataset %s of best '
+                              'model %f %%') % 
+                                     (epoch, minibatch_index+1,nom_test,
+                                      test_score2*100.))
 
                     sys.stdout.flush()
 
+                # useful when doing tests
+                if self.max_minibatches and minibatch_index >= self.max_minibatches:
+                    break
+
             self.series['params'].append((epoch,), self.classifier.all_params)
 
-            if patience <= iter :
+            if patience <= total_mb_index:
                 done_looping = True
                 break
 
@@ -268,7 +264,22 @@
         print(('Optimization complete with best validation score of %f %%,'
                'with test performance %f %%') %  
                      (best_validation_loss * 100., test_score*100.))
+        print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.))
+        
         print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
+        
+
+    #Set parameters like they where right after pre-train
+    def reload_parameters(self):
+        
+        #self.parameters_pre=pickle.load('params_pretrain.txt')
+        f = open('params_pretrain.txt')
+        self.parameters_pre=pickle.load(f)
+        f.close()
+        for idx,x in enumerate(self.parameters_pre):
+            self.classifier.params[idx].value=copy(x)
+
+