diff deep/stacked_dae/v_sylvain/sgd_optimization.py @ 260:0c0f0b3f6a93

branch merge.
author Arnaud Bergeron <abergeron@gmail.com>
date Wed, 17 Mar 2010 15:31:21 -0400
parents 7dd43ef66d15
children a0264184684e
line wrap: on
line diff
--- a/deep/stacked_dae/v_sylvain/sgd_optimization.py	Wed Mar 17 15:24:25 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py	Wed Mar 17 15:31:21 2010 -0400
@@ -12,31 +12,12 @@
 
 from jobman import DD
 import jobman, jobman.sql
+from copy import copy
 
 from stacked_dae import SdA
 
 from ift6266.utils.seriestables import *
 
-##def shared_dataset(data_xy):
-##    data_x, data_y = data_xy
-##    if theano.config.device.startswith("gpu"):
-##        print "TRANSFERING DATASETS (via shared()) TO GPU"
-##        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
-##        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
-##        shared_y = T.cast(shared_y, 'int32')
-##    else:
-##        print "WILL RUN ON CPU, NOT GPU, SO DATASETS REMAIN IN BYTES"
-##        shared_x = theano.shared(data_x)
-##        shared_y = theano.shared(data_y)
-##    return shared_x, shared_y
-
-    ######Les shared seront remplacees utilisant "given" dans les enonces de fonction plus loin
-def shared_dataset(batch_size, n_in):
-    
-    shared_x = theano.shared(numpy.asarray(numpy.zeros((batch_size,n_in)), dtype=theano.config.floatX))
-    shared_y = theano.shared(numpy.asarray(numpy.zeros(batch_size), dtype=theano.config.floatX))
-    return shared_x, shared_y
-
 default_series = { \
         'reconstruction_error' : DummySeries(),
         'training_error' : DummySeries(),
@@ -45,37 +26,34 @@
         'params' : DummySeries()
         }
 
+def itermax(iter, max):
+    for i,it in enumerate(iter):
+        if i >= max:
+            break
+        yield it
+
 class SdaSgdOptimizer:
-    def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series=default_series):
+    def __init__(self, dataset, hyperparameters, n_ins, n_outs,
+                    examples_per_epoch, series=default_series, max_minibatches=None):
         self.dataset = dataset
         self.hp = hyperparameters
         self.n_ins = n_ins
         self.n_outs = n_outs
-        self.input_divider = input_divider
+        self.parameters_pre=[]
    
+        self.max_minibatches = max_minibatches
+        print "SdaSgdOptimizer, max_minibatches =", max_minibatches
+
+        self.ex_per_epoch = examples_per_epoch
+        self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size
+
         self.series = series
 
         self.rng = numpy.random.RandomState(1234)
 
-        self.init_datasets()
         self.init_classifier()
 
         sys.stdout.flush()
-     
-    def init_datasets(self):
-        print "init_datasets"
-        sys.stdout.flush()
-
-        #train_set, valid_set, test_set = self.dataset
-        self.test_set_x, self.test_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
-        self.valid_set_x, self.valid_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
-        self.train_set_x, self.train_set_y = shared_dataset(self.hp.minibatch_size,self.n_ins)
-
-        # compute number of minibatches for training, validation and testing
-        self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
-        self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
-        # remove last batch in case it's incomplete
-        self.n_test_batches  = (self.test_set_x.value.shape[0]  / self.hp.minibatch_size) - 1
 
     def init_classifier(self):
         print "Constructing classifier"
@@ -88,8 +66,6 @@
 
         # construct the stacked denoising autoencoder class
         self.classifier = SdA( \
-                          train_set_x= self.train_set_x, \
-                          train_set_y = self.train_set_y,\
                           batch_size = self.hp.minibatch_size, \
                           n_ins= self.n_ins, \
                           hidden_layers_sizes = layers_sizes, \
@@ -97,8 +73,7 @@
                           corruption_levels = corruption_levels,\
                           rng = self.rng,\
                           pretrain_lr = self.hp.pretraining_lr, \
-                          finetune_lr = self.hp.finetuning_lr,\
-                          input_divider = self.input_divider )
+                          finetune_lr = self.hp.finetuning_lr)
 
         #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph")
 
@@ -108,7 +83,7 @@
         self.pretrain(self.dataset)
         self.finetune(self.dataset)
 
-    def pretrain(self,dataset,reduce):
+    def pretrain(self,dataset):
         print "STARTING PRETRAINING, time = ", datetime.datetime.now()
         sys.stdout.flush()
 
@@ -118,15 +93,19 @@
             # go through pretraining epochs 
             for epoch in xrange(self.hp.pretraining_epochs_per_layer):
                 # go through the training set
-                batch_index=int(0)
+                batch_index=0
                 for x,y in dataset.train(self.hp.minibatch_size):
-                    batch_index+=1
-                    if batch_index > reduce: #If maximum number of mini-batch is used
-                        break
                     c = self.classifier.pretrain_functions[i](x)
 
-                    
                     self.series["reconstruction_error"].append((epoch, batch_index), c)
+                    batch_index+=1
+
+                    #if batch_index % 100 == 0:
+                    #    print "100 batches"
+
+                    # useful when doing tests
+                    if self.max_minibatches and batch_index >= self.max_minibatches:
+                        break
                         
                 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
                 sys.stdout.flush()
@@ -137,33 +116,33 @@
 
         print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
         self.hp.update({'pretraining_time': end_time-start_time})
-
+        
         sys.stdout.flush()
+        
+        #To be able to load them later for tests on finetune
+        self.parameters_pre=[copy(x.value) for x in self.classifier.params]
 
-    def finetune(self,dataset,reduce):
+
+    def finetune(self,dataset,num_finetune):
         print "STARTING FINETUNING, time = ", datetime.datetime.now()
 
-        #index   = T.lscalar()    # index to a [mini]batch 
         minibatch_size = self.hp.minibatch_size
-        ensemble_x = T.matrix('ensemble_x')
-        ensemble_y = T.ivector('ensemble_y')
 
         # create a function to compute the mistakes that are made by the model
         # on the validation set, or testing set
-        shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX))
-        test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors,
-                 givens = {
-                   #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
-                   #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]})
-                   self.classifier.x: ensemble_x,
-                   self.classifier.y: ensemble_y})
+        test_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #         givens = {
+        #           self.classifier.x: ensemble_x,
+        #           self.classifier.y: ensemble_y]})
 
-        validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors,
-                givens = {
-                   #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
-                   #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]})
-                   self.classifier.x: ensemble_x,
-                   self.classifier.y: ensemble_y})
+        validate_model = \
+            theano.function(
+                [self.classifier.x,self.classifier.y], self.classifier.errors)
+        #        givens = {
+        #           self.classifier.x: ,
+        #           self.classifier.y: ]})
 
 
         # early-stopping parameters
@@ -172,11 +151,13 @@
                                       # found
         improvement_threshold = 0.995 # a relative improvement of this much is 
                                       # considered significant
-        validation_frequency  = min(self.n_train_batches, patience/2)
+        validation_frequency  = min(self.mb_per_epoch, patience/2)
                                       # go through this many 
                                       # minibatche before checking the network 
                                       # on the validation set; in this case we 
                                       # check every epoch 
+        if self.max_minibatches and validation_frequency > self.max_minibatches:
+            validation_frequency = self.max_minibatches / 2
 
         best_params          = None
         best_validation_loss = float('inf')
@@ -186,37 +167,31 @@
         done_looping = False
         epoch = 0
 
-        while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
+        total_mb_index = 0
+
+        while (epoch < num_finetune) and (not done_looping):
             epoch = epoch + 1
-            minibatch_index=int(0)
+            minibatch_index = -1
             for x,y in dataset.train(minibatch_size):
-                minibatch_index +=1
-                
-                if minibatch_index > reduce:   #If maximum number of mini-batchs is used 
-                    break
-                
+                minibatch_index += 1
                 cost_ij = self.classifier.finetune(x,y)
-                iter    = epoch * self.n_train_batches + minibatch_index
+                total_mb_index += 1
 
                 self.series["training_error"].append((epoch, minibatch_index), cost_ij)
 
-                if (iter+1) % validation_frequency == 0: 
+                if (total_mb_index+1) % validation_frequency == 0: 
                     
-                    #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)]
-                    test_index=int(0)
-                    validation_losses=[]    
-                    for x,y in dataset.valid(minibatch_size):
-                        test_index+=1
-                        if test_index > reduce:
-                            break
-                        validation_losses.append(validate_model(x,y))
+                    iter = dataset.valid(minibatch_size)
+                    if self.max_minibatches:
+                        iter = itermax(iter, self.max_minibatches)
+                    validation_losses = [validate_model(x,y) for x,y in iter]
                     this_validation_loss = numpy.mean(validation_losses)
 
                     self.series["validation_error"].\
                         append((epoch, minibatch_index), this_validation_loss*100.)
 
                     print('epoch %i, minibatch %i, validation error %f %%' % \
-                           (epoch, minibatch_index, \
+                           (epoch, minibatch_index+1, \
                             this_validation_loss*100.))
 
 
@@ -226,21 +201,17 @@
                         #improve patience if loss improvement is good enough
                         if this_validation_loss < best_validation_loss *  \
                                improvement_threshold :
-                            patience = max(patience, iter * patience_increase)
+                            patience = max(patience, total_mb_index * patience_increase)
 
                         # save best validation score and iteration number
                         best_validation_loss = this_validation_loss
-                        best_iter = iter
+                        best_iter = total_mb_index
 
                         # test it on the test set
-                        #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)]
-                        test_losses=[]
-                        i=0
-                        for x,y in dataset.test(minibatch_size):
-                            i+=1
-                            if i > reduce:
-                                break
-                            test_losses.append(test_model(x,y))
+                        iter = dataset.test(minibatch_size)
+                        if self.max_minibatches:
+                            iter = itermax(iter, self.max_minibatches)
+                        test_losses = [test_model(x,y) for x,y in iter]
                         test_score = numpy.mean(test_losses)
 
                         self.series["test_error"].\
@@ -248,14 +219,18 @@
 
                         print(('     epoch %i, minibatch %i, test error of best '
                               'model %f %%') % 
-                                     (epoch, minibatch_index,
+                                     (epoch, minibatch_index+1,
                                       test_score*100.))
 
                     sys.stdout.flush()
 
+                # useful when doing tests
+                if self.max_minibatches and minibatch_index >= self.max_minibatches:
+                    break
+
             self.series['params'].append((epoch,), self.classifier.all_params)
 
-            if patience <= iter :
+            if patience <= total_mb_index:
                 done_looping = True
                 break
 
@@ -269,6 +244,15 @@
                'with test performance %f %%') %  
                      (best_validation_loss * 100., test_score*100.))
         print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
+        
+
+    #Set parameters like they where right after pre-train
+    def reload_parameters(self):
+
+        for idx,x in enumerate(self.parameters_pre):
+            self.classifier.params[idx].value=copy(x)
+
+