# HG changeset patch
# User fsavard
# Date 1269458064 14400
# Node ID 694e75413413d1ea9711f7dd39e65cf29db63804
# Parent  8a3af19ae2723dfb1a3e4fcf81fcd8d0ee2cdfa6# Parent  28b628f331b2ff6a99e76089dc4117460e61b616
Merge

diff -r 8a3af19ae272 -r 694e75413413 deep/stacked_dae/v_sylvain/nist_sda.py
--- a/deep/stacked_dae/v_sylvain/nist_sda.py	Wed Mar 24 15:13:48 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/nist_sda.py	Wed Mar 24 15:14:24 2010 -0400
@@ -68,7 +68,21 @@
                                     max_minibatches=rtt)
 
     parameters=[]
-    optimizer.pretrain(datasets.nist_P07())
+    #Number of files of P07 used for pretraining
+    nb_file=0
+    if state['pretrain_choice'] == 0:
+        print('\n\tpretraining with NIST\n')
+        optimizer.pretrain(datasets.nist_all()) 
+    elif state['pretrain_choice'] == 1:
+        #To know how many file will be used during pretraining
+        nb_file = state['pretraining_epochs_per_layer'] 
+        state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset
+        if nb_file >=100:
+            sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+
+            "You have to correct the code (and be patient, P07 is huge !!)\n"+
+             "or reduce the number of pretraining epoch to run the code (better idea).\n")
+        print('\n\tpretraining with P07')
+        optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file)) 
     channel.save()
     
     #Set some of the parameters used for the finetuning
@@ -89,34 +103,47 @@
     
     #Decide how the finetune is done
     
-    if finetune_choice==0:
-        print('\n\n\tfinetune avec nist\n\n')
-        optimizer.reload_parameters()
-        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1)
-    if finetune_choice==1:
-        print('\n\n\tfinetune avec P07\n\n')
-        optimizer.reload_parameters()
-        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0)
-    if finetune_choice==2:
-        print('\n\n\tfinetune avec nist suivi de P07\n\n')
-        optimizer.reload_parameters()
-        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1)
-        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0)
-
+    if finetune_choice == 0:
+        print('\n\n\tfinetune with NIST\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1)
+        channel.save()
+    if finetune_choice == 1:
+        print('\n\n\tfinetune with P07\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0)
+        channel.save()
+    if finetune_choice == 2:
+        print('\n\n\tfinetune with NIST followed by P07\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=21)
+        optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20)
+        channel.save()
+    if finetune_choice == 3:
+        print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\
+        All hidden units output are input of the logistic regression\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1,special=1)
+        
+        
     if finetune_choice==-1:
-        print('\nSerie de 3 essais de fine-tuning')
-        print('\n\n\tfinetune avec nist\n\n')
-        optimizer.reload_parameters()
-        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1)
+        print('\nSERIE OF 3 DIFFERENT FINETUNINGS')
+        print('\n\n\tfinetune with NIST\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1)
+        channel.save()
+        print('\n\n\tfinetune with P07\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0)
         channel.save()
-        print('\n\n\tfinetune avec P07\n\n')
-        optimizer.reload_parameters()
-        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0)
+        print('\n\n\tfinetune with NIST (done earlier) followed by P07 (written here)\n\n')
+        optimizer.reload_parameters('params_finetune_NIST.txt')
+        optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20)
         channel.save()
-        print('\n\n\tfinetune avec nist suivi de P07\n\n')
-        optimizer.reload_parameters()
-        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1)
-        optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0)
+        print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\
+        All hidden units output are input of the logistic regression\n\n')
+        optimizer.reload_parameters('params_pretrain.txt')
+        optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1,special=1)
         channel.save()
     
     channel.save()
diff -r 8a3af19ae272 -r 694e75413413 deep/stacked_dae/v_sylvain/sgd_optimization.py
--- a/deep/stacked_dae/v_sylvain/sgd_optimization.py	Wed Mar 24 15:13:48 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py	Wed Mar 24 15:14:24 2010 -0400
@@ -95,8 +95,11 @@
             for epoch in xrange(self.hp.pretraining_epochs_per_layer):
                 # go through the training set
                 batch_index=0
+                count=0
+                num_files=0
                 for x,y in dataset.train(self.hp.minibatch_size):
                     c = self.classifier.pretrain_functions[i](x)
+                    count +=1
 
                     self.series["reconstruction_error"].append((epoch, batch_index), c)
                     batch_index+=1
@@ -107,11 +110,21 @@
                     # useful when doing tests
                     if self.max_minibatches and batch_index >= self.max_minibatches:
                         break
-                        
-                print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
-                sys.stdout.flush()
+                    
+                    #When we pass through the data only once (the case with P07)
+                    #There is approximately 800*1024=819200 examples per file (1k per example and files are 800M)
+                    if self.hp.pretraining_epochs_per_layer == 1 and count%819200 == 0:
+                        print 'Pre-training layer %i, epoch %d, cost '%(i,num_files),c
+                        num_files+=1
+                        sys.stdout.flush()
+                        self.series['params'].append((num_files,), self.classifier.all_params)
+                
+                #When NIST is used
+                if self.hp.pretraining_epochs_per_layer > 1:        
+                    print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
+                    sys.stdout.flush()
 
-                self.series['params'].append((epoch,), self.classifier.all_params)
+                    self.series['params'].append((epoch,), self.classifier.all_params)
      
         end_time = time.clock()
 
@@ -127,14 +140,19 @@
         f.close()
 
 
-    def finetune(self,dataset,dataset_test,num_finetune,ind_test):
+    def finetune(self,dataset,dataset_test,num_finetune,ind_test,special=0):
+        
+        if special != 0 and special != 1:
+            sys.exit('Bad value for variable special. Must be in {0,1}')
         print "STARTING FINETUNING, time = ", datetime.datetime.now()
 
         minibatch_size = self.hp.minibatch_size
-        if ind_test == 0:
+        if ind_test == 0 or ind_test == 20:
             nom_test = "NIST"
+            nom_train="P07"
         else:
             nom_test = "P07"
+            nom_train = "NIST"
 
 
         # create a function to compute the mistakes that are made by the model
@@ -177,20 +195,28 @@
         epoch = 0
 
         total_mb_index = 0
+        minibatch_index = -1
 
         while (epoch < num_finetune) and (not done_looping):
             epoch = epoch + 1
-            minibatch_index = -1
+
             for x,y in dataset.train(minibatch_size):
                 minibatch_index += 1
-                cost_ij = self.classifier.finetune(x,y)
+                if special == 0:
+                    cost_ij = self.classifier.finetune(x,y)
+                elif special == 1:
+                    cost_ij = self.classifier.finetune2(x,y)
                 total_mb_index += 1
 
                 self.series["training_error"].append((epoch, minibatch_index), cost_ij)
 
                 if (total_mb_index+1) % validation_frequency == 0: 
-                    
-                    iter = dataset.valid(minibatch_size)
+                    #minibatch_index += 1
+                    #The validation set is always NIST
+                    if ind_test == 0:
+                        iter=dataset_test.valid(minibatch_size)
+                    else:
+                        iter = dataset.valid(minibatch_size)
                     if self.max_minibatches:
                         iter = itermax(iter, self.max_minibatches)
                     validation_losses = [validate_model(x,y) for x,y in iter]
@@ -199,8 +225,8 @@
                     self.series["validation_error"].\
                         append((epoch, minibatch_index), this_validation_loss*100.)
 
-                    print('epoch %i, minibatch %i, validation error %f %%' % \
-                           (epoch, minibatch_index+1, \
+                    print('epoch %i, minibatch %i, validation error on %s : %f %%' % \
+                           (epoch, minibatch_index+1,nom_test, \
                             this_validation_loss*100.))
 
 
@@ -233,16 +259,20 @@
                         self.series["test_error"].\
                             append((epoch, minibatch_index), test_score*100.)
 
-                        print(('     epoch %i, minibatch %i, test error of best '
+                        print(('     epoch %i, minibatch %i, test error on dataset %s  (train data) of best '
                               'model %f %%') % 
-                                     (epoch, minibatch_index+1,
+                                     (epoch, minibatch_index+1,nom_train,
                                       test_score*100.))
                                     
                         print(('     epoch %i, minibatch %i, test error on dataset %s of best '
                               'model %f %%') % 
                                      (epoch, minibatch_index+1,nom_test,
                                       test_score2*100.))
-
+                    
+                    if patience <= total_mb_index:
+                        done_looping = True
+                        break
+                    
                     sys.stdout.flush()
 
                 # useful when doing tests
@@ -251,8 +281,7 @@
 
             self.series['params'].append((epoch,), self.classifier.all_params)
 
-            if patience <= total_mb_index:
-                done_looping = True
+            if done_looping == True:    #To exit completly the fine-tuning
                 break
 
         end_time = time.clock()
@@ -261,19 +290,45 @@
                     'test_score':test_score,
                     'num_finetuning_epochs':epoch})
 
-        print(('Optimization complete with best validation score of %f %%,'
-               'with test performance %f %%') %  
-                     (best_validation_loss * 100., test_score*100.))
+        print(('\nOptimization complete with best validation score of %f %%,'
+               'with test performance %f %% on dataset %s ') %  
+                     (best_validation_loss * 100., test_score*100.,nom_train))
         print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.))
         
         print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
         
+        #Save a copy of the parameters in a file to be able to get them in the future
+        
+        if special == 1:    #To keep a track of the value of the parameters
+            parameters_finetune=[copy(x.value) for x in self.classifier.params]
+            f = open('params_finetune_stanford.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+        
+        elif ind_test== 0:    #To keep a track of the value of the parameters
+            parameters_finetune=[copy(x.value) for x in self.classifier.params]
+            f = open('params_finetune_P07.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+
+        elif ind_test== 1:    #For the run with 2 finetunes. It will be faster.
+            parameters_finetune=[copy(x.value) for x in self.classifier.params]
+            f = open('params_finetune_NIST.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+        
+        elif ind_test== 20:    #To keep a track of the value of the parameters
+            parameters_finetune=[copy(x.value) for x in self.classifier.params]
+            f = open('params_finetune_NIST_then_P07.txt', 'w')
+            pickle.dump(parameters_finetune,f)
+            f.close()
+        
 
     #Set parameters like they where right after pre-train
-    def reload_parameters(self):
+    def reload_parameters(self,which):
         
         #self.parameters_pre=pickle.load('params_pretrain.txt')
-        f = open('params_pretrain.txt')
+        f = open(which)
         self.parameters_pre=pickle.load(f)
         f.close()
         for idx,x in enumerate(self.parameters_pre):
diff -r 8a3af19ae272 -r 694e75413413 deep/stacked_dae/v_sylvain/stacked_dae.py
--- a/deep/stacked_dae/v_sylvain/stacked_dae.py	Wed Mar 24 15:13:48 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/stacked_dae.py	Wed Mar 24 15:14:24 2010 -0400
@@ -36,6 +36,7 @@
 
         # list of parameters for this layer
         self.params = [self.W, self.b]
+        
 
     def negative_log_likelihood(self, y):
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
@@ -181,6 +182,7 @@
         # (not used for finetuning... still using ".params")
         self.all_params         = []
         self.n_layers           = len(hidden_layers_sizes)
+        self.logistic_params    = []
 
         print "Creating SdA with params:"
         print "batch_size", batch_size
@@ -257,7 +259,7 @@
             self.pretrain_functions += [update_fn]
 
         
-        # We now need to add a logistic layer on top of the MLP
+        # We now need to add a logistic layer on top of the SDA
         self.logLayer = LogisticRegression(\
                          input = self.layers[-1].output,\
                          n_in = hidden_layers_sizes[-1], n_out = n_outs)
@@ -277,15 +279,48 @@
             
         self.finetune = theano.function([self.x,self.y], cost, 
                 updates = updates)#,
-        #        givens = {
-        #          self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider,
-        #          self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
 
         # symbolic variable that points to the number of errors made on the
         # minibatch given by self.x and self.y
 
         self.errors = self.logLayer.errors(self.y)
         
+        
+        #STRUCTURE FOR THE FINETUNING OF THE LOGISTIC REGRESSION ON THE TOP WITH
+        #ALL HIDDEN LAYERS AS INPUT
+        
+        all_h=[]
+        for i in xrange(self.n_layers):
+            all_h.append(self.layers[i].output)
+        self.all_hidden=T.concatenate(all_h,axis=1)
+
+
+        self.logLayer2 = LogisticRegression(\
+                         input = self.all_hidden,\
+                         n_in = sum(hidden_layers_sizes), n_out = n_outs)
+                         #n_in=hidden_layers_sizes[0],n_out=n_outs)
+
+        #self.logistic_params+= self.logLayer2.params
+        # construct a function that implements one step of finetunining
+
+        # compute the cost, defined as the negative log likelihood 
+        cost2 = self.logLayer2.negative_log_likelihood(self.y)
+        # compute the gradients with respect to the model parameters
+        gparams2 = T.grad(cost2, self.logLayer2.params)
+
+        # compute list of updates
+        updates2 = {}
+        for param,gparam in zip(self.logLayer2.params, gparams2):
+            updates2[param] = param - gparam*finetune_lr
+   
+        self.finetune2 = theano.function([self.x,self.y], cost2, 
+                updates = updates2)
+
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+
+        self.errors2 = self.logLayer2.errors(self.y)
+        
 
 if __name__ == '__main__':
     import sys