# HG changeset patch
# User SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca>
# Date 1268673740 14400
# Node ID 9fc641d7adda89273fb5272805e1c6279266ea3e
# Parent  9b6e0af062af921fe9b3c2bd07476ed599bc41e1
Possibilite de restreindre la taille des ensemble d'entrainement, valid et test afin de pouvoir tester le code rapidement

diff -r 9b6e0af062af -r 9fc641d7adda deep/stacked_dae/v_sylvain/nist_sda.py
--- a/deep/stacked_dae/v_sylvain/nist_sda.py	Mon Mar 15 10:09:50 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/nist_sda.py	Mon Mar 15 13:22:20 2010 -0400
@@ -49,6 +49,7 @@
     REDUCE_TRAIN_TO = 1000
     MAX_FINETUNING_EPOCHS = 2
     REDUCE_EVERY = 10
+    MINIBATCH_SIZE=20
 
 # Possible values the hyperparameters can take. These are then
 # combined with produit_cartesien_jobs so we get a list of all
@@ -71,7 +72,7 @@
                        'hidden_layers_sizes':500,
                        'corruption_levels':0.2,
                        'minibatch_size':20,
-                       #'reduce_train_to':10000,
+                       'reduce_train_to':10000,
                        'num_hidden_layers':1})
 
 '''
@@ -94,16 +95,18 @@
 ##
 ##    print "NIST loaded"
 ##
-##    # For test runs, we don't want to use the whole dataset so
-##    # reduce it to fewer elements if asked to.
-##    rtt = None
-##    if state.has_key('reduce_train_to'):
-##        rtt = state['reduce_train_to']
-##    elif REDUCE_TRAIN_TO:
-##        rtt = REDUCE_TRAIN_TO
-##
-##    if rtt:
-##        print "Reducing training set to "+str(rtt)+ " examples"
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = int(state['reduce_train_to']/state['minibatch_size'])
+    elif REDUCE_TRAIN_TO:
+        rtt = int(REDUCE_TRAIN_TO/MINIBATCH_SIZE)
+
+    if rtt:
+        print "Reducing training set to "+str(rtt*state['minibatch_size'])+ " examples"
+    else:
+        rtt=float('inf')    #No reduction
 ##        nist.reduce_train_set(rtt)
 ##
 ##    train,valid,test = nist.get_tvt()
@@ -111,7 +114,7 @@
 
     n_ins = 32*32
     n_outs = 62 # 10 digits, 26*2 (lower, capitals)
-
+    
     series = create_series(state.num_hidden_layers)
 
     print "Creating optimizer with state, ", state
@@ -120,10 +123,10 @@
                                     n_ins=n_ins, n_outs=n_outs,\
                                     series=series)
 
-    optimizer.pretrain(datasets.nist_all)
+    optimizer.pretrain(datasets.nist_all,rtt)
     channel.save()
 
-    optimizer.finetune(datasets.nist_all)
+    optimizer.finetune(datasets.nist_all,rtt)
     channel.save()
 
     return channel.COMPLETE
diff -r 9b6e0af062af -r 9fc641d7adda deep/stacked_dae/v_sylvain/sgd_optimization.py
--- a/deep/stacked_dae/v_sylvain/sgd_optimization.py	Mon Mar 15 10:09:50 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py	Mon Mar 15 13:22:20 2010 -0400
@@ -108,7 +108,7 @@
         self.pretrain(self.dataset)
         self.finetune(self.dataset)
 
-    def pretrain(self,dataset):
+    def pretrain(self,dataset,reduce):
         print "STARTING PRETRAINING, time = ", datetime.datetime.now()
         sys.stdout.flush()
 
@@ -120,8 +120,11 @@
                 # go through the training set
                 batch_index=int(0)
                 for x,y in dataset.train(self.hp.minibatch_size):
+                    batch_index+=1
+                    if batch_index > reduce: #If maximum number of mini-batch is used
+                        break
                     c = self.classifier.pretrain_functions[i](x)
-                    batch_index+=1
+
                     
                     self.series["reconstruction_error"].append((epoch, batch_index), c)
                         
@@ -137,7 +140,7 @@
 
         sys.stdout.flush()
 
-    def finetune(self,dataset):
+    def finetune(self,dataset,reduce):
         print "STARTING FINETUNING, time = ", datetime.datetime.now()
 
         #index   = T.lscalar()    # index to a [mini]batch 
@@ -185,10 +188,13 @@
 
         while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
             epoch = epoch + 1
-            minibatch_index=int(-1)
+            minibatch_index=int(0)
             for x,y in dataset.train(minibatch_size):
+                minibatch_index +=1
                 
-                minibatch_index+=1
+                if minibatch_index > reduce:   #If maximum number of mini-batchs is used 
+                    break
+                
                 cost_ij = self.classifier.finetune(x,y)
                 iter    = epoch * self.n_train_batches + minibatch_index
 
@@ -196,14 +202,21 @@
 
                 if (iter+1) % validation_frequency == 0: 
                     
-                    validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)]
+                    #validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)]
+                    test_index=int(0)
+                    validation_losses=[]    
+                    for x,y in dataset.valid(minibatch_size):
+                        test_index+=1
+                        if test_index > reduce:
+                            break
+                        validation_losses.append(validate_model(x,y))
                     this_validation_loss = numpy.mean(validation_losses)
 
                     self.series["validation_error"].\
                         append((epoch, minibatch_index), this_validation_loss*100.)
 
-                    print('epoch %i, minibatch %i/%i, validation error %f %%' % \
-                           (epoch, minibatch_index+1, self.n_train_batches, \
+                    print('epoch %i, minibatch %i, validation error %f %%' % \
+                           (epoch, minibatch_index, \
                             this_validation_loss*100.))
 
 
@@ -220,15 +233,22 @@
                         best_iter = iter
 
                         # test it on the test set
-                        test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)]
+                        #test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)]
+                        test_losses=[]
+                        i=0
+                        for x,y in dataset.test(minibatch_size):
+                            i+=1
+                            if i > reduce:
+                                break
+                            test_losses.append(test_model(x,y))
                         test_score = numpy.mean(test_losses)
 
                         self.series["test_error"].\
                             append((epoch, minibatch_index), test_score*100.)
 
-                        print(('     epoch %i, minibatch %i/%i, test error of best '
+                        print(('     epoch %i, minibatch %i, test error of best '
                               'model %f %%') % 
-                                     (epoch, minibatch_index+1, self.n_train_batches,
+                                     (epoch, minibatch_index,
                                       test_score*100.))
 
                     sys.stdout.flush()