diff deep/stacked_dae/v_sylvain/sgd_optimization.py @ 233:02ed13244133

version pour utilisation du module dataset
author SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca>
date Sun, 14 Mar 2010 15:07:17 -0400
parents 8a94a5c808cd
children ecb69e17950b
line wrap: on
line diff
--- a/deep/stacked_dae/v_sylvain/sgd_optimization.py	Sat Mar 13 15:45:43 2010 -0500
+++ b/deep/stacked_dae/v_sylvain/sgd_optimization.py	Sun Mar 14 15:07:17 2010 -0400
@@ -105,10 +105,10 @@
         sys.stdout.flush()
 
     def train(self):
-        self.pretrain()
-        self.finetune()
+        self.pretrain(self.dataset)
+        self.finetune(self.dataset)
 
-    def pretrain(self):
+    def pretrain(self,dataset):
         print "STARTING PRETRAINING, time = ", datetime.datetime.now()
         sys.stdout.flush()
 
@@ -118,8 +118,8 @@
             # go through pretraining epochs 
             for epoch in xrange(self.hp.pretraining_epochs_per_layer):
                 # go through the training set
-                for batch_index in xrange(self.n_train_batches):
-                    c = self.classifier.pretrain_functions[i](batch_index)
+                for x,y in dataset.train(self.hp.minibatch_size):
+                    c = self.classifier.pretrain_functions[i](x)
 
                     self.series["reconstruction_error"].append((epoch, batch_index), c)
                         
@@ -135,24 +135,28 @@
 
         sys.stdout.flush()
 
-    def finetune(self):
+    def finetune(self,dataset):
         print "STARTING FINETUNING, time = ", datetime.datetime.now()
 
-        index   = T.lscalar()    # index to a [mini]batch 
+        #index   = T.lscalar()    # index to a [mini]batch 
         minibatch_size = self.hp.minibatch_size
 
         # create a function to compute the mistakes that are made by the model
         # on the validation set, or testing set
         shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX))
-        test_model = theano.function([index], self.classifier.errors,
+        test_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors,
                  givens = {
-                   self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
-                   self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]})
+                   #self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
+                   #self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]})
+                   self.classifier.x: ensemble_x,
+                   self.classifier.y: ensemble_y})
 
-        validate_model = theano.function([index], self.classifier.errors,
+        validate_model = theano.function([ensemble_x,ensemble_y], self.classifier.errors,
                 givens = {
-                   self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
-                   self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]})
+                   #self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider,
+                   #self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]})
+                   self.classifier.x: ensemble_x,
+                   self.classifier.y: ensemble_y})
 
 
         # early-stopping parameters
@@ -177,16 +181,18 @@
 
         while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
             epoch = epoch + 1
-            for minibatch_index in xrange(self.n_train_batches):
-
-                cost_ij = self.classifier.finetune(minibatch_index)
+            minibatch_index=int(-1)
+            for x,y in dataset.train(minibatch_size):
+                
+                minibatch_index+=1
+                cost_ij = self.classifier.finetune(x,y)
                 iter    = epoch * self.n_train_batches + minibatch_index
 
                 self.series["training_error"].append((epoch, minibatch_index), cost_ij)
 
                 if (iter+1) % validation_frequency == 0: 
                     
-                    validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)]
+                    validation_losses = [validate_model(x,y) for x,y in dataset.valid(minibatch_size)]
                     this_validation_loss = numpy.mean(validation_losses)
 
                     self.series["validation_error"].\
@@ -210,7 +216,7 @@
                         best_iter = iter
 
                         # test it on the test set
-                        test_losses = [test_model(i) for i in xrange(self.n_test_batches)]
+                        test_losses = [test_model(x,y) for x,y in dataset.test(minibatch_size)]
                         test_score = numpy.mean(test_losses)
 
                         self.series["test_error"].\