diff deep/stacked_dae/stacked_dae.py @ 186:d364a130b221

Ajout du code de base pour scalar_series. Modifications à stacked_dae: réglé un problème avec les input_divider (empêchait une optimisation), et ajouté utilisation des séries. Si j'avais pas déjà commité, aussi, j'ai enlevé l'histoire de réutilisation du pretraining: c'était compliqué (error prone) et ça créait des jobs beaucoup trop longues.
author fsavard
date Mon, 01 Mar 2010 11:45:25 -0500
parents b9ea8e2d071a
children 3632e6258642
line wrap: on
line diff
--- a/deep/stacked_dae/stacked_dae.py	Fri Feb 26 17:45:52 2010 -0500
+++ b/deep/stacked_dae/stacked_dae.py	Mon Mar 01 11:45:25 2010 -0500
@@ -152,9 +152,12 @@
         self.layers             = []
         self.pretrain_functions = []
         self.params             = []
+        # MODIF: added this so we also get the b_primes
+        # (not used for finetuning... still using ".params")
+        self.all_params         = []
         self.n_layers           = len(hidden_layers_sizes)
 
-        self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
+        self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX))
 
         if len(hidden_layers_sizes) < 1 :
             raiseException (' You must have at least one hidden layer ')
@@ -196,6 +199,8 @@
                           corruption_level = corruption_levels[0],\
                           input = layer_input, \
                           shared_W = layer.W, shared_b = layer.b)
+
+            self.all_params += dA_layer.params
         
             # Construct a function that trains this dA
             # compute gradients of layer parameters
@@ -209,7 +214,7 @@
             update_fn = theano.function([index], dA_layer.cost, \
                   updates = updates,
                   givens = { 
-                     self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.input_divider})
+                     self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider})
             # collect this function into a list
             self.pretrain_functions += [update_fn]
 
@@ -220,6 +225,7 @@
                          n_in = hidden_layers_sizes[-1], n_out = n_outs)
 
         self.params += self.logLayer.params
+        self.all_params += self.logLayer.params
         # construct a function that implements one step of finetunining
 
         # compute the cost, defined as the negative log likelihood 
@@ -234,7 +240,7 @@
         self.finetune = theano.function([index], cost, 
                 updates = updates,
                 givens = {
-                  self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.input_divider,
+                  self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider,
                   self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
 
         # symbolic variable that points to the number of errors made on the