diff scripts/stacked_dae/stacked_dae.py @ 139:7d8366fb90bf

Ajouté des __init__.py dans l'arborescence pour que les scripts puissent être utilisés avec des paths pour jobman, et fait pas mal de modifs dans stacked_dae pour pouvoir réutiliser le travail fait pour des tests où le pretraining est le même.
author fsavard
date Mon, 22 Feb 2010 13:38:25 -0500
parents 5c79a2557f2f
children
line wrap: on
line diff
--- a/scripts/stacked_dae/stacked_dae.py	Sun Feb 21 17:30:38 2010 -0600
+++ b/scripts/stacked_dae/stacked_dae.py	Mon Feb 22 13:38:25 2010 -0500
@@ -6,6 +6,9 @@
 import time
 import theano.tensor as T
 from theano.tensor.shared_randomstreams import RandomStreams
+import copy
+
+from utils import update_locals
 
 class LogisticRegression(object):
     def __init__(self, input, n_in, n_out):
@@ -140,13 +143,16 @@
 class SdA(object):
     def __init__(self, train_set_x, train_set_y, batch_size, n_ins, 
                  hidden_layers_sizes, n_outs, 
-                 corruption_levels, rng, pretrain_lr, finetune_lr):
-       
+                 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0):
+        update_locals(self, locals())      
+ 
         self.layers             = []
         self.pretrain_functions = []
         self.params             = []
         self.n_layers           = len(hidden_layers_sizes)
 
+        self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
+
         if len(hidden_layers_sizes) < 1 :
             raiseException (' You must have at least one hidden layer ')
 
@@ -200,7 +206,7 @@
             update_fn = theano.function([index], dA_layer.cost, \
                   updates = updates,
                   givens = { 
-                     self.x : train_set_x[index*batch_size:(index+1)*batch_size]})
+                     self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.input_divider})
             # collect this function into a list
             self.pretrain_functions += [update_fn]
 
@@ -225,7 +231,7 @@
         self.finetune = theano.function([index], cost, 
                 updates = updates,
                 givens = {
-                  self.x : train_set_x[index*batch_size:(index+1)*batch_size],
+                  self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.input_divider,
                   self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
 
         # symbolic variable that points to the number of errors made on the
@@ -233,23 +239,49 @@
 
         self.errors = self.logLayer.errors(self.y)
 
+    @classmethod
+    def copy_reusing_lower_layers(cls, obj, num_hidden_layers, new_finetuning_lr=None):
+        assert(num_hidden_layers <= obj.n_layers)
+
+        if not new_finetuning_lr:
+            new_finetuning_lr = obj.finetune_lr
+
+        new_sda = cls(train_set_x= obj.train_set_x, \
+                      train_set_y = obj.train_set_y,\
+                      batch_size = obj.batch_size, \
+                      n_ins= obj.n_ins, \
+                      hidden_layers_sizes = obj.hidden_layers_sizes[:num_hidden_layers], \
+                      n_outs = obj.n_outs, \
+                      corruption_levels = obj.corruption_levels[:num_hidden_layers],\
+                      rng = obj.rng,\
+                      pretrain_lr = obj.pretrain_lr, \
+                      finetune_lr = new_finetuning_lr, \
+                      input_divider = obj.input_divider )
+
+        # new_sda.layers contains only the hidden layers actually
+        for i, layer in enumerate(new_sda.layers):
+            original_layer = obj.layers[i]
+            for p1,p2 in zip(layer.params, original_layer.params):
+                p1.value = p2.value.copy()
+
+        return new_sda
+
+    def get_params_copy(self):
+        return copy.deepcopy(self.params)
+
+    def set_params_from_copy(self, copy):
+        # We don't want to replace the var, as the functions have pointers in there
+        # We only want to replace values.
+        for i, p in enumerate(self.params):
+            p.value = copy[i].value
+
+    def get_params_means(self):
+        s = []
+        for p in self.params:
+            s.append(numpy.mean(p.value))
+        return s
+
 if __name__ == '__main__':
     import sys
     args = sys.argv[1:]
 
-    if len(args) < 1:
-        print "Options: mnist, jobman_add, load_nist"
-        sys.exit(0)
-
-    if args[0] == "jobman_add":
-        jobman_add()
-    elif args[0] == "mnist":
-        sgd_optimization_mnist(dataset=MNIST_LOCATION)
-    elif args[0] == "load_nist":
-        load_nist_test()
-    elif args[0] == "nist":
-        sgd_optimization_nist()
-    elif args[0] == "pc":
-        test_produit_croise_jobs()
-
-