diff deep/stacked_dae/v_sylvain/stacked_dae.py @ 351:799ad23a161f

Ajout de la capacite d'utiliser des tanh a la place des sigmoides et sigmoide en sortie plutot que softmax
author SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca>
date Wed, 21 Apr 2010 14:07:53 -0400
parents 54ad8a091783
children bc4464c0894c
line wrap: on
line diff
--- a/deep/stacked_dae/v_sylvain/stacked_dae.py	Tue Apr 20 16:38:13 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/stacked_dae.py	Wed Apr 21 14:07:53 2010 -0400
@@ -27,8 +27,10 @@
         # initialize the baises b as a vector of n_out 0s
         self.b = theano.shared( value=numpy.zeros((n_out,), 
                                             dtype = theano.config.floatX) )
-        # compute vector of class-membership probabilities in symbolic form
-        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+        # compute vector of class-membership. This is a sigmoid instead of
+        #a softmax to be able to classify as nothing later
+##        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+        self.p_y_given_x = T.nnet.sigmoid(T.dot(input, self.W)+self.b)
         
         # compute prediction as class whose probability is maximal in 
         # symbolic form
@@ -71,7 +73,25 @@
 
         self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
         self.params = [self.W, self.b]
+    
 
+class TanhLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+
+        self.output = (T.tanh(T.dot(input, self.W) + self.b) + 1) /2
+        # ( *+ 1) /2  is because tanh goes from -1 to 1 and sigmoid goes from 0 to 1
+        # I want to use tanh, but the image has to stay the same. The correction is necessary.
+        self.params = [self.W, self.b]
 
 
 class dA(object):
@@ -132,7 +152,9 @@
     # Equation (2)
     # note  : y is stored as an attribute of the class so that it can be 
     #         used later when stacking dAs. 
+    
     self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
+        
     # Equation (3)
     #self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
     # Equation (4)
@@ -160,6 +182,21 @@
     #        of the reconstruction of the corresponding example of the 
     #        minibatch. We need to compute the average of all these to get 
     #        the cost of the minibatch
+    
+##    #Or use a Tanh everything is always between 0 and 1, the range is 
+##    #changed so it remain the same as when sigmoid is used
+##    self.y   = (T.tanh(T.dot(self.tilde_x, self.W      ) + self.b)+1.0)/2.0
+##    
+##    z_a = T.dot(self.y, self.W_prime) + self.b_prime
+##    self.z =  (T.tanh(z_a + self.b_prime)+1.0) / 2.0
+##    #To ensure to do not have a log(0) operation
+##    if self.z <= 0:
+##        self.z = 0.000001
+##    if self.z >= 1:
+##        self.z = 0.999999
+##        
+##    self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
+    
     self.cost = T.mean(self.L)
 
     self.params = [ self.W, self.b, self.b_prime ]
@@ -222,9 +259,13 @@
                 layer_input = self.x
             else:
                 layer_input = self.layers[-1].output
+            #We have to choose between sigmoidal layer or tanh layer !
 
             layer = SigmoidalLayer(rng, layer_input, input_size, 
                                    hidden_layers_sizes[i] )
+                                
+##            layer = TanhLayer(rng, layer_input, input_size, 
+##                                   hidden_layers_sizes[i] )
             # add the layer to the 
             self.layers += [layer]
             self.params += layer.params