Mercurial > ift6266
diff deep/stacked_dae/v_sylvain/stacked_dae.py @ 351:799ad23a161f
Ajout de la capacite d'utiliser des tanh a la place des sigmoides et sigmoide en sortie plutot que softmax
author | SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca> |
---|---|
date | Wed, 21 Apr 2010 14:07:53 -0400 |
parents | 54ad8a091783 |
children | bc4464c0894c |
line wrap: on
line diff
--- a/deep/stacked_dae/v_sylvain/stacked_dae.py Tue Apr 20 16:38:13 2010 -0400 +++ b/deep/stacked_dae/v_sylvain/stacked_dae.py Wed Apr 21 14:07:53 2010 -0400 @@ -27,8 +27,10 @@ # initialize the baises b as a vector of n_out 0s self.b = theano.shared( value=numpy.zeros((n_out,), dtype = theano.config.floatX) ) - # compute vector of class-membership probabilities in symbolic form - self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) + # compute vector of class-membership. This is a sigmoid instead of + #a softmax to be able to classify as nothing later +## self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) + self.p_y_given_x = T.nnet.sigmoid(T.dot(input, self.W)+self.b) # compute prediction as class whose probability is maximal in # symbolic form @@ -71,7 +73,25 @@ self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b) self.params = [self.W, self.b] + +class TanhLayer(object): + def __init__(self, rng, input, n_in, n_out): + self.input = input + + W_values = numpy.asarray( rng.uniform( \ + low = -numpy.sqrt(6./(n_in+n_out)), \ + high = numpy.sqrt(6./(n_in+n_out)), \ + size = (n_in, n_out)), dtype = theano.config.floatX) + self.W = theano.shared(value = W_values) + + b_values = numpy.zeros((n_out,), dtype= theano.config.floatX) + self.b = theano.shared(value= b_values) + + self.output = (T.tanh(T.dot(input, self.W) + self.b) + 1) /2 + # ( *+ 1) /2 is because tanh goes from -1 to 1 and sigmoid goes from 0 to 1 + # I want to use tanh, but the image has to stay the same. The correction is necessary. + self.params = [self.W, self.b] class dA(object): @@ -132,7 +152,9 @@ # Equation (2) # note : y is stored as an attribute of the class so that it can be # used later when stacking dAs. + self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) + # Equation (3) #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) # Equation (4) @@ -160,6 +182,21 @@ # of the reconstruction of the corresponding example of the # minibatch. We need to compute the average of all these to get # the cost of the minibatch + +## #Or use a Tanh everything is always between 0 and 1, the range is +## #changed so it remain the same as when sigmoid is used +## self.y = (T.tanh(T.dot(self.tilde_x, self.W ) + self.b)+1.0)/2.0 +## +## z_a = T.dot(self.y, self.W_prime) + self.b_prime +## self.z = (T.tanh(z_a + self.b_prime)+1.0) / 2.0 +## #To ensure to do not have a log(0) operation +## if self.z <= 0: +## self.z = 0.000001 +## if self.z >= 1: +## self.z = 0.999999 +## +## self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) + self.cost = T.mean(self.L) self.params = [ self.W, self.b, self.b_prime ] @@ -222,9 +259,13 @@ layer_input = self.x else: layer_input = self.layers[-1].output + #We have to choose between sigmoidal layer or tanh layer ! layer = SigmoidalLayer(rng, layer_input, input_size, hidden_layers_sizes[i] ) + +## layer = TanhLayer(rng, layer_input, input_size, +## hidden_layers_sizes[i] ) # add the layer to the self.layers += [layer] self.params += layer.params