Mercurial > ift6266
comparison deep/stacked_dae/v_sylvain/stacked_dae.py @ 368:d391ad815d89
Correction d'un bug avec la fonction de log-likelihood pour utilisation de non-linearite de sortie sigmoides
author | SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca> |
---|---|
date | Fri, 23 Apr 2010 12:12:03 -0400 |
parents | 14b28e43ce4e |
children |
comparison
equal
deleted
inserted
replaced
367:f24b10e43a6f | 368:d391ad815d89 |
---|---|
26 dtype = theano.config.floatX) ) | 26 dtype = theano.config.floatX) ) |
27 # initialize the baises b as a vector of n_out 0s | 27 # initialize the baises b as a vector of n_out 0s |
28 self.b = theano.shared( value=numpy.zeros((n_out,), | 28 self.b = theano.shared( value=numpy.zeros((n_out,), |
29 dtype = theano.config.floatX) ) | 29 dtype = theano.config.floatX) ) |
30 # compute vector of class-membership. This is a sigmoid instead of | 30 # compute vector of class-membership. This is a sigmoid instead of |
31 #a softmax to be able to classify as nothing later | 31 #a softmax to be able later to classify as nothing |
32 ## self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) | 32 ## self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) #row-wise |
33 self.p_y_given_x = T.nnet.sigmoid(T.dot(input, self.W)+self.b) | 33 self.p_y_given_x = T.nnet.sigmoid(T.dot(input, self.W)+self.b) |
34 | 34 |
35 # compute prediction as class whose probability is maximal in | 35 # compute prediction as class whose probability is maximal in |
36 # symbolic form | 36 # symbolic form |
37 self.y_pred=T.argmax(self.p_y_given_x, axis=1) | 37 self.y_pred=T.argmax(self.p_y_given_x, axis=1) |
39 # list of parameters for this layer | 39 # list of parameters for this layer |
40 self.params = [self.W, self.b] | 40 self.params = [self.W, self.b] |
41 | 41 |
42 | 42 |
43 def negative_log_likelihood(self, y): | 43 def negative_log_likelihood(self, y): |
44 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) | 44 ## return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) |
45 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]+T.sum(T.log(1-self.p_y_given_x), axis=1)-T.log(1-self.p_y_given_x)[T.arange(y.shape[0]),y]) | |
46 | |
47 | |
48 ## def kullback_leibler(self,y): | |
49 ## return -T.mean(T.log(1/float(self.p_y_given_x))[T.arange(y.shape[0]),y]) | |
50 | |
45 | 51 |
46 def errors(self, y): | 52 def errors(self, y): |
47 # check if y has same dimension of y_pred | 53 # check if y has same dimension of y_pred |
48 if y.ndim != self.y_pred.ndim: | 54 if y.ndim != self.y_pred.ndim: |
49 raise TypeError('y should have the same shape as self.y_pred', | 55 raise TypeError('y should have the same shape as self.y_pred', |
185 | 191 |
186 #Or use a Tanh everything is always between 0 and 1, the range is | 192 #Or use a Tanh everything is always between 0 and 1, the range is |
187 #changed so it remain the same as when sigmoid is used | 193 #changed so it remain the same as when sigmoid is used |
188 self.y = (T.tanh(T.dot(self.tilde_x, self.W ) + self.b)+1.0)/2.0 | 194 self.y = (T.tanh(T.dot(self.tilde_x, self.W ) + self.b)+1.0)/2.0 |
189 | 195 |
190 z_a = T.dot(self.y, self.W_prime) + self.b_prime | 196 self.z = (T.tanh(T.dot(self.y, self.W_prime) + self.b_prime)+1.0) / 2.0 |
191 self.z = (T.tanh(z_a )+1.0) / 2.0 | |
192 #To ensure to do not have a log(0) operation | 197 #To ensure to do not have a log(0) operation |
193 if self.z <= 0: | 198 if self.z <= 0: |
194 self.z = 0.000001 | 199 self.z = 0.000001 |
195 if self.z >= 1: | 200 if self.z >= 1: |
196 self.z = 0.999999 | 201 self.z = 0.999999 |
197 | 202 |
198 self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) | 203 self.L = - T.sum( self.x*T.log(self.z) + (1.0-self.x)*T.log(1.0-self.z), axis=1 ) |
199 | 204 |
200 self.cost = T.mean(self.L) | 205 self.cost = T.mean(self.L) |
201 | 206 |
202 self.params = [ self.W, self.b, self.b_prime ] | 207 self.params = [ self.W, self.b, self.b_prime ] |
203 | 208 |