comparison deep/stacked_dae/v2/stacked_dae.py @ 228:851e7ad4a143

Corrigé une erreur dans la formule de coût modifiée dans stacked_dae, et enlevé des timers dans sgd_optimization
author fsavard
date Fri, 12 Mar 2010 10:47:36 -0500
parents acae439d6572
children 02eb98d051fe
comparison
equal deleted inserted replaced
227:acae439d6572 228:851e7ad4a143
131 # Equation (2) 131 # Equation (2)
132 # note : y is stored as an attribute of the class so that it can be 132 # note : y is stored as an attribute of the class so that it can be
133 # used later when stacking dAs. 133 # used later when stacking dAs.
134 self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) 134 self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b)
135 # Equation (3) 135 # Equation (3)
136 self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) 136 #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
137 # Equation (4) 137 # Equation (4)
138 # note : we sum over the size of a datapoint; if we are using minibatches, 138 # note : we sum over the size of a datapoint; if we are using minibatches,
139 # L will be a vector, with one entry per example in minibatch 139 # L will be a vector, with one entry per example in minibatch
140 #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 140 #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
141 #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) 141 #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
142 142
143 # bypassing z to avoid running to log(0) 143 # bypassing z to avoid running to log(0)
144 z_a = T.dot(self.y, self.W_prime) + self.b_prime 144 z_a = T.dot(self.y, self.W_prime) + self.b_prime
145 log_sigmoid = T.log(1) - T.log(1+T.exp(-z_a)) 145 log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a))
146 # log(1-sigmoid(z_a)) 146 # log(1-sigmoid(z_a))
147 log_1_sigmoid = -self.x - T.log(1+T.exp(-z_a)) 147 log_1_sigmoid = -self.z_a - T.log(1.+T.exp(-z_a))
148 self.L = -T.sum( self.x * (log_sigmoid) \ 148 self.L = -T.sum( self.x * (log_sigmoid) \
149 + (1.0-self.x) * (log_1_sigmoid), axis=1 ) 149 + (1.0-self.x) * (log_1_sigmoid), axis=1 )
150 150
151 # I added this epsilon to avoid getting log(0) and 1/0 in grad 151 # I added this epsilon to avoid getting log(0) and 1/0 in grad
152 # This means conceptually that there'd be no probability of 0, but that 152 # This means conceptually that there'd be no probability of 0, but that