Mercurial > ift6266
comparison deep/stacked_dae/v_sylvain/stacked_dae.py @ 351:799ad23a161f
Ajout de la capacite d'utiliser des tanh a la place des sigmoides et sigmoide en sortie plutot que softmax
author | SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca> |
---|---|
date | Wed, 21 Apr 2010 14:07:53 -0400 |
parents | 54ad8a091783 |
children | bc4464c0894c |
comparison
equal
deleted
inserted
replaced
350:625c0c3fcbdb | 351:799ad23a161f |
---|---|
25 self.W = theano.shared( value=numpy.zeros((n_in,n_out), | 25 self.W = theano.shared( value=numpy.zeros((n_in,n_out), |
26 dtype = theano.config.floatX) ) | 26 dtype = theano.config.floatX) ) |
27 # initialize the baises b as a vector of n_out 0s | 27 # initialize the baises b as a vector of n_out 0s |
28 self.b = theano.shared( value=numpy.zeros((n_out,), | 28 self.b = theano.shared( value=numpy.zeros((n_out,), |
29 dtype = theano.config.floatX) ) | 29 dtype = theano.config.floatX) ) |
30 # compute vector of class-membership probabilities in symbolic form | 30 # compute vector of class-membership. This is a sigmoid instead of |
31 self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) | 31 #a softmax to be able to classify as nothing later |
32 ## self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) | |
33 self.p_y_given_x = T.nnet.sigmoid(T.dot(input, self.W)+self.b) | |
32 | 34 |
33 # compute prediction as class whose probability is maximal in | 35 # compute prediction as class whose probability is maximal in |
34 # symbolic form | 36 # symbolic form |
35 self.y_pred=T.argmax(self.p_y_given_x, axis=1) | 37 self.y_pred=T.argmax(self.p_y_given_x, axis=1) |
36 | 38 |
69 b_values = numpy.zeros((n_out,), dtype= theano.config.floatX) | 71 b_values = numpy.zeros((n_out,), dtype= theano.config.floatX) |
70 self.b = theano.shared(value= b_values) | 72 self.b = theano.shared(value= b_values) |
71 | 73 |
72 self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b) | 74 self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b) |
73 self.params = [self.W, self.b] | 75 self.params = [self.W, self.b] |
74 | 76 |
77 | |
78 class TanhLayer(object): | |
79 def __init__(self, rng, input, n_in, n_out): | |
80 self.input = input | |
81 | |
82 W_values = numpy.asarray( rng.uniform( \ | |
83 low = -numpy.sqrt(6./(n_in+n_out)), \ | |
84 high = numpy.sqrt(6./(n_in+n_out)), \ | |
85 size = (n_in, n_out)), dtype = theano.config.floatX) | |
86 self.W = theano.shared(value = W_values) | |
87 | |
88 b_values = numpy.zeros((n_out,), dtype= theano.config.floatX) | |
89 self.b = theano.shared(value= b_values) | |
90 | |
91 self.output = (T.tanh(T.dot(input, self.W) + self.b) + 1) /2 | |
92 # ( *+ 1) /2 is because tanh goes from -1 to 1 and sigmoid goes from 0 to 1 | |
93 # I want to use tanh, but the image has to stay the same. The correction is necessary. | |
94 self.params = [self.W, self.b] | |
75 | 95 |
76 | 96 |
77 class dA(object): | 97 class dA(object): |
78 def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\ | 98 def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\ |
79 input = None, shared_W = None, shared_b = None): | 99 input = None, shared_W = None, shared_b = None): |
130 # ``corruption_level`` | 150 # ``corruption_level`` |
131 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level, dtype=theano.config.floatX) * self.x | 151 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level, dtype=theano.config.floatX) * self.x |
132 # Equation (2) | 152 # Equation (2) |
133 # note : y is stored as an attribute of the class so that it can be | 153 # note : y is stored as an attribute of the class so that it can be |
134 # used later when stacking dAs. | 154 # used later when stacking dAs. |
155 | |
135 self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) | 156 self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) |
157 | |
136 # Equation (3) | 158 # Equation (3) |
137 #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) | 159 #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) |
138 # Equation (4) | 160 # Equation (4) |
139 # note : we sum over the size of a datapoint; if we are using minibatches, | 161 # note : we sum over the size of a datapoint; if we are using minibatches, |
140 # L will be a vector, with one entry per example in minibatch | 162 # L will be a vector, with one entry per example in minibatch |
158 # + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) | 180 # + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) |
159 # note : L is now a vector, where each element is the cross-entropy cost | 181 # note : L is now a vector, where each element is the cross-entropy cost |
160 # of the reconstruction of the corresponding example of the | 182 # of the reconstruction of the corresponding example of the |
161 # minibatch. We need to compute the average of all these to get | 183 # minibatch. We need to compute the average of all these to get |
162 # the cost of the minibatch | 184 # the cost of the minibatch |
185 | |
186 ## #Or use a Tanh everything is always between 0 and 1, the range is | |
187 ## #changed so it remain the same as when sigmoid is used | |
188 ## self.y = (T.tanh(T.dot(self.tilde_x, self.W ) + self.b)+1.0)/2.0 | |
189 ## | |
190 ## z_a = T.dot(self.y, self.W_prime) + self.b_prime | |
191 ## self.z = (T.tanh(z_a + self.b_prime)+1.0) / 2.0 | |
192 ## #To ensure to do not have a log(0) operation | |
193 ## if self.z <= 0: | |
194 ## self.z = 0.000001 | |
195 ## if self.z >= 1: | |
196 ## self.z = 0.999999 | |
197 ## | |
198 ## self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) | |
199 | |
163 self.cost = T.mean(self.L) | 200 self.cost = T.mean(self.L) |
164 | 201 |
165 self.params = [ self.W, self.b, self.b_prime ] | 202 self.params = [ self.W, self.b, self.b_prime ] |
166 | 203 |
167 | 204 |
220 # layer | 257 # layer |
221 if i == 0 : | 258 if i == 0 : |
222 layer_input = self.x | 259 layer_input = self.x |
223 else: | 260 else: |
224 layer_input = self.layers[-1].output | 261 layer_input = self.layers[-1].output |
262 #We have to choose between sigmoidal layer or tanh layer ! | |
225 | 263 |
226 layer = SigmoidalLayer(rng, layer_input, input_size, | 264 layer = SigmoidalLayer(rng, layer_input, input_size, |
227 hidden_layers_sizes[i] ) | 265 hidden_layers_sizes[i] ) |
266 | |
267 ## layer = TanhLayer(rng, layer_input, input_size, | |
268 ## hidden_layers_sizes[i] ) | |
228 # add the layer to the | 269 # add the layer to the |
229 self.layers += [layer] | 270 self.layers += [layer] |
230 self.params += layer.params | 271 self.params += layer.params |
231 | 272 |
232 # Construct a denoising autoencoder that shared weights with this | 273 # Construct a denoising autoencoder that shared weights with this |