comparison deep/stacked_dae/v_sylvain/stacked_dae.py @ 351:799ad23a161f

Ajout de la capacite d'utiliser des tanh a la place des sigmoides et sigmoide en sortie plutot que softmax
author SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca>
date Wed, 21 Apr 2010 14:07:53 -0400
parents 54ad8a091783
children bc4464c0894c
comparison
equal deleted inserted replaced
350:625c0c3fcbdb 351:799ad23a161f
25 self.W = theano.shared( value=numpy.zeros((n_in,n_out), 25 self.W = theano.shared( value=numpy.zeros((n_in,n_out),
26 dtype = theano.config.floatX) ) 26 dtype = theano.config.floatX) )
27 # initialize the baises b as a vector of n_out 0s 27 # initialize the baises b as a vector of n_out 0s
28 self.b = theano.shared( value=numpy.zeros((n_out,), 28 self.b = theano.shared( value=numpy.zeros((n_out,),
29 dtype = theano.config.floatX) ) 29 dtype = theano.config.floatX) )
30 # compute vector of class-membership probabilities in symbolic form 30 # compute vector of class-membership. This is a sigmoid instead of
31 self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) 31 #a softmax to be able to classify as nothing later
32 ## self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
33 self.p_y_given_x = T.nnet.sigmoid(T.dot(input, self.W)+self.b)
32 34
33 # compute prediction as class whose probability is maximal in 35 # compute prediction as class whose probability is maximal in
34 # symbolic form 36 # symbolic form
35 self.y_pred=T.argmax(self.p_y_given_x, axis=1) 37 self.y_pred=T.argmax(self.p_y_given_x, axis=1)
36 38
69 b_values = numpy.zeros((n_out,), dtype= theano.config.floatX) 71 b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
70 self.b = theano.shared(value= b_values) 72 self.b = theano.shared(value= b_values)
71 73
72 self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b) 74 self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
73 self.params = [self.W, self.b] 75 self.params = [self.W, self.b]
74 76
77
78 class TanhLayer(object):
79 def __init__(self, rng, input, n_in, n_out):
80 self.input = input
81
82 W_values = numpy.asarray( rng.uniform( \
83 low = -numpy.sqrt(6./(n_in+n_out)), \
84 high = numpy.sqrt(6./(n_in+n_out)), \
85 size = (n_in, n_out)), dtype = theano.config.floatX)
86 self.W = theano.shared(value = W_values)
87
88 b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
89 self.b = theano.shared(value= b_values)
90
91 self.output = (T.tanh(T.dot(input, self.W) + self.b) + 1) /2
92 # ( *+ 1) /2 is because tanh goes from -1 to 1 and sigmoid goes from 0 to 1
93 # I want to use tanh, but the image has to stay the same. The correction is necessary.
94 self.params = [self.W, self.b]
75 95
76 96
77 class dA(object): 97 class dA(object):
78 def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\ 98 def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\
79 input = None, shared_W = None, shared_b = None): 99 input = None, shared_W = None, shared_b = None):
130 # ``corruption_level`` 150 # ``corruption_level``
131 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level, dtype=theano.config.floatX) * self.x 151 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level, dtype=theano.config.floatX) * self.x
132 # Equation (2) 152 # Equation (2)
133 # note : y is stored as an attribute of the class so that it can be 153 # note : y is stored as an attribute of the class so that it can be
134 # used later when stacking dAs. 154 # used later when stacking dAs.
155
135 self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) 156 self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b)
157
136 # Equation (3) 158 # Equation (3)
137 #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) 159 #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
138 # Equation (4) 160 # Equation (4)
139 # note : we sum over the size of a datapoint; if we are using minibatches, 161 # note : we sum over the size of a datapoint; if we are using minibatches,
140 # L will be a vector, with one entry per example in minibatch 162 # L will be a vector, with one entry per example in minibatch
158 # + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) 180 # + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
159 # note : L is now a vector, where each element is the cross-entropy cost 181 # note : L is now a vector, where each element is the cross-entropy cost
160 # of the reconstruction of the corresponding example of the 182 # of the reconstruction of the corresponding example of the
161 # minibatch. We need to compute the average of all these to get 183 # minibatch. We need to compute the average of all these to get
162 # the cost of the minibatch 184 # the cost of the minibatch
185
186 ## #Or use a Tanh everything is always between 0 and 1, the range is
187 ## #changed so it remain the same as when sigmoid is used
188 ## self.y = (T.tanh(T.dot(self.tilde_x, self.W ) + self.b)+1.0)/2.0
189 ##
190 ## z_a = T.dot(self.y, self.W_prime) + self.b_prime
191 ## self.z = (T.tanh(z_a + self.b_prime)+1.0) / 2.0
192 ## #To ensure to do not have a log(0) operation
193 ## if self.z <= 0:
194 ## self.z = 0.000001
195 ## if self.z >= 1:
196 ## self.z = 0.999999
197 ##
198 ## self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
199
163 self.cost = T.mean(self.L) 200 self.cost = T.mean(self.L)
164 201
165 self.params = [ self.W, self.b, self.b_prime ] 202 self.params = [ self.W, self.b, self.b_prime ]
166 203
167 204
220 # layer 257 # layer
221 if i == 0 : 258 if i == 0 :
222 layer_input = self.x 259 layer_input = self.x
223 else: 260 else:
224 layer_input = self.layers[-1].output 261 layer_input = self.layers[-1].output
262 #We have to choose between sigmoidal layer or tanh layer !
225 263
226 layer = SigmoidalLayer(rng, layer_input, input_size, 264 layer = SigmoidalLayer(rng, layer_input, input_size,
227 hidden_layers_sizes[i] ) 265 hidden_layers_sizes[i] )
266
267 ## layer = TanhLayer(rng, layer_input, input_size,
268 ## hidden_layers_sizes[i] )
228 # add the layer to the 269 # add the layer to the
229 self.layers += [layer] 270 self.layers += [layer]
230 self.params += layer.params 271 self.params += layer.params
231 272
232 # Construct a denoising autoencoder that shared weights with this 273 # Construct a denoising autoencoder that shared weights with this