Mercurial > ift6266
comparison deep/stacked_dae/stacked_dae.py @ 275:7b4507295eba
merge
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Mon, 22 Mar 2010 10:20:10 -0400 |
parents | acb942530923 c8fe09a65039 |
children |
comparison
equal
deleted
inserted
replaced
274:44409b6652aa | 275:7b4507295eba |
---|---|
125 # third argument is the probability of success of any trial | 125 # third argument is the probability of success of any trial |
126 # | 126 # |
127 # this will produce an array of 0s and 1s where 1 has a | 127 # this will produce an array of 0s and 1s where 1 has a |
128 # probability of 1 - ``corruption_level`` and 0 with | 128 # probability of 1 - ``corruption_level`` and 0 with |
129 # ``corruption_level`` | 129 # ``corruption_level`` |
130 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x | 130 self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level, dtype=theano.config.floatX) * self.x |
131 # Equation (2) | 131 # Equation (2) |
132 # note : y is stored as an attribute of the class so that it can be | 132 # note : y is stored as an attribute of the class so that it can be |
133 # used later when stacking dAs. | 133 # used later when stacking dAs. |
134 self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) | 134 self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b) |
135 # Equation (3) | 135 # Equation (3) |
136 self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) | 136 #self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime) |
137 # Equation (4) | 137 # Equation (4) |
138 # note : we sum over the size of a datapoint; if we are using minibatches, | 138 # note : we sum over the size of a datapoint; if we are using minibatches, |
139 # L will be a vector, with one entry per example in minibatch | 139 # L will be a vector, with one entry per example in minibatch |
140 #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) | 140 #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) |
141 #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) | 141 #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) |
142 | 142 |
143 # bypassing z to avoid running to log(0) | 143 # bypassing z to avoid running to log(0) |
144 #self.z_a = T.dot(self.y, self.W_prime) + self.b_prime) | 144 z_a = T.dot(self.y, self.W_prime) + self.b_prime |
145 #self.L = -T.sum( self.x * (T.log(1)-T.log(1+T.exp(-self.z_a))) \ | 145 log_sigmoid = T.log(1.) - T.log(1.+T.exp(-z_a)) |
146 # + (1.0-self.x) * (T.log(1)-T.log(1+T.exp(-self.z_a))), axis=1 ) | 146 # log(1-sigmoid(z_a)) |
147 log_1_sigmoid = -z_a - T.log(1.+T.exp(-z_a)) | |
148 self.L = -T.sum( self.x * (log_sigmoid) \ | |
149 + (1.0-self.x) * (log_1_sigmoid), axis=1 ) | |
147 | 150 |
148 # I added this epsilon to avoid getting log(0) and 1/0 in grad | 151 # I added this epsilon to avoid getting log(0) and 1/0 in grad |
149 # This means conceptually that there'd be no probability of 0, but that | 152 # This means conceptually that there'd be no probability of 0, but that |
150 # doesn't seem to me as important (maybe I'm wrong?). | 153 # doesn't seem to me as important (maybe I'm wrong?). |
151 eps = 0.00000001 | 154 #eps = 0.00000001 |
152 eps_1 = 1-eps | 155 #eps_1 = 1-eps |
153 self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \ | 156 #self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \ |
154 + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) | 157 # + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) |
155 # note : L is now a vector, where each element is the cross-entropy cost | 158 # note : L is now a vector, where each element is the cross-entropy cost |
156 # of the reconstruction of the corresponding example of the | 159 # of the reconstruction of the corresponding example of the |
157 # minibatch. We need to compute the average of all these to get | 160 # minibatch. We need to compute the average of all these to get |
158 # the cost of the minibatch | 161 # the cost of the minibatch |
159 self.cost = T.mean(self.L) | 162 self.cost = T.mean(self.L) |
160 | 163 |
161 self.params = [ self.W, self.b, self.b_prime ] | 164 self.params = [ self.W, self.b, self.b_prime ] |
162 | 165 |
163 | 166 |
164 class SdA(object): | 167 class SdA(object): |
165 def __init__(self, train_set_x, train_set_y, batch_size, n_ins, | 168 def __init__(self, batch_size, n_ins, |
166 hidden_layers_sizes, n_outs, | 169 hidden_layers_sizes, n_outs, |
167 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0): | 170 corruption_levels, rng, pretrain_lr, finetune_lr): |
168 # Just to make sure those are not modified somewhere else afterwards | 171 # Just to make sure those are not modified somewhere else afterwards |
169 hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) | 172 hidden_layers_sizes = copy.deepcopy(hidden_layers_sizes) |
170 corruption_levels = copy.deepcopy(corruption_levels) | 173 corruption_levels = copy.deepcopy(corruption_levels) |
171 | 174 |
172 update_locals(self, locals()) | 175 update_locals(self, locals()) |
185 print "corruption_levels", corruption_levels | 188 print "corruption_levels", corruption_levels |
186 print "n_ins", n_ins | 189 print "n_ins", n_ins |
187 print "n_outs", n_outs | 190 print "n_outs", n_outs |
188 print "pretrain_lr", pretrain_lr | 191 print "pretrain_lr", pretrain_lr |
189 print "finetune_lr", finetune_lr | 192 print "finetune_lr", finetune_lr |
190 print "input_divider", input_divider | |
191 print "----" | 193 print "----" |
192 | |
193 self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX)) | |
194 | 194 |
195 if len(hidden_layers_sizes) < 1 : | 195 if len(hidden_layers_sizes) < 1 : |
196 raiseException (' You must have at least one hidden layer ') | 196 raiseException (' You must have at least one hidden layer ') |
197 | 197 |
198 | 198 |
199 # allocate symbolic variables for the data | 199 # allocate symbolic variables for the data |
200 index = T.lscalar() # index to a [mini]batch | 200 #index = T.lscalar() # index to a [mini]batch |
201 self.x = T.matrix('x') # the data is presented as rasterized images | 201 self.x = T.matrix('x') # the data is presented as rasterized images |
202 self.y = T.ivector('y') # the labels are presented as 1D vector of | 202 self.y = T.ivector('y') # the labels are presented as 1D vector of |
203 # [int] labels | 203 # [int] labels |
204 | 204 |
205 for i in xrange( self.n_layers ): | 205 for i in xrange( self.n_layers ): |
242 updates = {} | 242 updates = {} |
243 for param, gparam in zip(dA_layer.params, gparams): | 243 for param, gparam in zip(dA_layer.params, gparams): |
244 updates[param] = param - gparam * pretrain_lr | 244 updates[param] = param - gparam * pretrain_lr |
245 | 245 |
246 # create a function that trains the dA | 246 # create a function that trains the dA |
247 update_fn = theano.function([index], dA_layer.cost, \ | 247 update_fn = theano.function([self.x], dA_layer.cost, \ |
248 updates = updates, | 248 updates = updates)#, |
249 givens = { | 249 # givens = { |
250 self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider}) | 250 # self.x : ensemble}) |
251 # collect this function into a list | |
252 #update_fn = theano.function([index], dA_layer.cost, \ | |
253 # updates = updates, | |
254 # givens = { | |
255 # self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.shared_divider}) | |
251 # collect this function into a list | 256 # collect this function into a list |
252 self.pretrain_functions += [update_fn] | 257 self.pretrain_functions += [update_fn] |
253 | 258 |
254 | 259 |
255 # We now need to add a logistic layer on top of the MLP | 260 # We now need to add a logistic layer on top of the MLP |
268 # compute list of updates | 273 # compute list of updates |
269 updates = {} | 274 updates = {} |
270 for param,gparam in zip(self.params, gparams): | 275 for param,gparam in zip(self.params, gparams): |
271 updates[param] = param - gparam*finetune_lr | 276 updates[param] = param - gparam*finetune_lr |
272 | 277 |
273 self.finetune = theano.function([index], cost, | 278 self.finetune = theano.function([self.x,self.y], cost, |
274 updates = updates, | 279 updates = updates)#, |
275 givens = { | 280 # givens = { |
276 self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, | 281 # self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.shared_divider, |
277 self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) | 282 # self.y : train_set_y[index*batch_size:(index+1)*batch_size]} ) |
278 | 283 |
279 # symbolic variable that points to the number of errors made on the | 284 # symbolic variable that points to the number of errors made on the |
280 # minibatch given by self.x and self.y | 285 # minibatch given by self.x and self.y |
281 | 286 |
282 self.errors = self.logLayer.errors(self.y) | 287 self.errors = self.logLayer.errors(self.y) |