Mercurial > ift6266
comparison baseline/mlp/ratio_classes/mlp_nist_ratio.py @ 443:89a49dae6cf3
merge
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Mon, 03 May 2010 18:38:58 -0400 |
parents | d8129a09ffb1 |
children |
comparison
equal
deleted
inserted
replaced
442:d5b2b6397a5a | 443:89a49dae6cf3 |
---|---|
22 to do lr first, then add regularization) | 22 to do lr first, then add regularization) |
23 | 23 |
24 """ | 24 """ |
25 __docformat__ = 'restructedtext en' | 25 __docformat__ = 'restructedtext en' |
26 | 26 |
27 import ift6266 | 27 import setup_batches |
28 from scripts import setup_batches | |
29 import pdb | 28 import pdb |
30 import numpy | 29 import numpy |
31 | 30 |
32 import theano | 31 import theano |
33 import theano.tensor as T | 32 import theano.tensor as T |
48 sigmoid function while the top layer is a softamx layer. | 47 sigmoid function while the top layer is a softamx layer. |
49 """ | 48 """ |
50 | 49 |
51 | 50 |
52 | 51 |
53 def __init__(self, input, n_in, n_hidden, n_out,learning_rate): | 52 def __init__(self, input, n_in, n_hidden, n_out,learning_rate, test_subclass): |
54 """Initialize the parameters for the multilayer perceptron | 53 """Initialize the parameters for the multilayer perceptron |
55 | 54 |
56 :param input: symbolic variable that describes the input of the | 55 :param input: symbolic variable that describes the input of the |
57 architecture (one minibatch) | 56 architecture (one minibatch) |
58 | 57 |
111 # symbolic expression computing the values of the top layer | 110 # symbolic expression computing the values of the top layer |
112 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2) | 111 self.p_y_given_x= T.nnet.softmax(T.dot(self.hidden, self.W2)+self.b2) |
113 | 112 |
114 # compute prediction as class whose probability is maximal in | 113 # compute prediction as class whose probability is maximal in |
115 # symbolic form | 114 # symbolic form |
116 self.y_pred = T.argmax( self.p_y_given_x, axis =1) | 115 #self.y_pred = T.argmax( self.p_y_given_x, axis =1) |
117 self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1) | 116 #self.y_pred_num = T.argmax( self.p_y_given_x[0:9], axis =1) |
118 | 117 |
119 | 118 self.test_subclass = test_subclass |
120 | 119 |
121 | 120 #if (self.test_subclass == "u"): |
121 # self.y_pred = T.argmax( self.p_y_given_x[10:35], axis =1) + 10 | |
122 #elif (self.test_subclass == "l"): | |
123 # self.y_pred = T.argmax( self.p_y_given_x[35:], axis =1) + 35 | |
124 #elif (self.test_subclass == "d"): | |
125 # self.y_pred = T.argmax( self.p_y_given_x[0:9], axis =1) | |
126 #else: | |
127 self.y_pred = T.argmax( self.p_y_given_x, axis =1) | |
128 | |
122 # L1 norm ; one regularization option is to enforce L1 norm to | 129 # L1 norm ; one regularization option is to enforce L1 norm to |
123 # be small | 130 # be small |
124 self.L1 = abs(self.W1).sum() + abs(self.W2).sum() | 131 self.L1 = abs(self.W1).sum() + abs(self.W2).sum() |
125 | 132 |
126 # square of L2 norm ; one regularization option is to enforce | 133 # square of L2 norm ; one regularization option is to enforce |
176 L1_reg = 0.00,\ | 183 L1_reg = 0.00,\ |
177 L2_reg = 0.0001,\ | 184 L2_reg = 0.0001,\ |
178 nb_max_exemples=1000000,\ | 185 nb_max_exemples=1000000,\ |
179 batch_size=20,\ | 186 batch_size=20,\ |
180 nb_hidden = 500,\ | 187 nb_hidden = 500,\ |
181 nb_targets = 62,\ | 188 nb_targets = 26,\ |
182 tau=1e6,\ | 189 tau=1e6,\ |
183 main_class="d",\ | 190 main_class="l",\ |
184 start_ratio=1,\ | 191 start_ratio=1,\ |
185 end_ratio=1): | 192 end_ratio=1): |
186 | 193 |
187 | 194 |
188 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] | 195 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] |
214 print 'finished parsing the data' | 221 print 'finished parsing the data' |
215 # construct the logistic regression class | 222 # construct the logistic regression class |
216 classifier = MLP( input=x.reshape((batch_size,32*32)),\ | 223 classifier = MLP( input=x.reshape((batch_size,32*32)),\ |
217 n_in=32*32,\ | 224 n_in=32*32,\ |
218 n_hidden=nb_hidden,\ | 225 n_hidden=nb_hidden,\ |
219 n_out=nb_targets, | 226 n_out=nb_targets,\ |
220 learning_rate=learning_rate) | 227 learning_rate=learning_rate,\ |
228 test_subclass=main_class) | |
221 | 229 |
222 | 230 |
223 | 231 |
224 | 232 |
225 # the cost we minimize during training is the negative log likelihood of | 233 # the cost we minimize during training is the negative log likelihood of |
283 n_iter = nb_max_exemples/batch_size # nb of max times we are allowed to run through all exemples | 291 n_iter = nb_max_exemples/batch_size # nb of max times we are allowed to run through all exemples |
284 n_iter = n_iter/n_minibatches + 1 #round up | 292 n_iter = n_iter/n_minibatches + 1 #round up |
285 n_iter=max(1,n_iter) # run at least once on short debug call | 293 n_iter=max(1,n_iter) # run at least once on short debug call |
286 time_n=0 #in unit of exemples | 294 time_n=0 #in unit of exemples |
287 | 295 |
288 | 296 if (main_class == "u"): |
297 class_offset = 10 | |
298 elif (main_class == "l"): | |
299 class_offset = 36 | |
300 else: | |
301 class_offset = 0 | |
302 | |
289 | 303 |
290 if verbose == True: | 304 if verbose == True: |
291 print 'looping at most %d times through the data set' %n_iter | 305 print 'looping at most %d times through the data set' %n_iter |
292 for iter in xrange(n_iter* n_minibatches): | 306 for iter in xrange(n_iter* n_minibatches): |
293 | 307 |
300 classifier.lr.value = tau*initial_lr/(tau+time_n) | 314 classifier.lr.value = tau*initial_lr/(tau+time_n) |
301 | 315 |
302 # get the minibatches corresponding to `iter` modulo | 316 # get the minibatches corresponding to `iter` modulo |
303 # `len(train_batches)` | 317 # `len(train_batches)` |
304 x,y = train_batches[ minibatch_index ] | 318 x,y = train_batches[ minibatch_index ] |
319 | |
320 y = y - class_offset | |
321 | |
305 # convert to float | 322 # convert to float |
306 x_float = x/255.0 | 323 x_float = x/255.0 |
307 cost_ij = train_model(x_float,y) | 324 cost_ij = train_model(x_float,y) |
308 | 325 |
309 if (iter+1) % validation_frequency == 0: | 326 if (iter+1) % validation_frequency == 0: |
310 # compute zero-one loss on validation set | 327 # compute zero-one loss on validation set |
311 | 328 |
312 this_validation_loss = 0. | 329 this_validation_loss = 0. |
313 for x,y in validation_batches: | 330 for x,y in validation_batches: |
314 # sum up the errors for each minibatch | 331 # sum up the errors for each minibatch |
332 y = y - class_offset | |
315 x_float = x/255.0 | 333 x_float = x/255.0 |
316 this_validation_loss += test_model(x_float,y) | 334 this_validation_loss += test_model(x_float,y) |
317 # get the average by dividing with the number of minibatches | 335 # get the average by dividing with the number of minibatches |
318 this_validation_loss /= len(validation_batches) | 336 this_validation_loss /= len(validation_batches) |
319 #save the validation loss | 337 #save the validation loss |
321 | 339 |
322 #get the training error rate | 340 #get the training error rate |
323 this_train_loss=0 | 341 this_train_loss=0 |
324 for x,y in train_batches: | 342 for x,y in train_batches: |
325 # sum up the errors for each minibatch | 343 # sum up the errors for each minibatch |
344 y = y - class_offset | |
326 x_float = x/255.0 | 345 x_float = x/255.0 |
327 this_train_loss += test_model(x_float,y) | 346 this_train_loss += test_model(x_float,y) |
328 # get the average by dividing with the number of minibatches | 347 # get the average by dividing with the number of minibatches |
329 this_train_loss /= len(train_batches) | 348 this_train_loss /= len(train_batches) |
330 #save the validation loss | 349 #save the validation loss |
353 # so we continue exploring | 372 # so we continue exploring |
354 patience=nb_max_exemples/batch_size | 373 patience=nb_max_exemples/batch_size |
355 # test it on the test set | 374 # test it on the test set |
356 test_score = 0. | 375 test_score = 0. |
357 for x,y in test_batches: | 376 for x,y in test_batches: |
377 y = y - class_offset | |
358 x_float=x/255.0 | 378 x_float=x/255.0 |
359 test_score += test_model(x_float,y) | 379 test_score += test_model(x_float,y) |
360 test_score /= len(test_batches) | 380 test_score /= len(test_batches) |
361 if verbose == True: | 381 if verbose == True: |
362 print((' epoch %i, minibatch %i/%i, test error of best ' | 382 print((' epoch %i, minibatch %i/%i, test error of best ' |
379 test_score = 0. | 399 test_score = 0. |
380 #cap the patience so we are allowed one more validation error | 400 #cap the patience so we are allowed one more validation error |
381 #calculation before aborting | 401 #calculation before aborting |
382 patience = iter+validation_frequency+1 | 402 patience = iter+validation_frequency+1 |
383 for x,y in test_batches: | 403 for x,y in test_batches: |
404 y = y - class_offset | |
384 x_float=x/255.0 | 405 x_float=x/255.0 |
385 test_score += test_model(x_float,y) | 406 test_score += test_model(x_float,y) |
386 test_score /= len(test_batches) | 407 test_score /= len(test_batches) |
387 if verbose == True: | 408 if verbose == True: |
388 print ' validation error is going up, possibly stopping soon' | 409 print ' validation error is going up, possibly stopping soon' |
419 if __name__ == '__main__': | 440 if __name__ == '__main__': |
420 mlp_full_nist(True) | 441 mlp_full_nist(True) |
421 | 442 |
422 def jobman_mlp_full_nist(state,channel): | 443 def jobman_mlp_full_nist(state,channel): |
423 (train_error,validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\ | 444 (train_error,validation_error,test_error,nb_exemples,time)=mlp_full_nist(learning_rate=state.learning_rate,\ |
424 nb_max_exemples=state.nb_max_exemples,\ | |
425 nb_hidden=state.nb_hidden,\ | 445 nb_hidden=state.nb_hidden,\ |
426 adaptive_lr=state.adaptive_lr,\ | 446 main_class=state.main_class,\ |
427 tau=state.tau,\ | 447 start_ratio=state.ratio,\ |
428 main_class=state.main_class,\ | 448 end_ratio=state.ratio) |
429 start_ratio=state.start_ratio,\ | |
430 end_ratio=state.end_ratio) | |
431 state.train_error=train_error | 449 state.train_error=train_error |
432 state.validation_error=validation_error | 450 state.validation_error=validation_error |
433 state.test_error=test_error | 451 state.test_error=test_error |
434 state.nb_exemples=nb_exemples | 452 state.nb_exemples=nb_exemples |
435 state.time=time | 453 state.time=time |