Mercurial > ift6266
comparison baseline/mlp/mlp_nist.py @ 377:0b7e64e8e93f
branch merge
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Sun, 25 Apr 2010 17:12:03 -0400 |
parents | 76b7182dd32e |
children | 60a4432b8071 |
comparison
equal
deleted
inserted
replaced
376:01445a75c702 | 377:0b7e64e8e93f |
---|---|
21 to do lr first, then add regularization) | 21 to do lr first, then add regularization) |
22 | 22 |
23 """ | 23 """ |
24 __docformat__ = 'restructedtext en' | 24 __docformat__ = 'restructedtext en' |
25 | 25 |
26 import sys | |
26 import pdb | 27 import pdb |
27 import numpy | 28 import numpy |
28 import pylab | 29 import pylab |
29 import theano | 30 import theano |
30 import theano.tensor as T | 31 import theano.tensor as T |
161 # represents a mistake in prediction | 162 # represents a mistake in prediction |
162 return T.mean(T.neq(self.y_pred, y)) | 163 return T.mean(T.neq(self.y_pred, y)) |
163 else: | 164 else: |
164 raise NotImplementedError() | 165 raise NotImplementedError() |
165 | 166 |
167 def mlp_get_nist_error(model_name='/u/mullerx/ift6266h10_sandbox_db/xvm_final_lr1_p073/8/best_model.npy.npz', | |
168 data_set=0): | |
169 | |
170 | |
171 | |
172 # allocate symbolic variables for the data | |
173 x = T.fmatrix() # the data is presented as rasterized images | |
174 y = T.lvector() # the labels are presented as 1D vector of | |
175 # [long int] labels | |
176 | |
177 # load the data set and create an mlp based on the dimensions of the model | |
178 model=numpy.load(model_name) | |
179 W1=model['W1'] | |
180 W2=model['W2'] | |
181 b1=model['b1'] | |
182 b2=model['b2'] | |
183 nb_hidden=b1.shape[0] | |
184 input_dim=W1.shape[0] | |
185 nb_targets=b2.shape[0] | |
186 learning_rate=0.1 | |
187 | |
188 | |
189 if data_set==0: | |
190 dataset=datasets.nist_all() | |
191 elif data_set==1: | |
192 dataset=datasets.nist_P07() | |
193 | |
194 | |
195 classifier = MLP( input=x,\ | |
196 n_in=input_dim,\ | |
197 n_hidden=nb_hidden,\ | |
198 n_out=nb_targets, | |
199 learning_rate=learning_rate) | |
200 | |
201 | |
202 #overwrite weights with weigths from model | |
203 classifier.W1.value=W1 | |
204 classifier.W2.value=W2 | |
205 classifier.b1.value=b1 | |
206 classifier.b2.value=b2 | |
207 | |
208 | |
209 cost = classifier.negative_log_likelihood(y) \ | |
210 + 0.0 * classifier.L1 \ | |
211 + 0.0 * classifier.L2_sqr | |
212 | |
213 # compiling a theano function that computes the mistakes that are made by | |
214 # the model on a minibatch | |
215 test_model = theano.function([x,y], classifier.errors(y)) | |
216 | |
217 | |
218 | |
219 #get the test error | |
220 #use a batch size of 1 so we can get the sub-class error | |
221 #without messing with matrices (will be upgraded later) | |
222 test_score=0 | |
223 temp=0 | |
224 for xt,yt in dataset.test(20): | |
225 test_score += test_model(xt,yt) | |
226 temp = temp+1 | |
227 test_score /= temp | |
228 | |
229 | |
230 return test_score*100 | |
231 | |
232 | |
233 | |
234 | |
235 | |
166 | 236 |
167 def mlp_full_nist( verbose = 1,\ | 237 def mlp_full_nist( verbose = 1,\ |
168 adaptive_lr = 0,\ | 238 adaptive_lr = 0,\ |
169 data_set=0,\ | 239 data_set=0,\ |
170 learning_rate=0.01,\ | 240 learning_rate=0.01,\ |
172 L2_reg = 0.0001,\ | 242 L2_reg = 0.0001,\ |
173 nb_max_exemples=1000000,\ | 243 nb_max_exemples=1000000,\ |
174 batch_size=20,\ | 244 batch_size=20,\ |
175 nb_hidden = 30,\ | 245 nb_hidden = 30,\ |
176 nb_targets = 62, | 246 nb_targets = 62, |
177 tau=1e6,\ | 247 tau=1e6,\ |
178 lr_t2_factor=0.5): | 248 lr_t2_factor=0.5,\ |
179 | 249 init_model=0,\ |
180 | 250 channel=0): |
251 | |
252 | |
253 if channel!=0: | |
254 channel.save() | |
181 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] | 255 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] |
182 | 256 |
183 #save initial learning rate if classical adaptive lr is used | 257 #save initial learning rate if classical adaptive lr is used |
184 initial_lr=learning_rate | 258 initial_lr=learning_rate |
185 max_div_count=3 | 259 max_div_count=1000 |
186 | 260 |
187 | 261 |
188 total_validation_error_list = [] | 262 total_validation_error_list = [] |
189 total_train_error_list = [] | 263 total_train_error_list = [] |
190 learning_rate_list=[] | 264 learning_rate_list=[] |
193 | 267 |
194 if data_set==0: | 268 if data_set==0: |
195 dataset=datasets.nist_all() | 269 dataset=datasets.nist_all() |
196 elif data_set==1: | 270 elif data_set==1: |
197 dataset=datasets.nist_P07() | 271 dataset=datasets.nist_P07() |
272 elif data_set==2: | |
273 dataset=datasets.PNIST07() | |
198 | 274 |
199 | 275 |
200 | 276 |
201 | 277 |
202 ishape = (32,32) # this is the size of NIST images | 278 ishape = (32,32) # this is the size of NIST images |
213 n_hidden=nb_hidden,\ | 289 n_hidden=nb_hidden,\ |
214 n_out=nb_targets, | 290 n_out=nb_targets, |
215 learning_rate=learning_rate) | 291 learning_rate=learning_rate) |
216 | 292 |
217 | 293 |
294 # check if we want to initialise the weights with a previously calculated model | |
295 # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets) | |
296 if init_model!=0: | |
297 old_model=numpy.load(init_model) | |
298 classifier.W1.value=old_model['W1'] | |
299 classifier.W2.value=old_model['W2'] | |
300 classifier.b1.value=old_model['b1'] | |
301 classifier.b2.value=old_model['b2'] | |
218 | 302 |
219 | 303 |
220 # the cost we minimize during training is the negative log likelihood of | 304 # the cost we minimize during training is the negative log likelihood of |
221 # the model plus the regularization terms (L1 and L2); cost is expressed | 305 # the model plus the regularization terms (L1 and L2); cost is expressed |
222 # here symbolically | 306 # here symbolically |
287 temp=0 | 371 temp=0 |
288 divergence_flag=0 | 372 divergence_flag=0 |
289 | 373 |
290 | 374 |
291 | 375 |
292 if verbose == 1: | 376 |
293 print 'starting training' | 377 print 'starting training' |
378 sys.stdout.flush() | |
294 while(minibatch_index*batch_size<nb_max_exemples): | 379 while(minibatch_index*batch_size<nb_max_exemples): |
295 | 380 |
296 for x, y in dataset.train(batch_size): | 381 for x, y in dataset.train(batch_size): |
297 | 382 |
298 #if we are using the classic learning rate deacay, adjust it before training of current mini-batch | 383 #if we are using the classic learning rate deacay, adjust it before training of current mini-batch |
301 | 386 |
302 | 387 |
303 #train model | 388 #train model |
304 cost_ij = train_model(x,y) | 389 cost_ij = train_model(x,y) |
305 | 390 |
306 if (minibatch_index+1) % validation_frequency == 0: | 391 if (minibatch_index) % validation_frequency == 0: |
307 #save the current learning rate | 392 #save the current learning rate |
308 learning_rate_list.append(classifier.lr.value) | 393 learning_rate_list.append(classifier.lr.value) |
309 divergence_flag_list.append(divergence_flag) | 394 divergence_flag_list.append(divergence_flag) |
395 | |
396 | |
310 | 397 |
311 # compute the validation error | 398 # compute the validation error |
312 this_validation_loss = 0. | 399 this_validation_loss = 0. |
313 temp=0 | 400 temp=0 |
314 for xv,yv in dataset.valid(1): | 401 for xv,yv in dataset.valid(1): |
317 temp=temp+1 | 404 temp=temp+1 |
318 # get the average by dividing with the number of minibatches | 405 # get the average by dividing with the number of minibatches |
319 this_validation_loss /= temp | 406 this_validation_loss /= temp |
320 #save the validation loss | 407 #save the validation loss |
321 total_validation_error_list.append(this_validation_loss) | 408 total_validation_error_list.append(this_validation_loss) |
322 if verbose == 1: | 409 |
323 print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % | 410 print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % |
324 (epoch, minibatch_index+1,classifier.lr.value, | 411 (epoch, minibatch_index+1,classifier.lr.value, |
325 this_validation_loss*100.)) | 412 this_validation_loss*100.)) |
413 sys.stdout.flush() | |
414 | |
415 #save temp results to check during training | |
416 numpy.savez('temp_results.npy',config=configuration,total_validation_error_list=total_validation_error_list,\ | |
417 learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list) | |
326 | 418 |
327 # if we got the best validation score until now | 419 # if we got the best validation score until now |
328 if this_validation_loss < best_validation_loss: | 420 if this_validation_loss < best_validation_loss: |
329 # save best validation score and iteration number | 421 # save best validation score and iteration number |
330 best_validation_loss = this_validation_loss | 422 best_validation_loss = this_validation_loss |
342 temp =0 | 434 temp =0 |
343 for xt,yt in dataset.test(batch_size): | 435 for xt,yt in dataset.test(batch_size): |
344 test_score += test_model(xt,yt) | 436 test_score += test_model(xt,yt) |
345 temp = temp+1 | 437 temp = temp+1 |
346 test_score /= temp | 438 test_score /= temp |
347 if verbose == 1: | 439 |
348 print(('epoch %i, minibatch %i, test error of best ' | 440 print(('epoch %i, minibatch %i, test error of best ' |
349 'model %f %%') % | 441 'model %f %%') % |
350 (epoch, minibatch_index+1, | 442 (epoch, minibatch_index+1, |
351 test_score*100.)) | 443 test_score*100.)) |
444 sys.stdout.flush() | |
352 | 445 |
353 # if the validation error is going up, we are overfitting (or oscillating) | 446 # if the validation error is going up, we are overfitting (or oscillating) |
354 # check if we are allowed to continue and if we will adjust the learning rate | 447 # check if we are allowed to continue and if we will adjust the learning rate |
355 elif this_validation_loss >= best_validation_loss: | 448 elif this_validation_loss >= best_validation_loss: |
356 | 449 |
372 temp=0 | 465 temp=0 |
373 for xt,yt in dataset.test(batch_size): | 466 for xt,yt in dataset.test(batch_size): |
374 test_score += test_model(xt,yt) | 467 test_score += test_model(xt,yt) |
375 temp=temp+1 | 468 temp=temp+1 |
376 test_score /= temp | 469 test_score /= temp |
377 if verbose == 1: | 470 |
378 print ' validation error is going up, possibly stopping soon' | 471 print ' validation error is going up, possibly stopping soon' |
379 print((' epoch %i, minibatch %i, test error of best ' | 472 print((' epoch %i, minibatch %i, test error of best ' |
380 'model %f %%') % | 473 'model %f %%') % |
381 (epoch, minibatch_index+1, | 474 (epoch, minibatch_index+1, |
382 test_score*100.)) | 475 test_score*100.)) |
476 sys.stdout.flush() | |
383 | 477 |
384 | 478 |
385 | 479 |
386 # check early stop condition | 480 # check early stop condition |
387 if divergence_flag==max_div_count: | 481 if divergence_flag==max_div_count: |
391 | 485 |
392 #check if we have seen enough exemples | 486 #check if we have seen enough exemples |
393 #force one epoch at least | 487 #force one epoch at least |
394 if epoch>0 and minibatch_index*batch_size>nb_max_exemples: | 488 if epoch>0 and minibatch_index*batch_size>nb_max_exemples: |
395 break | 489 break |
490 | |
491 | |
492 | |
396 | 493 |
397 | 494 |
398 time_n= time_n + batch_size | 495 time_n= time_n + batch_size |
399 minibatch_index = minibatch_index + 1 | 496 minibatch_index = minibatch_index + 1 |
400 | 497 |
401 # we have finished looping through the training set | 498 # we have finished looping through the training set |
402 epoch = epoch+1 | 499 epoch = epoch+1 |
403 end_time = time.clock() | 500 end_time = time.clock() |
404 if verbose == 1: | 501 |
405 print(('Optimization complete. Best validation score of %f %% ' | 502 print(('Optimization complete. Best validation score of %f %% ' |
406 'obtained at iteration %i, with test performance %f %%') % | 503 'obtained at iteration %i, with test performance %f %%') % |
407 (best_validation_loss * 100., best_iter, test_score*100.)) | 504 (best_validation_loss * 100., best_iter, test_score*100.)) |
408 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) | 505 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) |
409 print minibatch_index | 506 print minibatch_index |
507 sys.stdout.flush() | |
410 | 508 |
411 #save the model and the weights | 509 #save the model and the weights |
412 numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value) | 510 numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value) |
413 numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\ | 511 numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\ |
414 learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list) | 512 learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list) |
425 nb_hidden=state.nb_hidden,\ | 523 nb_hidden=state.nb_hidden,\ |
426 adaptive_lr=state.adaptive_lr,\ | 524 adaptive_lr=state.adaptive_lr,\ |
427 tau=state.tau,\ | 525 tau=state.tau,\ |
428 verbose = state.verbose,\ | 526 verbose = state.verbose,\ |
429 lr_t2_factor=state.lr_t2_factor, | 527 lr_t2_factor=state.lr_t2_factor, |
430 data_set=state.data_set) | 528 data_set=state.data_set, |
529 channel=channel) | |
431 state.train_error=train_error | 530 state.train_error=train_error |
432 state.validation_error=validation_error | 531 state.validation_error=validation_error |
433 state.test_error=test_error | 532 state.test_error=test_error |
434 state.nb_exemples=nb_exemples | 533 state.nb_exemples=nb_exemples |
435 state.time=time | 534 state.time=time |