comparison baseline/mlp/mlp_nist.py @ 377:0b7e64e8e93f

branch merge
author Arnaud Bergeron <abergeron@gmail.com>
date Sun, 25 Apr 2010 17:12:03 -0400
parents 76b7182dd32e
children 60a4432b8071
comparison
equal deleted inserted replaced
376:01445a75c702 377:0b7e64e8e93f
21 to do lr first, then add regularization) 21 to do lr first, then add regularization)
22 22
23 """ 23 """
24 __docformat__ = 'restructedtext en' 24 __docformat__ = 'restructedtext en'
25 25
26 import sys
26 import pdb 27 import pdb
27 import numpy 28 import numpy
28 import pylab 29 import pylab
29 import theano 30 import theano
30 import theano.tensor as T 31 import theano.tensor as T
161 # represents a mistake in prediction 162 # represents a mistake in prediction
162 return T.mean(T.neq(self.y_pred, y)) 163 return T.mean(T.neq(self.y_pred, y))
163 else: 164 else:
164 raise NotImplementedError() 165 raise NotImplementedError()
165 166
167 def mlp_get_nist_error(model_name='/u/mullerx/ift6266h10_sandbox_db/xvm_final_lr1_p073/8/best_model.npy.npz',
168 data_set=0):
169
170
171
172 # allocate symbolic variables for the data
173 x = T.fmatrix() # the data is presented as rasterized images
174 y = T.lvector() # the labels are presented as 1D vector of
175 # [long int] labels
176
177 # load the data set and create an mlp based on the dimensions of the model
178 model=numpy.load(model_name)
179 W1=model['W1']
180 W2=model['W2']
181 b1=model['b1']
182 b2=model['b2']
183 nb_hidden=b1.shape[0]
184 input_dim=W1.shape[0]
185 nb_targets=b2.shape[0]
186 learning_rate=0.1
187
188
189 if data_set==0:
190 dataset=datasets.nist_all()
191 elif data_set==1:
192 dataset=datasets.nist_P07()
193
194
195 classifier = MLP( input=x,\
196 n_in=input_dim,\
197 n_hidden=nb_hidden,\
198 n_out=nb_targets,
199 learning_rate=learning_rate)
200
201
202 #overwrite weights with weigths from model
203 classifier.W1.value=W1
204 classifier.W2.value=W2
205 classifier.b1.value=b1
206 classifier.b2.value=b2
207
208
209 cost = classifier.negative_log_likelihood(y) \
210 + 0.0 * classifier.L1 \
211 + 0.0 * classifier.L2_sqr
212
213 # compiling a theano function that computes the mistakes that are made by
214 # the model on a minibatch
215 test_model = theano.function([x,y], classifier.errors(y))
216
217
218
219 #get the test error
220 #use a batch size of 1 so we can get the sub-class error
221 #without messing with matrices (will be upgraded later)
222 test_score=0
223 temp=0
224 for xt,yt in dataset.test(20):
225 test_score += test_model(xt,yt)
226 temp = temp+1
227 test_score /= temp
228
229
230 return test_score*100
231
232
233
234
235
166 236
167 def mlp_full_nist( verbose = 1,\ 237 def mlp_full_nist( verbose = 1,\
168 adaptive_lr = 0,\ 238 adaptive_lr = 0,\
169 data_set=0,\ 239 data_set=0,\
170 learning_rate=0.01,\ 240 learning_rate=0.01,\
172 L2_reg = 0.0001,\ 242 L2_reg = 0.0001,\
173 nb_max_exemples=1000000,\ 243 nb_max_exemples=1000000,\
174 batch_size=20,\ 244 batch_size=20,\
175 nb_hidden = 30,\ 245 nb_hidden = 30,\
176 nb_targets = 62, 246 nb_targets = 62,
177 tau=1e6,\ 247 tau=1e6,\
178 lr_t2_factor=0.5): 248 lr_t2_factor=0.5,\
179 249 init_model=0,\
180 250 channel=0):
251
252
253 if channel!=0:
254 channel.save()
181 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr] 255 configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
182 256
183 #save initial learning rate if classical adaptive lr is used 257 #save initial learning rate if classical adaptive lr is used
184 initial_lr=learning_rate 258 initial_lr=learning_rate
185 max_div_count=3 259 max_div_count=1000
186 260
187 261
188 total_validation_error_list = [] 262 total_validation_error_list = []
189 total_train_error_list = [] 263 total_train_error_list = []
190 learning_rate_list=[] 264 learning_rate_list=[]
193 267
194 if data_set==0: 268 if data_set==0:
195 dataset=datasets.nist_all() 269 dataset=datasets.nist_all()
196 elif data_set==1: 270 elif data_set==1:
197 dataset=datasets.nist_P07() 271 dataset=datasets.nist_P07()
272 elif data_set==2:
273 dataset=datasets.PNIST07()
198 274
199 275
200 276
201 277
202 ishape = (32,32) # this is the size of NIST images 278 ishape = (32,32) # this is the size of NIST images
213 n_hidden=nb_hidden,\ 289 n_hidden=nb_hidden,\
214 n_out=nb_targets, 290 n_out=nb_targets,
215 learning_rate=learning_rate) 291 learning_rate=learning_rate)
216 292
217 293
294 # check if we want to initialise the weights with a previously calculated model
295 # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets)
296 if init_model!=0:
297 old_model=numpy.load(init_model)
298 classifier.W1.value=old_model['W1']
299 classifier.W2.value=old_model['W2']
300 classifier.b1.value=old_model['b1']
301 classifier.b2.value=old_model['b2']
218 302
219 303
220 # the cost we minimize during training is the negative log likelihood of 304 # the cost we minimize during training is the negative log likelihood of
221 # the model plus the regularization terms (L1 and L2); cost is expressed 305 # the model plus the regularization terms (L1 and L2); cost is expressed
222 # here symbolically 306 # here symbolically
287 temp=0 371 temp=0
288 divergence_flag=0 372 divergence_flag=0
289 373
290 374
291 375
292 if verbose == 1: 376
293 print 'starting training' 377 print 'starting training'
378 sys.stdout.flush()
294 while(minibatch_index*batch_size<nb_max_exemples): 379 while(minibatch_index*batch_size<nb_max_exemples):
295 380
296 for x, y in dataset.train(batch_size): 381 for x, y in dataset.train(batch_size):
297 382
298 #if we are using the classic learning rate deacay, adjust it before training of current mini-batch 383 #if we are using the classic learning rate deacay, adjust it before training of current mini-batch
301 386
302 387
303 #train model 388 #train model
304 cost_ij = train_model(x,y) 389 cost_ij = train_model(x,y)
305 390
306 if (minibatch_index+1) % validation_frequency == 0: 391 if (minibatch_index) % validation_frequency == 0:
307 #save the current learning rate 392 #save the current learning rate
308 learning_rate_list.append(classifier.lr.value) 393 learning_rate_list.append(classifier.lr.value)
309 divergence_flag_list.append(divergence_flag) 394 divergence_flag_list.append(divergence_flag)
395
396
310 397
311 # compute the validation error 398 # compute the validation error
312 this_validation_loss = 0. 399 this_validation_loss = 0.
313 temp=0 400 temp=0
314 for xv,yv in dataset.valid(1): 401 for xv,yv in dataset.valid(1):
317 temp=temp+1 404 temp=temp+1
318 # get the average by dividing with the number of minibatches 405 # get the average by dividing with the number of minibatches
319 this_validation_loss /= temp 406 this_validation_loss /= temp
320 #save the validation loss 407 #save the validation loss
321 total_validation_error_list.append(this_validation_loss) 408 total_validation_error_list.append(this_validation_loss)
322 if verbose == 1: 409
323 print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % 410 print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') %
324 (epoch, minibatch_index+1,classifier.lr.value, 411 (epoch, minibatch_index+1,classifier.lr.value,
325 this_validation_loss*100.)) 412 this_validation_loss*100.))
413 sys.stdout.flush()
414
415 #save temp results to check during training
416 numpy.savez('temp_results.npy',config=configuration,total_validation_error_list=total_validation_error_list,\
417 learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list)
326 418
327 # if we got the best validation score until now 419 # if we got the best validation score until now
328 if this_validation_loss < best_validation_loss: 420 if this_validation_loss < best_validation_loss:
329 # save best validation score and iteration number 421 # save best validation score and iteration number
330 best_validation_loss = this_validation_loss 422 best_validation_loss = this_validation_loss
342 temp =0 434 temp =0
343 for xt,yt in dataset.test(batch_size): 435 for xt,yt in dataset.test(batch_size):
344 test_score += test_model(xt,yt) 436 test_score += test_model(xt,yt)
345 temp = temp+1 437 temp = temp+1
346 test_score /= temp 438 test_score /= temp
347 if verbose == 1: 439
348 print(('epoch %i, minibatch %i, test error of best ' 440 print(('epoch %i, minibatch %i, test error of best '
349 'model %f %%') % 441 'model %f %%') %
350 (epoch, minibatch_index+1, 442 (epoch, minibatch_index+1,
351 test_score*100.)) 443 test_score*100.))
444 sys.stdout.flush()
352 445
353 # if the validation error is going up, we are overfitting (or oscillating) 446 # if the validation error is going up, we are overfitting (or oscillating)
354 # check if we are allowed to continue and if we will adjust the learning rate 447 # check if we are allowed to continue and if we will adjust the learning rate
355 elif this_validation_loss >= best_validation_loss: 448 elif this_validation_loss >= best_validation_loss:
356 449
372 temp=0 465 temp=0
373 for xt,yt in dataset.test(batch_size): 466 for xt,yt in dataset.test(batch_size):
374 test_score += test_model(xt,yt) 467 test_score += test_model(xt,yt)
375 temp=temp+1 468 temp=temp+1
376 test_score /= temp 469 test_score /= temp
377 if verbose == 1: 470
378 print ' validation error is going up, possibly stopping soon' 471 print ' validation error is going up, possibly stopping soon'
379 print((' epoch %i, minibatch %i, test error of best ' 472 print((' epoch %i, minibatch %i, test error of best '
380 'model %f %%') % 473 'model %f %%') %
381 (epoch, minibatch_index+1, 474 (epoch, minibatch_index+1,
382 test_score*100.)) 475 test_score*100.))
476 sys.stdout.flush()
383 477
384 478
385 479
386 # check early stop condition 480 # check early stop condition
387 if divergence_flag==max_div_count: 481 if divergence_flag==max_div_count:
391 485
392 #check if we have seen enough exemples 486 #check if we have seen enough exemples
393 #force one epoch at least 487 #force one epoch at least
394 if epoch>0 and minibatch_index*batch_size>nb_max_exemples: 488 if epoch>0 and minibatch_index*batch_size>nb_max_exemples:
395 break 489 break
490
491
492
396 493
397 494
398 time_n= time_n + batch_size 495 time_n= time_n + batch_size
399 minibatch_index = minibatch_index + 1 496 minibatch_index = minibatch_index + 1
400 497
401 # we have finished looping through the training set 498 # we have finished looping through the training set
402 epoch = epoch+1 499 epoch = epoch+1
403 end_time = time.clock() 500 end_time = time.clock()
404 if verbose == 1: 501
405 print(('Optimization complete. Best validation score of %f %% ' 502 print(('Optimization complete. Best validation score of %f %% '
406 'obtained at iteration %i, with test performance %f %%') % 503 'obtained at iteration %i, with test performance %f %%') %
407 (best_validation_loss * 100., best_iter, test_score*100.)) 504 (best_validation_loss * 100., best_iter, test_score*100.))
408 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) 505 print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
409 print minibatch_index 506 print minibatch_index
507 sys.stdout.flush()
410 508
411 #save the model and the weights 509 #save the model and the weights
412 numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value) 510 numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value)
413 numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\ 511 numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\
414 learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list) 512 learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list)
425 nb_hidden=state.nb_hidden,\ 523 nb_hidden=state.nb_hidden,\
426 adaptive_lr=state.adaptive_lr,\ 524 adaptive_lr=state.adaptive_lr,\
427 tau=state.tau,\ 525 tau=state.tau,\
428 verbose = state.verbose,\ 526 verbose = state.verbose,\
429 lr_t2_factor=state.lr_t2_factor, 527 lr_t2_factor=state.lr_t2_factor,
430 data_set=state.data_set) 528 data_set=state.data_set,
529 channel=channel)
431 state.train_error=train_error 530 state.train_error=train_error
432 state.validation_error=validation_error 531 state.validation_error=validation_error
433 state.test_error=test_error 532 state.test_error=test_error
434 state.nb_exemples=nb_exemples 533 state.nb_exemples=nb_exemples
435 state.time=time 534 state.time=time