comparison baseline/mlp/mlp_nist.py @ 322:743907366476

code clean up in progress
author xaviermuller
date Tue, 06 Apr 2010 16:00:52 -0400
parents 1e4bf5a5b46d
children 7a7615f940e8
comparison
equal deleted inserted replaced
321:403b9e6ecfaa 322:743907366476
31 import time 31 import time
32 import theano.tensor.nnet 32 import theano.tensor.nnet
33 import pylearn 33 import pylearn
34 import theano,pylearn.version,ift6266 34 import theano,pylearn.version,ift6266
35 from pylearn.io import filetensor as ft 35 from pylearn.io import filetensor as ft
36 from ift6266 import datasets
36 37
37 data_path = '/data/lisa/data/nist/by_class/' 38 data_path = '/data/lisa/data/nist/by_class/'
38 39
39 class MLP(object): 40 class MLP(object):
40 """Multi-Layer Perceptron Class 41 """Multi-Layer Perceptron Class
163 raise NotImplementedError() 164 raise NotImplementedError()
164 165
165 166
166 def mlp_full_nist( verbose = 1,\ 167 def mlp_full_nist( verbose = 1,\
167 adaptive_lr = 0,\ 168 adaptive_lr = 0,\
168 train_data = 'all/all_train_data.ft',\ 169 data_set=0,\
169 train_labels = 'all/all_train_labels.ft',\
170 test_data = 'all/all_test_data.ft',\
171 test_labels = 'all/all_test_labels.ft',\
172 learning_rate=0.01,\ 170 learning_rate=0.01,\
173 L1_reg = 0.00,\ 171 L1_reg = 0.00,\
174 L2_reg = 0.0001,\ 172 L2_reg = 0.0001,\
175 nb_max_exemples=1000000,\ 173 nb_max_exemples=1000000,\
176 batch_size=20,\ 174 batch_size=20,\
177 nb_hidden = 500,\ 175 nb_hidden = 30,\
178 nb_targets = 62, 176 nb_targets = 62,
179 tau=1e6,\ 177 tau=1e6,\
180 lr_t2_factor=0.5): 178 lr_t2_factor=0.5):
181 179
182 180
188 total_validation_error_list = [] 186 total_validation_error_list = []
189 total_train_error_list = [] 187 total_train_error_list = []
190 learning_rate_list=[] 188 learning_rate_list=[]
191 best_training_error=float('inf'); 189 best_training_error=float('inf');
192 190
193 191 if data_set==0:
194 192 dataset=datasets.nist_all()
195 193
196 f = open(data_path+train_data) 194
197 g= open(data_path+train_labels) 195
198 h = open(data_path+test_data)
199 i= open(data_path+test_labels)
200
201 raw_train_data = ft.read(f)
202 raw_train_labels = ft.read(g)
203 raw_test_data = ft.read(h)
204 raw_test_labels = ft.read(i)
205
206 f.close()
207 g.close()
208 i.close()
209 h.close()
210 #create a validation set the same size as the test size
211 #use the end of the training array for this purpose
212 #discard the last remaining so we get a %batch_size number
213 test_size=len(raw_test_labels)
214 test_size = int(test_size/batch_size)
215 test_size*=batch_size
216 train_size = len(raw_train_data)
217 train_size = int(train_size/batch_size)
218 train_size*=batch_size
219 validation_size =test_size
220 offset = train_size-test_size
221 if verbose == 1:
222 print 'train size = %d' %train_size
223 print 'test size = %d' %test_size
224 print 'valid size = %d' %validation_size
225 print 'offset = %d' %offset
226
227
228 train_set = (raw_train_data,raw_train_labels)
229 train_batches = []
230 for i in xrange(0, train_size-test_size, batch_size):
231 train_batches = train_batches + \
232 [(raw_train_data[i:i+batch_size], raw_train_labels[i:i+batch_size])]
233
234 test_batches = []
235 for i in xrange(0, test_size, batch_size):
236 test_batches = test_batches + \
237 [(raw_test_data[i:i+batch_size], raw_test_labels[i:i+batch_size])]
238
239 validation_batches = []
240 for i in xrange(0, test_size, batch_size):
241 validation_batches = validation_batches + \
242 [(raw_train_data[offset+i:offset+i+batch_size], raw_train_labels[offset+i:offset+i+batch_size])]
243
244 196
245 ishape = (32,32) # this is the size of NIST images 197 ishape = (32,32) # this is the size of NIST images
246 198
247 # allocate symbolic variables for the data 199 # allocate symbolic variables for the data
248 x = T.fmatrix() # the data is presented as rasterized images 200 x = T.fmatrix() # the data is presented as rasterized images
249 y = T.lvector() # the labels are presented as 1D vector of 201 y = T.lvector() # the labels are presented as 1D vector of
250 # [long int] labels 202 # [long int] labels
251 203
252 if verbose==1: 204
253 print 'finished parsing the data'
254 # construct the logistic regression class 205 # construct the logistic regression class
255 classifier = MLP( input=x.reshape((batch_size,32*32)),\ 206 classifier = MLP( input=x,\
256 n_in=32*32,\ 207 n_in=32*32,\
257 n_hidden=nb_hidden,\ 208 n_hidden=nb_hidden,\
258 n_out=nb_targets, 209 n_out=nb_targets,
259 learning_rate=learning_rate) 210 learning_rate=learning_rate)
260 211
287 238
288 # compiling a theano function `train_model` that returns the cost, but in 239 # compiling a theano function `train_model` that returns the cost, but in
289 # the same time updates the parameter of the model based on the rules 240 # the same time updates the parameter of the model based on the rules
290 # defined in `updates` 241 # defined in `updates`
291 train_model = theano.function([x, y], cost, updates = updates ) 242 train_model = theano.function([x, y], cost, updates = updates )
292 n_minibatches = len(train_batches) 243
244
245
293 246
294 247
295 248
296 249
297 250
301 #2) validation error is going up twice in a row(probable overfitting) 254 #2) validation error is going up twice in a row(probable overfitting)
302 255
303 # This means we no longer stop on slow convergence as low learning rates stopped 256 # This means we no longer stop on slow convergence as low learning rates stopped
304 # too fast. 257 # too fast.
305 258
306 # no longer relevant 259 #approximate number of samples in the training set
307 patience =nb_max_exemples/batch_size 260 #this is just to have a validation frequency
261 #roughly proportionnal to the training set
262 n_minibatches = 650000/batch_size
263
264
265 patience =nb_max_exemples/batch_size #in units of minibatch
308 patience_increase = 2 # wait this much longer when a new best is 266 patience_increase = 2 # wait this much longer when a new best is
309 # found 267 # found
310 improvement_threshold = 0.995 # a relative improvement of this much is 268 improvement_threshold = 0.995 # a relative improvement of this much is
311 # considered significant 269 # considered significant
312 validation_frequency = n_minibatches/4 270 validation_frequency = n_minibatches/4
313 271
314 272
315 273
316 274
317 best_params = None 275
318 best_validation_loss = float('inf') 276 best_validation_loss = float('inf')
319 best_iter = 0 277 best_iter = 0
320 test_score = 0. 278 test_score = 0.
321 start_time = time.clock() 279 start_time = time.clock()
322 n_iter = nb_max_exemples/batch_size # nb of max times we are allowed to run through all exemples
323 n_iter = n_iter/n_minibatches + 1 #round up
324 n_iter=max(1,n_iter) # run at least once on short debug call
325 time_n=0 #in unit of exemples 280 time_n=0 #in unit of exemples
326 281 minibatch_index=0
327 282 epoch=0
328 283 temp=0
284
285
286
329 if verbose == 1: 287 if verbose == 1:
330 print 'looping at most %d times through the data set' %n_iter 288 print 'looking at most at %i exemples' %nb_max_exemples
331 for iter in xrange(n_iter* n_minibatches): 289 while(minibatch_index*batch_size<nb_max_exemples):
332 290
333 # get epoch and minibatch index 291 for x, y in dataset.train(batch_size):
334 epoch = iter / n_minibatches 292
335 minibatch_index = iter % n_minibatches
336
337
338 if adaptive_lr==2:
339 classifier.lr.value = tau*initial_lr/(tau+time_n)
340
341
342 # get the minibatches corresponding to `iter` modulo
343 # `len(train_batches)`
344 x,y = train_batches[ minibatch_index ]
345 # convert to float
346 x_float = x/255.0
347 cost_ij = train_model(x_float,y)
348
349 if (iter+1) % validation_frequency == 0:
350 # compute zero-one loss on validation set
351 293
352 this_validation_loss = 0. 294 minibatch_index = minibatch_index + 1
353 for x,y in validation_batches: 295 if adaptive_lr==2:
354 # sum up the errors for each minibatch 296 classifier.lr.value = tau*initial_lr/(tau+time_n)
355 x_float = x/255.0 297
356 this_validation_loss += test_model(x_float,y)
357 # get the average by dividing with the number of minibatches
358 this_validation_loss /= len(validation_batches)
359 #save the validation loss
360 total_validation_error_list.append(this_validation_loss)
361 298
362 #get the training error rate 299 #train model
363 this_train_loss=0 300 cost_ij = train_model(x,y)
364 for x,y in train_batches: 301
365 # sum up the errors for each minibatch 302 if (minibatch_index+1) % validation_frequency == 0:
366 x_float = x/255.0
367 this_train_loss += test_model(x_float,y)
368 # get the average by dividing with the number of minibatches
369 this_train_loss /= len(train_batches)
370 #save the validation loss
371 total_train_error_list.append(this_train_loss)
372 if(this_train_loss<best_training_error):
373 best_training_error=this_train_loss
374 303
375 if verbose == 1: 304 #save the current learning rate
376 print('epoch %i, minibatch %i/%i, validation error %f, training error %f %%' % \ 305 learning_rate_list.append(classifier.lr.value)
377 (epoch, minibatch_index+1, n_minibatches, \ 306
378 this_validation_loss*100.,this_train_loss*100)) 307 # compute the validation error
379 print 'learning rate = %f' %classifier.lr.value 308 this_validation_loss = 0.
380 print 'time = %i' %time_n 309 temp=0
381 310 for xv,yv in dataset.valid(1):
382 311 # sum up the errors for each minibatch
383 #save the learning rate 312 axxa=test_model(xv,yv)
384 learning_rate_list.append(classifier.lr.value) 313 this_validation_loss += axxa
385 314 temp=temp+1
386 315 # get the average by dividing with the number of minibatches
387 # if we got the best validation score until now 316 this_validation_loss /= temp
388 if this_validation_loss < best_validation_loss: 317 #save the validation loss
389 # save best validation score and iteration number 318 total_validation_error_list.append(this_validation_loss)
390 best_validation_loss = this_validation_loss
391 best_iter = iter
392 # reset patience if we are going down again
393 # so we continue exploring
394 patience=nb_max_exemples/batch_size
395 # test it on the test set
396 test_score = 0.
397 for x,y in test_batches:
398 x_float=x/255.0
399 test_score += test_model(x_float,y)
400 test_score /= len(test_batches)
401 if verbose == 1: 319 if verbose == 1:
402 print((' epoch %i, minibatch %i/%i, test error of best ' 320 print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') %
403 'model %f %%') % 321 (epoch, minibatch_index+1,classifier.lr.value,
404 (epoch, minibatch_index+1, n_minibatches, 322 this_validation_loss*100.))
405 test_score*100.)) 323
406 324 # if we got the best validation score until now
407 # if the validation error is going up, we are overfitting (or oscillating) 325 if this_validation_loss < best_validation_loss:
408 # stop converging but run at least to next validation 326 # save best validation score and iteration number
409 # to check overfitting or ocsillation 327 best_validation_loss = this_validation_loss
410 # the saved weights of the model will be a bit off in that case 328 best_iter = minibatch_index
411 elif this_validation_loss >= best_validation_loss: 329 # reset patience if we are going down again
412 #calculate the test error at this point and exit 330 # so we continue exploring
413 # test it on the test set 331 patience=nb_max_exemples/batch_size
414 # however, if adaptive_lr is true, try reducing the lr to 332 # test it on the test set
415 # get us out of an oscilliation 333 test_score = 0.
416 if adaptive_lr==1: 334 temp =0
417 classifier.lr.value=classifier.lr.value*lr_t2_factor 335 for xt,yt in dataset.test(batch_size):
418 336 test_score += test_model(xt,yt)
419 test_score = 0. 337 temp = temp+1
420 #cap the patience so we are allowed one more validation error 338 test_score /= temp
421 #calculation before aborting 339 if verbose == 1:
422 patience = iter+validation_frequency+1 340 print(('epoch %i, minibatch %i, test error of best '
423 for x,y in test_batches: 341 'model %f %%') %
424 x_float=x/255.0 342 (epoch, minibatch_index+1,
425 test_score += test_model(x_float,y) 343 test_score*100.))
426 test_score /= len(test_batches) 344
427 if verbose == 1: 345 # if the validation error is going up, we are overfitting (or oscillating)
428 print ' validation error is going up, possibly stopping soon' 346 # stop converging but run at least to next validation
429 print((' epoch %i, minibatch %i/%i, test error of best ' 347 # to check overfitting or ocsillation
430 'model %f %%') % 348 # the saved weights of the model will be a bit off in that case
431 (epoch, minibatch_index+1, n_minibatches, 349 elif this_validation_loss >= best_validation_loss:
432 test_score*100.)) 350 #calculate the test error at this point and exit
433 351 # test it on the test set
434 352 # however, if adaptive_lr is true, try reducing the lr to
435 353 # get us out of an oscilliation
436 354 if adaptive_lr==1:
437 if iter>patience: 355 classifier.lr.value=classifier.lr.value*lr_t2_factor
438 print 'we have diverged' 356
439 break 357 test_score = 0.
440 358 #cap the patience so we are allowed one more validation error
441 359 #calculation before aborting
442 time_n= time_n + batch_size 360 patience = minibatch_index+validation_frequency+1
361 temp=0
362 for xt,yt in dataset.test(batch_size):
363 test_score += test_model(xt,yt)
364 temp=temp+1
365 test_score /= temp
366 if verbose == 1:
367 print ' validation error is going up, possibly stopping soon'
368 print((' epoch %i, minibatch %i, test error of best '
369 'model %f %%') %
370 (epoch, minibatch_index+1,
371 test_score*100.))
372
373
374
375
376 if minibatch_index>patience:
377 print 'we have diverged'
378 break
379
380
381 time_n= time_n + batch_size
382 epoch = epoch+1
443 end_time = time.clock() 383 end_time = time.clock()
444 if verbose == 1: 384 if verbose == 1:
445 print(('Optimization complete. Best validation score of %f %% ' 385 print(('Optimization complete. Best validation score of %f %% '
446 'obtained at iteration %i, with test performance %f %%') % 386 'obtained at iteration %i, with test performance %f %%') %
447 (best_validation_loss * 100., best_iter, test_score*100.)) 387 (best_validation_loss * 100., best_iter, test_score*100.))
448 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) 388 print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
449 print iter 389 print minibatch_index
450 390
451 #save the model and the weights 391 #save the model and the weights
452 numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value) 392 numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value)
453 numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\ 393 numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\
454 learning_rate_list=learning_rate_list) 394 learning_rate_list=learning_rate_list)