Mercurial > ift6266
comparison baseline/mlp/mlp_nist.py @ 322:743907366476
code clean up in progress
author | xaviermuller |
---|---|
date | Tue, 06 Apr 2010 16:00:52 -0400 |
parents | 1e4bf5a5b46d |
children | 7a7615f940e8 |
comparison
equal
deleted
inserted
replaced
321:403b9e6ecfaa | 322:743907366476 |
---|---|
31 import time | 31 import time |
32 import theano.tensor.nnet | 32 import theano.tensor.nnet |
33 import pylearn | 33 import pylearn |
34 import theano,pylearn.version,ift6266 | 34 import theano,pylearn.version,ift6266 |
35 from pylearn.io import filetensor as ft | 35 from pylearn.io import filetensor as ft |
36 from ift6266 import datasets | |
36 | 37 |
37 data_path = '/data/lisa/data/nist/by_class/' | 38 data_path = '/data/lisa/data/nist/by_class/' |
38 | 39 |
39 class MLP(object): | 40 class MLP(object): |
40 """Multi-Layer Perceptron Class | 41 """Multi-Layer Perceptron Class |
163 raise NotImplementedError() | 164 raise NotImplementedError() |
164 | 165 |
165 | 166 |
166 def mlp_full_nist( verbose = 1,\ | 167 def mlp_full_nist( verbose = 1,\ |
167 adaptive_lr = 0,\ | 168 adaptive_lr = 0,\ |
168 train_data = 'all/all_train_data.ft',\ | 169 data_set=0,\ |
169 train_labels = 'all/all_train_labels.ft',\ | |
170 test_data = 'all/all_test_data.ft',\ | |
171 test_labels = 'all/all_test_labels.ft',\ | |
172 learning_rate=0.01,\ | 170 learning_rate=0.01,\ |
173 L1_reg = 0.00,\ | 171 L1_reg = 0.00,\ |
174 L2_reg = 0.0001,\ | 172 L2_reg = 0.0001,\ |
175 nb_max_exemples=1000000,\ | 173 nb_max_exemples=1000000,\ |
176 batch_size=20,\ | 174 batch_size=20,\ |
177 nb_hidden = 500,\ | 175 nb_hidden = 30,\ |
178 nb_targets = 62, | 176 nb_targets = 62, |
179 tau=1e6,\ | 177 tau=1e6,\ |
180 lr_t2_factor=0.5): | 178 lr_t2_factor=0.5): |
181 | 179 |
182 | 180 |
188 total_validation_error_list = [] | 186 total_validation_error_list = [] |
189 total_train_error_list = [] | 187 total_train_error_list = [] |
190 learning_rate_list=[] | 188 learning_rate_list=[] |
191 best_training_error=float('inf'); | 189 best_training_error=float('inf'); |
192 | 190 |
193 | 191 if data_set==0: |
194 | 192 dataset=datasets.nist_all() |
195 | 193 |
196 f = open(data_path+train_data) | 194 |
197 g= open(data_path+train_labels) | 195 |
198 h = open(data_path+test_data) | |
199 i= open(data_path+test_labels) | |
200 | |
201 raw_train_data = ft.read(f) | |
202 raw_train_labels = ft.read(g) | |
203 raw_test_data = ft.read(h) | |
204 raw_test_labels = ft.read(i) | |
205 | |
206 f.close() | |
207 g.close() | |
208 i.close() | |
209 h.close() | |
210 #create a validation set the same size as the test size | |
211 #use the end of the training array for this purpose | |
212 #discard the last remaining so we get a %batch_size number | |
213 test_size=len(raw_test_labels) | |
214 test_size = int(test_size/batch_size) | |
215 test_size*=batch_size | |
216 train_size = len(raw_train_data) | |
217 train_size = int(train_size/batch_size) | |
218 train_size*=batch_size | |
219 validation_size =test_size | |
220 offset = train_size-test_size | |
221 if verbose == 1: | |
222 print 'train size = %d' %train_size | |
223 print 'test size = %d' %test_size | |
224 print 'valid size = %d' %validation_size | |
225 print 'offset = %d' %offset | |
226 | |
227 | |
228 train_set = (raw_train_data,raw_train_labels) | |
229 train_batches = [] | |
230 for i in xrange(0, train_size-test_size, batch_size): | |
231 train_batches = train_batches + \ | |
232 [(raw_train_data[i:i+batch_size], raw_train_labels[i:i+batch_size])] | |
233 | |
234 test_batches = [] | |
235 for i in xrange(0, test_size, batch_size): | |
236 test_batches = test_batches + \ | |
237 [(raw_test_data[i:i+batch_size], raw_test_labels[i:i+batch_size])] | |
238 | |
239 validation_batches = [] | |
240 for i in xrange(0, test_size, batch_size): | |
241 validation_batches = validation_batches + \ | |
242 [(raw_train_data[offset+i:offset+i+batch_size], raw_train_labels[offset+i:offset+i+batch_size])] | |
243 | |
244 | 196 |
245 ishape = (32,32) # this is the size of NIST images | 197 ishape = (32,32) # this is the size of NIST images |
246 | 198 |
247 # allocate symbolic variables for the data | 199 # allocate symbolic variables for the data |
248 x = T.fmatrix() # the data is presented as rasterized images | 200 x = T.fmatrix() # the data is presented as rasterized images |
249 y = T.lvector() # the labels are presented as 1D vector of | 201 y = T.lvector() # the labels are presented as 1D vector of |
250 # [long int] labels | 202 # [long int] labels |
251 | 203 |
252 if verbose==1: | 204 |
253 print 'finished parsing the data' | |
254 # construct the logistic regression class | 205 # construct the logistic regression class |
255 classifier = MLP( input=x.reshape((batch_size,32*32)),\ | 206 classifier = MLP( input=x,\ |
256 n_in=32*32,\ | 207 n_in=32*32,\ |
257 n_hidden=nb_hidden,\ | 208 n_hidden=nb_hidden,\ |
258 n_out=nb_targets, | 209 n_out=nb_targets, |
259 learning_rate=learning_rate) | 210 learning_rate=learning_rate) |
260 | 211 |
287 | 238 |
288 # compiling a theano function `train_model` that returns the cost, but in | 239 # compiling a theano function `train_model` that returns the cost, but in |
289 # the same time updates the parameter of the model based on the rules | 240 # the same time updates the parameter of the model based on the rules |
290 # defined in `updates` | 241 # defined in `updates` |
291 train_model = theano.function([x, y], cost, updates = updates ) | 242 train_model = theano.function([x, y], cost, updates = updates ) |
292 n_minibatches = len(train_batches) | 243 |
244 | |
245 | |
293 | 246 |
294 | 247 |
295 | 248 |
296 | 249 |
297 | 250 |
301 #2) validation error is going up twice in a row(probable overfitting) | 254 #2) validation error is going up twice in a row(probable overfitting) |
302 | 255 |
303 # This means we no longer stop on slow convergence as low learning rates stopped | 256 # This means we no longer stop on slow convergence as low learning rates stopped |
304 # too fast. | 257 # too fast. |
305 | 258 |
306 # no longer relevant | 259 #approximate number of samples in the training set |
307 patience =nb_max_exemples/batch_size | 260 #this is just to have a validation frequency |
261 #roughly proportionnal to the training set | |
262 n_minibatches = 650000/batch_size | |
263 | |
264 | |
265 patience =nb_max_exemples/batch_size #in units of minibatch | |
308 patience_increase = 2 # wait this much longer when a new best is | 266 patience_increase = 2 # wait this much longer when a new best is |
309 # found | 267 # found |
310 improvement_threshold = 0.995 # a relative improvement of this much is | 268 improvement_threshold = 0.995 # a relative improvement of this much is |
311 # considered significant | 269 # considered significant |
312 validation_frequency = n_minibatches/4 | 270 validation_frequency = n_minibatches/4 |
313 | 271 |
314 | 272 |
315 | 273 |
316 | 274 |
317 best_params = None | 275 |
318 best_validation_loss = float('inf') | 276 best_validation_loss = float('inf') |
319 best_iter = 0 | 277 best_iter = 0 |
320 test_score = 0. | 278 test_score = 0. |
321 start_time = time.clock() | 279 start_time = time.clock() |
322 n_iter = nb_max_exemples/batch_size # nb of max times we are allowed to run through all exemples | |
323 n_iter = n_iter/n_minibatches + 1 #round up | |
324 n_iter=max(1,n_iter) # run at least once on short debug call | |
325 time_n=0 #in unit of exemples | 280 time_n=0 #in unit of exemples |
326 | 281 minibatch_index=0 |
327 | 282 epoch=0 |
328 | 283 temp=0 |
284 | |
285 | |
286 | |
329 if verbose == 1: | 287 if verbose == 1: |
330 print 'looping at most %d times through the data set' %n_iter | 288 print 'looking at most at %i exemples' %nb_max_exemples |
331 for iter in xrange(n_iter* n_minibatches): | 289 while(minibatch_index*batch_size<nb_max_exemples): |
332 | 290 |
333 # get epoch and minibatch index | 291 for x, y in dataset.train(batch_size): |
334 epoch = iter / n_minibatches | 292 |
335 minibatch_index = iter % n_minibatches | |
336 | |
337 | |
338 if adaptive_lr==2: | |
339 classifier.lr.value = tau*initial_lr/(tau+time_n) | |
340 | |
341 | |
342 # get the minibatches corresponding to `iter` modulo | |
343 # `len(train_batches)` | |
344 x,y = train_batches[ minibatch_index ] | |
345 # convert to float | |
346 x_float = x/255.0 | |
347 cost_ij = train_model(x_float,y) | |
348 | |
349 if (iter+1) % validation_frequency == 0: | |
350 # compute zero-one loss on validation set | |
351 | 293 |
352 this_validation_loss = 0. | 294 minibatch_index = minibatch_index + 1 |
353 for x,y in validation_batches: | 295 if adaptive_lr==2: |
354 # sum up the errors for each minibatch | 296 classifier.lr.value = tau*initial_lr/(tau+time_n) |
355 x_float = x/255.0 | 297 |
356 this_validation_loss += test_model(x_float,y) | |
357 # get the average by dividing with the number of minibatches | |
358 this_validation_loss /= len(validation_batches) | |
359 #save the validation loss | |
360 total_validation_error_list.append(this_validation_loss) | |
361 | 298 |
362 #get the training error rate | 299 #train model |
363 this_train_loss=0 | 300 cost_ij = train_model(x,y) |
364 for x,y in train_batches: | 301 |
365 # sum up the errors for each minibatch | 302 if (minibatch_index+1) % validation_frequency == 0: |
366 x_float = x/255.0 | |
367 this_train_loss += test_model(x_float,y) | |
368 # get the average by dividing with the number of minibatches | |
369 this_train_loss /= len(train_batches) | |
370 #save the validation loss | |
371 total_train_error_list.append(this_train_loss) | |
372 if(this_train_loss<best_training_error): | |
373 best_training_error=this_train_loss | |
374 | 303 |
375 if verbose == 1: | 304 #save the current learning rate |
376 print('epoch %i, minibatch %i/%i, validation error %f, training error %f %%' % \ | 305 learning_rate_list.append(classifier.lr.value) |
377 (epoch, minibatch_index+1, n_minibatches, \ | 306 |
378 this_validation_loss*100.,this_train_loss*100)) | 307 # compute the validation error |
379 print 'learning rate = %f' %classifier.lr.value | 308 this_validation_loss = 0. |
380 print 'time = %i' %time_n | 309 temp=0 |
381 | 310 for xv,yv in dataset.valid(1): |
382 | 311 # sum up the errors for each minibatch |
383 #save the learning rate | 312 axxa=test_model(xv,yv) |
384 learning_rate_list.append(classifier.lr.value) | 313 this_validation_loss += axxa |
385 | 314 temp=temp+1 |
386 | 315 # get the average by dividing with the number of minibatches |
387 # if we got the best validation score until now | 316 this_validation_loss /= temp |
388 if this_validation_loss < best_validation_loss: | 317 #save the validation loss |
389 # save best validation score and iteration number | 318 total_validation_error_list.append(this_validation_loss) |
390 best_validation_loss = this_validation_loss | |
391 best_iter = iter | |
392 # reset patience if we are going down again | |
393 # so we continue exploring | |
394 patience=nb_max_exemples/batch_size | |
395 # test it on the test set | |
396 test_score = 0. | |
397 for x,y in test_batches: | |
398 x_float=x/255.0 | |
399 test_score += test_model(x_float,y) | |
400 test_score /= len(test_batches) | |
401 if verbose == 1: | 319 if verbose == 1: |
402 print((' epoch %i, minibatch %i/%i, test error of best ' | 320 print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % |
403 'model %f %%') % | 321 (epoch, minibatch_index+1,classifier.lr.value, |
404 (epoch, minibatch_index+1, n_minibatches, | 322 this_validation_loss*100.)) |
405 test_score*100.)) | 323 |
406 | 324 # if we got the best validation score until now |
407 # if the validation error is going up, we are overfitting (or oscillating) | 325 if this_validation_loss < best_validation_loss: |
408 # stop converging but run at least to next validation | 326 # save best validation score and iteration number |
409 # to check overfitting or ocsillation | 327 best_validation_loss = this_validation_loss |
410 # the saved weights of the model will be a bit off in that case | 328 best_iter = minibatch_index |
411 elif this_validation_loss >= best_validation_loss: | 329 # reset patience if we are going down again |
412 #calculate the test error at this point and exit | 330 # so we continue exploring |
413 # test it on the test set | 331 patience=nb_max_exemples/batch_size |
414 # however, if adaptive_lr is true, try reducing the lr to | 332 # test it on the test set |
415 # get us out of an oscilliation | 333 test_score = 0. |
416 if adaptive_lr==1: | 334 temp =0 |
417 classifier.lr.value=classifier.lr.value*lr_t2_factor | 335 for xt,yt in dataset.test(batch_size): |
418 | 336 test_score += test_model(xt,yt) |
419 test_score = 0. | 337 temp = temp+1 |
420 #cap the patience so we are allowed one more validation error | 338 test_score /= temp |
421 #calculation before aborting | 339 if verbose == 1: |
422 patience = iter+validation_frequency+1 | 340 print(('epoch %i, minibatch %i, test error of best ' |
423 for x,y in test_batches: | 341 'model %f %%') % |
424 x_float=x/255.0 | 342 (epoch, minibatch_index+1, |
425 test_score += test_model(x_float,y) | 343 test_score*100.)) |
426 test_score /= len(test_batches) | 344 |
427 if verbose == 1: | 345 # if the validation error is going up, we are overfitting (or oscillating) |
428 print ' validation error is going up, possibly stopping soon' | 346 # stop converging but run at least to next validation |
429 print((' epoch %i, minibatch %i/%i, test error of best ' | 347 # to check overfitting or ocsillation |
430 'model %f %%') % | 348 # the saved weights of the model will be a bit off in that case |
431 (epoch, minibatch_index+1, n_minibatches, | 349 elif this_validation_loss >= best_validation_loss: |
432 test_score*100.)) | 350 #calculate the test error at this point and exit |
433 | 351 # test it on the test set |
434 | 352 # however, if adaptive_lr is true, try reducing the lr to |
435 | 353 # get us out of an oscilliation |
436 | 354 if adaptive_lr==1: |
437 if iter>patience: | 355 classifier.lr.value=classifier.lr.value*lr_t2_factor |
438 print 'we have diverged' | 356 |
439 break | 357 test_score = 0. |
440 | 358 #cap the patience so we are allowed one more validation error |
441 | 359 #calculation before aborting |
442 time_n= time_n + batch_size | 360 patience = minibatch_index+validation_frequency+1 |
361 temp=0 | |
362 for xt,yt in dataset.test(batch_size): | |
363 test_score += test_model(xt,yt) | |
364 temp=temp+1 | |
365 test_score /= temp | |
366 if verbose == 1: | |
367 print ' validation error is going up, possibly stopping soon' | |
368 print((' epoch %i, minibatch %i, test error of best ' | |
369 'model %f %%') % | |
370 (epoch, minibatch_index+1, | |
371 test_score*100.)) | |
372 | |
373 | |
374 | |
375 | |
376 if minibatch_index>patience: | |
377 print 'we have diverged' | |
378 break | |
379 | |
380 | |
381 time_n= time_n + batch_size | |
382 epoch = epoch+1 | |
443 end_time = time.clock() | 383 end_time = time.clock() |
444 if verbose == 1: | 384 if verbose == 1: |
445 print(('Optimization complete. Best validation score of %f %% ' | 385 print(('Optimization complete. Best validation score of %f %% ' |
446 'obtained at iteration %i, with test performance %f %%') % | 386 'obtained at iteration %i, with test performance %f %%') % |
447 (best_validation_loss * 100., best_iter, test_score*100.)) | 387 (best_validation_loss * 100., best_iter, test_score*100.)) |
448 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) | 388 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) |
449 print iter | 389 print minibatch_index |
450 | 390 |
451 #save the model and the weights | 391 #save the model and the weights |
452 numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value) | 392 numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value) |
453 numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\ | 393 numpy.savez('results.npy',config=configuration,total_train_error_list=total_train_error_list,total_validation_error_list=total_validation_error_list,\ |
454 learning_rate_list=learning_rate_list) | 394 learning_rate_list=learning_rate_list) |