comparison baseline/conv_mlp/convolutional_mlp.py @ 270:d41fe003fade

Reseau a convolution avec le bon dataset
author Jeremy Eustache <jeremy.eustache@voila.fr>
date Sat, 20 Mar 2010 15:49:55 -0400
parents a491d3600a77
children
comparison
equal deleted inserted replaced
269:4533350d7361 270:d41fe003fade
22 """ 22 """
23 23
24 import numpy, theano, cPickle, gzip, time 24 import numpy, theano, cPickle, gzip, time
25 import theano.tensor as T 25 import theano.tensor as T
26 import theano.sandbox.softsign 26 import theano.sandbox.softsign
27 import sys
27 import pylearn.datasets.MNIST 28 import pylearn.datasets.MNIST
28 from pylearn.io import filetensor as ft 29 from pylearn.io import filetensor as ft
29 from theano.sandbox import conv, downsample 30 from theano.sandbox import conv, downsample
31
32 from ift6266 import datasets
30 import theano,pylearn.version,ift6266 33 import theano,pylearn.version,ift6266
31 34
32 class LeNetConvPoolLayer(object): 35 class LeNetConvPoolLayer(object):
33 36
34 def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)): 37 def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
176 return T.mean(T.neq(self.y_pred, y)) 179 return T.mean(T.neq(self.y_pred, y))
177 else: 180 else:
178 raise NotImplementedError() 181 raise NotImplementedError()
179 182
180 183
181 def load_dataset(fname,batch=20): 184 def evaluate_lenet5(learning_rate=0.1, n_iter=200, batch_size=20, n_kern0=20, n_kern1=50, n_layer=3, filter_shape0=5, filter_shape1=5, sigmoide_size=500, dataset='mnist.pkl.gz'):
182
183 # repertoire qui contient les donnees NIST
184 # le repertoire suivant va fonctionner si vous etes connecte sur un ordinateur
185 # du reseau DIRO
186 datapath = '/data/lisa/data/nist/by_class/'
187 # le fichier .ft contient chiffres NIST dans un format efficace. Les chiffres
188 # sont stockes dans une matrice de NxD, ou N est le nombre d'images, est D est
189 # le nombre de pixels par image (32x32 = 1024). Chaque pixel de l'image est une
190 # valeur entre 0 et 255, correspondant a un niveau de gris. Les valeurs sont
191 # stockees comme des uint8, donc des bytes.
192 f = open(datapath+'digits/digits_train_data.ft')
193 # Verifier que vous avez assez de memoire pour loader les donnees au complet
194 # dans le memoire. Sinon, utilisez ft.arraylike, une classe construite
195 # specialement pour des fichiers qu'on ne souhaite pas loader dans RAM.
196 d = ft.read(f)
197
198 # NB: N'oubliez pas de diviser les valeurs des pixels par 255. si jamais vous
199 # utilisez les donnees commes entrees dans un reseaux de neurones et que vous
200 # voulez des entres entre 0 et 1.
201 # digits_train_data.ft contient les images, digits_train_labels.ft contient les
202 # etiquettes
203 f = open(datapath+'digits/digits_train_labels.ft')
204 labels = ft.read(f)
205
206
207 # Load the dataset
208 #f = gzip.open(fname,'rb')
209 #train_set, valid_set, test_set = cPickle.load(f)
210 #f.close()
211
212 # make minibatches of size 20
213 batch_size = batch # sized of the minibatch
214
215 # Dealing with the training set
216 # get the list of training images (x) and their labels (y)
217 (train_set_x, train_set_y) = (d[:200000,:],labels[:200000])
218 # initialize the list of training minibatches with empty list
219 train_batches = []
220 for i in xrange(0, len(train_set_x), batch_size):
221 # add to the list of minibatches the minibatch starting at
222 # position i, ending at position i+batch_size
223 # a minibatch is a pair ; the first element of the pair is a list
224 # of datapoints, the second element is the list of corresponding
225 # labels
226 train_batches = train_batches + \
227 [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])]
228
229 #print train_batches[500]
230
231 # Dealing with the validation set
232 (valid_set_x, valid_set_y) = (d[200000:270000,:],labels[200000:270000])
233 # initialize the list of validation minibatches
234 valid_batches = []
235 for i in xrange(0, len(valid_set_x), batch_size):
236 valid_batches = valid_batches + \
237 [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])]
238
239 # Dealing with the testing set
240 (test_set_x, test_set_y) = (d[270000:340000,:],labels[270000:340000])
241 # initialize the list of testing minibatches
242 test_batches = []
243 for i in xrange(0, len(test_set_x), batch_size):
244 test_batches = test_batches + \
245 [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])]
246
247
248 return train_batches, valid_batches, test_batches
249
250
251 def evaluate_lenet5(learning_rate=0.1, n_iter=200, batch_size=20, n_kern0=20, n_kern1=50, n_layer=3, filter_shape0=5, filter_shape1=5, dataset='mnist.pkl.gz'):
252 rng = numpy.random.RandomState(23455) 185 rng = numpy.random.RandomState(23455)
253 186
254 print 'Before load dataset' 187 print 'Before load dataset'
255 train_batches, valid_batches, test_batches = load_dataset(dataset,batch_size) 188 dataset=datasets.nist_digits
189 train_batches= dataset.train(batch_size)
190 valid_batches=dataset.valid(batch_size)
191 test_batches=dataset.test(batch_size)
192 #print valid_batches.shape
193 #print test_batches.shape
256 print 'After load dataset' 194 print 'After load dataset'
257 195
258 ishape = (32,32) # this is the size of NIST images 196 ishape = (32,32) # this is the size of NIST images
259 n_kern2=80 197 n_kern2=80
260 n_kern3=100 198 n_kern3=100
303 else: 241 else:
304 242
305 fshape0=(32-filter_shape0+1)/2 243 fshape0=(32-filter_shape0+1)/2
306 layer1_input = layer0.output.flatten(2) 244 layer1_input = layer0.output.flatten(2)
307 # construct a fully-connected sigmoidal layer 245 # construct a fully-connected sigmoidal layer
308 layer1 = SigmoidalLayer(rng, input=layer1_input,n_in=n_kern0*fshape0*fshape0, n_out=500) 246 layer1 = SigmoidalLayer(rng, input=layer1_input,n_in=n_kern0*fshape0*fshape0, n_out=sigmoide_size)
309 247
310 layer2 = LogisticRegression(input=layer1.output, n_in=500, n_out=10) 248 layer2 = LogisticRegression(input=layer1.output, n_in=sigmoide_size, n_out=10)
311 cost = layer2.negative_log_likelihood(y) 249 cost = layer2.negative_log_likelihood(y)
312 test_model = theano.function([x,y], layer2.errors(y)) 250 test_model = theano.function([x,y], layer2.errors(y))
313 params = layer2.params+ layer1.params + layer0.params 251 params = layer2.params+ layer1.params + layer0.params
314 252
315 253
333 filter_shape=(n_kern3,n_kern2,filter_shape3,filter_shape3), poolsize=(2,2)) 271 filter_shape=(n_kern3,n_kern2,filter_shape3,filter_shape3), poolsize=(2,2))
334 272
335 layer4_input = layer3.output.flatten(2) 273 layer4_input = layer3.output.flatten(2)
336 274
337 layer4 = SigmoidalLayer(rng, input=layer4_input, 275 layer4 = SigmoidalLayer(rng, input=layer4_input,
338 n_in=n_kern3*fshape3*fshape3, n_out=500) 276 n_in=n_kern3*fshape3*fshape3, n_out=sigmoide_size)
339 277
340 278
341 layer5 = LogisticRegression(input=layer4.output, n_in=500, n_out=10) 279 layer5 = LogisticRegression(input=layer4.output, n_in=sigmoide_size, n_out=10)
342 280
343 cost = layer5.negative_log_likelihood(y) 281 cost = layer5.negative_log_likelihood(y)
344 282
345 test_model = theano.function([x,y], layer5.errors(y)) 283 test_model = theano.function([x,y], layer5.errors(y))
346 284
352 fshape1=(fshape0-filter_shape1+1)/2 290 fshape1=(fshape0-filter_shape1+1)/2
353 fshape2=(fshape1-filter_shape2+1)/2 291 fshape2=(fshape1-filter_shape2+1)/2
354 layer3_input = layer2.output.flatten(2) 292 layer3_input = layer2.output.flatten(2)
355 293
356 layer3 = SigmoidalLayer(rng, input=layer3_input, 294 layer3 = SigmoidalLayer(rng, input=layer3_input,
357 n_in=n_kern2*fshape2*fshape2, n_out=500) 295 n_in=n_kern2*fshape2*fshape2, n_out=sigmoide_size)
358 296
359 297
360 layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) 298 layer4 = LogisticRegression(input=layer3.output, n_in=sigmoide_size, n_out=10)
361 299
362 cost = layer4.negative_log_likelihood(y) 300 cost = layer4.negative_log_likelihood(y)
363 301
364 test_model = theano.function([x,y], layer4.errors(y)) 302 test_model = theano.function([x,y], layer4.errors(y))
365 303
376 # This will generate a matrix of shape (20,32*4*4) = (20,512) 314 # This will generate a matrix of shape (20,32*4*4) = (20,512)
377 layer2_input = layer1.output.flatten(2) 315 layer2_input = layer1.output.flatten(2)
378 316
379 # construct a fully-connected sigmoidal layer 317 # construct a fully-connected sigmoidal layer
380 layer2 = SigmoidalLayer(rng, input=layer2_input, 318 layer2 = SigmoidalLayer(rng, input=layer2_input,
381 n_in=n_kern1*fshape1*fshape1, n_out=500) 319 n_in=n_kern1*fshape1*fshape1, n_out=sigmoide_size)
382 320
383 321
384 # classify the values of the fully-connected sigmoidal layer 322 # classify the values of the fully-connected sigmoidal layer
385 layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) 323 layer3 = LogisticRegression(input=layer2.output, n_in=sigmoide_size, n_out=10)
386 324
387 # the cost we minimize during training is the NLL of the model 325 # the cost we minimize during training is the NLL of the model
388 cost = layer3.negative_log_likelihood(y) 326 cost = layer3.negative_log_likelihood(y)
389 327
390 # create a function to compute the mistakes that are made by the model 328 # create a function to compute the mistakes that are made by the model
412 350
413 ############### 351 ###############
414 # TRAIN MODEL # 352 # TRAIN MODEL #
415 ############### 353 ###############
416 354
417 n_minibatches = len(train_batches) 355 #n_minibatches = len(train_batches)
356 n_minibatches=0
357 n_valid=0
358 n_test=0
359 for x, y in dataset.train(batch_size):
360 if x.shape[0] == batch_size:
361 n_minibatches+=1
362 n_minibatches*=batch_size
363 print n_minibatches
364
365 for x, y in dataset.valid(batch_size):
366 if x.shape[0] == batch_size:
367 n_valid+=1
368 n_valid*=batch_size
369 print n_valid
370
371 for x, y in dataset.test(batch_size):
372 if x.shape[0] == batch_size:
373 n_test+=1
374 n_test*=batch_size
375 print n_test
376
418 377
419 # early-stopping parameters 378 # early-stopping parameters
420 patience = 10000 # look as this many examples regardless 379 patience = 10000 # look as this many examples regardless
421 patience_increase = 2 # wait this much longer when a new best is 380 patience_increase = 2 # wait this much longer when a new best is
422 # found 381 # found
431 best_validation_loss = float('inf') 390 best_validation_loss = float('inf')
432 best_iter = 0 391 best_iter = 0
433 test_score = 0. 392 test_score = 0.
434 start_time = time.clock() 393 start_time = time.clock()
435 394
395
436 # have a maximum of `n_iter` iterations through the entire dataset 396 # have a maximum of `n_iter` iterations through the entire dataset
437 for iter in xrange(n_iter * n_minibatches): 397 iter=0
438 398 for epoch in xrange(n_iter):
439 # get epoch and minibatch index 399 for x, y in train_batches:
440 epoch = iter / n_minibatches 400 if x.shape[0] != batch_size:
441 minibatch_index = iter % n_minibatches 401 continue
442 402 iter+=1
443 # get the minibatches corresponding to `iter` modulo 403
444 # `len(train_batches)` 404 # get epoch and minibatch index
445 x,y = train_batches[ minibatch_index ] 405 #epoch = iter / n_minibatches
446 406 minibatch_index = iter % n_minibatches
447 if iter %100 == 0: 407
448 print 'training @ iter = ', iter 408 if iter %100 == 0:
449 cost_ij = train_model(x,y) 409 print 'training @ iter = ', iter
450 410 cost_ij = train_model(x,y)
451 if (iter+1) % validation_frequency == 0: 411
452 412
453 # compute zero-one loss on validation set 413 # compute zero-one loss on validation set
454 this_validation_loss = 0. 414 this_validation_loss = 0.
455 for x,y in valid_batches: 415 for x,y in valid_batches:
456 # sum up the errors for each minibatch 416 if x.shape[0] != batch_size:
457 this_validation_loss += test_model(x,y) 417 continue
458 418 # sum up the errors for each minibatch
459 # get the average by dividing with the number of minibatches 419 this_validation_loss += test_model(x,y)
460 this_validation_loss /= len(valid_batches) 420
461 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ 421 # get the average by dividing with the number of minibatches
462 (epoch, minibatch_index+1, n_minibatches, \ 422 this_validation_loss /= n_valid
463 this_validation_loss*100.)) 423 print('epoch %i, minibatch %i/%i, validation error %f %%' % \
464 424 (epoch, minibatch_index+1, n_minibatches, \
465 425 this_validation_loss*100.))
466 # if we got the best validation score until now 426
467 if this_validation_loss < best_validation_loss: 427
468 428 # if we got the best validation score until now
469 #improve patience if loss improvement is good enough 429 if this_validation_loss < best_validation_loss:
470 if this_validation_loss < best_validation_loss * \ 430
471 improvement_threshold : 431 #improve patience if loss improvement is good enough
472 patience = max(patience, iter * patience_increase) 432 if this_validation_loss < best_validation_loss * \
473 433 improvement_threshold :
474 # save best validation score and iteration number 434 patience = max(patience, iter * patience_increase)
475 best_validation_loss = this_validation_loss 435
476 best_iter = iter 436 # save best validation score and iteration number
477 437 best_validation_loss = this_validation_loss
478 # test it on the test set 438 best_iter = iter
479 test_score = 0. 439
480 for x,y in test_batches: 440 # test it on the test set
481 test_score += test_model(x,y) 441 test_score = 0.
482 test_score /= len(test_batches) 442 for x,y in test_batches:
483 print((' epoch %i, minibatch %i/%i, test error of best ' 443 if x.shape[0] != batch_size:
484 'model %f %%') % 444 continue
485 (epoch, minibatch_index+1, n_minibatches, 445 test_score += test_model(x,y)
486 test_score*100.)) 446 test_score /= n_test
487 447 print((' epoch %i, minibatch %i/%i, test error of best '
488 if patience <= iter : 448 'model %f %%') %
489 break 449 (epoch, minibatch_index+1, n_minibatches,
450 test_score*100.))
451
452 if patience <= iter :
453 break
490 454
491 end_time = time.clock() 455 end_time = time.clock()
492 print('Optimization complete.') 456 print('Optimization complete.')
493 print('Best validation score of %f %% obtained at iteration %i,'\ 457 print('Best validation score of %f %% obtained at iteration %i,'\
494 'with test performance %f %%' % 458 'with test performance %f %%' %
500 if __name__ == '__main__': 464 if __name__ == '__main__':
501 evaluate_lenet5() 465 evaluate_lenet5()
502 466
503 def experiment(state, channel): 467 def experiment(state, channel):
504 print 'start experiment' 468 print 'start experiment'
505 (best_validation_loss, test_score, minutes_trained, iter) = evaluate_lenet5(state.learning_rate, state.n_iter, state.batch_size, state.n_kern0, state.n_kern1, state.n_layer, state.filter_shape0, state.filter_shape1) 469 (best_validation_loss, test_score, minutes_trained, iter) = evaluate_lenet5(state.learning_rate, state.n_iter, state.batch_size, state.n_kern0, state.n_kern1, state.n_layer, state.filter_shape0, state.filter_shape1,state.sigmoide_size)
506 print 'end experiment' 470 print 'end experiment'
471
472 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
507 473
508 state.best_validation_loss = best_validation_loss 474 state.best_validation_loss = best_validation_loss
509 state.test_score = test_score 475 state.test_score = test_score
510 state.minutes_trained = minutes_trained 476 state.minutes_trained = minutes_trained
511 state.iter = iter 477 state.iter = iter