Mercurial > ift6266
comparison baseline/conv_mlp/convolutional_mlp.py @ 270:d41fe003fade
Reseau a convolution avec le bon dataset
author | Jeremy Eustache <jeremy.eustache@voila.fr> |
---|---|
date | Sat, 20 Mar 2010 15:49:55 -0400 |
parents | a491d3600a77 |
children |
comparison
equal
deleted
inserted
replaced
269:4533350d7361 | 270:d41fe003fade |
---|---|
22 """ | 22 """ |
23 | 23 |
24 import numpy, theano, cPickle, gzip, time | 24 import numpy, theano, cPickle, gzip, time |
25 import theano.tensor as T | 25 import theano.tensor as T |
26 import theano.sandbox.softsign | 26 import theano.sandbox.softsign |
27 import sys | |
27 import pylearn.datasets.MNIST | 28 import pylearn.datasets.MNIST |
28 from pylearn.io import filetensor as ft | 29 from pylearn.io import filetensor as ft |
29 from theano.sandbox import conv, downsample | 30 from theano.sandbox import conv, downsample |
31 | |
32 from ift6266 import datasets | |
30 import theano,pylearn.version,ift6266 | 33 import theano,pylearn.version,ift6266 |
31 | 34 |
32 class LeNetConvPoolLayer(object): | 35 class LeNetConvPoolLayer(object): |
33 | 36 |
34 def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)): | 37 def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)): |
176 return T.mean(T.neq(self.y_pred, y)) | 179 return T.mean(T.neq(self.y_pred, y)) |
177 else: | 180 else: |
178 raise NotImplementedError() | 181 raise NotImplementedError() |
179 | 182 |
180 | 183 |
181 def load_dataset(fname,batch=20): | 184 def evaluate_lenet5(learning_rate=0.1, n_iter=200, batch_size=20, n_kern0=20, n_kern1=50, n_layer=3, filter_shape0=5, filter_shape1=5, sigmoide_size=500, dataset='mnist.pkl.gz'): |
182 | |
183 # repertoire qui contient les donnees NIST | |
184 # le repertoire suivant va fonctionner si vous etes connecte sur un ordinateur | |
185 # du reseau DIRO | |
186 datapath = '/data/lisa/data/nist/by_class/' | |
187 # le fichier .ft contient chiffres NIST dans un format efficace. Les chiffres | |
188 # sont stockes dans une matrice de NxD, ou N est le nombre d'images, est D est | |
189 # le nombre de pixels par image (32x32 = 1024). Chaque pixel de l'image est une | |
190 # valeur entre 0 et 255, correspondant a un niveau de gris. Les valeurs sont | |
191 # stockees comme des uint8, donc des bytes. | |
192 f = open(datapath+'digits/digits_train_data.ft') | |
193 # Verifier que vous avez assez de memoire pour loader les donnees au complet | |
194 # dans le memoire. Sinon, utilisez ft.arraylike, une classe construite | |
195 # specialement pour des fichiers qu'on ne souhaite pas loader dans RAM. | |
196 d = ft.read(f) | |
197 | |
198 # NB: N'oubliez pas de diviser les valeurs des pixels par 255. si jamais vous | |
199 # utilisez les donnees commes entrees dans un reseaux de neurones et que vous | |
200 # voulez des entres entre 0 et 1. | |
201 # digits_train_data.ft contient les images, digits_train_labels.ft contient les | |
202 # etiquettes | |
203 f = open(datapath+'digits/digits_train_labels.ft') | |
204 labels = ft.read(f) | |
205 | |
206 | |
207 # Load the dataset | |
208 #f = gzip.open(fname,'rb') | |
209 #train_set, valid_set, test_set = cPickle.load(f) | |
210 #f.close() | |
211 | |
212 # make minibatches of size 20 | |
213 batch_size = batch # sized of the minibatch | |
214 | |
215 # Dealing with the training set | |
216 # get the list of training images (x) and their labels (y) | |
217 (train_set_x, train_set_y) = (d[:200000,:],labels[:200000]) | |
218 # initialize the list of training minibatches with empty list | |
219 train_batches = [] | |
220 for i in xrange(0, len(train_set_x), batch_size): | |
221 # add to the list of minibatches the minibatch starting at | |
222 # position i, ending at position i+batch_size | |
223 # a minibatch is a pair ; the first element of the pair is a list | |
224 # of datapoints, the second element is the list of corresponding | |
225 # labels | |
226 train_batches = train_batches + \ | |
227 [(train_set_x[i:i+batch_size], train_set_y[i:i+batch_size])] | |
228 | |
229 #print train_batches[500] | |
230 | |
231 # Dealing with the validation set | |
232 (valid_set_x, valid_set_y) = (d[200000:270000,:],labels[200000:270000]) | |
233 # initialize the list of validation minibatches | |
234 valid_batches = [] | |
235 for i in xrange(0, len(valid_set_x), batch_size): | |
236 valid_batches = valid_batches + \ | |
237 [(valid_set_x[i:i+batch_size], valid_set_y[i:i+batch_size])] | |
238 | |
239 # Dealing with the testing set | |
240 (test_set_x, test_set_y) = (d[270000:340000,:],labels[270000:340000]) | |
241 # initialize the list of testing minibatches | |
242 test_batches = [] | |
243 for i in xrange(0, len(test_set_x), batch_size): | |
244 test_batches = test_batches + \ | |
245 [(test_set_x[i:i+batch_size], test_set_y[i:i+batch_size])] | |
246 | |
247 | |
248 return train_batches, valid_batches, test_batches | |
249 | |
250 | |
251 def evaluate_lenet5(learning_rate=0.1, n_iter=200, batch_size=20, n_kern0=20, n_kern1=50, n_layer=3, filter_shape0=5, filter_shape1=5, dataset='mnist.pkl.gz'): | |
252 rng = numpy.random.RandomState(23455) | 185 rng = numpy.random.RandomState(23455) |
253 | 186 |
254 print 'Before load dataset' | 187 print 'Before load dataset' |
255 train_batches, valid_batches, test_batches = load_dataset(dataset,batch_size) | 188 dataset=datasets.nist_digits |
189 train_batches= dataset.train(batch_size) | |
190 valid_batches=dataset.valid(batch_size) | |
191 test_batches=dataset.test(batch_size) | |
192 #print valid_batches.shape | |
193 #print test_batches.shape | |
256 print 'After load dataset' | 194 print 'After load dataset' |
257 | 195 |
258 ishape = (32,32) # this is the size of NIST images | 196 ishape = (32,32) # this is the size of NIST images |
259 n_kern2=80 | 197 n_kern2=80 |
260 n_kern3=100 | 198 n_kern3=100 |
303 else: | 241 else: |
304 | 242 |
305 fshape0=(32-filter_shape0+1)/2 | 243 fshape0=(32-filter_shape0+1)/2 |
306 layer1_input = layer0.output.flatten(2) | 244 layer1_input = layer0.output.flatten(2) |
307 # construct a fully-connected sigmoidal layer | 245 # construct a fully-connected sigmoidal layer |
308 layer1 = SigmoidalLayer(rng, input=layer1_input,n_in=n_kern0*fshape0*fshape0, n_out=500) | 246 layer1 = SigmoidalLayer(rng, input=layer1_input,n_in=n_kern0*fshape0*fshape0, n_out=sigmoide_size) |
309 | 247 |
310 layer2 = LogisticRegression(input=layer1.output, n_in=500, n_out=10) | 248 layer2 = LogisticRegression(input=layer1.output, n_in=sigmoide_size, n_out=10) |
311 cost = layer2.negative_log_likelihood(y) | 249 cost = layer2.negative_log_likelihood(y) |
312 test_model = theano.function([x,y], layer2.errors(y)) | 250 test_model = theano.function([x,y], layer2.errors(y)) |
313 params = layer2.params+ layer1.params + layer0.params | 251 params = layer2.params+ layer1.params + layer0.params |
314 | 252 |
315 | 253 |
333 filter_shape=(n_kern3,n_kern2,filter_shape3,filter_shape3), poolsize=(2,2)) | 271 filter_shape=(n_kern3,n_kern2,filter_shape3,filter_shape3), poolsize=(2,2)) |
334 | 272 |
335 layer4_input = layer3.output.flatten(2) | 273 layer4_input = layer3.output.flatten(2) |
336 | 274 |
337 layer4 = SigmoidalLayer(rng, input=layer4_input, | 275 layer4 = SigmoidalLayer(rng, input=layer4_input, |
338 n_in=n_kern3*fshape3*fshape3, n_out=500) | 276 n_in=n_kern3*fshape3*fshape3, n_out=sigmoide_size) |
339 | 277 |
340 | 278 |
341 layer5 = LogisticRegression(input=layer4.output, n_in=500, n_out=10) | 279 layer5 = LogisticRegression(input=layer4.output, n_in=sigmoide_size, n_out=10) |
342 | 280 |
343 cost = layer5.negative_log_likelihood(y) | 281 cost = layer5.negative_log_likelihood(y) |
344 | 282 |
345 test_model = theano.function([x,y], layer5.errors(y)) | 283 test_model = theano.function([x,y], layer5.errors(y)) |
346 | 284 |
352 fshape1=(fshape0-filter_shape1+1)/2 | 290 fshape1=(fshape0-filter_shape1+1)/2 |
353 fshape2=(fshape1-filter_shape2+1)/2 | 291 fshape2=(fshape1-filter_shape2+1)/2 |
354 layer3_input = layer2.output.flatten(2) | 292 layer3_input = layer2.output.flatten(2) |
355 | 293 |
356 layer3 = SigmoidalLayer(rng, input=layer3_input, | 294 layer3 = SigmoidalLayer(rng, input=layer3_input, |
357 n_in=n_kern2*fshape2*fshape2, n_out=500) | 295 n_in=n_kern2*fshape2*fshape2, n_out=sigmoide_size) |
358 | 296 |
359 | 297 |
360 layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) | 298 layer4 = LogisticRegression(input=layer3.output, n_in=sigmoide_size, n_out=10) |
361 | 299 |
362 cost = layer4.negative_log_likelihood(y) | 300 cost = layer4.negative_log_likelihood(y) |
363 | 301 |
364 test_model = theano.function([x,y], layer4.errors(y)) | 302 test_model = theano.function([x,y], layer4.errors(y)) |
365 | 303 |
376 # This will generate a matrix of shape (20,32*4*4) = (20,512) | 314 # This will generate a matrix of shape (20,32*4*4) = (20,512) |
377 layer2_input = layer1.output.flatten(2) | 315 layer2_input = layer1.output.flatten(2) |
378 | 316 |
379 # construct a fully-connected sigmoidal layer | 317 # construct a fully-connected sigmoidal layer |
380 layer2 = SigmoidalLayer(rng, input=layer2_input, | 318 layer2 = SigmoidalLayer(rng, input=layer2_input, |
381 n_in=n_kern1*fshape1*fshape1, n_out=500) | 319 n_in=n_kern1*fshape1*fshape1, n_out=sigmoide_size) |
382 | 320 |
383 | 321 |
384 # classify the values of the fully-connected sigmoidal layer | 322 # classify the values of the fully-connected sigmoidal layer |
385 layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) | 323 layer3 = LogisticRegression(input=layer2.output, n_in=sigmoide_size, n_out=10) |
386 | 324 |
387 # the cost we minimize during training is the NLL of the model | 325 # the cost we minimize during training is the NLL of the model |
388 cost = layer3.negative_log_likelihood(y) | 326 cost = layer3.negative_log_likelihood(y) |
389 | 327 |
390 # create a function to compute the mistakes that are made by the model | 328 # create a function to compute the mistakes that are made by the model |
412 | 350 |
413 ############### | 351 ############### |
414 # TRAIN MODEL # | 352 # TRAIN MODEL # |
415 ############### | 353 ############### |
416 | 354 |
417 n_minibatches = len(train_batches) | 355 #n_minibatches = len(train_batches) |
356 n_minibatches=0 | |
357 n_valid=0 | |
358 n_test=0 | |
359 for x, y in dataset.train(batch_size): | |
360 if x.shape[0] == batch_size: | |
361 n_minibatches+=1 | |
362 n_minibatches*=batch_size | |
363 print n_minibatches | |
364 | |
365 for x, y in dataset.valid(batch_size): | |
366 if x.shape[0] == batch_size: | |
367 n_valid+=1 | |
368 n_valid*=batch_size | |
369 print n_valid | |
370 | |
371 for x, y in dataset.test(batch_size): | |
372 if x.shape[0] == batch_size: | |
373 n_test+=1 | |
374 n_test*=batch_size | |
375 print n_test | |
376 | |
418 | 377 |
419 # early-stopping parameters | 378 # early-stopping parameters |
420 patience = 10000 # look as this many examples regardless | 379 patience = 10000 # look as this many examples regardless |
421 patience_increase = 2 # wait this much longer when a new best is | 380 patience_increase = 2 # wait this much longer when a new best is |
422 # found | 381 # found |
431 best_validation_loss = float('inf') | 390 best_validation_loss = float('inf') |
432 best_iter = 0 | 391 best_iter = 0 |
433 test_score = 0. | 392 test_score = 0. |
434 start_time = time.clock() | 393 start_time = time.clock() |
435 | 394 |
395 | |
436 # have a maximum of `n_iter` iterations through the entire dataset | 396 # have a maximum of `n_iter` iterations through the entire dataset |
437 for iter in xrange(n_iter * n_minibatches): | 397 iter=0 |
438 | 398 for epoch in xrange(n_iter): |
439 # get epoch and minibatch index | 399 for x, y in train_batches: |
440 epoch = iter / n_minibatches | 400 if x.shape[0] != batch_size: |
441 minibatch_index = iter % n_minibatches | 401 continue |
442 | 402 iter+=1 |
443 # get the minibatches corresponding to `iter` modulo | 403 |
444 # `len(train_batches)` | 404 # get epoch and minibatch index |
445 x,y = train_batches[ minibatch_index ] | 405 #epoch = iter / n_minibatches |
446 | 406 minibatch_index = iter % n_minibatches |
447 if iter %100 == 0: | 407 |
448 print 'training @ iter = ', iter | 408 if iter %100 == 0: |
449 cost_ij = train_model(x,y) | 409 print 'training @ iter = ', iter |
450 | 410 cost_ij = train_model(x,y) |
451 if (iter+1) % validation_frequency == 0: | 411 |
452 | 412 |
453 # compute zero-one loss on validation set | 413 # compute zero-one loss on validation set |
454 this_validation_loss = 0. | 414 this_validation_loss = 0. |
455 for x,y in valid_batches: | 415 for x,y in valid_batches: |
456 # sum up the errors for each minibatch | 416 if x.shape[0] != batch_size: |
457 this_validation_loss += test_model(x,y) | 417 continue |
458 | 418 # sum up the errors for each minibatch |
459 # get the average by dividing with the number of minibatches | 419 this_validation_loss += test_model(x,y) |
460 this_validation_loss /= len(valid_batches) | 420 |
461 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ | 421 # get the average by dividing with the number of minibatches |
462 (epoch, minibatch_index+1, n_minibatches, \ | 422 this_validation_loss /= n_valid |
463 this_validation_loss*100.)) | 423 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ |
464 | 424 (epoch, minibatch_index+1, n_minibatches, \ |
465 | 425 this_validation_loss*100.)) |
466 # if we got the best validation score until now | 426 |
467 if this_validation_loss < best_validation_loss: | 427 |
468 | 428 # if we got the best validation score until now |
469 #improve patience if loss improvement is good enough | 429 if this_validation_loss < best_validation_loss: |
470 if this_validation_loss < best_validation_loss * \ | 430 |
471 improvement_threshold : | 431 #improve patience if loss improvement is good enough |
472 patience = max(patience, iter * patience_increase) | 432 if this_validation_loss < best_validation_loss * \ |
473 | 433 improvement_threshold : |
474 # save best validation score and iteration number | 434 patience = max(patience, iter * patience_increase) |
475 best_validation_loss = this_validation_loss | 435 |
476 best_iter = iter | 436 # save best validation score and iteration number |
477 | 437 best_validation_loss = this_validation_loss |
478 # test it on the test set | 438 best_iter = iter |
479 test_score = 0. | 439 |
480 for x,y in test_batches: | 440 # test it on the test set |
481 test_score += test_model(x,y) | 441 test_score = 0. |
482 test_score /= len(test_batches) | 442 for x,y in test_batches: |
483 print((' epoch %i, minibatch %i/%i, test error of best ' | 443 if x.shape[0] != batch_size: |
484 'model %f %%') % | 444 continue |
485 (epoch, minibatch_index+1, n_minibatches, | 445 test_score += test_model(x,y) |
486 test_score*100.)) | 446 test_score /= n_test |
487 | 447 print((' epoch %i, minibatch %i/%i, test error of best ' |
488 if patience <= iter : | 448 'model %f %%') % |
489 break | 449 (epoch, minibatch_index+1, n_minibatches, |
450 test_score*100.)) | |
451 | |
452 if patience <= iter : | |
453 break | |
490 | 454 |
491 end_time = time.clock() | 455 end_time = time.clock() |
492 print('Optimization complete.') | 456 print('Optimization complete.') |
493 print('Best validation score of %f %% obtained at iteration %i,'\ | 457 print('Best validation score of %f %% obtained at iteration %i,'\ |
494 'with test performance %f %%' % | 458 'with test performance %f %%' % |
500 if __name__ == '__main__': | 464 if __name__ == '__main__': |
501 evaluate_lenet5() | 465 evaluate_lenet5() |
502 | 466 |
503 def experiment(state, channel): | 467 def experiment(state, channel): |
504 print 'start experiment' | 468 print 'start experiment' |
505 (best_validation_loss, test_score, minutes_trained, iter) = evaluate_lenet5(state.learning_rate, state.n_iter, state.batch_size, state.n_kern0, state.n_kern1, state.n_layer, state.filter_shape0, state.filter_shape1) | 469 (best_validation_loss, test_score, minutes_trained, iter) = evaluate_lenet5(state.learning_rate, state.n_iter, state.batch_size, state.n_kern0, state.n_kern1, state.n_layer, state.filter_shape0, state.filter_shape1,state.sigmoide_size) |
506 print 'end experiment' | 470 print 'end experiment' |
471 | |
472 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) | |
507 | 473 |
508 state.best_validation_loss = best_validation_loss | 474 state.best_validation_loss = best_validation_loss |
509 state.test_score = test_score | 475 state.test_score = test_score |
510 state.minutes_trained = minutes_trained | 476 state.minutes_trained = minutes_trained |
511 state.iter = iter | 477 state.iter = iter |