comparison writeup/techreport.tex @ 460:fe292653a0f8

ajoute dernier tableau de resultats
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Thu, 27 May 2010 21:53:24 -0600
parents c0f738f0cef0
children 9609c5cf9b6b
comparison
equal deleted inserted replaced
459:5ead24fd4d49 460:fe292653a0f8
387 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline 387 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline
388 Humans& 18.2\% $\pm$.1\% & 39.4\%$\pm$.1\% & 46.9\%$\pm$.1\% & $>1.1\%$ \\ \hline 388 Humans& 18.2\% $\pm$.1\% & 39.4\%$\pm$.1\% & 46.9\%$\pm$.1\% & $>1.1\%$ \\ \hline
389 SDA0 & 23.7\% $\pm$.14\% & 65.2\%$\pm$.34\% & 97.45\%$\pm$.06\% & 2.7\% $\pm$.14\%\\ \hline 389 SDA0 & 23.7\% $\pm$.14\% & 65.2\%$\pm$.34\% & 97.45\%$\pm$.06\% & 2.7\% $\pm$.14\%\\ \hline
390 SDA1 & 17.1\% $\pm$.13\% & 29.7\%$\pm$.3\% & 29.7\%$\pm$.3\% & 1.4\% $\pm$.1\%\\ \hline 390 SDA1 & 17.1\% $\pm$.13\% & 29.7\%$\pm$.3\% & 29.7\%$\pm$.3\% & 1.4\% $\pm$.1\%\\ \hline
391 SDA2 & 18.7\% $\pm$.13\% & 33.6\%$\pm$.3\% & 39.9\%$\pm$.17\% & 1.7\% $\pm$.1\%\\ \hline 391 SDA2 & 18.7\% $\pm$.13\% & 33.6\%$\pm$.3\% & 39.9\%$\pm$.17\% & 1.7\% $\pm$.1\%\\ \hline
392 MLP0 & 24.2\% $\pm$.15\% & \%$\pm$.35\% & \%$\pm$.1\% & 3.45\% $\pm$.16\% \\ \hline 392 MLP0 & 24.2\% $\pm$.15\% & 68.8\%$\pm$.33\% & 78.70\%$\pm$.14\% & 3.45\% $\pm$.15\% \\ \hline
393 MLP1 & 23.0\% $\pm$.15\% & 41.8\%$\pm$.35\% & 90.4\%$\pm$.1\% & 3.85\% $\pm$.16\% \\ \hline 393 MLP1 & 23.0\% $\pm$.15\% & 41.8\%$\pm$.35\% & 90.4\%$\pm$.1\% & 3.85\% $\pm$.16\% \\ \hline
394 MLP2 & ?\% $\pm$.15\% & ?\%$\pm$.35\% & 90.4\%$\pm$.1\% & 3.85\% $\pm$.16\% \\ \hline 394 MLP2 & 24.3\% $\pm$.15\% & 46.0\%$\pm$.35\% & 54.7\%$\pm$.17\% & 4.85\% $\pm$.18\% \\ \hline
395 \end{tabular} 395 \end{tabular}
396 \end{center} 396 \end{center}
397 \end{table} 397 \end{table}
398 398
399 \subsection{Perturbed Training Data More Helpful for SDAE} 399 \subsection{Perturbed Training Data More Helpful for SDAE}
400 400
401 401 \begin{table}
402 \subsection{Training with More Classes than Necessary} 402 \caption{Relative change in error rates due to the use of perturbed training data,
403 403 either using NISTP, for the MLP1/SDA1 models, or using P07, for the MLP2/SDA2 models.
404 As previously seen, the SDA is better able to benefit from the transformations applied to the data than the MLP. We are now training SDAs and MLPs on single classes from NIST (respectively digits, lower case characters and upper case characters), to compare the test results with those from models trained on the entire NIST database (per-class test error, with an a priori on the desired class). The goal is to find out if training the model with more classes than necessary reduces the test error on a single class, as opposed to training it only with the desired class. We use a single hidden layer MLP with 1000 hidden units, and a SDA with 3 hidden layers (1000 hidden units per layer), pre-trained and fine-tuned on NIST. 404 A positive value indicates that training on the perturbed data helped for the
405 405 given test set (the first 3 columns on the 62-class tasks and the last one is
406 Our results show that the MLP only benefits from a full NIST training on digits, and the test error is only 5\% smaller than a digits-specialized MLP. On the other hand, the SDA always gives better results when it is trained with the entire NIST database, compared to its specialized counterparts (with upper case character, the test errors is 12\% smaller, 27\% smaller on digits, and 15\% smaller on lower case characters). 406 on the clean 10-class digits). Clearly, the deep learning models did benefit more
407 from perturbed training data, even when testing on clean data, whereas the MLP
408 trained on perturbed data performed worse on the clean digits and about the same
409 on the clean characters. }
410 \label{tab:sda-vs-mlp-vs-humans}
411 \begin{center}
412 \begin{tabular}{|l|r|r|r|r|} \hline
413 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline
414 SDA0/SDA1-1 & 38\% & 84\% & 228\% & 93\% \\ \hline
415 SDA0/SDA2-1 & 27\% & 94\% & 144\% & 59\% \\ \hline
416 MLP0/MLP1-1 & 5.2\% & 65\% & -13\% & -10\% \\ \hline
417 MLP0/MLP2-1 & -0.4\% & 49\% & 44\% & -29\% \\ \hline
418 \end{tabular}
419 \end{center}
420 \end{table}
421
422
423 \subsection{Multi-Task Learning Effects}
424
425 As previously seen, the SDA is better able to benefit from the
426 transformations applied to the data than the MLP. In this experiment we
427 define three tasks: recognizing digits (knowing that the input is a digit),
428 recognizing upper case characters (knowing that the input is one), and
429 recognizing lower case characters (knowing that the input is one). We
430 consider the digit classification task as the target task and we want to
431 evaluate whether training with the other tasks can help or hurt, and
432 whether the effect is different for MLPs versus SDAs. The goal is to find
433 out if deep learning can benefit more (or less) from multiple related tasks
434 (i.e. the multi-task setting) compared to a corresponding purely supervised
435 shallow learner.
436
437 We use a single hidden layer MLP with 1000 hidden units, and a SDA
438 with 3 hidden layers (1000 hidden units per layer), pre-trained and
439 fine-tuned on NIST.
440
441 Our results show that the MLP benefits marginally from the multi-task setting
442 in the case of digits (5\% relative improvement) but is actually hurt in the case
443 of characters (respectively 3\% and 4\% worse for lower and upper class characters).
444 On the other hand the SDA benefitted from the multi-task setting, with relative
445 error rate improvements of 27\%, 15\% and 13\% respectively for digits,
446 lower and upper case characters, as shown in Table~\ref{tab:multi-task}.
447
448 \begin{table}
449 \caption{Test error rates and relative change in error rates due to the use of
450 a multi-task setting, i.e., training on each task in isolation vs training
451 for all three tasks together, for MLPs vs SDAs. The SDA benefits much
452 more from the multi-task setting. All experiments on only on the
453 unperturbed NIST data, using validation error for model selection.
454 Relative improvement is 1 - single-task error / multi-task error.}
455 \label{tab:multi-task}
456 \begin{center}
457 \begin{tabular}{|l|r|r|r|} \hline
458 & single-task & multi-task & relative \\
459 & setting & setting & improvement \\ \hline
460 MLP-digits & 3.77\% & 3.99\% & 5.6\% \\ \hline
461 MLP-lower & 17.4\% & 16.8\% & -4.1\% \\ \hline
462 MLP-upper & 7.84\% & 7.54\% & -3.6\% \\ \hline
463 SDA-digits & 2.6\% & 3.56\% & 27\% \\ \hline
464 SDA-lower & 12.3\% & 14.4\% & 15\% \\ \hline
465 SDA-upper & 5.93\% & 6.78\% & 13\% \\ \hline
466 \end{tabular}
467 \end{center}
468 \end{table}
407 469
408 \section{Conclusions} 470 \section{Conclusions}
409 471
410 \bibliography{strings,ml,aigaion,specials} 472 \bibliography{strings,ml,aigaion,specials}
411 \bibliographystyle{mlapa} 473 \bibliographystyle{mlapa}