comparison writeup/nips2010_submission.tex @ 491:19eab4daf212

merge
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Mon, 31 May 2010 22:15:44 -0400
parents d6cf4912abb0 ee9836baade3
children a194ce5a4249
comparison
equal deleted inserted replaced
490:d6cf4912abb0 491:19eab4daf212
609 %\bibliographystyle{plainnat} 609 %\bibliographystyle{plainnat}
610 \bibliographystyle{unsrtnat} 610 \bibliographystyle{unsrtnat}
611 %\bibliographystyle{apalike} 611 %\bibliographystyle{apalike}
612 } 612 }
613 613
614 \newpage
615
616 \centerline{APPENDIX FOR {\bf Deep Self-Taught Learning for Handwritten Character Recognition}}
617
618 \vspace*{1cm}
619
620 \begin{table}[h]
621 \caption{Overall comparison of error rates ($\pm$ std.err.) on 62 character classes (10 digits +
622 26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training
623 (SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture
624 (MLP=Multi-Layer Perceptron). The models shown are all trained using perturbed data (NISTP or P07)
625 and using a validation set to select hyper-parameters and other training choices.
626 \{SDA,MLP\}0 are trained on NIST,
627 \{SDA,MLP\}1 are trained on NISTP, and \{SDA,MLP\}2 are trained on P07.
628 The human error rate on digits is a lower bound because it does not count digits that were
629 recognized as letters. For comparison, the results found in the literature
630 on NIST digits classification using the same test set are included.}
631 \label{tab:sda-vs-mlp-vs-humans}
632 \begin{center}
633 \begin{tabular}{|l|r|r|r|r|} \hline
634 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline
635 Humans& 18.2\% $\pm$.1\% & 39.4\%$\pm$.1\% & 46.9\%$\pm$.1\% & $1.4\%$ \\ \hline
636 SDA0 & 23.7\% $\pm$.14\% & 65.2\%$\pm$.34\% & 97.45\%$\pm$.06\% & 2.7\% $\pm$.14\%\\ \hline
637 SDA1 & 17.1\% $\pm$.13\% & 29.7\%$\pm$.3\% & 29.7\%$\pm$.3\% & 1.4\% $\pm$.1\%\\ \hline
638 SDA2 & 18.7\% $\pm$.13\% & 33.6\%$\pm$.3\% & 39.9\%$\pm$.17\% & 1.7\% $\pm$.1\%\\ \hline
639 MLP0 & 24.2\% $\pm$.15\% & 68.8\%$\pm$.33\% & 78.70\%$\pm$.14\% & 3.45\% $\pm$.15\% \\ \hline
640 MLP1 & 23.0\% $\pm$.15\% & 41.8\%$\pm$.35\% & 90.4\%$\pm$.1\% & 3.85\% $\pm$.16\% \\ \hline
641 MLP2 & 24.3\% $\pm$.15\% & 46.0\%$\pm$.35\% & 54.7\%$\pm$.17\% & 4.85\% $\pm$.18\% \\ \hline
642 \citep{Granger+al-2007} & & & & 4.95\% $\pm$.18\% \\ \hline
643 \citep{Cortes+al-2000} & & & & 3.71\% $\pm$.16\% \\ \hline
644 \citep{Oliveira+al-2002} & & & & 2.4\% $\pm$.13\% \\ \hline
645 \citep{Milgram+al-2005} & & & & 2.1\% $\pm$.12\% \\ \hline
646 \end{tabular}
647 \end{center}
648 \end{table}
649
650 \begin{table}[h]
651 \caption{Relative change in error rates due to the use of perturbed training data,
652 either using NISTP, for the MLP1/SDA1 models, or using P07, for the MLP2/SDA2 models.
653 A positive value indicates that training on the perturbed data helped for the
654 given test set (the first 3 columns on the 62-class tasks and the last one is
655 on the clean 10-class digits). Clearly, the deep learning models did benefit more
656 from perturbed training data, even when testing on clean data, whereas the MLP
657 trained on perturbed data performed worse on the clean digits and about the same
658 on the clean characters. }
659 \label{tab:perturbation-effect}
660 \begin{center}
661 \begin{tabular}{|l|r|r|r|r|} \hline
662 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline
663 SDA0/SDA1-1 & 38\% & 84\% & 228\% & 93\% \\ \hline
664 SDA0/SDA2-1 & 27\% & 94\% & 144\% & 59\% \\ \hline
665 MLP0/MLP1-1 & 5.2\% & 65\% & -13\% & -10\% \\ \hline
666 MLP0/MLP2-1 & -0.4\% & 49\% & 44\% & -29\% \\ \hline
667 \end{tabular}
668 \end{center}
669 \end{table}
670
671 \begin{table}[h]
672 \caption{Test error rates and relative change in error rates due to the use of
673 a multi-task setting, i.e., training on each task in isolation vs training
674 for all three tasks together, for MLPs vs SDAs. The SDA benefits much
675 more from the multi-task setting. All experiments on only on the
676 unperturbed NIST data, using validation error for model selection.
677 Relative improvement is 1 - single-task error / multi-task error.}
678 \label{tab:multi-task}
679 \begin{center}
680 \begin{tabular}{|l|r|r|r|} \hline
681 & single-task & multi-task & relative \\
682 & setting & setting & improvement \\ \hline
683 MLP-digits & 3.77\% & 3.99\% & 5.6\% \\ \hline
684 MLP-lower & 17.4\% & 16.8\% & -4.1\% \\ \hline
685 MLP-upper & 7.84\% & 7.54\% & -3.6\% \\ \hline
686 SDA-digits & 2.6\% & 3.56\% & 27\% \\ \hline
687 SDA-lower & 12.3\% & 14.4\% & 15\% \\ \hline
688 SDA-upper & 5.93\% & 6.78\% & 13\% \\ \hline
689 \end{tabular}
690 \end{center}
691 \end{table}
692
693 614
694 \end{document} 615 \end{document}