comparison writeup/nips2010_submission.tex @ 488:6c9ff48e15cd

Moved the tables into a separate supplementary material file
author dumitru@dumitru.mtv.corp.google.com
date Mon, 31 May 2010 19:07:35 -0700
parents 6beaf3328521
children ee9836baade3
comparison
equal deleted inserted replaced
485:6beaf3328521 488:6c9ff48e15cd
600 %\bibliographystyle{plainnat} 600 %\bibliographystyle{plainnat}
601 \bibliographystyle{unsrtnat} 601 \bibliographystyle{unsrtnat}
602 %\bibliographystyle{apalike} 602 %\bibliographystyle{apalike}
603 } 603 }
604 604
605 \newpage
606
607 \centerline{APPENDIX FOR {\bf Deep Self-Taught Learning for Handwritten Character Recognition}}
608
609 \vspace*{1cm}
610
611 \begin{table}[h]
612 \caption{Overall comparison of error rates ($\pm$ std.err.) on 62 character classes (10 digits +
613 26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training
614 (SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture
615 (MLP=Multi-Layer Perceptron). The models shown are all trained using perturbed data (NISTP or P07)
616 and using a validation set to select hyper-parameters and other training choices.
617 \{SDA,MLP\}0 are trained on NIST,
618 \{SDA,MLP\}1 are trained on NISTP, and \{SDA,MLP\}2 are trained on P07.
619 The human error rate on digits is a lower bound because it does not count digits that were
620 recognized as letters. For comparison, the results found in the literature
621 on NIST digits classification using the same test set are included.}
622 \label{tab:sda-vs-mlp-vs-humans}
623 \begin{center}
624 \begin{tabular}{|l|r|r|r|r|} \hline
625 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline
626 Humans& 18.2\% $\pm$.1\% & 39.4\%$\pm$.1\% & 46.9\%$\pm$.1\% & $1.4\%$ \\ \hline
627 SDA0 & 23.7\% $\pm$.14\% & 65.2\%$\pm$.34\% & 97.45\%$\pm$.06\% & 2.7\% $\pm$.14\%\\ \hline
628 SDA1 & 17.1\% $\pm$.13\% & 29.7\%$\pm$.3\% & 29.7\%$\pm$.3\% & 1.4\% $\pm$.1\%\\ \hline
629 SDA2 & 18.7\% $\pm$.13\% & 33.6\%$\pm$.3\% & 39.9\%$\pm$.17\% & 1.7\% $\pm$.1\%\\ \hline
630 MLP0 & 24.2\% $\pm$.15\% & 68.8\%$\pm$.33\% & 78.70\%$\pm$.14\% & 3.45\% $\pm$.15\% \\ \hline
631 MLP1 & 23.0\% $\pm$.15\% & 41.8\%$\pm$.35\% & 90.4\%$\pm$.1\% & 3.85\% $\pm$.16\% \\ \hline
632 MLP2 & 24.3\% $\pm$.15\% & 46.0\%$\pm$.35\% & 54.7\%$\pm$.17\% & 4.85\% $\pm$.18\% \\ \hline
633 \citep{Granger+al-2007} & & & & 4.95\% $\pm$.18\% \\ \hline
634 \citep{Cortes+al-2000} & & & & 3.71\% $\pm$.16\% \\ \hline
635 \citep{Oliveira+al-2002} & & & & 2.4\% $\pm$.13\% \\ \hline
636 \citep{Milgram+al-2005} & & & & 2.1\% $\pm$.12\% \\ \hline
637 \end{tabular}
638 \end{center}
639 \end{table}
640
641 \begin{table}[h]
642 \caption{Relative change in error rates due to the use of perturbed training data,
643 either using NISTP, for the MLP1/SDA1 models, or using P07, for the MLP2/SDA2 models.
644 A positive value indicates that training on the perturbed data helped for the
645 given test set (the first 3 columns on the 62-class tasks and the last one is
646 on the clean 10-class digits). Clearly, the deep learning models did benefit more
647 from perturbed training data, even when testing on clean data, whereas the MLP
648 trained on perturbed data performed worse on the clean digits and about the same
649 on the clean characters. }
650 \label{tab:perturbation-effect}
651 \begin{center}
652 \begin{tabular}{|l|r|r|r|r|} \hline
653 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline
654 SDA0/SDA1-1 & 38\% & 84\% & 228\% & 93\% \\ \hline
655 SDA0/SDA2-1 & 27\% & 94\% & 144\% & 59\% \\ \hline
656 MLP0/MLP1-1 & 5.2\% & 65\% & -13\% & -10\% \\ \hline
657 MLP0/MLP2-1 & -0.4\% & 49\% & 44\% & -29\% \\ \hline
658 \end{tabular}
659 \end{center}
660 \end{table}
661
662 \begin{table}[h]
663 \caption{Test error rates and relative change in error rates due to the use of
664 a multi-task setting, i.e., training on each task in isolation vs training
665 for all three tasks together, for MLPs vs SDAs. The SDA benefits much
666 more from the multi-task setting. All experiments on only on the
667 unperturbed NIST data, using validation error for model selection.
668 Relative improvement is 1 - single-task error / multi-task error.}
669 \label{tab:multi-task}
670 \begin{center}
671 \begin{tabular}{|l|r|r|r|} \hline
672 & single-task & multi-task & relative \\
673 & setting & setting & improvement \\ \hline
674 MLP-digits & 3.77\% & 3.99\% & 5.6\% \\ \hline
675 MLP-lower & 17.4\% & 16.8\% & -4.1\% \\ \hline
676 MLP-upper & 7.84\% & 7.54\% & -3.6\% \\ \hline
677 SDA-digits & 2.6\% & 3.56\% & 27\% \\ \hline
678 SDA-lower & 12.3\% & 14.4\% & 15\% \\ \hline
679 SDA-upper & 5.93\% & 6.78\% & 13\% \\ \hline
680 \end{tabular}
681 \end{center}
682 \end{table}
683
684 605
685 \end{document} 606 \end{document}