comparison writeup/nips2010_submission_supplementary.tex @ 488:6c9ff48e15cd

Moved the tables into a separate supplementary material file
author dumitru@dumitru.mtv.corp.google.com
date Mon, 31 May 2010 19:07:35 -0700
parents
children bf481414ba9c
comparison
equal deleted inserted replaced
485:6beaf3328521 488:6c9ff48e15cd
1 \documentclass{article} % For LaTeX2e
2 \usepackage{nips10submit_e,times}
3
4 \usepackage{amsthm,amsmath,amssymb,bbold,bbm}
5 \usepackage{algorithm,algorithmic}
6 \usepackage[utf8]{inputenc}
7 \usepackage{graphicx,subfigure}
8 \usepackage[numbers]{natbib}
9
10 \title{Deep Self-Taught Learning for Handwritten Character Recognition\\
11 \emph{Supplementary Material}}
12
13 \begin{document}
14
15 \maketitle
16
17 These tables correspond to Figures 3 and 4 and contain the raw error rates for each model and dataset considered.
18
19 \begin{table}[h]
20 \caption{Overall comparison of error rates ($\pm$ std.err.) on 62 character classes (10 digits +
21 26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training
22 (SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture
23 (MLP=Multi-Layer Perceptron). The models shown are all trained using perturbed data (NISTP or P07)
24 and using a validation set to select hyper-parameters and other training choices.
25 \{SDA,MLP\}0 are trained on NIST,
26 \{SDA,MLP\}1 are trained on NISTP, and \{SDA,MLP\}2 are trained on P07.
27 The human error rate on digits is a lower bound because it does not count digits that were
28 recognized as letters. For comparison, the results found in the literature
29 on NIST digits classification using the same test set are included.}
30 \label{tab:sda-vs-mlp-vs-humans}
31 \begin{center}
32 \begin{tabular}{|l|r|r|r|r|} \hline
33 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline
34 Humans& 18.2\% $\pm$.1\% & 39.4\%$\pm$.1\% & 46.9\%$\pm$.1\% & $1.4\%$ \\ \hline
35 SDA0 & 23.7\% $\pm$.14\% & 65.2\%$\pm$.34\% & 97.45\%$\pm$.06\% & 2.7\% $\pm$.14\%\\ \hline
36 SDA1 & 17.1\% $\pm$.13\% & 29.7\%$\pm$.3\% & 29.7\%$\pm$.3\% & 1.4\% $\pm$.1\%\\ \hline
37 SDA2 & 18.7\% $\pm$.13\% & 33.6\%$\pm$.3\% & 39.9\%$\pm$.17\% & 1.7\% $\pm$.1\%\\ \hline
38 MLP0 & 24.2\% $\pm$.15\% & 68.8\%$\pm$.33\% & 78.70\%$\pm$.14\% & 3.45\% $\pm$.15\% \\ \hline
39 MLP1 & 23.0\% $\pm$.15\% & 41.8\%$\pm$.35\% & 90.4\%$\pm$.1\% & 3.85\% $\pm$.16\% \\ \hline
40 MLP2 & 24.3\% $\pm$.15\% & 46.0\%$\pm$.35\% & 54.7\%$\pm$.17\% & 4.85\% $\pm$.18\% \\ \hline
41 \citep{Granger+al-2007} & & & & 4.95\% $\pm$.18\% \\ \hline
42 \citep{Cortes+al-2000} & & & & 3.71\% $\pm$.16\% \\ \hline
43 \citep{Oliveira+al-2002} & & & & 2.4\% $\pm$.13\% \\ \hline
44 \citep{Milgram+al-2005} & & & & 2.1\% $\pm$.12\% \\ \hline
45 \end{tabular}
46 \end{center}
47 \end{table}
48
49 \begin{table}[h]
50 \caption{Relative change in error rates due to the use of perturbed training data,
51 either using NISTP, for the MLP1/SDA1 models, or using P07, for the MLP2/SDA2 models.
52 A positive value indicates that training on the perturbed data helped for the
53 given test set (the first 3 columns on the 62-class tasks and the last one is
54 on the clean 10-class digits). Clearly, the deep learning models did benefit more
55 from perturbed training data, even when testing on clean data, whereas the MLP
56 trained on perturbed data performed worse on the clean digits and about the same
57 on the clean characters. }
58 \label{tab:perturbation-effect}
59 \begin{center}
60 \begin{tabular}{|l|r|r|r|r|} \hline
61 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline
62 SDA0/SDA1-1 & 38\% & 84\% & 228\% & 93\% \\ \hline
63 SDA0/SDA2-1 & 27\% & 94\% & 144\% & 59\% \\ \hline
64 MLP0/MLP1-1 & 5.2\% & 65\% & -13\% & -10\% \\ \hline
65 MLP0/MLP2-1 & -0.4\% & 49\% & 44\% & -29\% \\ \hline
66 \end{tabular}
67 \end{center}
68 \end{table}
69
70 \begin{table}[h]
71 \caption{Test error rates and relative change in error rates due to the use of
72 a multi-task setting, i.e., training on each task in isolation vs training
73 for all three tasks together, for MLPs vs SDAs. The SDA benefits much
74 more from the multi-task setting. All experiments on only on the
75 unperturbed NIST data, using validation error for model selection.
76 Relative improvement is 1 - single-task error / multi-task error.}
77 \label{tab:multi-task}
78 \begin{center}
79 \begin{tabular}{|l|r|r|r|} \hline
80 & single-task & multi-task & relative \\
81 & setting & setting & improvement \\ \hline
82 MLP-digits & 3.77\% & 3.99\% & 5.6\% \\ \hline
83 MLP-lower & 17.4\% & 16.8\% & -4.1\% \\ \hline
84 MLP-upper & 7.84\% & 7.54\% & -3.6\% \\ \hline
85 SDA-digits & 2.6\% & 3.56\% & 27\% \\ \hline
86 SDA-lower & 12.3\% & 14.4\% & 15\% \\ \hline
87 SDA-upper & 5.93\% & 6.78\% & 13\% \\ \hline
88 \end{tabular}
89 \end{center}
90 \end{table}
91
92 {\small
93 \bibliography{strings,ml,aigaion,specials}
94 %\bibliographystyle{plainnat}
95 \bibliographystyle{unsrtnat}
96 %\bibliographystyle{apalike}
97 }
98
99 \end{document}