Mercurial > ift6266
comparison writeup/nips2010_submission_supplementary.tex @ 488:6c9ff48e15cd
Moved the tables into a separate supplementary material file
author | dumitru@dumitru.mtv.corp.google.com |
---|---|
date | Mon, 31 May 2010 19:07:35 -0700 |
parents | |
children | bf481414ba9c |
comparison
equal
deleted
inserted
replaced
485:6beaf3328521 | 488:6c9ff48e15cd |
---|---|
1 \documentclass{article} % For LaTeX2e | |
2 \usepackage{nips10submit_e,times} | |
3 | |
4 \usepackage{amsthm,amsmath,amssymb,bbold,bbm} | |
5 \usepackage{algorithm,algorithmic} | |
6 \usepackage[utf8]{inputenc} | |
7 \usepackage{graphicx,subfigure} | |
8 \usepackage[numbers]{natbib} | |
9 | |
10 \title{Deep Self-Taught Learning for Handwritten Character Recognition\\ | |
11 \emph{Supplementary Material}} | |
12 | |
13 \begin{document} | |
14 | |
15 \maketitle | |
16 | |
17 These tables correspond to Figures 3 and 4 and contain the raw error rates for each model and dataset considered. | |
18 | |
19 \begin{table}[h] | |
20 \caption{Overall comparison of error rates ($\pm$ std.err.) on 62 character classes (10 digits + | |
21 26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training | |
22 (SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture | |
23 (MLP=Multi-Layer Perceptron). The models shown are all trained using perturbed data (NISTP or P07) | |
24 and using a validation set to select hyper-parameters and other training choices. | |
25 \{SDA,MLP\}0 are trained on NIST, | |
26 \{SDA,MLP\}1 are trained on NISTP, and \{SDA,MLP\}2 are trained on P07. | |
27 The human error rate on digits is a lower bound because it does not count digits that were | |
28 recognized as letters. For comparison, the results found in the literature | |
29 on NIST digits classification using the same test set are included.} | |
30 \label{tab:sda-vs-mlp-vs-humans} | |
31 \begin{center} | |
32 \begin{tabular}{|l|r|r|r|r|} \hline | |
33 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline | |
34 Humans& 18.2\% $\pm$.1\% & 39.4\%$\pm$.1\% & 46.9\%$\pm$.1\% & $1.4\%$ \\ \hline | |
35 SDA0 & 23.7\% $\pm$.14\% & 65.2\%$\pm$.34\% & 97.45\%$\pm$.06\% & 2.7\% $\pm$.14\%\\ \hline | |
36 SDA1 & 17.1\% $\pm$.13\% & 29.7\%$\pm$.3\% & 29.7\%$\pm$.3\% & 1.4\% $\pm$.1\%\\ \hline | |
37 SDA2 & 18.7\% $\pm$.13\% & 33.6\%$\pm$.3\% & 39.9\%$\pm$.17\% & 1.7\% $\pm$.1\%\\ \hline | |
38 MLP0 & 24.2\% $\pm$.15\% & 68.8\%$\pm$.33\% & 78.70\%$\pm$.14\% & 3.45\% $\pm$.15\% \\ \hline | |
39 MLP1 & 23.0\% $\pm$.15\% & 41.8\%$\pm$.35\% & 90.4\%$\pm$.1\% & 3.85\% $\pm$.16\% \\ \hline | |
40 MLP2 & 24.3\% $\pm$.15\% & 46.0\%$\pm$.35\% & 54.7\%$\pm$.17\% & 4.85\% $\pm$.18\% \\ \hline | |
41 \citep{Granger+al-2007} & & & & 4.95\% $\pm$.18\% \\ \hline | |
42 \citep{Cortes+al-2000} & & & & 3.71\% $\pm$.16\% \\ \hline | |
43 \citep{Oliveira+al-2002} & & & & 2.4\% $\pm$.13\% \\ \hline | |
44 \citep{Milgram+al-2005} & & & & 2.1\% $\pm$.12\% \\ \hline | |
45 \end{tabular} | |
46 \end{center} | |
47 \end{table} | |
48 | |
49 \begin{table}[h] | |
50 \caption{Relative change in error rates due to the use of perturbed training data, | |
51 either using NISTP, for the MLP1/SDA1 models, or using P07, for the MLP2/SDA2 models. | |
52 A positive value indicates that training on the perturbed data helped for the | |
53 given test set (the first 3 columns on the 62-class tasks and the last one is | |
54 on the clean 10-class digits). Clearly, the deep learning models did benefit more | |
55 from perturbed training data, even when testing on clean data, whereas the MLP | |
56 trained on perturbed data performed worse on the clean digits and about the same | |
57 on the clean characters. } | |
58 \label{tab:perturbation-effect} | |
59 \begin{center} | |
60 \begin{tabular}{|l|r|r|r|r|} \hline | |
61 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline | |
62 SDA0/SDA1-1 & 38\% & 84\% & 228\% & 93\% \\ \hline | |
63 SDA0/SDA2-1 & 27\% & 94\% & 144\% & 59\% \\ \hline | |
64 MLP0/MLP1-1 & 5.2\% & 65\% & -13\% & -10\% \\ \hline | |
65 MLP0/MLP2-1 & -0.4\% & 49\% & 44\% & -29\% \\ \hline | |
66 \end{tabular} | |
67 \end{center} | |
68 \end{table} | |
69 | |
70 \begin{table}[h] | |
71 \caption{Test error rates and relative change in error rates due to the use of | |
72 a multi-task setting, i.e., training on each task in isolation vs training | |
73 for all three tasks together, for MLPs vs SDAs. The SDA benefits much | |
74 more from the multi-task setting. All experiments on only on the | |
75 unperturbed NIST data, using validation error for model selection. | |
76 Relative improvement is 1 - single-task error / multi-task error.} | |
77 \label{tab:multi-task} | |
78 \begin{center} | |
79 \begin{tabular}{|l|r|r|r|} \hline | |
80 & single-task & multi-task & relative \\ | |
81 & setting & setting & improvement \\ \hline | |
82 MLP-digits & 3.77\% & 3.99\% & 5.6\% \\ \hline | |
83 MLP-lower & 17.4\% & 16.8\% & -4.1\% \\ \hline | |
84 MLP-upper & 7.84\% & 7.54\% & -3.6\% \\ \hline | |
85 SDA-digits & 2.6\% & 3.56\% & 27\% \\ \hline | |
86 SDA-lower & 12.3\% & 14.4\% & 15\% \\ \hline | |
87 SDA-upper & 5.93\% & 6.78\% & 13\% \\ \hline | |
88 \end{tabular} | |
89 \end{center} | |
90 \end{table} | |
91 | |
92 {\small | |
93 \bibliography{strings,ml,aigaion,specials} | |
94 %\bibliographystyle{plainnat} | |
95 \bibliographystyle{unsrtnat} | |
96 %\bibliographystyle{apalike} | |
97 } | |
98 | |
99 \end{document} |