diff writeup/nips2010_submission.tex @ 488:6c9ff48e15cd

Moved the tables into a separate supplementary material file
author dumitru@dumitru.mtv.corp.google.com
date Mon, 31 May 2010 19:07:35 -0700
parents 6beaf3328521
children ee9836baade3
line wrap: on
line diff
--- a/writeup/nips2010_submission.tex	Mon May 31 21:50:00 2010 -0400
+++ b/writeup/nips2010_submission.tex	Mon May 31 19:07:35 2010 -0700
@@ -602,84 +602,5 @@
 %\bibliographystyle{apalike}
 }
 
-\newpage
-
-\centerline{APPENDIX FOR {\bf Deep Self-Taught Learning for Handwritten Character Recognition}}
-
-\vspace*{1cm}
-
-\begin{table}[h]
-\caption{Overall comparison of error rates ($\pm$ std.err.) on 62 character classes (10 digits +
-26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training
-(SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture 
-(MLP=Multi-Layer Perceptron). The models shown are all trained using perturbed data (NISTP or P07)
-and using a validation set to select hyper-parameters and other training choices. 
-\{SDA,MLP\}0 are trained on NIST,
-\{SDA,MLP\}1 are trained on NISTP, and \{SDA,MLP\}2 are trained on P07.
-The human error rate on digits is a lower bound because it does not count digits that were
-recognized as letters. For comparison, the results found in the literature
-on NIST digits classification using the same test set are included.}
-\label{tab:sda-vs-mlp-vs-humans}
-\begin{center}
-\begin{tabular}{|l|r|r|r|r|} \hline
-      & NIST test          & NISTP test       & P07 test       & NIST test digits   \\ \hline
-Humans&   18.2\% $\pm$.1\%   &  39.4\%$\pm$.1\%   &  46.9\%$\pm$.1\%  &  $1.4\%$ \\ \hline 
-SDA0   &  23.7\% $\pm$.14\%  &  65.2\%$\pm$.34\%  & 97.45\%$\pm$.06\%  & 2.7\% $\pm$.14\%\\ \hline 
-SDA1   &  17.1\% $\pm$.13\%  &  29.7\%$\pm$.3\%  & 29.7\%$\pm$.3\%  & 1.4\% $\pm$.1\%\\ \hline 
-SDA2   &  18.7\% $\pm$.13\%  &  33.6\%$\pm$.3\%  & 39.9\%$\pm$.17\%  & 1.7\% $\pm$.1\%\\ \hline 
-MLP0   &  24.2\% $\pm$.15\%  & 68.8\%$\pm$.33\%  & 78.70\%$\pm$.14\%  & 3.45\% $\pm$.15\% \\ \hline 
-MLP1   &  23.0\% $\pm$.15\%  &  41.8\%$\pm$.35\%  & 90.4\%$\pm$.1\%  & 3.85\% $\pm$.16\% \\ \hline 
-MLP2   &  24.3\% $\pm$.15\%  &  46.0\%$\pm$.35\%  & 54.7\%$\pm$.17\%  & 4.85\% $\pm$.18\% \\ \hline 
-\citep{Granger+al-2007} &     &                    &                   & 4.95\% $\pm$.18\% \\ \hline
-\citep{Cortes+al-2000} &      &                    &                   & 3.71\% $\pm$.16\% \\ \hline
-\citep{Oliveira+al-2002} &    &                    &                   & 2.4\% $\pm$.13\% \\ \hline
-\citep{Milgram+al-2005} &      &                    &                   & 2.1\% $\pm$.12\% \\ \hline
-\end{tabular}
-\end{center}
-\end{table}
-
-\begin{table}[h]
-\caption{Relative change in error rates due to the use of perturbed training data,
-either using NISTP, for the MLP1/SDA1 models, or using P07, for the MLP2/SDA2 models.
-A positive value indicates that training on the perturbed data helped for the
-given test set (the first 3 columns on the 62-class tasks and the last one is
-on the clean 10-class digits). Clearly, the deep learning models did benefit more
-from perturbed training data, even when testing on clean data, whereas the MLP
-trained on perturbed data performed worse on the clean digits and about the same
-on the clean characters. }
-\label{tab:perturbation-effect}
-\begin{center}
-\begin{tabular}{|l|r|r|r|r|} \hline
-      & NIST test          & NISTP test      & P07 test       & NIST test digits   \\ \hline
-SDA0/SDA1-1   &  38\%      &  84\%           & 228\%          &  93\% \\ \hline 
-SDA0/SDA2-1   &  27\%      &  94\%           & 144\%          &  59\% \\ \hline 
-MLP0/MLP1-1   &  5.2\%     &  65\%           & -13\%          & -10\%  \\ \hline 
-MLP0/MLP2-1   &  -0.4\%    &  49\%           & 44\%           & -29\% \\ \hline 
-\end{tabular}
-\end{center}
-\end{table}
-
-\begin{table}[h]
-\caption{Test error rates and relative change in error rates due to the use of
-a multi-task setting, i.e., training on each task in isolation vs training
-for all three tasks together, for MLPs vs SDAs. The SDA benefits much
-more from the multi-task setting. All experiments on only on the
-unperturbed NIST data, using validation error for model selection.
-Relative improvement is 1 - single-task error / multi-task error.}
-\label{tab:multi-task}
-\begin{center}
-\begin{tabular}{|l|r|r|r|} \hline
-             & single-task  & multi-task  & relative \\ 
-             & setting      & setting     & improvement \\ \hline
-MLP-digits   &  3.77\%      &  3.99\%     & 5.6\%   \\ \hline 
-MLP-lower   &  17.4\%      &  16.8\%     &  -4.1\%    \\ \hline 
-MLP-upper   &  7.84\%     &  7.54\%      & -3.6\%    \\ \hline 
-SDA-digits   &  2.6\%      &  3.56\%     & 27\%    \\ \hline 
-SDA-lower   &  12.3\%      &  14.4\%    & 15\%    \\ \hline 
-SDA-upper   &  5.93\%     &  6.78\%      & 13\%    \\ \hline 
-\end{tabular}
-\end{center}
-\end{table}
-
 
 \end{document}