ift6266: writeup/nips2010_submission_supplementary.tex comparison

comparison writeup/nips2010_submission_supplementary.tex @ 488:6c9ff48e15cd

Moved the tables into a separate supplementary material file

author	dumitru@dumitru.mtv.corp.google.com
date	Mon, 31 May 2010 19:07:35 -0700
parents
children	bf481414ba9c

comparison

equal deleted inserted replaced

-:6beaf3328521
+:6c9ff48e15cd
+\documentclass{article} % For LaTeX2e
+\usepackage{nips10submit_e,times}
+\usepackage{amsthm,amsmath,amssymb,bbold,bbm}
+\usepackage{algorithm,algorithmic}
+\usepackage[utf8]{inputenc}
+\usepackage{graphicx,subfigure}
+\usepackage[numbers]{natbib}
+\title{Deep Self-Taught Learning for Handwritten Character Recognition\\
+\emph{Supplementary Material}}
+\begin{document}
+\maketitle
+These tables correspond to Figures 3 and 4 and contain the raw error rates for each model and dataset considered.
+\begin{table}[h]
+\caption{Overall comparison of error rates ($\pm$ std.err.) on 62 character classes (10 digits +
+26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training
+(SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture
+(MLP=Multi-Layer Perceptron). The models shown are all trained using perturbed data (NISTP or P07)
+and using a validation set to select hyper-parameters and other training choices.
+\{SDA,MLP\}0 are trained on NIST,
+\{SDA,MLP\}1 are trained on NISTP, and \{SDA,MLP\}2 are trained on P07.
+The human error rate on digits is a lower bound because it does not count digits that were
+recognized as letters. For comparison, the results found in the literature
+on NIST digits classification using the same test set are included.}
+\label{tab:sda-vs-mlp-vs-humans}
+\begin{center}
+\begin{tabular}{|l|r|r|r|r|} \hline
+& NIST test          & NISTP test       & P07 test       & NIST test digits   \\ \hline
+Humans&   18.2\% $\pm$.1\%   &  39.4\%$\pm$.1\%   &  46.9\%$\pm$.1\%  &  $1.4\%$ \\ \hline
+SDA0   &  23.7\% $\pm$.14\%  &  65.2\%$\pm$.34\%  & 97.45\%$\pm$.06\%  & 2.7\% $\pm$.14\%\\ \hline
+SDA1   &  17.1\% $\pm$.13\%  &  29.7\%$\pm$.3\%  & 29.7\%$\pm$.3\%  & 1.4\% $\pm$.1\%\\ \hline
+SDA2   &  18.7\% $\pm$.13\%  &  33.6\%$\pm$.3\%  & 39.9\%$\pm$.17\%  & 1.7\% $\pm$.1\%\\ \hline
+MLP0   &  24.2\% $\pm$.15\%  & 68.8\%$\pm$.33\%  & 78.70\%$\pm$.14\%  & 3.45\% $\pm$.15\% \\ \hline
+MLP1   &  23.0\% $\pm$.15\%  &  41.8\%$\pm$.35\%  & 90.4\%$\pm$.1\%  & 3.85\% $\pm$.16\% \\ \hline
+MLP2   &  24.3\% $\pm$.15\%  &  46.0\%$\pm$.35\%  & 54.7\%$\pm$.17\%  & 4.85\% $\pm$.18\% \\ \hline
+\citep{Granger+al-2007} &     &                    &                   & 4.95\% $\pm$.18\% \\ \hline
+\citep{Cortes+al-2000} &      &                    &                   & 3.71\% $\pm$.16\% \\ \hline
+\citep{Oliveira+al-2002} &    &                    &                   & 2.4\% $\pm$.13\% \\ \hline
+\citep{Milgram+al-2005} &      &                    &                   & 2.1\% $\pm$.12\% \\ \hline
+\end{tabular}
+\end{center}
+\end{table}
+\begin{table}[h]
+\caption{Relative change in error rates due to the use of perturbed training data,
+either using NISTP, for the MLP1/SDA1 models, or using P07, for the MLP2/SDA2 models.
+A positive value indicates that training on the perturbed data helped for the
+given test set (the first 3 columns on the 62-class tasks and the last one is
+on the clean 10-class digits). Clearly, the deep learning models did benefit more
+from perturbed training data, even when testing on clean data, whereas the MLP
+trained on perturbed data performed worse on the clean digits and about the same
+on the clean characters. }
+\label{tab:perturbation-effect}
+\begin{center}
+\begin{tabular}{|l|r|r|r|r|} \hline
+& NIST test          & NISTP test      & P07 test       & NIST test digits   \\ \hline
+SDA0/SDA1-1   &  38\%      &  84\%           & 228\%          &  93\% \\ \hline
+SDA0/SDA2-1   &  27\%      &  94\%           & 144\%          &  59\% \\ \hline
+MLP0/MLP1-1   &  5.2\%     &  65\%           & -13\%          & -10\%  \\ \hline
+MLP0/MLP2-1   &  -0.4\%    &  49\%           & 44\%           & -29\% \\ \hline
+\end{tabular}
+\end{center}
+\end{table}
+\begin{table}[h]
+\caption{Test error rates and relative change in error rates due to the use of
+a multi-task setting, i.e., training on each task in isolation vs training
+for all three tasks together, for MLPs vs SDAs. The SDA benefits much
+more from the multi-task setting. All experiments on only on the
+unperturbed NIST data, using validation error for model selection.
+Relative improvement is 1 - single-task error / multi-task error.}
+\label{tab:multi-task}
+\begin{center}
+\begin{tabular}{|l|r|r|r|} \hline
+& single-task  & multi-task  & relative \\
+& setting      & setting     & improvement \\ \hline
+MLP-digits   &  3.77\%      &  3.99\%     & 5.6\%   \\ \hline
+MLP-lower   &  17.4\%      &  16.8\%     &  -4.1\%    \\ \hline
+MLP-upper   &  7.84\%     &  7.54\%      & -3.6\%    \\ \hline
+SDA-digits   &  2.6\%      &  3.56\%     & 27\%    \\ \hline
+SDA-lower   &  12.3\%      &  14.4\%    & 15\%    \\ \hline
+SDA-upper   &  5.93\%     &  6.78\%      & 13\%    \\ \hline
+\end{tabular}
+\end{center}
+\end{table}
+{\small
+\bibliography{strings,ml,aigaion,specials}
+%\bibliographystyle{plainnat}
+\bibliographystyle{unsrtnat}
+%\bibliographystyle{apalike}
+}
+\end{document}

Mercurial > ift6266

comparison writeup/nips2010_submission_supplementary.tex @ 488:6c9ff48e15cd