# HG changeset patch # User Yoshua Bengio # Date 1272887178 14400 # Node ID a6d339033d03e4e70757b3980e02bec49e57f579 # Parent 479f2f518fc954f25eb36895d108c6527ee5f61f added AMT diff -r 479f2f518fc9 -r a6d339033d03 writeup/ml.bib --- a/writeup/ml.bib Mon May 03 06:17:54 2010 -0400 +++ b/writeup/ml.bib Mon May 03 07:46:18 2010 -0400 @@ -25727,3 +25727,27 @@ } +@inproceedings{SnowEtAl2008, + author = {Snow, R. and O'Connor, B. and Jurafsky, D. and Ng, A.}, + booktitle = {Proc. Empirical Methods in NLP}, + pages = {254--263}, + title = {Cheap and Fast -- But is it Good? Evaluating Non-Expert Annotations for Natural Language Tasks}, + year = {2008} +} + + +@inproceedings{SorokinAndForsyth2008, + author = {Sorokin, A. and Forsyth, D.}, + booktitle = {CVPR Workshops}, + pages = {1--8}, + title = {Utility data annotation with Amazon Mechanical Turk}, + year = {2008} +} + +@inproceedings{ whitehill09, + title = {Whose Vote Should Count More: Optimal Integration of Labels from Labelers of Unknown Expertise}, + author = {J. Whitehill and P. Ruvolo and T. Wu and J. Bergsma and J. Movellan}, + booktitle = {NIPS 22}, + pages = {2035--2043}, + year = 2009 +} diff -r 479f2f518fc9 -r a6d339033d03 writeup/techreport.tex --- a/writeup/techreport.tex Mon May 03 06:17:54 2010 -0400 +++ b/writeup/techreport.tex Mon May 03 07:46:18 2010 -0400 @@ -31,7 +31,10 @@ We find that the SDA outperforms its shallow counterpart, an ordinary Multi-Layer Perceptron, and that it is better able to take advantage of the additional -generated data. +generated data, as well as better able to take advantage of +training from more classes than those of interest in the end. +In fact, we find that the SDA reaches human performance as +estimated by the Amazon Mechanical Turk on the NIST test characters. \end{abstract} \section{Introduction} @@ -325,16 +328,36 @@ \section{Experimental Results} -\subsection{SDA vs MLP} +\subsection{SDA vs MLP vs Humans} +We compare here the best MLP (according to validation set error) that we found against +the best SDA (again according to validation set error), along with a precise estimate +of human performance obtained via Amazon's Mechanical Turk (AMT) +service\footnote{http://mturk.com}. AMT users are paid small amounts +of money to perform tasks for which human intelligence is required. +Mechanical Turk has been used extensively in natural language +processing \cite{SnowEtAl2008} and vision +\cite{SorokinAndForsyth2008,whitehill09}. AMT users where presented +with 10 character images and asked to type 10 corresponding ascii +characters. Hence they were forced to make a hard choice among the +62 character classes. Three users classified each image, allowing +to estimate inter-human variability (shown as +/- in parenthesis below). + +\begin{table} +\caption{Overall comparison of error rates on 62 character classes (10 digits + +26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training +(SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture +(MLP=Multi-Layer Perceptron). } +\label{tab:sda-vs-mlp-vs-humans} \begin{center} -\begin{tabular}{lcc} - & train w/ & train w/ \\ - & NIST & P07 + NIST \\ \hline -SDA & & \\ \hline -MLP & & \\ \hline +\begin{tabular}{|l|r|r|r|r|} \hline + & NIST test & NISTP test & P07 test & NIST test digits \\ \hline +Humans& & & & \\ \hline +SDA & & & &\\ \hline +MLP & & & & \\ \hline \end{tabular} \end{center} +\end{table} \subsection{Perturbed Training Data More Helpful for SDAE}