Mercurial > ift6266
changeset 438:a6d339033d03
added AMT
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Mon, 03 May 2010 07:46:18 -0400 |
parents | 479f2f518fc9 |
children | 5ca2936f2062 |
files | writeup/ml.bib writeup/techreport.tex |
diffstat | 2 files changed, 54 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/writeup/ml.bib Mon May 03 06:17:54 2010 -0400 +++ b/writeup/ml.bib Mon May 03 07:46:18 2010 -0400 @@ -25727,3 +25727,27 @@ } +@inproceedings{SnowEtAl2008, + author = {Snow, R. and O'Connor, B. and Jurafsky, D. and Ng, A.}, + booktitle = {Proc. Empirical Methods in NLP}, + pages = {254--263}, + title = {Cheap and Fast -- But is it Good? Evaluating Non-Expert Annotations for Natural Language Tasks}, + year = {2008} +} + + +@inproceedings{SorokinAndForsyth2008, + author = {Sorokin, A. and Forsyth, D.}, + booktitle = {CVPR Workshops}, + pages = {1--8}, + title = {Utility data annotation with Amazon Mechanical Turk}, + year = {2008} +} + +@inproceedings{ whitehill09, + title = {Whose Vote Should Count More: Optimal Integration of Labels from Labelers of Unknown Expertise}, + author = {J. Whitehill and P. Ruvolo and T. Wu and J. Bergsma and J. Movellan}, + booktitle = {NIPS 22}, + pages = {2035--2043}, + year = 2009 +}
--- a/writeup/techreport.tex Mon May 03 06:17:54 2010 -0400 +++ b/writeup/techreport.tex Mon May 03 07:46:18 2010 -0400 @@ -31,7 +31,10 @@ We find that the SDA outperforms its shallow counterpart, an ordinary Multi-Layer Perceptron, and that it is better able to take advantage of the additional -generated data. +generated data, as well as better able to take advantage of +training from more classes than those of interest in the end. +In fact, we find that the SDA reaches human performance as +estimated by the Amazon Mechanical Turk on the NIST test characters. \end{abstract} \section{Introduction} @@ -325,16 +328,36 @@ \section{Experimental Results} -\subsection{SDA vs MLP} +\subsection{SDA vs MLP vs Humans} +We compare here the best MLP (according to validation set error) that we found against +the best SDA (again according to validation set error), along with a precise estimate +of human performance obtained via Amazon's Mechanical Turk (AMT) +service\footnote{http://mturk.com}. AMT users are paid small amounts +of money to perform tasks for which human intelligence is required. +Mechanical Turk has been used extensively in natural language +processing \cite{SnowEtAl2008} and vision +\cite{SorokinAndForsyth2008,whitehill09}. AMT users where presented +with 10 character images and asked to type 10 corresponding ascii +characters. Hence they were forced to make a hard choice among the +62 character classes. Three users classified each image, allowing +to estimate inter-human variability (shown as +/- in parenthesis below). + +\begin{table} +\caption{Overall comparison of error rates on 62 character classes (10 digits + +26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training +(SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture +(MLP=Multi-Layer Perceptron). } +\label{tab:sda-vs-mlp-vs-humans} \begin{center} -\begin{tabular}{lcc} - & train w/ & train w/ \\ - & NIST & P07 + NIST \\ \hline -SDA & & \\ \hline -MLP & & \\ \hline +\begin{tabular}{|l|r|r|r|r|} \hline + & NIST test & NISTP test & P07 test & NIST test digits \\ \hline +Humans& & & & \\ \hline +SDA & & & &\\ \hline +MLP & & & & \\ \hline \end{tabular} \end{center} +\end{table} \subsection{Perturbed Training Data More Helpful for SDAE}