annotate writeup/contributions.tex @ 626:75dbbe409578

Added code for deep mlp, experiment code to go along with it. Also added code I used to filter the P07 / PNIST07 datasets to keep only digits.
author fsavard
date Wed, 16 Mar 2011 13:43:32 -0400
parents f5a198b2854a
children
rev   line source
586
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
1 \documentclass{article} % For LaTeX2e
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
2 \usepackage{times}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
3 \usepackage{wrapfig}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
4 \usepackage{amsthm,amsmath,bbm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
5 \usepackage[psamsfonts]{amssymb}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
6 \usepackage{algorithm,algorithmic}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
7 \usepackage[utf8]{inputenc}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
8 \usepackage{graphicx,subfigure}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
9 \usepackage[numbers]{natbib}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
10
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
11 \addtolength{\textwidth}{10mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
12 \addtolength{\evensidemargin}{-5mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
13 \addtolength{\oddsidemargin}{-5mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
14
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
15 %\setlength\parindent{0mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
16
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
17 \begin{document}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
18
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
19 \begin{center}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
20 {\Large Deep Self-Taught Learning for Handwritten Character Recognition}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
21
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
22 {\bf \large Information on Main Contributions}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
23 \end{center}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
24
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
25 \setlength{\parindent}{0cm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
26
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
27 %\vspace*{-2mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
28 \section*{Background and Related Contributions}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
29 %\vspace*{-2mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
30 %{\large \bf Background and Related Contributions}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
31
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
32 Recent theoretical and empirical work in statistical machine learning has
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
33 demonstrated the potential of learning algorithms for {\bf deep
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
34 architectures}, i.e., function classes obtained by composing multiple
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
35 levels of representation
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
36 \citep{Hinton06,ranzato-07-small,Bengio-nips-2006,VincentPLarochelleH2008,ranzato-08,Larochelle-jmlr-2009,Salakhutdinov+Hinton-2009,HonglakL2009,HonglakLNIPS2009,Jarrett-ICCV2009,Taylor-cvpr-2010}.
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
37 See~\citet{Bengio-2009} for a review of deep learning algorithms.
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
38
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
39 {\bf Self-taught learning}~\citep{RainaR2007} is a paradigm that combines
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
40 principles of semi-supervised and multi-task learning: the learner can
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
41 exploit examples that are unlabeled and possibly come from a distribution
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
42 different from the target distribution, e.g., from other classes than those
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
43 of interest. Self-taught learning has already been applied to deep
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
44 learners, but mostly to show the advantage of unlabeled
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
45 examples~\citep{Bengio-2009,WestonJ2008-small}.
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
46
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
47 There already are theoretical arguments~\citep{baxter95a} supporting the claim
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
48 that learning an {\bf intermediate representation} shared across tasks can be
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
49 beneficial for multi-task learning. It has also already been argued~\citep{Bengio-2009}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
50 that {\bf multiple levels of representation} can bring a benefit over a single level.
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
51
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
52 %{\large \bf Main Claim}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
53 %\vspace*{-2mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
54 \section*{Main Claim}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
55 %\vspace*{-2mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
56
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
57 We claim that deep learners, with several levels of representation, can
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
58 benefit more from self-taught learning than shallow learners (with a single
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
59 level), both in the context of the multi-task setting and from {\em
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
60 out-of-distribution examples} in general.
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
61
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
62 %{\large \bf Contribution to Machine Learning}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
63 %\vspace*{-2mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
64 \section*{Contribution to Machine Learning}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
65 %\vspace*{-2mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
66
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
67 We show evidence for the above claim in a large-scale setting, with
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
68 a training set consisting of hundreds of millions of examples, in the
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
69 context of handwritten character recognition with 62 classes (upper-case,
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
70 lower-case, digits).
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
71
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
72 %{\large \bf Evidence to Support the Claim}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
73 %\vspace*{-2mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
74 \section*{Evidence to Support the Claim}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
75 %\vspace*{-2mm}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
76
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
77 In the above experimental setting, we show that {\em deep learners benefited
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
78 significantly more from the multi-task setting than a corresponding shallow
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
79 learner}. and that they benefited more from {\em distorted (out-of-distribution) examples}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
80 (i.e. from a distribution larger than the one from which test examples come from).
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
81
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
82 In addition, we show that they {\em beat previously published results} on this task
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
83 (the MNIST special database 19)
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
84 and {\bf reach human-level performance} on both handwritten digit classification and
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
85 62-class handwritten character recognition.
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
86
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
87 \newpage
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
88
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
89 {\small
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
90 \bibliography{strings,strings-short,strings-shorter,ift6266_ml,specials,aigaion-shorter}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
91 %\bibliographystyle{plainnat}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
92 \bibliographystyle{unsrtnat}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
93 %\bibliographystyle{apalike}
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
94 }
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
95
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
96
f5a198b2854a contributions.tex
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
diff changeset
97 \end{document}