Mercurial > ift6266
annotate writeup/nipswp_submission.tex @ 597:5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
author | boulanni <nicolas_boulanger@hotmail.com> |
---|---|
date | Thu, 14 Oct 2010 18:04:11 -0400 |
parents | |
children | a0fdc1f134da |
rev | line source |
---|---|
597
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
1 %\documentclass[twoside,11pt]{article} % For LaTeX2e |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
2 \documentclass{article} % For LaTeX2e |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
3 \usepackage{nips10submit_e} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
4 \usepackage{times} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
5 \usepackage{wrapfig} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
6 \usepackage{amsthm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
7 \usepackage{amsmath} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
8 \usepackage{bbm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
9 \usepackage[utf8]{inputenc} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
10 \usepackage[psamsfonts]{amssymb} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
11 %\usepackage{algorithm,algorithmic} % not used after all |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
12 \usepackage{graphicx,subfigure} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
13 \usepackage[numbers]{natbib} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
14 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
15 \addtolength{\textwidth}{10mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
16 \addtolength{\evensidemargin}{-5mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
17 \addtolength{\oddsidemargin}{-5mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
18 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
19 %\setlength\parindent{0mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
20 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
21 \begin{document} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
22 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
23 \title{Deep Self-Taught Learning for Handwritten Character Recognition} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
24 \author{ |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
25 Yoshua Bengio \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
26 Frédéric Bastien \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
27 Arnaud Bergeron \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
28 Nicolas Boulanger-Lewandowski \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
29 Thomas Breuel \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
30 Youssouf Chherawala \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
31 Moustapha Cisse \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
32 Myriam Côté \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
33 Dumitru Erhan \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
34 Jeremy Eustache \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
35 Xavier Glorot \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
36 Xavier Muller \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
37 Sylvain Pannetier Lebeuf \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
38 Razvan Pascanu \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
39 Salah Rifai \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
40 Francois Savard \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
41 Guillaume Sicard |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
42 } |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
43 \date{{\tt bengioy@iro.umontreal.ca}, Dept. IRO, U. Montreal, P.O. Box 6128, Centre-Ville branch, H3C 3J7, Montreal (Qc), Canada} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
44 %\jmlrheading{}{2010}{}{10/2010}{XX/2011}{Yoshua Bengio et al} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
45 %\editor{} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
46 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
47 %\makeanontitle |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
48 \maketitle |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
49 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
50 %{\bf Running title: Deep Self-Taught Learning} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
51 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
52 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
53 \begin{abstract} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
54 Recent theoretical and empirical work in statistical machine learning has demonstrated the potential of learning algorithms for deep architectures, i.e., function classes obtained by composing multiple levels of representation. Self-taught learning (exploiting unlabeled examples or examples from other distributions) has already been applied to deep learners, but mostly to show the advantage of unlabeled examples. Here we explore the advantage brought by {\em out-of-distribution examples}. For this purpose we developed a powerful generator of stochastic variations and noise processes for character images, including not only affine transformations but also slant, local elastic deformations, changes in thickness, background images, grey level changes, contrast, occlusion, and various types of noise. The out-of-distribution examples are obtained from these highly distorted images or by including examples of object classes different from those in the target test set. We show that {\em deep learners benefit more from out-of-distribution examples than a corresponding shallow learner}, at least in a large-scale handwritten character recognition setting. In fact, we show that they {\em beat previously published results and reach human-level performance}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
55 \end{abstract} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
56 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
57 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
58 %\begin{keywords} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
59 %Deep learning, self-taught learning, out-of-distribution examples, handwritten character recognition, multi-task learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
60 %\end{keywords} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
61 %\keywords{self-taught learning \and multi-task learning \and out-of-distribution examples \and deep learning \and handwriting recognition} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
62 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
63 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
64 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
65 \section{Introduction} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
66 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
67 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
68 {\bf Deep Learning} has emerged as a promising new area of research in |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
69 statistical machine learning~\citep{Hinton06,ranzato-07-small,Bengio-nips-2006,VincentPLarochelleH2008,ranzato-08,TaylorHintonICML2009,Larochelle-jmlr-2009,Salakhutdinov+Hinton-2009,HonglakL2009,HonglakLNIPS2009,Jarrett-ICCV2009,Taylor-cvpr-2010}. See \citet{Bengio-2009} for a review. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
70 Learning algorithms for deep architectures are centered on the learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
71 of useful representations of data, which are better suited to the task at hand, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
72 and are organized in a hierarchy with multiple levels. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
73 This is in part inspired by observations of the mammalian visual cortex, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
74 which consists of a chain of processing elements, each of which is associated with a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
75 different representation of the raw visual input. In fact, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
76 it was found recently that the features learnt in deep architectures resemble |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
77 those observed in the first two of these stages (in areas V1 and V2 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
78 of visual cortex) \citep{HonglakL2008}, and that they become more and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
79 more invariant to factors of variation (such as camera movement) in |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
80 higher layers~\citep{Goodfellow2009}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
81 Learning a hierarchy of features increases the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
82 ease and practicality of developing representations that are at once |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
83 tailored to specific tasks, yet are able to borrow statistical strength |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
84 from other related tasks (e.g., modeling different kinds of objects). Finally, learning the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
85 feature representation can lead to higher-level (more abstract, more |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
86 general) features that are more robust to unanticipated sources of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
87 variance extant in real data. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
88 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
89 {\bf Self-taught learning}~\citep{RainaR2007} is a paradigm that combines principles |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
90 of semi-supervised and multi-task learning: the learner can exploit examples |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
91 that are unlabeled and possibly come from a distribution different from the target |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
92 distribution, e.g., from other classes than those of interest. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
93 It has already been shown that deep learners can clearly take advantage of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
94 unsupervised learning and unlabeled examples~\citep{Bengio-2009,WestonJ2008-small}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
95 but more needs to be done to explore the impact |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
96 of {\em out-of-distribution} examples and of the {\em multi-task} setting |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
97 (one exception is~\citep{CollobertR2008}, which uses a different kind |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
98 of learning algorithm). In particular the {\em relative |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
99 advantage of deep learning} for these settings has not been evaluated. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
100 The hypothesis discussed in the conclusion is that in the context of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
101 multi-task learning and the availability of out-of-distribution training examples, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
102 a deep hierarchy of features |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
103 may be better able to provide {\em sharing of statistical strength} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
104 between different regions in input space or different tasks, compared to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
105 a shallow learner. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
106 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
107 \iffalse |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
108 Whereas a deep architecture can in principle be more powerful than a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
109 shallow one in terms of representation, depth appears to render the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
110 training problem more difficult in terms of optimization and local minima. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
111 It is also only recently that successful algorithms were proposed to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
112 overcome some of these difficulties. All are based on unsupervised |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
113 learning, often in an greedy layer-wise ``unsupervised pre-training'' |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
114 stage~\citep{Bengio-2009}. One of these layer initialization techniques, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
115 applied here, is the Denoising |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
116 Auto-encoder~(DA)~\citep{VincentPLarochelleH2008-very-small} (see Figure~\ref{fig:da}), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
117 which |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
118 performed similarly or better than previously proposed Restricted Boltzmann |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
119 Machines in terms of unsupervised extraction of a hierarchy of features |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
120 useful for classification. Each layer is trained to denoise its |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
121 input, creating a layer of features that can be used as input for the next layer. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
122 \fi |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
123 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
124 %The principle is that each layer starting from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
125 %the bottom is trained to encode its input (the output of the previous |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
126 %layer) and to reconstruct it from a corrupted version. After this |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
127 %unsupervised initialization, the stack of DAs can be |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
128 %converted into a deep supervised feedforward neural network and fine-tuned by |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
129 %stochastic gradient descent. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
130 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
131 % |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
132 The {\bf main claim} of this paper is that deep learners (with several levels of representation) can |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
133 {\bf benefit more from self-taught learning than shallow learners} (with a single |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
134 level), both in the context of the multi-task setting and from {\em |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
135 out-of-distribution examples} in general. Because we are able to improve on state-of-the-art |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
136 performance and reach human-level performance |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
137 on a large-scale task, we consider that this paper is also a contribution |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
138 to advance the application of machine learning to handwritten character recognition. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
139 More precisely, we ask and answer the following questions: |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
140 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
141 %\begin{enumerate} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
142 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
143 Do the good results previously obtained with deep architectures on the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
144 MNIST digit images generalize to the setting of a similar but much larger and richer |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
145 dataset, the NIST special database 19, with 62 classes and around 800k examples? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
146 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
147 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
148 To what extent does the perturbation of input images (e.g. adding |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
149 noise, affine transformations, background images) make the resulting |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
150 classifiers better not only on similarly perturbed images but also on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
151 the {\em original clean examples}? We study this question in the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
152 context of the 62-class and 10-class tasks of the NIST special database 19. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
153 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
154 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
155 Do deep architectures {\em benefit {\bf more} from such out-of-distribution} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
156 examples, i.e. do they benefit more from the self-taught learning~\citep{RainaR2007} framework? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
157 We use highly perturbed examples to generate out-of-distribution examples. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
158 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
159 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
160 Similarly, does the feature learning step in deep learning algorithms benefit {\bf more} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
161 from training with moderately {\em different classes} (i.e. a multi-task learning scenario) than |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
162 a corresponding shallow and purely supervised architecture? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
163 We train on 62 classes and test on 10 (digits) or 26 (upper case or lower case) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
164 to answer this question. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
165 %\end{enumerate} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
166 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
167 Our experimental results provide positive evidence towards all of these questions, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
168 as well as {\em classifiers that reach human-level performance on 62-class isolated character |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
169 recognition and beat previously published results on the NIST dataset (special database 19)}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
170 To achieve these results, we introduce in the next section a sophisticated system |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
171 for stochastically transforming character images and then explain the methodology, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
172 which is based on training with or without these transformed images and testing on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
173 clean ones. We measure the relative advantage of out-of-distribution examples |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
174 (perturbed or out-of-class) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
175 for a deep learner vs a supervised shallow one. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
176 Code for generating these transformations as well as for the deep learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
177 algorithms are made available at {\tt http://hg.assembla.com/ift6266}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
178 We also estimate the relative advantage for deep learners of training with |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
179 other classes than those of interest, by comparing learners trained with |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
180 62 classes with learners trained with only a subset (on which they |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
181 are then tested). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
182 The conclusion discusses |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
183 the more general question of why deep learners may benefit so much from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
184 the self-taught learning framework. Since out-of-distribution data |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
185 (perturbed or from other related classes) is very common, this conclusion |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
186 is of practical importance. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
187 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
188 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
189 %\newpage |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
190 \section{Perturbed and Transformed Character Images} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
191 \label{s:perturbations} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
192 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
193 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
194 Figure~\ref{fig:transform} shows the different transformations we used to stochastically |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
195 transform $32 \times 32$ source images (such as the one in Fig.\ref{fig:torig}) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
196 in order to obtain data from a larger distribution which |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
197 covers a domain substantially larger than the clean characters distribution from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
198 which we start. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
199 Although character transformations have been used before to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
200 improve character recognizers, this effort is on a large scale both |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
201 in number of classes and in the complexity of the transformations, hence |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
202 in the complexity of the learning task. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
203 The code for these transformations (mostly python) is available at |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
204 {\tt http://anonymous.url.net}. All the modules in the pipeline share |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
205 a global control parameter ($0 \le complexity \le 1$) that allows one to modulate the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
206 amount of deformation or noise introduced. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
207 There are two main parts in the pipeline. The first one, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
208 from slant to pinch below, performs transformations. The second |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
209 part, from blur to contrast, adds different kinds of noise. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
210 More details can be found in~\citep{ift6266-tr-anonymous}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
211 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
212 \begin{figure}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
213 \centering |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
214 \subfigure[Original]{\includegraphics[scale=0.6]{images/Original.png}\label{fig:torig}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
215 \subfigure[Thickness]{\includegraphics[scale=0.6]{images/Thick_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
216 \subfigure[Slant]{\includegraphics[scale=0.6]{images/Slant_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
217 \subfigure[Affine Transformation]{\includegraphics[scale=0.6]{images/Affine_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
218 \subfigure[Local Elastic Deformation]{\includegraphics[scale=0.6]{images/Localelasticdistorsions_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
219 \subfigure[Pinch]{\includegraphics[scale=0.6]{images/Pinch_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
220 %Noise |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
221 \subfigure[Motion Blur]{\includegraphics[scale=0.6]{images/Motionblur_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
222 \subfigure[Occlusion]{\includegraphics[scale=0.6]{images/occlusion_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
223 \subfigure[Gaussian Smoothing]{\includegraphics[scale=0.6]{images/Bruitgauss_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
224 \subfigure[Pixels Permutation]{\includegraphics[scale=0.6]{images/Permutpixel_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
225 \subfigure[Gaussian Noise]{\includegraphics[scale=0.6]{images/Distorsiongauss_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
226 \subfigure[Background Image Addition]{\includegraphics[scale=0.6]{images/background_other_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
227 \subfigure[Salt \& Pepper]{\includegraphics[scale=0.6]{images/Poivresel_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
228 \subfigure[Scratches]{\includegraphics[scale=0.6]{images/Rature_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
229 \subfigure[Grey Level \& Contrast]{\includegraphics[scale=0.6]{images/Contrast_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
230 \caption{Transformation modules} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
231 \label{fig:transform} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
232 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
233 \end{figure} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
234 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
235 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
236 \section{Experimental Setup} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
237 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
238 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
239 Much previous work on deep learning had been performed on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
240 the MNIST digits task~\citep{Hinton06,ranzato-07-small,Bengio-nips-2006,Salakhutdinov+Hinton-2009}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
241 with 60~000 examples, and variants involving 10~000 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
242 examples~\citep{Larochelle-jmlr-toappear-2008,VincentPLarochelleH2008}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
243 The focus here is on much larger training sets, from 10 times to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
244 to 1000 times larger, and 62 classes. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
245 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
246 The first step in constructing the larger datasets (called NISTP and P07) is to sample from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
247 a {\em data source}: {\bf NIST} (NIST database 19), {\bf Fonts}, {\bf Captchas}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
248 and {\bf OCR data} (scanned machine printed characters). Once a character |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
249 is sampled from one of these sources (chosen randomly), the second step is to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
250 apply a pipeline of transformations and/or noise processes described in section \ref{s:perturbations}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
251 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
252 To provide a baseline of error rate comparison we also estimate human performance |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
253 on both the 62-class task and the 10-class digits task. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
254 We compare the best Multi-Layer Perceptrons (MLP) against |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
255 the best Stacked Denoising Auto-encoders (SDA), when |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
256 both models' hyper-parameters are selected to minimize the validation set error. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
257 We also provide a comparison against a precise estimate |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
258 of human performance obtained via Amazon's Mechanical Turk (AMT) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
259 service (http://mturk.com). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
260 AMT users are paid small amounts |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
261 of money to perform tasks for which human intelligence is required. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
262 Mechanical Turk has been used extensively in natural language processing and vision. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
263 %processing \citep{SnowEtAl2008} and vision |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
264 %\citep{SorokinAndForsyth2008,whitehill09}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
265 AMT users were presented |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
266 with 10 character images (from a test set) and asked to choose 10 corresponding ASCII |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
267 characters. They were forced to choose a single character class (either among the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
268 62 or 10 character classes) for each image. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
269 80 subjects classified 2500 images per (dataset,task) pair. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
270 Different humans labelers sometimes provided a different label for the same |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
271 example, and we were able to estimate the error variance due to this effect |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
272 because each image was classified by 3 different persons. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
273 The average error of humans on the 62-class task NIST test set |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
274 is 18.2\%, with a standard error of 0.1\%. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
275 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
276 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
277 \subsection{Data Sources} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
278 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
279 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
280 %\begin{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
281 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
282 {\bf NIST.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
283 Our main source of characters is the NIST Special Database 19~\citep{Grother-1995}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
284 widely used for training and testing character |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
285 recognition systems~\citep{Granger+al-2007,Cortes+al-2000,Oliveira+al-2002-short,Milgram+al-2005}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
286 The dataset is composed of 814255 digits and characters (upper and lower cases), with hand checked classifications, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
287 extracted from handwritten sample forms of 3600 writers. The characters are labelled by one of the 62 classes |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
288 corresponding to ``0''-``9'',``A''-``Z'' and ``a''-``z''. The dataset contains 8 parts (partitions) of varying complexity. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
289 The fourth partition (called $hsf_4$, 82587 examples), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
290 experimentally recognized to be the most difficult one, is the one recommended |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
291 by NIST as a testing set and is used in our work as well as some previous work~\citep{Granger+al-2007,Cortes+al-2000,Oliveira+al-2002-short,Milgram+al-2005} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
292 for that purpose. We randomly split the remainder (731668 examples) into a training set and a validation set for |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
293 model selection. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
294 The performances reported by previous work on that dataset mostly use only the digits. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
295 Here we use all the classes both in the training and testing phase. This is especially |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
296 useful to estimate the effect of a multi-task setting. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
297 The distribution of the classes in the NIST training and test sets differs |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
298 substantially, with relatively many more digits in the test set, and a more uniform distribution |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
299 of letters in the test set (whereas in the training set they are distributed |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
300 more like in natural text). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
301 %\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
302 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
303 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
304 {\bf Fonts.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
305 In order to have a good variety of sources we downloaded an important number of free fonts from: |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
306 {\tt http://cg.scs.carleton.ca/\textasciitilde luc/freefonts.html}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
307 % TODO: pointless to anonymize, it's not pointing to our work |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
308 Including the operating system's (Windows 7) fonts, there is a total of $9817$ different fonts that we can choose uniformly from. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
309 The chosen {\tt ttf} file is either used as input of the Captcha generator (see next item) or, by producing a corresponding image, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
310 directly as input to our models. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
311 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
312 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
313 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
314 {\bf Captchas.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
315 The Captcha data source is an adaptation of the \emph{pycaptcha} library (a python based captcha generator library) for |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
316 generating characters of the same format as the NIST dataset. This software is based on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
317 a random character class generator and various kinds of transformations similar to those described in the previous sections. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
318 In order to increase the variability of the data generated, many different fonts are used for generating the characters. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
319 Transformations (slant, distortions, rotation, translation) are applied to each randomly generated character with a complexity |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
320 depending on the value of the complexity parameter provided by the user of the data source. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
321 %Two levels of complexity are allowed and can be controlled via an easy to use facade class. %TODO: what's a facade class? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
322 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
323 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
324 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
325 {\bf OCR data.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
326 A large set (2 million) of scanned, OCRed and manually verified machine-printed |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
327 characters where included as an |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
328 additional source. This set is part of a larger corpus being collected by the Image Understanding |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
329 Pattern Recognition Research group led by Thomas Breuel at University of Kaiserslautern |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
330 ({\tt http://www.iupr.com}), and which will be publicly released. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
331 %TODO: let's hope that Thomas is not a reviewer! :) Seriously though, maybe we should anonymize this |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
332 %\end{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
333 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
334 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
335 \subsection{Data Sets} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
336 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
337 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
338 All data sets contain 32$\times$32 grey-level images (values in $[0,1]$) associated with a label |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
339 from one of the 62 character classes. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
340 %\begin{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
341 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
342 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
343 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
344 {\bf NIST.} This is the raw NIST special database 19~\citep{Grother-1995}. It has |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
345 \{651668 / 80000 / 82587\} \{training / validation / test\} examples. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
346 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
347 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
348 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
349 {\bf P07.} This dataset is obtained by taking raw characters from all four of the above sources |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
350 and sending them through the transformation pipeline described in section \ref{s:perturbations}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
351 For each new example to generate, a data source is selected with probability $10\%$ from the fonts, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
352 $25\%$ from the captchas, $25\%$ from the OCR data and $40\%$ from NIST. We apply all the transformations in the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
353 order given above, and for each of them we sample uniformly a \emph{complexity} in the range $[0,0.7]$. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
354 It has \{81920000 / 80000 / 20000\} \{training / validation / test\} examples. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
355 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
356 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
357 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
358 {\bf NISTP.} This one is equivalent to P07 (complexity parameter of $0.7$ with the same proportions of data sources) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
359 except that we only apply |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
360 transformations from slant to pinch. Therefore, the character is |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
361 transformed but no additional noise is added to the image, giving images |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
362 closer to the NIST dataset. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
363 It has \{81920000 / 80000 / 20000\} \{training / validation / test\} examples. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
364 %\end{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
365 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
366 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
367 \subsection{Models and their Hyperparameters} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
368 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
369 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
370 The experiments are performed using MLPs (with a single |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
371 hidden layer) and SDAs. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
372 \emph{Hyper-parameters are selected based on the {\bf NISTP} validation set error.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
373 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
374 {\bf Multi-Layer Perceptrons (MLP).} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
375 Whereas previous work had compared deep architectures to both shallow MLPs and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
376 SVMs, we only compared to MLPs here because of the very large datasets used |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
377 (making the use of SVMs computationally challenging because of their quadratic |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
378 scaling behavior). Preliminary experiments on training SVMs (libSVM) with subsets of the training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
379 set allowing the program to fit in memory yielded substantially worse results |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
380 than those obtained with MLPs. For training on nearly a billion examples |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
381 (with the perturbed data), the MLPs and SDA are much more convenient than |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
382 classifiers based on kernel methods. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
383 The MLP has a single hidden layer with $\tanh$ activation functions, and softmax (normalized |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
384 exponentials) on the output layer for estimating $P(class | image)$. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
385 The number of hidden units is taken in $\{300,500,800,1000,1500\}$. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
386 Training examples are presented in minibatches of size 20. A constant learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
387 rate was chosen among $\{0.001, 0.01, 0.025, 0.075, 0.1, 0.5\}$. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
388 %through preliminary experiments (measuring performance on a validation set), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
389 %and $0.1$ (which was found to work best) was then selected for optimizing on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
390 %the whole training sets. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
391 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
392 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
393 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
394 {\bf Stacked Denoising Auto-Encoders (SDA).} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
395 Various auto-encoder variants and Restricted Boltzmann Machines (RBMs) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
396 can be used to initialize the weights of each layer of a deep MLP (with many hidden |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
397 layers)~\citep{Hinton06,ranzato-07-small,Bengio-nips-2006}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
398 apparently setting parameters in the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
399 basin of attraction of supervised gradient descent yielding better |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
400 generalization~\citep{Erhan+al-2010}. This initial {\em unsupervised |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
401 pre-training phase} uses all of the training images but not the training labels. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
402 Each layer is trained in turn to produce a new representation of its input |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
403 (starting from the raw pixels). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
404 It is hypothesized that the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
405 advantage brought by this procedure stems from a better prior, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
406 on the one hand taking advantage of the link between the input |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
407 distribution $P(x)$ and the conditional distribution of interest |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
408 $P(y|x)$ (like in semi-supervised learning), and on the other hand |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
409 taking advantage of the expressive power and bias implicit in the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
410 deep architecture (whereby complex concepts are expressed as |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
411 compositions of simpler ones through a deep hierarchy). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
412 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
413 \begin{figure}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
414 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
415 \centerline{\resizebox{0.8\textwidth}{!}{\includegraphics{images/denoising_autoencoder_small.pdf}}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
416 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
417 \caption{Illustration of the computations and training criterion for the denoising |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
418 auto-encoder used to pre-train each layer of the deep architecture. Input $x$ of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
419 the layer (i.e. raw input or output of previous layer) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
420 s corrupted into $\tilde{x}$ and encoded into code $y$ by the encoder $f_\theta(\cdot)$. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
421 The decoder $g_{\theta'}(\cdot)$ maps $y$ to reconstruction $z$, which |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
422 is compared to the uncorrupted input $x$ through the loss function |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
423 $L_H(x,z)$, whose expected value is approximately minimized during training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
424 by tuning $\theta$ and $\theta'$.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
425 \label{fig:da} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
426 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
427 \end{figure} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
428 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
429 Here we chose to use the Denoising |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
430 Auto-encoder~\citep{VincentPLarochelleH2008} as the building block for |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
431 these deep hierarchies of features, as it is simple to train and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
432 explain (see Figure~\ref{fig:da}, as well as |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
433 tutorial and code there: {\tt http://deeplearning.net/tutorial}), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
434 provides efficient inference, and yielded results |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
435 comparable or better than RBMs in series of experiments |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
436 \citep{VincentPLarochelleH2008}. During training, a Denoising |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
437 Auto-encoder is presented with a stochastically corrupted version |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
438 of the input and trained to reconstruct the uncorrupted input, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
439 forcing the hidden units to represent the leading regularities in |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
440 the data. Here we use the random binary masking corruption |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
441 (which sets to 0 a random subset of the inputs). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
442 Once it is trained, in a purely unsupervised way, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
443 its hidden units' activations can |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
444 be used as inputs for training a second one, etc. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
445 After this unsupervised pre-training stage, the parameters |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
446 are used to initialize a deep MLP, which is fine-tuned by |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
447 the same standard procedure used to train them (see previous section). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
448 The SDA hyper-parameters are the same as for the MLP, with the addition of the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
449 amount of corruption noise (we used the masking noise process, whereby a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
450 fixed proportion of the input values, randomly selected, are zeroed), and a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
451 separate learning rate for the unsupervised pre-training stage (selected |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
452 from the same above set). The fraction of inputs corrupted was selected |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
453 among $\{10\%, 20\%, 50\%\}$. Another hyper-parameter is the number |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
454 of hidden layers but it was fixed to 3 based on previous work with |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
455 SDAs on MNIST~\citep{VincentPLarochelleH2008}. The size of the hidden |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
456 layers was kept constant across hidden layers, and the best results |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
457 were obtained with the largest values that we could experiment |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
458 with given our patience, with 1000 hidden units. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
459 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
460 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
461 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
462 \begin{figure}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
463 %\vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
464 \centerline{\resizebox{.99\textwidth}{!}{\includegraphics{images/error_rates_charts.pdf}}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
465 %\vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
466 \caption{SDAx are the {\bf deep} models. Error bars indicate a 95\% confidence interval. 0 indicates that the model was trained |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
467 on NIST, 1 on NISTP, and 2 on P07. Left: overall results |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
468 of all models, on NIST and NISTP test sets. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
469 Right: error rates on NIST test digits only, along with the previous results from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
470 literature~\citep{Granger+al-2007,Cortes+al-2000,Oliveira+al-2002-short,Milgram+al-2005} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
471 respectively based on ART, nearest neighbors, MLPs, and SVMs.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
472 \label{fig:error-rates-charts} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
473 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
474 \end{figure} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
475 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
476 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
477 \begin{figure}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
478 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
479 \centerline{\resizebox{.99\textwidth}{!}{\includegraphics{images/improvements_charts.pdf}}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
480 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
481 \caption{Relative improvement in error rate due to self-taught learning. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
482 Left: Improvement (or loss, when negative) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
483 induced by out-of-distribution examples (perturbed data). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
484 Right: Improvement (or loss, when negative) induced by multi-task |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
485 learning (training on all classes and testing only on either digits, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
486 upper case, or lower-case). The deep learner (SDA) benefits more from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
487 both self-taught learning scenarios, compared to the shallow MLP.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
488 \label{fig:improvements-charts} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
489 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
490 \end{figure} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
491 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
492 \section{Experimental Results} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
493 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
494 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
495 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
496 %\subsection{SDA vs MLP vs Humans} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
497 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
498 The models are either trained on NIST (MLP0 and SDA0), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
499 NISTP (MLP1 and SDA1), or P07 (MLP2 and SDA2), and tested |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
500 on either NIST, NISTP or P07, either on the 62-class task |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
501 or on the 10-digits task. Training (including about half |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
502 for unsupervised pre-training, for DAs) on the larger |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
503 datasets takes around one day on a GPU-285. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
504 Figure~\ref{fig:error-rates-charts} summarizes the results obtained, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
505 comparing humans, the three MLPs (MLP0, MLP1, MLP2) and the three SDAs (SDA0, SDA1, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
506 SDA2), along with the previous results on the digits NIST special database |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
507 19 test set from the literature, respectively based on ARTMAP neural |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
508 networks ~\citep{Granger+al-2007}, fast nearest-neighbor search |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
509 ~\citep{Cortes+al-2000}, MLPs ~\citep{Oliveira+al-2002-short}, and SVMs |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
510 ~\citep{Milgram+al-2005}.% More detailed and complete numerical results |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
511 %(figures and tables, including standard errors on the error rates) can be |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
512 %found in Appendix. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
513 The deep learner not only outperformed the shallow ones and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
514 previously published performance (in a statistically and qualitatively |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
515 significant way) but when trained with perturbed data |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
516 reaches human performance on both the 62-class task |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
517 and the 10-class (digits) task. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
518 17\% error (SDA1) or 18\% error (humans) may seem large but a large |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
519 majority of the errors from humans and from SDA1 are from out-of-context |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
520 confusions (e.g. a vertical bar can be a ``1'', an ``l'' or an ``L'', and a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
521 ``c'' and a ``C'' are often indistinguishible). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
522 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
523 In addition, as shown in the left of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
524 Figure~\ref{fig:improvements-charts}, the relative improvement in error |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
525 rate brought by self-taught learning is greater for the SDA, and these |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
526 differences with the MLP are statistically and qualitatively |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
527 significant. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
528 The left side of the figure shows the improvement to the clean |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
529 NIST test set error brought by the use of out-of-distribution examples |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
530 (i.e. the perturbed examples examples from NISTP or P07). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
531 Relative percent change is measured by taking |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
532 $100 \% \times$ (original model's error / perturbed-data model's error - 1). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
533 The right side of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
534 Figure~\ref{fig:improvements-charts} shows the relative improvement |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
535 brought by the use of a multi-task setting, in which the same model is |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
536 trained for more classes than the target classes of interest (i.e. training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
537 with all 62 classes when the target classes are respectively the digits, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
538 lower-case, or upper-case characters). Again, whereas the gain from the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
539 multi-task setting is marginal or negative for the MLP, it is substantial |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
540 for the SDA. Note that to simplify these multi-task experiments, only the original |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
541 NIST dataset is used. For example, the MLP-digits bar shows the relative |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
542 percent improvement in MLP error rate on the NIST digits test set |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
543 is $100\% \times$ (single-task |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
544 model's error / multi-task model's error - 1). The single-task model is |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
545 trained with only 10 outputs (one per digit), seeing only digit examples, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
546 whereas the multi-task model is trained with 62 outputs, with all 62 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
547 character classes as examples. Hence the hidden units are shared across |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
548 all tasks. For the multi-task model, the digit error rate is measured by |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
549 comparing the correct digit class with the output class associated with the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
550 maximum conditional probability among only the digit classes outputs. The |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
551 setting is similar for the other two target classes (lower case characters |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
552 and upper case characters). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
553 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
554 %\subsection{Perturbed Training Data More Helpful for SDA} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
555 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
556 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
557 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
558 %\subsection{Multi-Task Learning Effects} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
559 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
560 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
561 \iffalse |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
562 As previously seen, the SDA is better able to benefit from the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
563 transformations applied to the data than the MLP. In this experiment we |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
564 define three tasks: recognizing digits (knowing that the input is a digit), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
565 recognizing upper case characters (knowing that the input is one), and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
566 recognizing lower case characters (knowing that the input is one). We |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
567 consider the digit classification task as the target task and we want to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
568 evaluate whether training with the other tasks can help or hurt, and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
569 whether the effect is different for MLPs versus SDAs. The goal is to find |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
570 out if deep learning can benefit more (or less) from multiple related tasks |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
571 (i.e. the multi-task setting) compared to a corresponding purely supervised |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
572 shallow learner. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
573 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
574 We use a single hidden layer MLP with 1000 hidden units, and a SDA |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
575 with 3 hidden layers (1000 hidden units per layer), pre-trained and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
576 fine-tuned on NIST. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
577 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
578 Our results show that the MLP benefits marginally from the multi-task setting |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
579 in the case of digits (5\% relative improvement) but is actually hurt in the case |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
580 of characters (respectively 3\% and 4\% worse for lower and upper class characters). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
581 On the other hand the SDA benefited from the multi-task setting, with relative |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
582 error rate improvements of 27\%, 15\% and 13\% respectively for digits, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
583 lower and upper case characters, as shown in Table~\ref{tab:multi-task}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
584 \fi |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
585 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
586 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
587 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
588 \section{Conclusions and Discussion} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
589 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
590 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
591 We have found that the self-taught learning framework is more beneficial |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
592 to a deep learner than to a traditional shallow and purely |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
593 supervised learner. More precisely, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
594 the answers are positive for all the questions asked in the introduction. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
595 %\begin{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
596 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
597 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
598 {\bf Do the good results previously obtained with deep architectures on the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
599 MNIST digits generalize to a much larger and richer (but similar) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
600 dataset, the NIST special database 19, with 62 classes and around 800k examples}? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
601 Yes, the SDA {\em systematically outperformed the MLP and all the previously |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
602 published results on this dataset} (the ones that we are aware of), {\em in fact reaching human-level |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
603 performance} at around 17\% error on the 62-class task and 1.4\% on the digits, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
604 and beating previously published results on the same data. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
605 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
606 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
607 {\bf To what extent do self-taught learning scenarios help deep learners, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
608 and do they help them more than shallow supervised ones}? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
609 We found that distorted training examples not only made the resulting |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
610 classifier better on similarly perturbed images but also on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
611 the {\em original clean examples}, and more importantly and more novel, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
612 that deep architectures benefit more from such {\em out-of-distribution} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
613 examples. MLPs were helped by perturbed training examples when tested on perturbed input |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
614 images (65\% relative improvement on NISTP) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
615 but only marginally helped (5\% relative improvement on all classes) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
616 or even hurt (10\% relative loss on digits) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
617 with respect to clean examples . On the other hand, the deep SDAs |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
618 were significantly boosted by these out-of-distribution examples. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
619 Similarly, whereas the improvement due to the multi-task setting was marginal or |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
620 negative for the MLP (from +5.6\% to -3.6\% relative change), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
621 it was quite significant for the SDA (from +13\% to +27\% relative change), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
622 which may be explained by the arguments below. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
623 %\end{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
624 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
625 In the original self-taught learning framework~\citep{RainaR2007}, the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
626 out-of-sample examples were used as a source of unsupervised data, and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
627 experiments showed its positive effects in a \emph{limited labeled data} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
628 scenario. However, many of the results by \citet{RainaR2007} (who used a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
629 shallow, sparse coding approach) suggest that the {\em relative gain of self-taught |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
630 learning vs ordinary supervised learning} diminishes as the number of labeled examples increases. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
631 We note instead that, for deep |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
632 architectures, our experiments show that such a positive effect is accomplished |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
633 even in a scenario with a \emph{large number of labeled examples}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
634 i.e., here, the relative gain of self-taught learning is probably preserved |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
635 in the asymptotic regime. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
636 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
637 {\bf Why would deep learners benefit more from the self-taught learning framework}? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
638 The key idea is that the lower layers of the predictor compute a hierarchy |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
639 of features that can be shared across tasks or across variants of the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
640 input distribution. A theoretical analysis of generalization improvements |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
641 due to sharing of intermediate features across tasks already points |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
642 towards that explanation~\cite{baxter95a}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
643 Intermediate features that can be used in different |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
644 contexts can be estimated in a way that allows to share statistical |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
645 strength. Features extracted through many levels are more likely to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
646 be more abstract and more invariant to some of the factors of variation |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
647 in the underlying distribution (as the experiments in~\citet{Goodfellow2009} suggest), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
648 increasing the likelihood that they would be useful for a larger array |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
649 of tasks and input conditions. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
650 Therefore, we hypothesize that both depth and unsupervised |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
651 pre-training play a part in explaining the advantages observed here, and future |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
652 experiments could attempt at teasing apart these factors. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
653 And why would deep learners benefit from the self-taught learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
654 scenarios even when the number of labeled examples is very large? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
655 We hypothesize that this is related to the hypotheses studied |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
656 in~\citet{Erhan+al-2010}. In~\citet{Erhan+al-2010} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
657 it was found that online learning on a huge dataset did not make the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
658 advantage of the deep learning bias vanish, and a similar phenomenon |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
659 may be happening here. We hypothesize that unsupervised pre-training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
660 of a deep hierarchy with self-taught learning initializes the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
661 model in the basin of attraction of supervised gradient descent |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
662 that corresponds to better generalization. Furthermore, such good |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
663 basins of attraction are not discovered by pure supervised learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
664 (with or without self-taught settings) from random initialization, and more labeled examples |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
665 does not allow the shallow or purely supervised models to discover |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
666 the kind of better basins associated |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
667 with deep learning and self-taught learning. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
668 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
669 A Flash demo of the recognizer (where both the MLP and the SDA can be compared) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
670 can be executed on-line at {\tt http://deep.host22.com}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
671 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
672 \iffalse |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
673 \section*{Appendix I: Detailed Numerical Results} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
674 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
675 These tables correspond to Figures 2 and 3 and contain the raw error rates for each model and dataset considered. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
676 They also contain additional data such as test errors on P07 and standard errors. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
677 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
678 \begin{table}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
679 \caption{Overall comparison of error rates ($\pm$ std.err.) on 62 character classes (10 digits + |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
680 26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
681 (SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
682 (MLP=Multi-Layer Perceptron). The models shown are all trained using perturbed data (NISTP or P07) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
683 and using a validation set to select hyper-parameters and other training choices. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
684 \{SDA,MLP\}0 are trained on NIST, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
685 \{SDA,MLP\}1 are trained on NISTP, and \{SDA,MLP\}2 are trained on P07. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
686 The human error rate on digits is a lower bound because it does not count digits that were |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
687 recognized as letters. For comparison, the results found in the literature |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
688 on NIST digits classification using the same test set are included.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
689 \label{tab:sda-vs-mlp-vs-humans} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
690 \begin{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
691 \begin{tabular}{|l|r|r|r|r|} \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
692 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
693 Humans& 18.2\% $\pm$.1\% & 39.4\%$\pm$.1\% & 46.9\%$\pm$.1\% & $1.4\%$ \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
694 SDA0 & 23.7\% $\pm$.14\% & 65.2\%$\pm$.34\% & 97.45\%$\pm$.06\% & 2.7\% $\pm$.14\%\\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
695 SDA1 & 17.1\% $\pm$.13\% & 29.7\%$\pm$.3\% & 29.7\%$\pm$.3\% & 1.4\% $\pm$.1\%\\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
696 SDA2 & 18.7\% $\pm$.13\% & 33.6\%$\pm$.3\% & 39.9\%$\pm$.17\% & 1.7\% $\pm$.1\%\\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
697 MLP0 & 24.2\% $\pm$.15\% & 68.8\%$\pm$.33\% & 78.70\%$\pm$.14\% & 3.45\% $\pm$.15\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
698 MLP1 & 23.0\% $\pm$.15\% & 41.8\%$\pm$.35\% & 90.4\%$\pm$.1\% & 3.85\% $\pm$.16\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
699 MLP2 & 24.3\% $\pm$.15\% & 46.0\%$\pm$.35\% & 54.7\%$\pm$.17\% & 4.85\% $\pm$.18\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
700 \citep{Granger+al-2007} & & & & 4.95\% $\pm$.18\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
701 \citep{Cortes+al-2000} & & & & 3.71\% $\pm$.16\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
702 \citep{Oliveira+al-2002} & & & & 2.4\% $\pm$.13\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
703 \citep{Milgram+al-2005} & & & & 2.1\% $\pm$.12\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
704 \end{tabular} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
705 \end{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
706 \end{table} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
707 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
708 \begin{table}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
709 \caption{Relative change in error rates due to the use of perturbed training data, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
710 either using NISTP, for the MLP1/SDA1 models, or using P07, for the MLP2/SDA2 models. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
711 A positive value indicates that training on the perturbed data helped for the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
712 given test set (the first 3 columns on the 62-class tasks and the last one is |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
713 on the clean 10-class digits). Clearly, the deep learning models did benefit more |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
714 from perturbed training data, even when testing on clean data, whereas the MLP |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
715 trained on perturbed data performed worse on the clean digits and about the same |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
716 on the clean characters. } |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
717 \label{tab:perturbation-effect} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
718 \begin{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
719 \begin{tabular}{|l|r|r|r|r|} \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
720 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
721 SDA0/SDA1-1 & 38\% & 84\% & 228\% & 93\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
722 SDA0/SDA2-1 & 27\% & 94\% & 144\% & 59\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
723 MLP0/MLP1-1 & 5.2\% & 65\% & -13\% & -10\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
724 MLP0/MLP2-1 & -0.4\% & 49\% & 44\% & -29\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
725 \end{tabular} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
726 \end{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
727 \end{table} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
728 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
729 \begin{table}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
730 \caption{Test error rates and relative change in error rates due to the use of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
731 a multi-task setting, i.e., training on each task in isolation vs training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
732 for all three tasks together, for MLPs vs SDAs. The SDA benefits much |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
733 more from the multi-task setting. All experiments on only on the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
734 unperturbed NIST data, using validation error for model selection. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
735 Relative improvement is 1 - single-task error / multi-task error.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
736 \label{tab:multi-task} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
737 \begin{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
738 \begin{tabular}{|l|r|r|r|} \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
739 & single-task & multi-task & relative \\ |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
740 & setting & setting & improvement \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
741 MLP-digits & 3.77\% & 3.99\% & 5.6\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
742 MLP-lower & 17.4\% & 16.8\% & -4.1\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
743 MLP-upper & 7.84\% & 7.54\% & -3.6\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
744 SDA-digits & 2.6\% & 3.56\% & 27\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
745 SDA-lower & 12.3\% & 14.4\% & 15\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
746 SDA-upper & 5.93\% & 6.78\% & 13\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
747 \end{tabular} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
748 \end{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
749 \end{table} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
750 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
751 \fi |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
752 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
753 %\afterpage{\clearpage} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
754 %\clearpage |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
755 { |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
756 %\bibliographystyle{spbasic} % basic style, author-year citations |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
757 \bibliographystyle{plainnat} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
758 \bibliography{strings,strings-short,strings-shorter,ift6266_ml,specials,aigaion-shorter} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
759 %\bibliographystyle{unsrtnat} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
760 %\bibliographystyle{apalike} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
761 } |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
762 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
763 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
764 \end{document} |