Mercurial > ift6266
annotate writeup/nipswp_submission.tex @ 647:47af8a002530 tip
changed Theano to ift6266 and remove numpy as we do not use code from numpy in this repository
author | Razvan Pascanu <r.pascanu@gmail.com> |
---|---|
date | Wed, 17 Oct 2012 09:26:14 -0400 |
parents | a0fdc1f134da |
children |
rev | line source |
---|---|
597
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
1 %\documentclass[twoside,11pt]{article} % For LaTeX2e |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
2 \documentclass{article} % For LaTeX2e |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
3 \usepackage{nips10submit_e} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
4 \usepackage{times} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
5 \usepackage{wrapfig} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
6 \usepackage{amsthm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
7 \usepackage{amsmath} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
8 \usepackage{bbm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
9 \usepackage[utf8]{inputenc} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
10 \usepackage[psamsfonts]{amssymb} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
11 %\usepackage{algorithm,algorithmic} % not used after all |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
12 \usepackage{graphicx,subfigure} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
13 \usepackage[numbers]{natbib} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
14 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
15 \addtolength{\textwidth}{10mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
16 \addtolength{\evensidemargin}{-5mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
17 \addtolength{\oddsidemargin}{-5mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
18 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
19 %\setlength\parindent{0mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
20 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
21 \begin{document} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
22 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
23 \title{Deep Self-Taught Learning for Handwritten Character Recognition} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
24 \author{ |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
25 Yoshua Bengio \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
26 Frédéric Bastien \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
27 Arnaud Bergeron \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
28 Nicolas Boulanger-Lewandowski \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
29 Thomas Breuel \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
30 Youssouf Chherawala \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
31 Moustapha Cisse \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
32 Myriam Côté \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
33 Dumitru Erhan \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
34 Jeremy Eustache \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
35 Xavier Glorot \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
36 Xavier Muller \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
37 Sylvain Pannetier Lebeuf \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
38 Razvan Pascanu \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
39 Salah Rifai \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
40 Francois Savard \and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
41 Guillaume Sicard |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
42 } |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
43 \date{{\tt bengioy@iro.umontreal.ca}, Dept. IRO, U. Montreal, P.O. Box 6128, Centre-Ville branch, H3C 3J7, Montreal (Qc), Canada} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
44 %\jmlrheading{}{2010}{}{10/2010}{XX/2011}{Yoshua Bengio et al} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
45 %\editor{} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
46 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
47 %\makeanontitle |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
48 \maketitle |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
49 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
50 %{\bf Running title: Deep Self-Taught Learning} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
51 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
52 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
53 \begin{abstract} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
54 Recent theoretical and empirical work in statistical machine learning has demonstrated the potential of learning algorithms for deep architectures, i.e., function classes obtained by composing multiple levels of representation. Self-taught learning (exploiting unlabeled examples or examples from other distributions) has already been applied to deep learners, but mostly to show the advantage of unlabeled examples. Here we explore the advantage brought by {\em out-of-distribution examples}. For this purpose we developed a powerful generator of stochastic variations and noise processes for character images, including not only affine transformations but also slant, local elastic deformations, changes in thickness, background images, grey level changes, contrast, occlusion, and various types of noise. The out-of-distribution examples are obtained from these highly distorted images or by including examples of object classes different from those in the target test set. We show that {\em deep learners benefit more from out-of-distribution examples than a corresponding shallow learner}, at least in a large-scale handwritten character recognition setting. In fact, we show that they {\em beat previously published results and reach human-level performance}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
55 \end{abstract} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
56 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
57 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
58 %\begin{keywords} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
59 %Deep learning, self-taught learning, out-of-distribution examples, handwritten character recognition, multi-task learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
60 %\end{keywords} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
61 %\keywords{self-taught learning \and multi-task learning \and out-of-distribution examples \and deep learning \and handwriting recognition} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
62 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
63 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
64 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
65 \section{Introduction} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
66 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
67 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
68 {\bf Deep Learning} has emerged as a promising new area of research in |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
69 statistical machine learning~\citep{Hinton06,ranzato-07-small,Bengio-nips-2006,VincentPLarochelleH2008,ranzato-08,TaylorHintonICML2009,Larochelle-jmlr-2009,Salakhutdinov+Hinton-2009,HonglakL2009,HonglakLNIPS2009,Jarrett-ICCV2009,Taylor-cvpr-2010}. See \citet{Bengio-2009} for a review. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
70 Learning algorithms for deep architectures are centered on the learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
71 of useful representations of data, which are better suited to the task at hand, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
72 and are organized in a hierarchy with multiple levels. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
73 This is in part inspired by observations of the mammalian visual cortex, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
74 which consists of a chain of processing elements, each of which is associated with a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
75 different representation of the raw visual input. In fact, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
76 it was found recently that the features learnt in deep architectures resemble |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
77 those observed in the first two of these stages (in areas V1 and V2 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
78 of visual cortex) \citep{HonglakL2008}, and that they become more and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
79 more invariant to factors of variation (such as camera movement) in |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
80 higher layers~\citep{Goodfellow2009}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
81 Learning a hierarchy of features increases the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
82 ease and practicality of developing representations that are at once |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
83 tailored to specific tasks, yet are able to borrow statistical strength |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
84 from other related tasks (e.g., modeling different kinds of objects). Finally, learning the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
85 feature representation can lead to higher-level (more abstract, more |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
86 general) features that are more robust to unanticipated sources of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
87 variance extant in real data. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
88 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
89 {\bf Self-taught learning}~\citep{RainaR2007} is a paradigm that combines principles |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
90 of semi-supervised and multi-task learning: the learner can exploit examples |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
91 that are unlabeled and possibly come from a distribution different from the target |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
92 distribution, e.g., from other classes than those of interest. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
93 It has already been shown that deep learners can clearly take advantage of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
94 unsupervised learning and unlabeled examples~\citep{Bengio-2009,WestonJ2008-small}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
95 but more needs to be done to explore the impact |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
96 of {\em out-of-distribution} examples and of the {\em multi-task} setting |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
97 (one exception is~\citep{CollobertR2008}, which uses a different kind |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
98 of learning algorithm). In particular the {\em relative |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
99 advantage of deep learning} for these settings has not been evaluated. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
100 The hypothesis discussed in the conclusion is that in the context of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
101 multi-task learning and the availability of out-of-distribution training examples, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
102 a deep hierarchy of features |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
103 may be better able to provide {\em sharing of statistical strength} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
104 between different regions in input space or different tasks, compared to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
105 a shallow learner. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
106 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
107 \iffalse |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
108 Whereas a deep architecture can in principle be more powerful than a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
109 shallow one in terms of representation, depth appears to render the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
110 training problem more difficult in terms of optimization and local minima. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
111 It is also only recently that successful algorithms were proposed to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
112 overcome some of these difficulties. All are based on unsupervised |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
113 learning, often in an greedy layer-wise ``unsupervised pre-training'' |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
114 stage~\citep{Bengio-2009}. One of these layer initialization techniques, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
115 applied here, is the Denoising |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
116 Auto-encoder~(DA)~\citep{VincentPLarochelleH2008-very-small} (see Figure~\ref{fig:da}), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
117 which |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
118 performed similarly or better than previously proposed Restricted Boltzmann |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
119 Machines in terms of unsupervised extraction of a hierarchy of features |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
120 useful for classification. Each layer is trained to denoise its |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
121 input, creating a layer of features that can be used as input for the next layer. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
122 \fi |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
123 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
124 %The principle is that each layer starting from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
125 %the bottom is trained to encode its input (the output of the previous |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
126 %layer) and to reconstruct it from a corrupted version. After this |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
127 %unsupervised initialization, the stack of DAs can be |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
128 %converted into a deep supervised feedforward neural network and fine-tuned by |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
129 %stochastic gradient descent. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
130 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
131 % |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
132 The {\bf main claim} of this paper is that deep learners (with several levels of representation) can |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
133 {\bf benefit more from self-taught learning than shallow learners} (with a single |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
134 level), both in the context of the multi-task setting and from {\em |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
135 out-of-distribution examples} in general. Because we are able to improve on state-of-the-art |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
136 performance and reach human-level performance |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
137 on a large-scale task, we consider that this paper is also a contribution |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
138 to advance the application of machine learning to handwritten character recognition. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
139 More precisely, we ask and answer the following questions: |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
140 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
141 %\begin{enumerate} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
142 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
143 Do the good results previously obtained with deep architectures on the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
144 MNIST digit images generalize to the setting of a similar but much larger and richer |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
145 dataset, the NIST special database 19, with 62 classes and around 800k examples? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
146 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
147 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
148 To what extent does the perturbation of input images (e.g. adding |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
149 noise, affine transformations, background images) make the resulting |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
150 classifiers better not only on similarly perturbed images but also on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
151 the {\em original clean examples}? We study this question in the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
152 context of the 62-class and 10-class tasks of the NIST special database 19. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
153 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
154 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
155 Do deep architectures {\em benefit {\bf more} from such out-of-distribution} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
156 examples, i.e. do they benefit more from the self-taught learning~\citep{RainaR2007} framework? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
157 We use highly perturbed examples to generate out-of-distribution examples. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
158 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
159 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
160 Similarly, does the feature learning step in deep learning algorithms benefit {\bf more} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
161 from training with moderately {\em different classes} (i.e. a multi-task learning scenario) than |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
162 a corresponding shallow and purely supervised architecture? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
163 We train on 62 classes and test on 10 (digits) or 26 (upper case or lower case) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
164 to answer this question. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
165 %\end{enumerate} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
166 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
167 Our experimental results provide positive evidence towards all of these questions, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
168 as well as {\em classifiers that reach human-level performance on 62-class isolated character |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
169 recognition and beat previously published results on the NIST dataset (special database 19)}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
170 To achieve these results, we introduce in the next section a sophisticated system |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
171 for stochastically transforming character images and then explain the methodology, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
172 which is based on training with or without these transformed images and testing on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
173 clean ones. We measure the relative advantage of out-of-distribution examples |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
174 (perturbed or out-of-class) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
175 for a deep learner vs a supervised shallow one. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
176 Code for generating these transformations as well as for the deep learning |
598
a0fdc1f134da
minor changes to nips workshop submission
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
597
diff
changeset
|
177 algorithms are made available at {\tt http://anonymous.url.net}.%{\tt http://hg.assembla.com/ift6266}. |
597
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
178 We also estimate the relative advantage for deep learners of training with |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
179 other classes than those of interest, by comparing learners trained with |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
180 62 classes with learners trained with only a subset (on which they |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
181 are then tested). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
182 The conclusion discusses |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
183 the more general question of why deep learners may benefit so much from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
184 the self-taught learning framework. Since out-of-distribution data |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
185 (perturbed or from other related classes) is very common, this conclusion |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
186 is of practical importance. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
187 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
188 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
189 %\newpage |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
190 \section{Perturbed and Transformed Character Images} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
191 \label{s:perturbations} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
192 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
193 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
194 Figure~\ref{fig:transform} shows the different transformations we used to stochastically |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
195 transform $32 \times 32$ source images (such as the one in Fig.\ref{fig:torig}) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
196 in order to obtain data from a larger distribution which |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
197 covers a domain substantially larger than the clean characters distribution from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
198 which we start. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
199 Although character transformations have been used before to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
200 improve character recognizers, this effort is on a large scale both |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
201 in number of classes and in the complexity of the transformations, hence |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
202 in the complexity of the learning task. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
203 The code for these transformations (mostly python) is available at |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
204 {\tt http://anonymous.url.net}. All the modules in the pipeline share |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
205 a global control parameter ($0 \le complexity \le 1$) that allows one to modulate the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
206 amount of deformation or noise introduced. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
207 There are two main parts in the pipeline. The first one, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
208 from slant to pinch below, performs transformations. The second |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
209 part, from blur to contrast, adds different kinds of noise. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
210 More details can be found in~\citep{ift6266-tr-anonymous}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
211 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
212 \begin{figure}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
213 \centering |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
214 \subfigure[Original]{\includegraphics[scale=0.6]{images/Original.png}\label{fig:torig}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
215 \subfigure[Thickness]{\includegraphics[scale=0.6]{images/Thick_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
216 \subfigure[Slant]{\includegraphics[scale=0.6]{images/Slant_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
217 \subfigure[Affine Transformation]{\includegraphics[scale=0.6]{images/Affine_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
218 \subfigure[Local Elastic Deformation]{\includegraphics[scale=0.6]{images/Localelasticdistorsions_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
219 \subfigure[Pinch]{\includegraphics[scale=0.6]{images/Pinch_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
220 %Noise |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
221 \subfigure[Motion Blur]{\includegraphics[scale=0.6]{images/Motionblur_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
222 \subfigure[Occlusion]{\includegraphics[scale=0.6]{images/occlusion_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
223 \subfigure[Gaussian Smoothing]{\includegraphics[scale=0.6]{images/Bruitgauss_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
224 \subfigure[Pixels Permutation]{\includegraphics[scale=0.6]{images/Permutpixel_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
225 \subfigure[Gaussian Noise]{\includegraphics[scale=0.6]{images/Distorsiongauss_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
226 \subfigure[Background Image Addition]{\includegraphics[scale=0.6]{images/background_other_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
227 \subfigure[Salt \& Pepper]{\includegraphics[scale=0.6]{images/Poivresel_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
228 \subfigure[Scratches]{\includegraphics[scale=0.6]{images/Rature_only.png}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
229 \subfigure[Grey Level \& Contrast]{\includegraphics[scale=0.6]{images/Contrast_only.png}} |
598
a0fdc1f134da
minor changes to nips workshop submission
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
597
diff
changeset
|
230 \caption{Top left (a): example original image. Others (b-o): examples of the effect |
a0fdc1f134da
minor changes to nips workshop submission
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
597
diff
changeset
|
231 of each transformation module taken separately. Actual perturbed examples are obtained by |
a0fdc1f134da
minor changes to nips workshop submission
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
597
diff
changeset
|
232 a pipeline of these, with random choices about which module to apply and how much perturbation |
a0fdc1f134da
minor changes to nips workshop submission
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
597
diff
changeset
|
233 to apply.} |
597
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
234 \label{fig:transform} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
235 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
236 \end{figure} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
237 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
238 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
239 \section{Experimental Setup} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
240 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
241 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
242 Much previous work on deep learning had been performed on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
243 the MNIST digits task~\citep{Hinton06,ranzato-07-small,Bengio-nips-2006,Salakhutdinov+Hinton-2009}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
244 with 60~000 examples, and variants involving 10~000 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
245 examples~\citep{Larochelle-jmlr-toappear-2008,VincentPLarochelleH2008}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
246 The focus here is on much larger training sets, from 10 times to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
247 to 1000 times larger, and 62 classes. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
248 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
249 The first step in constructing the larger datasets (called NISTP and P07) is to sample from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
250 a {\em data source}: {\bf NIST} (NIST database 19), {\bf Fonts}, {\bf Captchas}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
251 and {\bf OCR data} (scanned machine printed characters). Once a character |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
252 is sampled from one of these sources (chosen randomly), the second step is to |
598
a0fdc1f134da
minor changes to nips workshop submission
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
597
diff
changeset
|
253 apply a pipeline of transformations and/or noise processes outlined in section \ref{s:perturbations}. |
597
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
254 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
255 To provide a baseline of error rate comparison we also estimate human performance |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
256 on both the 62-class task and the 10-class digits task. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
257 We compare the best Multi-Layer Perceptrons (MLP) against |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
258 the best Stacked Denoising Auto-encoders (SDA), when |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
259 both models' hyper-parameters are selected to minimize the validation set error. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
260 We also provide a comparison against a precise estimate |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
261 of human performance obtained via Amazon's Mechanical Turk (AMT) |
598
a0fdc1f134da
minor changes to nips workshop submission
Yoshua Bengio <bengioy@iro.umontreal.ca>
parents:
597
diff
changeset
|
262 service ({\tt http://mturk.com}). |
597
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
263 AMT users are paid small amounts |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
264 of money to perform tasks for which human intelligence is required. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
265 Mechanical Turk has been used extensively in natural language processing and vision. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
266 %processing \citep{SnowEtAl2008} and vision |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
267 %\citep{SorokinAndForsyth2008,whitehill09}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
268 AMT users were presented |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
269 with 10 character images (from a test set) and asked to choose 10 corresponding ASCII |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
270 characters. They were forced to choose a single character class (either among the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
271 62 or 10 character classes) for each image. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
272 80 subjects classified 2500 images per (dataset,task) pair. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
273 Different humans labelers sometimes provided a different label for the same |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
274 example, and we were able to estimate the error variance due to this effect |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
275 because each image was classified by 3 different persons. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
276 The average error of humans on the 62-class task NIST test set |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
277 is 18.2\%, with a standard error of 0.1\%. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
278 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
279 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
280 \subsection{Data Sources} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
281 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
282 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
283 %\begin{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
284 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
285 {\bf NIST.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
286 Our main source of characters is the NIST Special Database 19~\citep{Grother-1995}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
287 widely used for training and testing character |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
288 recognition systems~\citep{Granger+al-2007,Cortes+al-2000,Oliveira+al-2002-short,Milgram+al-2005}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
289 The dataset is composed of 814255 digits and characters (upper and lower cases), with hand checked classifications, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
290 extracted from handwritten sample forms of 3600 writers. The characters are labelled by one of the 62 classes |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
291 corresponding to ``0''-``9'',``A''-``Z'' and ``a''-``z''. The dataset contains 8 parts (partitions) of varying complexity. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
292 The fourth partition (called $hsf_4$, 82587 examples), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
293 experimentally recognized to be the most difficult one, is the one recommended |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
294 by NIST as a testing set and is used in our work as well as some previous work~\citep{Granger+al-2007,Cortes+al-2000,Oliveira+al-2002-short,Milgram+al-2005} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
295 for that purpose. We randomly split the remainder (731668 examples) into a training set and a validation set for |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
296 model selection. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
297 The performances reported by previous work on that dataset mostly use only the digits. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
298 Here we use all the classes both in the training and testing phase. This is especially |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
299 useful to estimate the effect of a multi-task setting. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
300 The distribution of the classes in the NIST training and test sets differs |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
301 substantially, with relatively many more digits in the test set, and a more uniform distribution |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
302 of letters in the test set (whereas in the training set they are distributed |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
303 more like in natural text). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
304 %\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
305 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
306 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
307 {\bf Fonts.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
308 In order to have a good variety of sources we downloaded an important number of free fonts from: |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
309 {\tt http://cg.scs.carleton.ca/\textasciitilde luc/freefonts.html}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
310 % TODO: pointless to anonymize, it's not pointing to our work |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
311 Including the operating system's (Windows 7) fonts, there is a total of $9817$ different fonts that we can choose uniformly from. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
312 The chosen {\tt ttf} file is either used as input of the Captcha generator (see next item) or, by producing a corresponding image, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
313 directly as input to our models. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
314 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
315 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
316 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
317 {\bf Captchas.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
318 The Captcha data source is an adaptation of the \emph{pycaptcha} library (a python based captcha generator library) for |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
319 generating characters of the same format as the NIST dataset. This software is based on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
320 a random character class generator and various kinds of transformations similar to those described in the previous sections. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
321 In order to increase the variability of the data generated, many different fonts are used for generating the characters. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
322 Transformations (slant, distortions, rotation, translation) are applied to each randomly generated character with a complexity |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
323 depending on the value of the complexity parameter provided by the user of the data source. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
324 %Two levels of complexity are allowed and can be controlled via an easy to use facade class. %TODO: what's a facade class? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
325 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
326 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
327 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
328 {\bf OCR data.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
329 A large set (2 million) of scanned, OCRed and manually verified machine-printed |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
330 characters where included as an |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
331 additional source. This set is part of a larger corpus being collected by the Image Understanding |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
332 Pattern Recognition Research group led by Thomas Breuel at University of Kaiserslautern |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
333 ({\tt http://www.iupr.com}), and which will be publicly released. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
334 %TODO: let's hope that Thomas is not a reviewer! :) Seriously though, maybe we should anonymize this |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
335 %\end{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
336 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
337 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
338 \subsection{Data Sets} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
339 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
340 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
341 All data sets contain 32$\times$32 grey-level images (values in $[0,1]$) associated with a label |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
342 from one of the 62 character classes. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
343 %\begin{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
344 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
345 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
346 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
347 {\bf NIST.} This is the raw NIST special database 19~\citep{Grother-1995}. It has |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
348 \{651668 / 80000 / 82587\} \{training / validation / test\} examples. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
349 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
350 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
351 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
352 {\bf P07.} This dataset is obtained by taking raw characters from all four of the above sources |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
353 and sending them through the transformation pipeline described in section \ref{s:perturbations}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
354 For each new example to generate, a data source is selected with probability $10\%$ from the fonts, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
355 $25\%$ from the captchas, $25\%$ from the OCR data and $40\%$ from NIST. We apply all the transformations in the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
356 order given above, and for each of them we sample uniformly a \emph{complexity} in the range $[0,0.7]$. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
357 It has \{81920000 / 80000 / 20000\} \{training / validation / test\} examples. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
358 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
359 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
360 %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
361 {\bf NISTP.} This one is equivalent to P07 (complexity parameter of $0.7$ with the same proportions of data sources) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
362 except that we only apply |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
363 transformations from slant to pinch. Therefore, the character is |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
364 transformed but no additional noise is added to the image, giving images |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
365 closer to the NIST dataset. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
366 It has \{81920000 / 80000 / 20000\} \{training / validation / test\} examples. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
367 %\end{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
368 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
369 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
370 \subsection{Models and their Hyperparameters} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
371 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
372 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
373 The experiments are performed using MLPs (with a single |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
374 hidden layer) and SDAs. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
375 \emph{Hyper-parameters are selected based on the {\bf NISTP} validation set error.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
376 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
377 {\bf Multi-Layer Perceptrons (MLP).} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
378 Whereas previous work had compared deep architectures to both shallow MLPs and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
379 SVMs, we only compared to MLPs here because of the very large datasets used |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
380 (making the use of SVMs computationally challenging because of their quadratic |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
381 scaling behavior). Preliminary experiments on training SVMs (libSVM) with subsets of the training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
382 set allowing the program to fit in memory yielded substantially worse results |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
383 than those obtained with MLPs. For training on nearly a billion examples |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
384 (with the perturbed data), the MLPs and SDA are much more convenient than |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
385 classifiers based on kernel methods. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
386 The MLP has a single hidden layer with $\tanh$ activation functions, and softmax (normalized |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
387 exponentials) on the output layer for estimating $P(class | image)$. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
388 The number of hidden units is taken in $\{300,500,800,1000,1500\}$. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
389 Training examples are presented in minibatches of size 20. A constant learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
390 rate was chosen among $\{0.001, 0.01, 0.025, 0.075, 0.1, 0.5\}$. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
391 %through preliminary experiments (measuring performance on a validation set), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
392 %and $0.1$ (which was found to work best) was then selected for optimizing on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
393 %the whole training sets. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
394 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
395 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
396 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
397 {\bf Stacked Denoising Auto-Encoders (SDA).} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
398 Various auto-encoder variants and Restricted Boltzmann Machines (RBMs) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
399 can be used to initialize the weights of each layer of a deep MLP (with many hidden |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
400 layers)~\citep{Hinton06,ranzato-07-small,Bengio-nips-2006}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
401 apparently setting parameters in the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
402 basin of attraction of supervised gradient descent yielding better |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
403 generalization~\citep{Erhan+al-2010}. This initial {\em unsupervised |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
404 pre-training phase} uses all of the training images but not the training labels. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
405 Each layer is trained in turn to produce a new representation of its input |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
406 (starting from the raw pixels). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
407 It is hypothesized that the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
408 advantage brought by this procedure stems from a better prior, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
409 on the one hand taking advantage of the link between the input |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
410 distribution $P(x)$ and the conditional distribution of interest |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
411 $P(y|x)$ (like in semi-supervised learning), and on the other hand |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
412 taking advantage of the expressive power and bias implicit in the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
413 deep architecture (whereby complex concepts are expressed as |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
414 compositions of simpler ones through a deep hierarchy). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
415 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
416 \begin{figure}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
417 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
418 \centerline{\resizebox{0.8\textwidth}{!}{\includegraphics{images/denoising_autoencoder_small.pdf}}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
419 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
420 \caption{Illustration of the computations and training criterion for the denoising |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
421 auto-encoder used to pre-train each layer of the deep architecture. Input $x$ of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
422 the layer (i.e. raw input or output of previous layer) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
423 s corrupted into $\tilde{x}$ and encoded into code $y$ by the encoder $f_\theta(\cdot)$. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
424 The decoder $g_{\theta'}(\cdot)$ maps $y$ to reconstruction $z$, which |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
425 is compared to the uncorrupted input $x$ through the loss function |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
426 $L_H(x,z)$, whose expected value is approximately minimized during training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
427 by tuning $\theta$ and $\theta'$.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
428 \label{fig:da} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
429 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
430 \end{figure} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
431 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
432 Here we chose to use the Denoising |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
433 Auto-encoder~\citep{VincentPLarochelleH2008} as the building block for |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
434 these deep hierarchies of features, as it is simple to train and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
435 explain (see Figure~\ref{fig:da}, as well as |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
436 tutorial and code there: {\tt http://deeplearning.net/tutorial}), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
437 provides efficient inference, and yielded results |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
438 comparable or better than RBMs in series of experiments |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
439 \citep{VincentPLarochelleH2008}. During training, a Denoising |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
440 Auto-encoder is presented with a stochastically corrupted version |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
441 of the input and trained to reconstruct the uncorrupted input, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
442 forcing the hidden units to represent the leading regularities in |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
443 the data. Here we use the random binary masking corruption |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
444 (which sets to 0 a random subset of the inputs). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
445 Once it is trained, in a purely unsupervised way, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
446 its hidden units' activations can |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
447 be used as inputs for training a second one, etc. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
448 After this unsupervised pre-training stage, the parameters |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
449 are used to initialize a deep MLP, which is fine-tuned by |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
450 the same standard procedure used to train them (see previous section). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
451 The SDA hyper-parameters are the same as for the MLP, with the addition of the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
452 amount of corruption noise (we used the masking noise process, whereby a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
453 fixed proportion of the input values, randomly selected, are zeroed), and a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
454 separate learning rate for the unsupervised pre-training stage (selected |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
455 from the same above set). The fraction of inputs corrupted was selected |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
456 among $\{10\%, 20\%, 50\%\}$. Another hyper-parameter is the number |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
457 of hidden layers but it was fixed to 3 based on previous work with |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
458 SDAs on MNIST~\citep{VincentPLarochelleH2008}. The size of the hidden |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
459 layers was kept constant across hidden layers, and the best results |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
460 were obtained with the largest values that we could experiment |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
461 with given our patience, with 1000 hidden units. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
462 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
463 \vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
464 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
465 \begin{figure}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
466 %\vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
467 \centerline{\resizebox{.99\textwidth}{!}{\includegraphics{images/error_rates_charts.pdf}}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
468 %\vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
469 \caption{SDAx are the {\bf deep} models. Error bars indicate a 95\% confidence interval. 0 indicates that the model was trained |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
470 on NIST, 1 on NISTP, and 2 on P07. Left: overall results |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
471 of all models, on NIST and NISTP test sets. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
472 Right: error rates on NIST test digits only, along with the previous results from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
473 literature~\citep{Granger+al-2007,Cortes+al-2000,Oliveira+al-2002-short,Milgram+al-2005} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
474 respectively based on ART, nearest neighbors, MLPs, and SVMs.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
475 \label{fig:error-rates-charts} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
476 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
477 \end{figure} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
478 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
479 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
480 \begin{figure}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
481 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
482 \centerline{\resizebox{.99\textwidth}{!}{\includegraphics{images/improvements_charts.pdf}}} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
483 \vspace*{-3mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
484 \caption{Relative improvement in error rate due to self-taught learning. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
485 Left: Improvement (or loss, when negative) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
486 induced by out-of-distribution examples (perturbed data). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
487 Right: Improvement (or loss, when negative) induced by multi-task |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
488 learning (training on all classes and testing only on either digits, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
489 upper case, or lower-case). The deep learner (SDA) benefits more from |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
490 both self-taught learning scenarios, compared to the shallow MLP.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
491 \label{fig:improvements-charts} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
492 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
493 \end{figure} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
494 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
495 \section{Experimental Results} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
496 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
497 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
498 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
499 %\subsection{SDA vs MLP vs Humans} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
500 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
501 The models are either trained on NIST (MLP0 and SDA0), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
502 NISTP (MLP1 and SDA1), or P07 (MLP2 and SDA2), and tested |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
503 on either NIST, NISTP or P07, either on the 62-class task |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
504 or on the 10-digits task. Training (including about half |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
505 for unsupervised pre-training, for DAs) on the larger |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
506 datasets takes around one day on a GPU-285. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
507 Figure~\ref{fig:error-rates-charts} summarizes the results obtained, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
508 comparing humans, the three MLPs (MLP0, MLP1, MLP2) and the three SDAs (SDA0, SDA1, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
509 SDA2), along with the previous results on the digits NIST special database |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
510 19 test set from the literature, respectively based on ARTMAP neural |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
511 networks ~\citep{Granger+al-2007}, fast nearest-neighbor search |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
512 ~\citep{Cortes+al-2000}, MLPs ~\citep{Oliveira+al-2002-short}, and SVMs |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
513 ~\citep{Milgram+al-2005}.% More detailed and complete numerical results |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
514 %(figures and tables, including standard errors on the error rates) can be |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
515 %found in Appendix. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
516 The deep learner not only outperformed the shallow ones and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
517 previously published performance (in a statistically and qualitatively |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
518 significant way) but when trained with perturbed data |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
519 reaches human performance on both the 62-class task |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
520 and the 10-class (digits) task. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
521 17\% error (SDA1) or 18\% error (humans) may seem large but a large |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
522 majority of the errors from humans and from SDA1 are from out-of-context |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
523 confusions (e.g. a vertical bar can be a ``1'', an ``l'' or an ``L'', and a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
524 ``c'' and a ``C'' are often indistinguishible). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
525 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
526 In addition, as shown in the left of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
527 Figure~\ref{fig:improvements-charts}, the relative improvement in error |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
528 rate brought by self-taught learning is greater for the SDA, and these |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
529 differences with the MLP are statistically and qualitatively |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
530 significant. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
531 The left side of the figure shows the improvement to the clean |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
532 NIST test set error brought by the use of out-of-distribution examples |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
533 (i.e. the perturbed examples examples from NISTP or P07). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
534 Relative percent change is measured by taking |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
535 $100 \% \times$ (original model's error / perturbed-data model's error - 1). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
536 The right side of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
537 Figure~\ref{fig:improvements-charts} shows the relative improvement |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
538 brought by the use of a multi-task setting, in which the same model is |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
539 trained for more classes than the target classes of interest (i.e. training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
540 with all 62 classes when the target classes are respectively the digits, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
541 lower-case, or upper-case characters). Again, whereas the gain from the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
542 multi-task setting is marginal or negative for the MLP, it is substantial |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
543 for the SDA. Note that to simplify these multi-task experiments, only the original |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
544 NIST dataset is used. For example, the MLP-digits bar shows the relative |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
545 percent improvement in MLP error rate on the NIST digits test set |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
546 is $100\% \times$ (single-task |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
547 model's error / multi-task model's error - 1). The single-task model is |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
548 trained with only 10 outputs (one per digit), seeing only digit examples, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
549 whereas the multi-task model is trained with 62 outputs, with all 62 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
550 character classes as examples. Hence the hidden units are shared across |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
551 all tasks. For the multi-task model, the digit error rate is measured by |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
552 comparing the correct digit class with the output class associated with the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
553 maximum conditional probability among only the digit classes outputs. The |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
554 setting is similar for the other two target classes (lower case characters |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
555 and upper case characters). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
556 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
557 %\subsection{Perturbed Training Data More Helpful for SDA} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
558 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
559 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
560 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
561 %\subsection{Multi-Task Learning Effects} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
562 %%\vspace*{-1mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
563 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
564 \iffalse |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
565 As previously seen, the SDA is better able to benefit from the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
566 transformations applied to the data than the MLP. In this experiment we |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
567 define three tasks: recognizing digits (knowing that the input is a digit), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
568 recognizing upper case characters (knowing that the input is one), and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
569 recognizing lower case characters (knowing that the input is one). We |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
570 consider the digit classification task as the target task and we want to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
571 evaluate whether training with the other tasks can help or hurt, and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
572 whether the effect is different for MLPs versus SDAs. The goal is to find |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
573 out if deep learning can benefit more (or less) from multiple related tasks |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
574 (i.e. the multi-task setting) compared to a corresponding purely supervised |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
575 shallow learner. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
576 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
577 We use a single hidden layer MLP with 1000 hidden units, and a SDA |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
578 with 3 hidden layers (1000 hidden units per layer), pre-trained and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
579 fine-tuned on NIST. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
580 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
581 Our results show that the MLP benefits marginally from the multi-task setting |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
582 in the case of digits (5\% relative improvement) but is actually hurt in the case |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
583 of characters (respectively 3\% and 4\% worse for lower and upper class characters). |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
584 On the other hand the SDA benefited from the multi-task setting, with relative |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
585 error rate improvements of 27\%, 15\% and 13\% respectively for digits, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
586 lower and upper case characters, as shown in Table~\ref{tab:multi-task}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
587 \fi |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
588 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
589 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
590 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
591 \section{Conclusions and Discussion} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
592 \vspace*{-2mm} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
593 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
594 We have found that the self-taught learning framework is more beneficial |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
595 to a deep learner than to a traditional shallow and purely |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
596 supervised learner. More precisely, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
597 the answers are positive for all the questions asked in the introduction. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
598 %\begin{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
599 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
600 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
601 {\bf Do the good results previously obtained with deep architectures on the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
602 MNIST digits generalize to a much larger and richer (but similar) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
603 dataset, the NIST special database 19, with 62 classes and around 800k examples}? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
604 Yes, the SDA {\em systematically outperformed the MLP and all the previously |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
605 published results on this dataset} (the ones that we are aware of), {\em in fact reaching human-level |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
606 performance} at around 17\% error on the 62-class task and 1.4\% on the digits, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
607 and beating previously published results on the same data. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
608 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
609 $\bullet$ %\item |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
610 {\bf To what extent do self-taught learning scenarios help deep learners, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
611 and do they help them more than shallow supervised ones}? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
612 We found that distorted training examples not only made the resulting |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
613 classifier better on similarly perturbed images but also on |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
614 the {\em original clean examples}, and more importantly and more novel, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
615 that deep architectures benefit more from such {\em out-of-distribution} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
616 examples. MLPs were helped by perturbed training examples when tested on perturbed input |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
617 images (65\% relative improvement on NISTP) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
618 but only marginally helped (5\% relative improvement on all classes) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
619 or even hurt (10\% relative loss on digits) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
620 with respect to clean examples . On the other hand, the deep SDAs |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
621 were significantly boosted by these out-of-distribution examples. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
622 Similarly, whereas the improvement due to the multi-task setting was marginal or |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
623 negative for the MLP (from +5.6\% to -3.6\% relative change), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
624 it was quite significant for the SDA (from +13\% to +27\% relative change), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
625 which may be explained by the arguments below. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
626 %\end{itemize} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
627 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
628 In the original self-taught learning framework~\citep{RainaR2007}, the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
629 out-of-sample examples were used as a source of unsupervised data, and |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
630 experiments showed its positive effects in a \emph{limited labeled data} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
631 scenario. However, many of the results by \citet{RainaR2007} (who used a |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
632 shallow, sparse coding approach) suggest that the {\em relative gain of self-taught |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
633 learning vs ordinary supervised learning} diminishes as the number of labeled examples increases. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
634 We note instead that, for deep |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
635 architectures, our experiments show that such a positive effect is accomplished |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
636 even in a scenario with a \emph{large number of labeled examples}, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
637 i.e., here, the relative gain of self-taught learning is probably preserved |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
638 in the asymptotic regime. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
639 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
640 {\bf Why would deep learners benefit more from the self-taught learning framework}? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
641 The key idea is that the lower layers of the predictor compute a hierarchy |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
642 of features that can be shared across tasks or across variants of the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
643 input distribution. A theoretical analysis of generalization improvements |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
644 due to sharing of intermediate features across tasks already points |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
645 towards that explanation~\cite{baxter95a}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
646 Intermediate features that can be used in different |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
647 contexts can be estimated in a way that allows to share statistical |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
648 strength. Features extracted through many levels are more likely to |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
649 be more abstract and more invariant to some of the factors of variation |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
650 in the underlying distribution (as the experiments in~\citet{Goodfellow2009} suggest), |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
651 increasing the likelihood that they would be useful for a larger array |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
652 of tasks and input conditions. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
653 Therefore, we hypothesize that both depth and unsupervised |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
654 pre-training play a part in explaining the advantages observed here, and future |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
655 experiments could attempt at teasing apart these factors. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
656 And why would deep learners benefit from the self-taught learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
657 scenarios even when the number of labeled examples is very large? |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
658 We hypothesize that this is related to the hypotheses studied |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
659 in~\citet{Erhan+al-2010}. In~\citet{Erhan+al-2010} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
660 it was found that online learning on a huge dataset did not make the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
661 advantage of the deep learning bias vanish, and a similar phenomenon |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
662 may be happening here. We hypothesize that unsupervised pre-training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
663 of a deep hierarchy with self-taught learning initializes the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
664 model in the basin of attraction of supervised gradient descent |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
665 that corresponds to better generalization. Furthermore, such good |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
666 basins of attraction are not discovered by pure supervised learning |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
667 (with or without self-taught settings) from random initialization, and more labeled examples |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
668 does not allow the shallow or purely supervised models to discover |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
669 the kind of better basins associated |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
670 with deep learning and self-taught learning. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
671 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
672 A Flash demo of the recognizer (where both the MLP and the SDA can be compared) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
673 can be executed on-line at {\tt http://deep.host22.com}. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
674 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
675 \iffalse |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
676 \section*{Appendix I: Detailed Numerical Results} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
677 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
678 These tables correspond to Figures 2 and 3 and contain the raw error rates for each model and dataset considered. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
679 They also contain additional data such as test errors on P07 and standard errors. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
680 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
681 \begin{table}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
682 \caption{Overall comparison of error rates ($\pm$ std.err.) on 62 character classes (10 digits + |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
683 26 lower + 26 upper), except for last columns -- digits only, between deep architecture with pre-training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
684 (SDA=Stacked Denoising Autoencoder) and ordinary shallow architecture |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
685 (MLP=Multi-Layer Perceptron). The models shown are all trained using perturbed data (NISTP or P07) |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
686 and using a validation set to select hyper-parameters and other training choices. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
687 \{SDA,MLP\}0 are trained on NIST, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
688 \{SDA,MLP\}1 are trained on NISTP, and \{SDA,MLP\}2 are trained on P07. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
689 The human error rate on digits is a lower bound because it does not count digits that were |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
690 recognized as letters. For comparison, the results found in the literature |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
691 on NIST digits classification using the same test set are included.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
692 \label{tab:sda-vs-mlp-vs-humans} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
693 \begin{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
694 \begin{tabular}{|l|r|r|r|r|} \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
695 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
696 Humans& 18.2\% $\pm$.1\% & 39.4\%$\pm$.1\% & 46.9\%$\pm$.1\% & $1.4\%$ \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
697 SDA0 & 23.7\% $\pm$.14\% & 65.2\%$\pm$.34\% & 97.45\%$\pm$.06\% & 2.7\% $\pm$.14\%\\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
698 SDA1 & 17.1\% $\pm$.13\% & 29.7\%$\pm$.3\% & 29.7\%$\pm$.3\% & 1.4\% $\pm$.1\%\\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
699 SDA2 & 18.7\% $\pm$.13\% & 33.6\%$\pm$.3\% & 39.9\%$\pm$.17\% & 1.7\% $\pm$.1\%\\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
700 MLP0 & 24.2\% $\pm$.15\% & 68.8\%$\pm$.33\% & 78.70\%$\pm$.14\% & 3.45\% $\pm$.15\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
701 MLP1 & 23.0\% $\pm$.15\% & 41.8\%$\pm$.35\% & 90.4\%$\pm$.1\% & 3.85\% $\pm$.16\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
702 MLP2 & 24.3\% $\pm$.15\% & 46.0\%$\pm$.35\% & 54.7\%$\pm$.17\% & 4.85\% $\pm$.18\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
703 \citep{Granger+al-2007} & & & & 4.95\% $\pm$.18\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
704 \citep{Cortes+al-2000} & & & & 3.71\% $\pm$.16\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
705 \citep{Oliveira+al-2002} & & & & 2.4\% $\pm$.13\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
706 \citep{Milgram+al-2005} & & & & 2.1\% $\pm$.12\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
707 \end{tabular} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
708 \end{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
709 \end{table} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
710 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
711 \begin{table}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
712 \caption{Relative change in error rates due to the use of perturbed training data, |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
713 either using NISTP, for the MLP1/SDA1 models, or using P07, for the MLP2/SDA2 models. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
714 A positive value indicates that training on the perturbed data helped for the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
715 given test set (the first 3 columns on the 62-class tasks and the last one is |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
716 on the clean 10-class digits). Clearly, the deep learning models did benefit more |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
717 from perturbed training data, even when testing on clean data, whereas the MLP |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
718 trained on perturbed data performed worse on the clean digits and about the same |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
719 on the clean characters. } |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
720 \label{tab:perturbation-effect} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
721 \begin{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
722 \begin{tabular}{|l|r|r|r|r|} \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
723 & NIST test & NISTP test & P07 test & NIST test digits \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
724 SDA0/SDA1-1 & 38\% & 84\% & 228\% & 93\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
725 SDA0/SDA2-1 & 27\% & 94\% & 144\% & 59\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
726 MLP0/MLP1-1 & 5.2\% & 65\% & -13\% & -10\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
727 MLP0/MLP2-1 & -0.4\% & 49\% & 44\% & -29\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
728 \end{tabular} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
729 \end{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
730 \end{table} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
731 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
732 \begin{table}[ht] |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
733 \caption{Test error rates and relative change in error rates due to the use of |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
734 a multi-task setting, i.e., training on each task in isolation vs training |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
735 for all three tasks together, for MLPs vs SDAs. The SDA benefits much |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
736 more from the multi-task setting. All experiments on only on the |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
737 unperturbed NIST data, using validation error for model selection. |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
738 Relative improvement is 1 - single-task error / multi-task error.} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
739 \label{tab:multi-task} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
740 \begin{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
741 \begin{tabular}{|l|r|r|r|} \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
742 & single-task & multi-task & relative \\ |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
743 & setting & setting & improvement \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
744 MLP-digits & 3.77\% & 3.99\% & 5.6\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
745 MLP-lower & 17.4\% & 16.8\% & -4.1\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
746 MLP-upper & 7.84\% & 7.54\% & -3.6\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
747 SDA-digits & 2.6\% & 3.56\% & 27\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
748 SDA-lower & 12.3\% & 14.4\% & 15\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
749 SDA-upper & 5.93\% & 6.78\% & 13\% \\ \hline |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
750 \end{tabular} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
751 \end{center} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
752 \end{table} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
753 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
754 \fi |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
755 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
756 %\afterpage{\clearpage} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
757 %\clearpage |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
758 { |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
759 %\bibliographystyle{spbasic} % basic style, author-year citations |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
760 \bibliographystyle{plainnat} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
761 \bibliography{strings,strings-short,strings-shorter,ift6266_ml,specials,aigaion-shorter} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
762 %\bibliographystyle{unsrtnat} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
763 %\bibliographystyle{apalike} |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
764 } |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
765 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
766 |
5ab605c9a7d9
NIPS deep learning workshop submission new .tex file compressed to 8 pages
boulanni <nicolas_boulanger@hotmail.com>
parents:
diff
changeset
|
767 \end{document} |