view writeup/techreport.tex @ 404:1509b9bba4cc

added digit/char error
author xaviermuller
date Wed, 28 Apr 2010 11:45:14 -0400
parents 4c840798d290
children fe2e2964e7a3
line wrap: on
line source

\documentclass[12pt,letterpaper]{article}
\usepackage[utf8]{inputenc}
\usepackage{graphicx}
\usepackage{times}
\usepackage{mlapa}

\begin{document}
\title{Generating and Exploiting Perturbed Training Data for Deep Architectures}
\author{The IFT6266 Gang}
\date{April 2010, Technical Report, Dept. IRO, U. Montreal}

\maketitle

\begin{abstract}
Recent theoretical and empirical work in statistical machine learning has
demonstrated the importance of learning algorithms for deep
architectures, i.e., function classes obtained by composing multiple
non-linear transformations. In the area of handwriting recognition,
deep learning algorithms
had been evaluated on rather small datasets with a few tens of thousands
of examples. Here we propose a powerful generator of variations
of examples for character images based on a pipeline of stochastic
transformations that include not only the usual affine transformations
but also the addition of slant, local elastic deformations, changes
in thickness, background images, color, contrast, occlusion, and
various types of pixel and spatially correlated noise.
We evaluate a deep learning algorithm (Stacked Denoising Autoencoders)
on the task of learning to classify digits and letters transformed
with this pipeline, using the hundreds of millions of generated examples
and testing on the full NIST test set.
We find that the SDA outperforms its
shallow counterpart, an ordinary Multi-Layer Perceptron,
and that it is better able to take advantage of the additional
generated data.
\end{abstract}

\section{Introduction}

Deep Learning has emerged as a promising new area of research in
statistical machine learning (see~\emcite{Bengio-2009} for a review).
Learning algorithms for deep architectures are centered on the learning
of useful representations of data, which are better suited to the task at hand.
This is in great part inspired by observations of the mammalian visual cortex, 
which consists of a chain of processing elements, each of which is associated with a
different representation. In fact,
it was found recently that the features learnt in deep architectures resemble
those observed in the first two of these stages (in areas V1 and V2
of visual cortex)~\cite{HonglakL2008}.
Processing images typically involves transforming the raw pixel data into
new {\bf representations} that can be used for analysis or classification.
For example, a principal component analysis representation linearly projects 
the input image into a lower-dimensional feature space.
Why learn a representation?  Current practice in the computer vision
literature converts the raw pixels into a hand-crafted representation
(e.g.\ SIFT features~\cite{Lowe04}), but deep learning algorithms
tend to discover similar features in their first few 
levels~\cite{HonglakL2008,ranzato-08,Koray-08,VincentPLarochelleH2008-very-small}.
Learning increases the
ease and practicality of developing representations that are at once
tailored to specific tasks, yet are able to borrow statistical strength
from other related tasks (e.g., modeling different kinds of objects). Finally, learning the
feature representation can lead to higher-level (more abstract, more
general) features that are more robust to unanticipated sources of
variance extant in real data.

Whereas a deep architecture can in principle be more powerful than a shallow
one in terms of representation, depth appears to render the training problem
more difficult in terms of optimization and local minima.
It is also only recently that
successful algorithms were proposed to overcome some of these
difficulties.

\section{Perturbation and Transformation of Character Images}

\subsection{Affine Transformations}
\subsection{Adding Slant}
\subsection{Local Elastic Deformations}
\subsection{Changing Thickness}
\subsection{Occlusion}
\subsection{Background Images}
\subsection{Salt and Pepper Noise}
\subsection{Spatially Gaussian Noise}
\subsection{Color and Contrast Changes}

\begin{figure}[h]
\resizebox{.99\textwidth}{!}{\includegraphics{images/example_t.png}}\\
\caption{Illustration of the pipeline of stochastic 
transformations applied to the image of a lower-case t
(the upper left image). Each image in the pipeline (going from
left to right, first top line, then bottom line) shows the result
of applying one of the modules in the pipeline. The last image
(bottom right) is used as training example.}
\label{fig:pipeline}
\end{figure}

\section{Learning Algorithms for Deep Architectures}

\section{Experimental Setup}

\subsection{Training Datasets}

\subsubsection{Data Sources}

\begin{itemize}
\item {\bf NIST}
\item {\bf Fonts}
\item {\bf Captchas}
\item {\bf OCR data}
\end{itemize}

\subsubsection{Data Sets}
\begin{itemize}
\item {\bf NIST}
\item {\bf P07}
\item {\bf NISTP} {\em ne pas utiliser PNIST mais NISTP, pour rester politically correct...}
\end{itemize}

\subsection{Models and their Hyperparameters}

\subsubsection{Multi-Layer Perceptrons (MLP)}

\subsubsection{Stacked Denoising Auto-Encoders (SDAE)}

\section{Experimental Results}

\subsection{SDA vs MLP}

\begin{center}
\begin{tabular}{lcc}
      & train w/   & train w/    \\
      & NIST       & P07 + NIST  \\ \hline 
SDA   &            &             \\ \hline 
MLP   &            &             \\ \hline 
\end{tabular}
\end{center}

\subsection{Perturbed Training Data More Helpful for SDAE}

\subsection{Training with More Classes than Necessary}

\section{Conclusions}

\bibliography{strings,ml,aigaion}
\bibliographystyle{mlapa}

\end{document}