diff writeup/nips2010_submission.tex @ 551:8f365abf171d

separete the transmo image
author Frederic Bastien <nouiz@nouiz.org>
date Wed, 02 Jun 2010 17:00:11 -0400
parents 662299f265ab
children 35c611363291
line wrap: on
line diff
--- a/writeup/nips2010_submission.tex	Wed Jun 02 15:44:46 2010 -0400
+++ b/writeup/nips2010_submission.tex	Wed Jun 02 17:00:11 2010 -0400
@@ -133,8 +133,18 @@
 \vspace*{-1mm}
 \section{Perturbation and Transformation of Character Images}
 \label{s:perturbations}
+{\large\bf Transformations}
+
 \vspace*{-1mm}
 
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Original.PNG}
+\label{fig:Original}
+\vspace{1.2cm}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Original:}
 This section describes the different transformations we used to stochastically
 transform source images in order to obtain data from a larger distribution which
 covers a domain substantially larger than the clean characters distribution from
@@ -152,33 +162,32 @@
 There are two main parts in the pipeline. The first one,
 from slant to pinch below, performs transformations. The second
 part, from blur to contrast, adds different kinds of noise.
+\end{minipage}
 
-\begin{figure}[ht]
-\vspace*{-2mm}
-\centerline{\resizebox{.9\textwidth}{!}{\includegraphics{images/transfo.png}}}
-% TODO: METTRE LE NOM DE LA TRANSFO A COTE DE CHAQUE IMAGE
-\caption{Illustration of each transformation applied alone to the same image
-of an upper-case h (top left). First row (from left to right) : original image, slant,
-thickness, affine transformation (translation, rotation, shear), 
-local elastic deformation; second row (from left to right) :
-pinch, motion blur, occlusion, pixel permutation, Gaussian noise; third row (from left to right) :
-background image, salt and pepper noise, spatially Gaussian noise, scratches,
-grey level and contrast changes.}
-\label{fig:transfo}
-\vspace*{-2mm}
-\end{figure}
 
-{\large\bf Transformations}
-
-\vspace*{0.5mm}
-
-{\bf Slant.} 
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Slant_only.PNG}
+\label{fig:Slant}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+%\centering
+{\bf Slant:}
 Each row of the image is shifted
 proportionally to its height: $shift = round(slant \times height)$.  
 $slant \sim U[-complexity,complexity]$.
-\vspace*{-1mm}
+\vspace{1.2cm}
+\end{minipage}
+
 
-{\bf Thickness.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Thick_only.PNG}
+\label{fig:Think}
+\vspace{.9cm}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Thinkness:}
 Morphological operators of dilation and erosion~\citep{Haralick87,Serra82}
 are applied. The neighborhood of each pixel is multiplied
 element-wise with a {\em structuring element} matrix.
@@ -190,9 +199,18 @@
 where $m=10$ for dilation and $m=6$ for erosion (to avoid completely erasing thin characters).  
 A neutral element (no transformation) 
 is always present in the set. is applied.  
-\vspace*{-1mm}
+\vspace{.4cm}
+\end{minipage}
+\vspace{-.7cm}
+
 
-{\bf Affine Transformations.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Affine_only.png}
+\label{fig:Affine}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Affine Transformations:}
 A $2 \times 3$ affine transform matrix (with
 6 parameters $(a,b,c,d,e,f)$) is sampled according to the $complexity$ level.
 Output pixel $(x,y)$ takes the value of input pixel
@@ -204,18 +222,33 @@
 complexity,1+3 \times complexity]$, $b$ and $e$ $\sim[-3 \times complexity,3
 \times complexity]$ and $c$ and $f$ $\sim U[-4 \times complexity, 4 \times
 complexity]$.
-\vspace*{-1mm}
+\end{minipage}
 
-{\bf Local Elastic Deformations.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Localelasticdistorsions_only.PNG}
+\label{fig:Elastic}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Local Elastic Deformations:}
 This filter induces a ``wiggly'' effect in the image, following~\citet{SimardSP03-short},
 which provides more details. 
 The intensity of the displacement fields is given by 
 $\alpha = \sqrt[3]{complexity} \times 10.0$, which are 
 convolved with a Gaussian 2D kernel (resulting in a blur) of
 standard deviation $\sigma = 10 - 7 \times\sqrt[3]{complexity}$.
-\vspace*{-1mm}
+\vspace{.4cm}
+\end{minipage}
+\vspace{-.7cm}
 
-{\bf Pinch.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Pinch_only.PNG}
+\label{fig:Pinch}
+\vspace{.6cm}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Pinch:}
 This is the ``Whirl and pinch'' GIMP filter but with whirl was set to 0. 
 A pinch is ``similar to projecting the image onto an elastic
 surface and pressing or pulling on the center of the surface'' (GIMP documentation manual).
@@ -230,22 +263,38 @@
 The actual value is given by bilinear interpolation considering the pixels
 around the (non-integer) source position thus found.
 Here $pinch \sim U[-complexity, 0.7 \times complexity]$.
+%\vspace{1.5cm}
+\end{minipage}
 
-\vspace*{0.5mm}
+\vspace{.1cm}
 
 {\large\bf Injecting Noise}
 
-\vspace*{0.5mm}
-
-{\bf Motion Blur.}
+\vspace*{-.2cm}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Original.PNG}
+\label{fig:Original}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Motion Blur:}
 This is GIMP's ``linear motion blur'' 
 with parameters $length$ and $angle$. The value of
 a pixel in the final image is approximately the  mean value of the first $length$ pixels
 found by moving in the $angle$ direction. 
 Here $angle \sim U[0,360]$ degrees, and $length \sim {\rm Normal}(0,(3 \times complexity)^2)$.
-\vspace*{-1mm}
+\vspace{.7cm}
+\end{minipage}
+
+\vspace*{-5mm}
 
-{\bf Occlusion.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Original.PNG}
+\label{fig:Original}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Occlusion:}
 Selects a random rectangle from an {\em occluder} character
 image and places it over the original {\em occluded}
 image. Pixels are combined by taking the max(occluder,occluded),
@@ -254,35 +303,76 @@
 The destination position in the occluded image are also sampled
 according to a normal distribution (more details in~\citet{ift6266-tr-anonymous}).
 This filter is skipped with probability 60\%.
-\vspace*{-1mm}
+\vspace{.4cm}
+\end{minipage}
 
-{\bf Pixel Permutation.}
-This filter permutes neighbouring pixels. It first selects 
+\vspace*{-5mm}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Original.PNG}
+\label{fig:Original}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Pixel Permutation:}
+This filter permutes neighbouring pixels. It first selects
 fraction $\frac{complexity}{3}$ of pixels randomly in the image. Each of them are then
 sequentially exchanged with one other in as $V4$ neighbourhood. 
 This filter is skipped with probability 80\%.
-\vspace*{-1mm}
+\vspace{.8cm}
+\end{minipage}
+
 
-{\bf Gaussian Noise.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Original.PNG}
+\label{fig:Original}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Gaussian Noise:}
 This filter simply adds, to each pixel of the image independently, a
 noise $\sim Normal(0,(\frac{complexity}{10})^2)$.
 This filter is skipped with probability 70\%.
-\vspace*{-1mm}
+\vspace{1.1cm}
+\end{minipage}
+\vspace{-.7cm}
 
-{\bf Background Images.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Original.PNG}
+\label{fig:Original}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Background Images:}
 Following~\citet{Larochelle-jmlr-2009}, this transformation adds a random
 background behind the letter, from a randomly chosen natural image,
 with contrast adjustments depending on $complexity$, to preserve
 more or less of the original character image.
-\vspace*{-1mm}
+\vspace{.8cm}
+\end{minipage}
+\vspace{-.7cm}
 
-{\bf Salt and Pepper Noise.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Original.PNG}
+\label{fig:Original}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Salt and Pepper Noise:}
 This filter adds noise $\sim U[0,1]$ to random subsets of pixels.
 The number of selected pixels is $0.2 \times complexity$.
 This filter is skipped with probability 75\%.
-\vspace*{-1mm}
+\vspace{.9cm}
+\end{minipage}
+\vspace{-.7cm}
 
-{\bf Spatially Gaussian Noise.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Original.PNG}
+\label{fig:Original}
+\vspace{.5cm}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Spatially Gaussian Noise:}
 Different regions of the image are spatially smoothed by convolving
 the image is convolved with a symmetric Gaussian kernel of
 size and variance chosen uniformly in the ranges $[12,12 + 20 \times
@@ -296,9 +386,17 @@
 computed from the following element-wise operation: $\frac{image + filtered
   image \times mask}{mask+1}$.
 This filter is skipped with probability 75\%.
-\vspace*{-1mm}
+\end{minipage}
+\vspace{-.7cm}
 
-{\bf Scratches.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Original.PNG}
+\label{fig:Original}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+\vspace{.4cm}
+{\bf Scratches:}
 The scratches module places line-like white patches on the image.  The
 lines are heavily transformed images of the digit ``1'' (one), chosen
 at random among 500 such 1 images,
@@ -309,13 +407,24 @@
 by an amount controlled by $complexity$.
 This filter is skipped with probability 85\%. The probabilities
 of applying 1, 2, or 3 patches are (50\%,30\%,20\%).
-\vspace*{-1mm}
+\end{minipage}
+\vspace{-.7cm}
 
-{\bf Grey Level and Contrast Changes.}
+\begin{minipage}[b]{0.14\linewidth}
+\centering
+\includegraphics[scale=.45]{images/Original.PNG}
+\label{fig:Original}
+\end{minipage}%
+\hspace{0.3cm}\begin{minipage}[b]{0.86\linewidth}
+{\bf Grey Level and Contrast Changes:}
 This filter changes the contrast and may invert the image polarity (white
 to black and black to white). The contrast is $C \sim U[1-0.85 \times complexity,1]$ 
 so the image is normalized into $[\frac{1-C}{2},1-\frac{1-C}{2}]$. The
 polarity is inverted with probability 50\%.
+\vspace{.7cm}
+\end{minipage}
+\vspace{-.7cm}
+
 
 \iffalse
 \begin{figure}[ht]