# HG changeset patch
# User Dumitru Erhan <dumitru.erhan@gmail.com>
# Date 1267211738 18000
# Node ID 1f5937e9e5308da70563789925217b60008a513a
# Parent  17ae5a1a4dd1e6c41a2fd2e6b4fab644f9110df5
More moves - transformations into data_generation, added "deep" folder

diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/BruitGauss.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/BruitGauss.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,138 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Ajout de bruit gaussien dans les donnees. A chaque iteration, un bruit poivre 
+et sel est ajoute, puis un lissage gaussien autour de ce point est ajoute.
+On fait un nombre d'iteration = 1024*complexity/25 ce qui equivaud
+a complexity/25 des points qui recoivent le centre du noyau gaussien.
+Il y en a beaucoup moins que le bruit poivre et sel, car la transformation
+est plutôt aggressive et touche beaucoup de pixels autour du centre 
+
+La grandeur de la gaussienne ainsi que son ecart type sont definit par complexity 
+et par une composante aleatoire normale.
+
+On a 25 % de chances d'effectuer le bruitage
+
+Ce fichier prend pour acquis que les images sont donnees une a la fois
+sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+import numpy
+#import random
+import scipy
+from scipy import ndimage
+
+class BruitGauss():
+    
+    def __init__(self,complexity=1,seed=6378):
+        self.nb_chngmax =10 #Le nombre de pixels changes. Seulement pour fin de calcul
+        self.grandeurmax = 20
+        self.sigmamax = 6.0
+        self.regenerate_parameters(complexity)
+        self.seed=seed
+        
+        #numpy.random.seed(self.seed)
+        
+    def get_seed(self):
+        return self.seed
+        
+    def get_settings_names(self):
+        return ['nb_chng','sigma_gauss','grandeur']
+
+    def regenerate_parameters(self, complexity):
+        self.effectuer =numpy.random.binomial(1,0.25)    ##### On a 25% de faire un bruit #####
+
+        
+        if self.effectuer and complexity > 0:
+            self.nb_chng=3+int(numpy.random.rand()*self.nb_chngmax*complexity)
+            self.sigma_gauss=2.0 + numpy.random.rand()*self.sigmamax*complexity
+            self.grandeur=12+int(numpy.random.rand()*self.grandeurmax*complexity)
+                        #creation du noyau gaussien
+            self.gauss=numpy.zeros((self.grandeur,self.grandeur))
+            x0 = y0 = self.grandeur/2.0
+            for i in xrange(self.grandeur):
+                for j in xrange(self.grandeur):
+                    self.gauss[i,j]=numpy.exp(-((i-x0)**2 + (j-y0)**2) / self.sigma_gauss**2)
+            #creation de la fenetre de moyennage
+            self.moy=numpy.zeros((self.grandeur,self.grandeur))
+            x0 = y0 = self.grandeur/2
+            for i in xrange(0,self.grandeur):
+                for j in xrange(0,self.grandeur):
+                    self.moy[i,j]=((numpy.sqrt(2*(self.grandeur/2.0)**2) -\
+                                 numpy.sqrt(numpy.abs(i-self.grandeur/2.0)**2+numpy.abs(j-self.grandeur/2.0)**2))/numpy.sqrt((self.grandeur/2.0)**2))**5
+        else:
+            self.sigma_gauss = 1 # eviter division par 0
+            self.grandeur=1
+            self.nb_chng = 0
+            self.effectuer = 0
+        
+        return self._get_current_parameters()
+
+    def _get_current_parameters(self):
+        return [self.nb_chng,self.sigma_gauss,self.grandeur]
+
+    
+    def transform_image(self, image):
+        if self.effectuer == 0:
+            return image
+        image=image.reshape((32,32))
+        filtered_image = ndimage.convolve(image,self.gauss,mode='constant')
+        assert image.shape == filtered_image.shape
+        filtered_image = (filtered_image - filtered_image.min() + image.min()) / (filtered_image.max() - filtered_image.min() + image.min()) * image.max()
+               
+        #construction of the moyennage Mask
+        Mask = numpy.zeros((32,32))
+        
+        for i in xrange(0,self.nb_chng):
+            x_bruit=int(numpy.random.randint(0,32))
+            y_bruit=int(numpy.random.randint(0,32))
+            offsetxmin = 0
+            offsetxmax = 0
+            offsetymin = 0
+            offsetymax = 0
+            if x_bruit < self.grandeur / 2:
+                offsetxmin = self.grandeur / 2 - x_bruit
+            if 32-x_bruit < numpy.ceil(self.grandeur / 2.0):
+                offsetxmax = numpy.ceil(self.grandeur / 2.0) - (32-x_bruit)
+            if y_bruit < self.grandeur / 2:
+                offsetymin = self.grandeur / 2 - y_bruit
+            if 32-y_bruit < numpy.ceil(self.grandeur / 2.0):
+                offsetymax = numpy.ceil(self.grandeur / 2.0) - (32-y_bruit)
+            Mask[x_bruit - self.grandeur/2 + offsetxmin : x_bruit + numpy.ceil(self.grandeur/2.0) - offsetxmax,\
+                    y_bruit - self.grandeur/2 + offsetymin : y_bruit + numpy.ceil(self.grandeur/2.0)-  offsetymax] +=\
+                        self.moy[offsetxmin:self.grandeur - offsetxmax,offsetymin:self.grandeur - offsetymax] 
+                    
+        return numpy.asarray((image + filtered_image*Mask)/(Mask+1),dtype='float32')
+
+#---TESTS---
+
+def _load_image():
+    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
+    d = ft.read(f)
+    w=numpy.asarray(d[0])
+    return (w/255.0).astype('float')
+
+def _test(complexite):
+    img=_load_image()
+    transfo = BruitGauss()
+    pylab.imshow(img.reshape((32,32)))
+    pylab.show()
+    print transfo.get_settings_names()
+    print transfo.regenerate_parameters(complexite)
+    
+    img_trans=transfo.transform_image(img)
+    
+    pylab.imshow(img_trans.reshape((32,32)))
+    pylab.show()
+    
+
+if __name__ == '__main__':
+    from pylearn.io import filetensor as ft
+    import pylab
+    _test(0.5)
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/DistorsionGauss.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/DistorsionGauss.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,100 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Ajout d'une composante aleatoire dans chaque pixel de l'image.
+C'est une distorsion gaussienne de moyenne 0 et d'écart type complexity/10
+
+Il y a 30% d'effectuer le bruitage
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+import numpy
+import random
+
+class DistorsionGauss():
+    
+    def __init__(self,seed=3459):
+        self.ecart_type=0.1 #L'ecart type de la gaussienne
+        self.effectuer=1    #1=on effectue et 0=rien faire
+        self.seed=seed
+        
+        #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
+        #numpy.random.seed(self.seed) 
+        #random.seed(self.seed)
+        
+    def get_settings_names(self):
+        return ['effectuer']
+    
+    def get_seed(self):
+        return self.seed
+    
+    def get_settings_names_determined_by_complexity(self,complexity):
+        return ['ecart_type']
+
+    def regenerate_parameters(self, complexity):
+        self.ecart_type=float(complexity)/10
+        self.effectuer =numpy.random.binomial(1,0.3)    ##### On a 30% de faire un bruit #####
+        return self._get_current_parameters()
+
+    def _get_current_parameters(self):
+        return [self.effectuer]
+    
+    def get_parameters_determined_by_complexity(self,complexity):
+        return [float(complexity)/10]
+    
+    def transform_image(self, image):
+        if self.effectuer == 0:
+            return image
+        
+        image=image.reshape(1024,1)
+        aleatoire=numpy.zeros((1024,1)).astype('float32')
+        for i in xrange(0,1024):
+            aleatoire[i]=float(random.gauss(0,self.ecart_type))
+        image=image+aleatoire
+        
+        
+        #Ramener tout entre 0 et 1. Ancienne facon de normaliser.
+        #Resultats moins interessant je trouve.
+##        if numpy.min(image) < 0:
+##            image-=numpy.min(image)
+##        if numpy.max(image) > 1:
+##            image/=numpy.max(image)
+            
+        for i in xrange(0,1024):
+            image[i]=min(1,max(0,image[i]))
+            
+        return image.reshape(32,32)
+
+
+#---TESTS---
+
+def _load_image():
+    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
+    d = ft.read(f)
+    w=numpy.asarray(d[random.randint(0,100)])
+    return (w/255.0).astype('float')
+
+def _test(complexite):
+    img=_load_image()
+    transfo = DistorsionGauss()
+    pylab.imshow(img.reshape((32,32)))
+    pylab.show()
+    print transfo.get_settings_names()
+    print transfo.regenerate_parameters(complexite)
+    
+    img_trans=transfo.transform_image(img)
+    
+    pylab.imshow(img_trans.reshape((32,32)))
+    pylab.show()
+    
+
+if __name__ == '__main__':
+    from pylearn.io import filetensor as ft
+    import pylab
+    for i in xrange(0,5):
+        _test(0.5)
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/Occlusion.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/Occlusion.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,161 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Ajout de bruit d'occlusion dans l'image originale.
+
+Le bruit provient d'un echantillon pris dans la seconde image puis rajoutee a
+gauche ou a droite de l'image originale. De plus, il se peut aussi que le
+bruit soit rajoute sur l'image originale, mais en plus pâle.
+
+Le fichier /data/lisa/data/ift6266h10/echantillon_occlusion.ft 
+(sur le reseau DIRO) est necessaire.
+
+Il y a 30% de chance d'avoir une occlusion quelconque.
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+
+import numpy
+
+from pylearn.io import filetensor as ft
+
+class Occlusion():
+    
+    def __init__(self,seed=9854):
+        #Ces 4 variables representent la taille du "crop" sur l'image2
+        #Ce "crop" est pris a partie de image1[15,15], le milieu de l'image1
+        self.haut=2
+        self.bas=2
+        self.gauche=2
+        self.droite=2
+        
+        #Ces deux variables representent le deplacement en x et y par rapport
+        #au milieu du bord gauche ou droit
+        self.x_arrivee=0
+        self.y_arrivee=0
+        
+        #Cette variable =1 si l'image est mise a gauche et -1 si a droite
+        #et =0 si au centre, mais plus pale
+        self.endroit=-1
+        
+        #Cette variable determine l'opacite de l'ajout dans le cas ou on est au milieu
+        self.opacite=0.5    #C'est completement arbitraire. Possible de le changer si voulu
+        
+        #Sert a dire si on fait quelque chose. 0=faire rien, 1 on fait quelque chose
+        self.appliquer=1
+        
+        self.seed=seed
+        #numpy.random.seed(self.seed)
+        
+        f3 = open('/data/lisa/data/ift6266h10/echantillon_occlusion.ft')   #Doit etre sur le reseau DIRO.
+        #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/echantillon_occlusion.ft')
+        #Il faut arranger le path sinon
+        w=ft.read(f3)
+        f3.close()
+        
+        self.longueur=len(w)
+        self.d=(w.astype('float'))/255
+        
+        
+    def get_settings_names(self):
+        return ['haut','bas','gauche','droite','x_arrivee','y_arrivee','endroit','rajout','appliquer']
+    
+    def get_seed(self):
+        return self.seed
+
+    def regenerate_parameters(self, complexity):
+        self.haut=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
+        self.bas=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
+        self.gauche=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
+        self.droite=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
+        if self.haut+self.bas+self.gauche+self.droite==0:   #Tres improbable
+            self.haut=1
+            self.bas=1
+            self.gauche=1
+            self.droite=1
+        
+        #Ces deux valeurs seront controlees afin d'etre certain de ne pas depasser
+        self.x_arrivee=int(numpy.abs(numpy.random.normal(0,2))) #Complexity n'entre pas en jeu, pas besoin
+        self.y_arrivee=int(numpy.random.normal(0,3)) 
+        
+        self.rajout=numpy.random.randint(0,self.longueur-1)  #les bouts de quelle lettre
+        self.appliquer=numpy.random.binomial(1,0.4)    #####  40 % du temps, on met une occlusion #####
+        
+        if complexity == 0: #On ne fait rien dans ce cas
+            self.applique=0
+        
+        self.endroit=numpy.random.randint(-1,2) 
+
+        return self._get_current_parameters()
+
+    def _get_current_parameters(self):
+        return [self.haut,self.bas,self.gauche,self.droite,self.x_arrivee,self.y_arrivee,self.endroit,self.rajout,self.appliquer]
+    
+    
+    def transform_image(self, image):
+        if self.appliquer == 0: #Si on fait rien, on retourne tout de suite l'image
+            return image
+        
+        #Attrapper le bruit d'occlusion
+        bruit=self.d[self.rajout].reshape((32,32))[15-self.haut:15+self.bas+1,15-self.gauche:15+self.droite+1]
+        
+        if self.x_arrivee+self.gauche+self.droite>32:
+            self.endroit*=-1    #On change de bord et on colle sur le cote
+            self.x_arrivee=0
+        if self.y_arrivee-self.haut <-16:
+            self.y_arrivee=self.haut-16#On colle le morceau en haut
+        if self.y_arrivee+self.bas > 15:
+            self.y_arrivee=15-self.bas  #On colle le morceau en bas
+            
+        if self.endroit==-1:    #a gauche
+            for i in xrange(-self.haut,self.bas+1):
+                for j in xrange(0,self.gauche+self.droite+1):
+                    image[16+self.y_arrivee+i,self.x_arrivee+j]=\
+                    max(image[16+self.y_arrivee+i,self.x_arrivee+j],bruit[i+self.haut,j])
+            
+        elif self.endroit==1: #a droite
+            for i in xrange(-self.haut,self.bas+1):
+                for j in xrange(-self.gauche-self.droite,1):
+                    image[16+self.y_arrivee+i,31-self.x_arrivee+j]=\
+                    max(image[16+self.y_arrivee+i,31-self.x_arrivee+j],bruit[i+self.haut,j+self.gauche+self.droite])
+            
+        elif self.endroit==0:    #au milieu
+            for i in xrange(-self.haut,self.bas+1):
+                for j in xrange(-self.gauche,self.droite+1):
+                    image[16+i,16+j]=max(image[16+i,16+j],bruit[i+self.haut,j+self.gauche]*self.opacite)
+            
+        
+        return image
+        
+#---TESTS---
+
+def _load_image():
+    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
+    d = ft.read(f)
+    w=numpy.asarray(d[numpy.random.randint(0,50)])
+    return (w/255.0).astype('float')
+
+def _test(complexite):
+    
+    transfo = Occlusion()
+    for i in xrange(0,20):
+        img = _load_image()
+        pylab.imshow(img.reshape((32,32)))
+        pylab.show()
+        print transfo.get_settings_names()
+        print transfo.regenerate_parameters(complexite)
+        
+        img_trans=transfo.transform_image(img.reshape((32,32)))
+        
+        print transfo.get_seed()
+        pylab.imshow(img_trans.reshape((32,32)))
+        pylab.show()
+    
+
+if __name__ == '__main__':
+    import pylab
+    import scipy
+    _test(0.5)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/PermutPixel.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/PermutPixel.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,114 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Un echange de pixels est effectue entre certain pixels choisit aleatoirement
+et un de ses 4 voisins, tout aussi choisi aleatoirement.
+
+Le nombre de pixels permutes est definit pas complexity*1024
+
+Il y a proba 20% d'effectuer le bruitage
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+import numpy
+import random
+
+class PermutPixel():
+    
+    def __init__(self,seed=7152):
+        self.nombre=10 #Le nombre de pixels a permuter
+        self.proportion=0.3
+        self.effectuer=1    #1=on effectue, 0=rien faire
+        self.seed=seed
+        
+        #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
+        #numpy.random.seed(self.seed)
+        #random.seed(self.seed)
+        
+    def get_seed(self):
+        return self.seed
+        
+    def get_settings_names(self):
+        return ['effectuer']
+    
+    def get_settings_names_determined_by_complexity(self,complexity):
+        return ['nombre']
+
+    def regenerate_parameters(self, complexity):
+        self.proportion=float(complexity)/3
+        self.nombre=int(256*self.proportion)*4   #Par multiple de 4 (256=1024/4)
+        self.echantillon=random.sample(xrange(0,1024),self.nombre)  #Les pixels qui seront permutes
+        self.effectuer =numpy.random.binomial(1,0.2)    ##### On a 20% de faire un bruit #####
+        return self._get_current_parameters()
+
+    def _get_current_parameters(self):
+        return [self.effectuer]  
+    
+    def get_parameters_determined_by_complexity(self, complexity):
+        return [int(complexity*256)*4]
+    
+    def transform_image(self, image):
+        if self.effectuer==0:
+            return image
+        
+        image=image.reshape(1024,1)
+        temp=0  #variable temporaire
+
+        for i in xrange(0,self.nombre,4):   #Par bonds de 4
+            #gauche
+            if self.echantillon[i] > 0:
+                temp=image[self.echantillon[i]-1]
+                image[self.echantillon[i]-1]=image[self.echantillon[i]]
+                image[self.echantillon[i]]=temp
+            #droite
+            if self.echantillon[i+1] < 1023:
+                temp=image[self.echantillon[i+1]+1]
+                image[self.echantillon[i+1]+1]=image[self.echantillon[i+1]]
+                image[self.echantillon[i+1]]=temp
+            #haut
+            if self.echantillon[i+2] > 31:
+                temp=image[self.echantillon[i+2]-32]
+                image[self.echantillon[i+2]-32]=image[self.echantillon[i+2]]
+                image[self.echantillon[i+2]]=temp
+            #bas
+            if self.echantillon[i+3] < 992:
+                temp=image[self.echantillon[i+3]+32]
+                image[self.echantillon[i+3]+32]=image[self.echantillon[i+3]]
+                image[self.echantillon[i+3]]=temp
+            
+            
+        return image.reshape((32,32))
+
+
+#---TESTS---
+
+def _load_image():
+    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
+    d = ft.read(f)
+    w=numpy.asarray(d[random.randint(0,100)])
+    return (w/255.0).astype('float')
+
+def _test(complexite):
+    img=_load_image()
+    transfo = PermutPixel()
+    pylab.imshow(img.reshape((32,32)))
+    pylab.show()
+    print transfo.get_settings_names()
+    print transfo.regenerate_parameters(complexite)
+    
+    img_trans=transfo.transform_image(img)
+    
+    pylab.imshow(img_trans.reshape((32,32)))
+    pylab.show()
+    
+
+if __name__ == '__main__':
+    from pylearn.io import filetensor as ft
+    import pylab
+    for i in xrange(0,5):
+        _test(0.5)
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/PoivreSel.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/PoivreSel.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,94 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Ajout de bruit poivre et sel dans les donnees. Le bruit est distribue de facon 
+aleatoire tire d'une uniforme tout comme la clarte des bites changees.
+
+La proportion de bites aleatoires est definit par complexity/5.
+Lorsque cette valeur est a 1 ==> Plus reconnaissable et 0 ==> Rien ne se passe
+
+On a maintenant 25% de chance d'effectuer un bruitage.
+
+Ce fichier prend pour acquis que les images sont donnees une a la fois
+sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+import numpy
+import random
+
+class PoivreSel():
+    
+    def __init__(self,seed=9361):
+        self.proportion_bruit=0.08 #Le pourcentage des pixels qui seront bruites
+        self.nb_chng=10 #Le nombre de pixels changes. Seulement pour fin de calcul
+        self.effectuer=1    #Vaut 1 si on effectue et 0 sinon.
+        
+        self.seed=seed
+        #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
+        #numpy.random.seed(self.seed)
+        #random.seed(self.seed)
+        
+    def get_seed(self):
+        return self.seed
+        
+    def get_settings_names(self):
+        return ['effectuer']
+    
+    def get_settings_names_determined_by_complexity(self,complexity):
+        return ['proportion_bruit']
+
+    def regenerate_parameters(self, complexity):
+        self.proportion_bruit = float(complexity)/5
+        self.nb_chng=int(1024*self.proportion_bruit)
+        self.changements=random.sample(xrange(1024),self.nb_chng)   #Les pixels qui seront changes
+        self.effectuer =numpy.random.binomial(1,0.25)    ##### On a 25% de faire un bruit #####
+        return self._get_current_parameters()
+
+    def _get_current_parameters(self):
+        return [self.effectuer]
+    
+    def get_parameters_determined_by_complexity(self, complexity):
+        return [float(complexity)/5]
+    
+    def transform_image(self, image):
+        if self.effectuer == 0:
+            return image
+        
+        image=image.reshape(1024,1)
+        for j in xrange(0,self.nb_chng):
+            image[self.changements[j]]=numpy.random.random()    #On determine les nouvelles valeurs des pixels changes
+        return image.reshape(32,32)
+
+
+#---TESTS---
+
+def _load_image():
+    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
+    d = ft.read(f)
+    w=numpy.asarray(d[0])
+    return (w/255.0).astype('float')
+
+def _test(complexite):
+    img=_load_image()
+    transfo = PoivreSel()
+    pylab.imshow(img.reshape((32,32)))
+    pylab.show()
+    print transfo.get_settings_names()
+    print transfo.regenerate_parameters(complexite)
+    
+    img_trans=transfo.transform_image(img)
+    
+    pylab.imshow(img_trans.reshape((32,32)))
+    pylab.show()
+    
+
+if __name__ == '__main__':
+    from pylearn.io import filetensor as ft
+    import pylab
+    _test(0.5)
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/Rature.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/Rature.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,255 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Ajout d'une rature sur le caractère. La rature est en fait un 1 qui recoit une
+rotation et qui est ensuite appliqué sur le caractère. Un grossissement, puis deux
+erosions sont effectuees sur le 1 afin qu'il ne soit plus reconnaissable.
+Il y a des chances d'avoir plus d'une seule rature !
+
+Il y a 15% d'effectuer une rature.
+
+Ce fichier prend pour acquis que les images sont donnees une a la fois
+sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+import numpy, Image, random
+import scipy.ndimage.morphology
+from pylearn.io import filetensor as ft
+
+
+class Rature():
+   
+    def __init__(self,seed=1256):
+        self.angle=0 #Angle en degre de la rotation (entre 0 et 180)
+        self.numero=0 #Le numero du 1 choisi dans la banque de 1
+        self.gauche=-1   #Le numero de la colonne la plus a gauche contenant le 1
+        self.droite=-1
+        self.haut=-1
+        self.bas=-1
+        self.faire=1    #1=on effectue et 0=fait rien
+        
+        self.crop_haut=0
+        self.crop_gauche=0  #Ces deux valeurs sont entre 0 et 31 afin de definir
+                            #l'endroit ou sera pris le crop dans l'image du 1
+                            
+        self.largeur_bande=-1    #La largeur de la bande
+        self.smooth=-1   #La largeur de la matrice carree servant a l'erosion
+        self.nb_ratures=-1   #Le nombre de ratures appliques
+        self.fini=0 #1=fini de mettre toutes les couches 0=pas fini
+        self.complexity=0   #Pour garder en memoire la complexite si plusieurs couches sont necessaires
+        self.seed=seed
+        
+        #numpy.random.seed(self.seed)
+        
+        f3 = open('/data/lisa/data/ift6266h10/un_rature.ft')   #Doit etre sur le reseau DIRO.
+        #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/un_rature.ft')
+        #Il faut arranger le path sinon
+        w=ft.read(f3)
+        f3.close()
+        self.d=(w.astype('float'))/255
+        
+        self.patch=self.d[0].reshape((32,32)) #La patch de rature qui sera appliquee sur l'image
+
+    def get_settings_names(self):
+        return ['angle','numero','faire','crop_haut','crop_gauche','largeur_bande','smooth','nb_ratures']
+    
+    def get_seed(self):
+        return self.seed
+
+    def regenerate_parameters(self, complexity,next_rature = False):
+        
+        
+        self.numero=random.randint(0,4999)  #Ces bornes sont inclusives !
+        self.fini=0
+        self.complexity=complexity
+            
+        if float(complexity) > 0:
+            
+            self.gauche=self.droite=self.haut=self.bas=-1   #Remet tout a -1
+            
+            self.angle=int(numpy.random.normal(90,100*complexity))
+
+            self.faire=numpy.random.binomial(1,0.15)    ##### 15% d'effectuer une rature #####
+            if next_rature:
+                self.faire = 1
+            #self.faire=1 #Pour tester seulement
+            
+            self.crop_haut=random.randint(0,17)
+            self.crop_gauche=random.randint(0,17)
+            if complexity <= 0.25 :
+                self.smooth=6
+            elif complexity <= 0.5:
+                self.smooth=5
+            elif complexity <= 0.75:
+                self.smooth=4
+            else:
+                self.smooth=3
+            
+            p = numpy.random.rand()
+            if p < 0.5:
+                self.nb_ratures= 1
+            else:
+                if p < 0.8:
+                    self.nb_ratures = 2
+                else:
+                    self.nb_ratures = 3
+            
+            #Creation de la "patch" de rature qui sera appliquee sur l'image
+            if self.faire == 1:
+                self.get_size()
+                self.get_image_rot()    #On fait la "patch"
+            
+        else:
+            self.faire=0    #On ne fait rien si complexity=0 !!
+        
+        return self._get_current_parameters()
+    
+    
+    def get_image_rot(self):
+        image2=(self.d[self.numero].reshape((32,32))[self.haut:self.bas,self.gauche:self.droite])
+        
+        im = Image.fromarray(numpy.asarray(image2*255,dtype='uint8'))
+        
+        #La rotation et le resize sont de belle qualite afin d'avoir une image nette
+        im2 = im.rotate(self.angle,Image.BICUBIC,expand=False)
+        im3=im2.resize((50,50),Image.ANTIALIAS)
+        
+        grosse=numpy.asarray(numpy.asarray(im3)/255.0,dtype='float32')
+        crop=grosse[self.haut:self.haut+32,self.gauche:self.gauche+32]
+        
+        self.get_patch(crop)
+        
+    def get_patch(self,crop):
+        smooting = numpy.ones((self.smooth,self.smooth))
+        #Il y a deux erosions afin d'avoir un beau resultat. Pas trop large et
+        #pas trop mince
+        trans=scipy.ndimage.morphology.grey_erosion\
+                    (crop,size=smooting.shape,structure=smooting,mode='wrap')
+        trans1=scipy.ndimage.morphology.grey_erosion\
+                    (trans,size=smooting.shape,structure=smooting,mode='wrap')
+        
+               
+        patch_img=Image.fromarray(numpy.asarray(trans1*255,dtype='uint8'))
+        
+        patch_img2=patch_img.crop((4,4,28,28)).resize((32,32))  #Pour contrer les effets de bords !
+        
+        trans2=numpy.asarray(numpy.asarray(patch_img2)/255.0,dtype='float32')
+            
+            
+        #Tout ramener entre 0 et 1
+        trans2=trans2-trans2.min() #On remet tout positif
+        trans2=trans2/trans2.max()
+        
+        #La rayure a plus de chance d'etre en bas ou oblique le haut a 10h
+        if random.random() <= 0.5:  #On renverse la matrice dans ce cas
+            for i in xrange(0,32):
+                self.patch[i,:]=trans2[31-i,:]
+        else:
+            self.patch=trans2
+        
+    
+    
+    
+    def get_size(self):
+        image=self.d[self.numero].reshape((32,32))
+        
+        #haut
+        for i in xrange(0,32):
+            for j in xrange(0,32):
+                if(image[i,j]) != 0:
+                    if self.haut == -1:
+                        self.haut=i
+                        break
+            if self.haut > -1:
+                break
+        
+        #bas
+        for i in xrange(31,-1,-1):
+            for j in xrange(0,32):
+                if(image[i,j]) != 0:
+                    if self.bas == -1:
+                        self.bas=i
+                        break
+            if self.bas > -1:
+                break
+            
+        #gauche
+        for i in xrange(0,32):
+            for j in xrange(0,32):
+                if(image[j,i]) != 0:
+                    if self.gauche == -1:
+                        self.gauche=i
+                        break
+            if self.gauche > -1:
+                break
+            
+        #droite
+        for i in xrange(31,-1,-1):
+            for j in xrange(0,32):
+                if(image[j,i]) != 0:
+                    if self.droite == -1:
+                        self.droite=i
+                        break
+            if self.droite > -1:
+                break
+                
+
+    def _get_current_parameters(self):
+        return [self.angle,self.numero,self.faire,self.crop_haut,self.crop_gauche,self.largeur_bande,self.smooth,self.nb_ratures]
+
+    def transform_image(self, image):
+        if self.faire == 0: #Rien faire !!
+            return image
+        
+        if self.fini == 0:   #S'il faut rajouter des couches
+            patch_temp=self.patch
+            for w in xrange(1,self.nb_ratures):
+                self.regenerate_parameters(self.complexity,1)
+                for i in xrange(0,32):
+                    for j in xrange(0,32):
+                        patch_temp[i,j]=max(patch_temp[i,j],self.patch[i,j])
+            self.fini=1
+            self.patch=patch_temp
+            
+        for i in xrange(0,32):
+            for j in xrange(0,32):
+                image[i,j]=max(image[i,j],self.patch[i,j])
+        self.patch*=0   #Remise a zero de la patch (pas necessaire)
+        return image
+
+
+#---TESTS---
+
+def _load_image():
+    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
+    d = ft.read(f)
+    w=numpy.asarray(d[0:1000])
+    return (w/255.0).astype('float')
+
+def _test(complexite):
+    img=_load_image()
+    transfo = Rature()
+    for i in xrange(0,10):
+        img2=img[random.randint(0,1000)]
+        pylab.imshow(img2.reshape((32,32)))
+        pylab.show()
+        print transfo.get_settings_names()
+        print transfo.regenerate_parameters(complexite)
+        img2=img2.reshape((32,32))
+        
+        img2_trans=transfo.transform_image(img2)
+        
+        pylab.imshow(img2_trans.reshape((32,32)))
+        pylab.show()
+    
+
+if __name__ == '__main__':
+    from pylearn.io import filetensor as ft
+    import pylab
+    _test(1)
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/add_background_image.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/add_background_image.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,112 @@
+#!/usr/bin/python                                                                                 
+# -*- coding: iso-8859-1 -*-                                                                      
+
+'''
+    Implementation of random background adding to a specific image
+
+    Author: Guillaume Sicard
+'''
+
+import sys, os, random
+import cPickle
+import Image, numpy           
+
+class AddBackground():
+    def __init__(self, threshold = 128, complexity = 1):
+        self.h = 32
+        self.w = 32
+        self.threshold = 1;
+        try: #in order to load locally if it is available
+            self.bg_image_file = '/Tmp/image_net/'
+            f=open(self.bg_image_file+'filelist.pkl')
+        except:
+            self.bg_image_file = '/data/lisa/data/ift6266h10/image_net/'
+            f=open(self.bg_image_file+'filelist.pkl')
+        self.image_files = cPickle.load(f)
+        f.close()
+        self.regenerate_parameters(complexity)
+    
+    def get_current_parameters(self):
+        return [self.contrast]
+    # get threshold value
+    def get_settings_names(self):
+        return ['contrast']
+    
+    # no need, except for testmod.py
+    def regenerate_parameters(self, complexity):
+        self.contrast = 1-numpy.random.rand()*complexity
+        return [self.contrast]
+
+    # load an image
+    def load_image(self,filename):
+        image = Image.open(filename).convert('L')
+        image = numpy.asarray(image)
+        image = (image / 255.0).astype(numpy.float32)
+        return image
+
+    # save an image
+    def save_image(self,array, filename):
+        image = (array * 255.0).astype('int')
+        image = Image.fromarray(image)
+        if (filename != ''):
+            image.save(filename)
+        else:
+            image.show()
+
+    # make a random 32x32 crop of an image
+    def rand_crop(self,image):
+        i_w, i_h = image.shape
+        x, y = random.randint(0, i_w - self.w), random.randint(0, i_h - self.h)
+        return image[x:x + self.w, y:y + self.h]
+
+    # select a random background image from "bg_image_file" and crops it
+    def rand_bg_image(self,maximage):
+        i = random.randint(0, len(self.image_files) - 1)
+
+        image = self.load_image(self.bg_image_file + self.image_files[i])
+        self.bg_image = self.rand_crop(image)
+        maxbg = self.bg_image.max()
+        self.bg_image = self.bg_image / maxbg * ( max(maximage - self.contrast,0.0) ) 
+
+    # set "bg_image" as background to "image", based on a pixels threshold
+    def set_bg(self,image):
+        tensor = numpy.asarray([self.bg_image,image],dtype='float32')
+        return tensor.max(0)
+
+    # transform an image file and return an array
+    def transform_image_from_file(self, filename):
+        self.rand_bg_image()
+        image = self.load_image(filename)
+        image = self.set_bg(image)
+        return image
+
+    # standard array to array transform
+    def transform_image(self, image):
+        self.rand_bg_image(image.max())
+        image = self.set_bg(image)
+        return image
+
+    # test method
+    def test(self,filename):
+        import time
+
+        sys.stdout.write('Starting addBackground test : loading image')
+        sys.stdout.flush()
+
+        image = self.load_image(filename)
+
+        t = 0
+        n = 500
+        for i in range(n):
+            t0 =  time.time()
+            image2 = self.transform_image(image)
+            t = ( i * t + (time.time() - t0) ) / (i + 1)
+            sys.stdout.write('.')
+            sys.stdout.flush()
+            
+        print "Done!\nAverage time : " + str(1000 * t) + " ms"
+
+if __name__ == '__main__':
+
+    myAddBackground = AddBackground()
+    myAddBackground.test('./images/0-LiberationSans-Italic.ttf.jpg')
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/affine_transform.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/affine_transform.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,89 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Simple implementation of random affine transformations based on the Python 
+Imaging Module affine transformations.
+
+
+Author: Razvan Pascanu
+'''
+
+import numpy, Image
+
+
+
+class AffineTransformation():
+    def __init__( self, complexity = .5):
+        self.shape = (32,32)
+        self.complexity = complexity
+        params = numpy.random.uniform(size=6) -.5
+        self.a = 1. + params[0]*.6*complexity
+        self.b = 0. + params[1]*.6*complexity
+        self.c = params[2]*8.*complexity
+        self.d = 0. + params[3]*.6*complexity
+        self.e = 1. + params[4]*.6*complexity
+        self.f = params[5]*8.*complexity
+
+    
+    def _get_current_parameters(self):
+        return [self.a, self.b, self.c, self.d, self.e, self.f]
+
+    def get_settings_names(self):
+        return ['a','b','c','d','e','f']
+
+    def regenerate_parameters(self, complexity):
+        # generate random affine transformation
+        # a point (x',y') of the new image corresponds to (x,y) of the old
+        # image where : 
+        #   x' = params[0]*x + params[1]*y + params[2]
+        #   y' = params[3]*x + params[4]*y _ params[5]
+
+        # the ranges are set manually as to look acceptable
+ 
+        self.complexity = complexity
+        params = numpy.random.uniform(size=6) -.5
+        self.a = 1. + params[0]*.8*complexity
+        self.b = 0. + params[1]*.8*complexity
+        self.c = params[2]*9.*complexity
+        self.d = 0. + params[3]*.8*complexity
+        self.e = 1. + params[4]*.8*complexity
+        self.f = params[5]*9.*complexity
+        return self._get_current_parameters()
+
+      
+
+
+    def transform_image(self,NIST_image):
+    
+        im = Image.fromarray( \
+                numpy.asarray(\
+                       NIST_image.reshape(self.shape)*255.0, dtype='uint8'))
+        nwim = im.transform( (32,32), Image.AFFINE, [self.a,self.b,self.c,self.d,self.e,self.f])
+        return numpy.asarray(numpy.asarray(nwim)/255.0,dtype='float32')
+
+
+
+if __name__ =='__main__':
+    print 'random test'
+    
+    from pylearn.io import filetensor as ft
+    import pylab
+
+    datapath = '/data/lisa/data/nist/by_class/'
+
+    f = open(datapath+'digits/digits_train_data.ft')
+    d = ft.read(f)
+    f.close()
+
+
+    transformer = AffineTransformation()
+    id = numpy.random.randint(30)
+    
+    pylab.figure()
+    pylab.imshow(d[id].reshape((32,32)))
+    pylab.figure()
+    pylab.imshow(transformer.transform_image(d[id]).reshape((32,32)))
+
+    pylab.show()
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/contrast.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/contrast.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,137 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Simple implementation of random contrast. This always switch half the time the polarity.
+then it decides of a random contrast dependant of the complexity, the mean of the maximum and minimum
+pixel value stays 0 (to avoid import bias change between exemples).
+
+Author: Xavier Glorot
+'''
+
+import numpy as N
+import copy
+
+
+class Contrast():
+    def __init__(self,complexity = 1):
+        #---------- private attributes
+        self.__nx__ = 32 #xdim of the images
+        self.__ny__ = 32 #ydim of the images
+        self.__Pinvert__ = 0.5 #probability to switch polarity
+        self.__mincontrast__ = 0.15
+        self.__resolution__ = 256
+        self.__rangecontrastres__ = self.__resolution__ - N.int(self.__mincontrast__*self.__resolution__)
+        #------------------------------------------------
+        
+        #---------- generation parameters
+        self.regenerate_parameters(complexity)
+        #------------------------------------------------
+    
+    def _get_current_parameters(self):
+        return [self.invert,self.contrast]
+    
+    def get_settings_names(self):
+        return ['invert','contrast']
+    
+    def regenerate_parameters(self, complexity):
+        self.invert = (N.random.uniform() < self.__Pinvert__)
+        self.contrast = self.__resolution__ - N.random.randint(1 + self.__rangecontrastres__ * complexity)
+        return self._get_current_parameters()
+    
+    def transform_1_image(self,image): #the real transformation method
+        maxi = image.max()
+        mini = image.min()
+        if self.invert:
+            newimage = 1 - (self.__resolution__- self.contrast) / (2 * float(self.__resolution__)) -\
+                        (image - mini) / float(maxi - mini) * self.contrast / float(self.__resolution__)
+        else:
+            newimage = (self.__resolution__- self.contrast) / (2 * float(self.__resolution__)) +\
+                        (image - mini) / float(maxi - mini) * self.contrast / float(self.__resolution__)
+        if image.dtype == 'uint8':
+            return N.asarray(newimage*255,dtype='uint8')
+        else:
+            return N.asarray(newimage,dtype=image.dtype)
+    
+    def transform_image(self,image): #handling different format
+        if image.shape == (self.__nx__,self.__ny__):
+            return self.transform_1_image(image)
+        if image.ndim == 3:
+            newimage = copy.copy(image)
+            for i in range(image.shape[0]):
+                newimage[i,:,:] = self.transform_1_image(image[i,:,:])
+            return newimage
+        if image.ndim == 2 and image.shape != (self.__nx__,self.__ny__):
+            newimage = N.reshape(image,(image.shape[0],self.__nx__,self.__ny__))
+            for i in range(image.shape[0]):
+                newimage[i,:,:] = self.transform_1_image(newimage[i,:,:])
+            return N.reshape(newimage,image.shape)
+        if image.ndim == 1:
+            newimage = N.reshape(image,(self.__nx__,self.__ny__))
+            newimage = self.transform_1_image(newimage)
+            return N.reshape(newimage,image.shape)
+        assert False #should never go there
+
+
+
+
+#test on NIST (you need pylearn and access to NIST to do that)
+
+if __name__ == '__main__':
+    
+    from pylearn.io import filetensor as ft
+    import copy
+    import pygame
+    import time
+    datapath = '/data/lisa/data/nist/by_class/'
+    f = open(datapath+'digits/digits_train_data.ft')
+    d = ft.read(f)
+    
+    pygame.surfarray.use_arraytype('numpy')
+    
+    pygame.display.init()
+    screen = pygame.display.set_mode((8*2*32,8*32),0,8)
+    anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
+    screen.set_palette(anglcolorpalette)
+    
+    MyContrast = Contrast()
+    
+    debut=time.time()
+    MyContrast.transform_image(d)
+    fin=time.time()
+    print '------------------------------------------------'
+    print d.shape[0],' images transformed in :', fin-debut, ' seconds'
+    print '------------------------------------------------'
+    print (fin-debut)/d.shape[0]*1000000,' microseconds per image'
+    print '------------------------------------------------'
+    print MyContrast.get_settings_names()
+    print MyContrast._get_current_parameters()
+    print MyContrast.regenerate_parameters(0)
+    print MyContrast.regenerate_parameters(0.5)
+    print MyContrast.regenerate_parameters(1)
+    for i in range(10000):
+        a=d[i,:]
+        b=N.asarray(N.reshape(a,(32,32))).T
+        
+        new=pygame.surfarray.make_surface(b)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new.set_palette(anglcolorpalette)
+        screen.blit(new,(0,0))
+        
+        print MyContrast.get_settings_names(), MyContrast.regenerate_parameters(1)
+        c=MyContrast.transform_image(a)
+        b=N.asarray(N.reshape(c,(32,32))).T
+        
+        new=pygame.surfarray.make_surface(b)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new.set_palette(anglcolorpalette)
+        screen.blit(new,(8*32,0))
+        
+        pygame.display.update()
+        raw_input('Press Enter')
+    
+    pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/filetensor.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/filetensor.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,232 @@
+"""
+Read and write the matrix file format described at
+U{http://www.cs.nyu.edu/~ylclab/data/norb-v1.0/index.html}
+
+The format is for dense tensors:
+
+    - magic number indicating type and endianness - 4bytes
+    - rank of tensor - int32
+    - dimensions - int32, int32, int32, ...
+    - <data>
+
+The number of dimensions and rank is slightly tricky: 
+    - for scalar: rank=0, dimensions = [1, 1, 1]
+    - for vector: rank=1, dimensions = [?, 1, 1]
+    - for matrix: rank=2, dimensions = [?, ?, 1]
+
+For rank >= 3, the number of dimensions matches the rank exactly.
+
+
+@todo: add complex type support
+
+"""
+import sys
+import numpy
+
+def _prod(lst):
+    p = 1
+    for l in lst:
+        p *= l
+    return p
+
+_magic_dtype = {
+        0x1E3D4C51 : ('float32', 4),
+        #0x1E3D4C52 : ('packed matrix', 0), #what is a packed matrix?
+        0x1E3D4C53 : ('float64', 8),
+        0x1E3D4C54 : ('int32', 4),
+        0x1E3D4C55 : ('uint8', 1),
+        0x1E3D4C56 : ('int16', 2),
+        }
+_dtype_magic = {
+        'float32': 0x1E3D4C51,
+        #'packed matrix': 0x1E3D4C52,
+        'float64': 0x1E3D4C53,
+        'int32': 0x1E3D4C54,
+        'uint8': 0x1E3D4C55,
+        'int16': 0x1E3D4C56
+        }
+
+def _read_int32(f):
+    """unpack a 4-byte integer from the current position in file f"""
+    s = f.read(4)
+    s_array = numpy.fromstring(s, dtype='int32')
+    return s_array.item()
+
+def _read_header(f, debug=False):
+    """
+    :returns: data type, element size, rank, shape, size
+    """
+    #what is the data type of this matrix?
+    #magic_s = f.read(4)
+    #magic = numpy.fromstring(magic_s, dtype='int32')
+    magic = _read_int32(f)
+    magic_t, elsize = _magic_dtype[magic]
+    if debug: 
+        print 'header magic', magic, magic_t, elsize
+    if magic_t == 'packed matrix':
+        raise NotImplementedError('packed matrix not supported')
+
+    #what is the rank of the tensor?
+    ndim = _read_int32(f)
+    if debug: print 'header ndim', ndim
+
+    #what are the dimensions of the tensor?
+    dim = numpy.fromfile(f, dtype='int32', count=max(ndim,3))[:ndim]
+    dim_size = _prod(dim)
+    if debug: print 'header dim', dim, dim_size
+
+    return magic_t, elsize, ndim, dim, dim_size
+
+class arraylike(object):
+    """Provide an array-like interface to the filetensor in f.
+
+    The rank parameter to __init__ controls how this object interprets the underlying tensor.
+    Its behaviour should be clear from the following example.
+    Suppose the underlying tensor is MxNxK.
+
+    - If rank is 0, self[i] will be a scalar and len(self) == M*N*K.
+
+    - If rank is 1, self[i] is a vector of length K, and len(self) == M*N.
+
+    - If rank is 3, self[i] is a 3D tensor of size MxNxK, and len(self)==1.
+
+    - If rank is 5, self[i] is a 5D tensor of size 1x1xMxNxK, and len(self) == 1.
+
+
+    :note: Objects of this class generally require exclusive use of the underlying file handle, because
+    they call seek() every time you access an element.
+    """
+
+    f = None 
+    """File-like object"""
+
+    magic_t = None
+    """numpy data type of array"""
+
+    elsize = None
+    """number of bytes per scalar element"""
+
+    ndim = None
+    """Rank of underlying tensor"""
+
+    dim = None
+    """tuple of array dimensions (aka shape)"""
+
+    dim_size = None
+    """number of scalars in the tensor (prod of dim)"""
+
+    f_start = None
+    """The file position of the first element of the tensor"""
+
+    readshape = None
+    """tuple of array dimensions of the block that we read"""
+
+    readsize = None
+    """number of elements we must read for each block"""
+    
+    def __init__(self, f, rank=0, debug=False):
+        self.f = f
+        self.magic_t, self.elsize, self.ndim, self.dim, self.dim_size = _read_header(f,debug)
+        self.f_start = f.tell()
+
+        if rank <= self.ndim:
+          self.readshape = tuple(self.dim[self.ndim-rank:])
+        else:
+          self.readshape = tuple(self.dim)
+
+        #self.readshape = tuple(self.dim[self.ndim-rank:]) if rank <= self.ndim else tuple(self.dim)
+
+        if rank <= self.ndim:
+          padding = tuple()
+        else:
+          padding = (1,) * (rank - self.ndim)
+
+        #padding = tuple() if rank <= self.ndim else (1,) * (rank - self.ndim)
+        self.returnshape = padding + self.readshape
+        self.readsize = _prod(self.readshape)
+        if debug: print 'READ PARAM', self.readshape, self.returnshape, self.readsize
+
+    def __len__(self):
+        return _prod(self.dim[:self.ndim-len(self.readshape)])
+
+    def __getitem__(self, idx):
+        if idx >= len(self):
+            raise IndexError(idx)
+        self.f.seek(self.f_start + idx * self.elsize * self.readsize)
+        return numpy.fromfile(self.f, 
+                dtype=self.magic_t, 
+                count=self.readsize).reshape(self.returnshape)
+
+
+#
+# TODO: implement item selection:
+#  e.g. load('some mat', subtensor=(:6, 2:5))
+#
+#  This function should be memory efficient by:
+#  - allocating an output matrix at the beginning
+#  - seeking through the file, reading subtensors from multiple places
+def read(f, subtensor=None, debug=False):
+    """Load all or part of file 'f' into a numpy ndarray
+
+    @param f: file from which to read
+    @type f: file-like object
+
+    If subtensor is not None, it should be like the argument to
+    numpy.ndarray.__getitem__.  The following two expressions should return
+    equivalent ndarray objects, but the one on the left may be faster and more
+    memory efficient if the underlying file f is big.
+
+        read(f, subtensor) <===> read(f)[*subtensor]
+    
+    Support for subtensors is currently spotty, so check the code to see if your
+    particular type of subtensor is supported.
+
+    """
+    magic_t, elsize, ndim, dim, dim_size = _read_header(f,debug)
+    f_start = f.tell()
+
+    rval = None
+    if subtensor is None:
+        rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
+    elif isinstance(subtensor, slice):
+        if subtensor.step not in (None, 1):
+            raise NotImplementedError('slice with step', subtensor.step)
+        if subtensor.start not in (None, 0):
+            bytes_per_row = _prod(dim[1:]) * elsize
+            f.seek(f_start + subtensor.start * bytes_per_row)
+        dim[0] = min(dim[0], subtensor.stop) - subtensor.start
+        rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
+    else:
+        raise NotImplementedError('subtensor access not written yet:', subtensor) 
+
+    return rval
+
+def write(f, mat):
+    """Write a numpy.ndarray to file.
+
+    @param f: file into which to write
+    @type f: file-like object
+
+    @param mat: array to write to file
+    @type mat: numpy ndarray or compatible
+
+    """
+    def _write_int32(f, i):
+        i_array = numpy.asarray(i, dtype='int32')
+        if 0: print 'writing int32', i, i_array
+        i_array.tofile(f)
+
+    try:
+        _write_int32(f, _dtype_magic[str(mat.dtype)])
+    except KeyError:
+        raise TypeError('Invalid ndarray dtype for filetensor format', mat.dtype)
+
+    _write_int32(f, len(mat.shape))
+    shape = mat.shape
+    if len(shape) < 3:
+        shape = list(shape) + [1] * (3 - len(shape))
+    if 0: print 'writing shape =', shape
+    for sh in shape:
+        _write_int32(f, sh)
+    mat.tofile(f)
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/gimp_script.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/gimp_script.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+'''
+Filtres GIMP sous Python
+Auteur: Nicolas Boulanger-Lewandowski
+Date: Hiver 2010
+
+run with: gimp -i --batch-interpreter python-fu-eval --batch - < gimp_script.py
+end with: pdb.gimp_quit(0)
+
+Implémente le motionblur et le pinch
+'''
+
+from gimpfu import *
+import numpy
+
+img = gimp.Image(32, 32, GRAY)
+img.disable_undo()
+layer1 = gimp.Layer(img, "layer1", 32, 32, GRAY_IMAGE, 100, NORMAL_MODE)
+img.add_layer(layer1, 0)
+dest_rgn = layer1.get_pixel_rgn(0, 0, 32, 32, True)
+
+def setpix(image):
+    dest_rgn[:,:] = (image.T*255).astype(numpy.uint8).tostring()
+    layer1.flush()
+    layer1.update(0, 0, 32, 32)
+
+def getpix():
+    return numpy.fromstring(dest_rgn[:,:], 'UInt8').astype(numpy.float32).reshape((32,32)).T / 255.0
+
+class GIMP1():
+    def get_settings_names(self):
+        return ['mblur_length', 'mblur_angle', 'pinch']
+    
+    def regenerate_parameters(self, complexity):
+        if complexity:
+            self.mblur_length = abs(int(round(numpy.random.normal(0, 3*complexity))))
+        else:
+            self.mblur_length = 0
+        self.mblur_angle =  int(round(numpy.random.uniform(0,360)))
+        self.pinch = numpy.random.uniform(-complexity, 0.7*complexity)
+
+        return [self.mblur_length, self.mblur_angle, self.pinch]
+
+    def transform_image(self, image):
+        if self.mblur_length or self.pinch:
+            setpix(image)
+            if self.mblur_length:
+                pdb.plug_in_mblur(img, layer1, 0, self.mblur_length, self.mblur_angle, 0, 0)
+            if self.pinch:        
+                pdb.plug_in_whirl_pinch(img, layer1, 0.0, self.pinch, 1.0)
+            image = getpix()
+
+        return image
+
+# test
+if __name__ == '__main__':
+    import Image
+    im = numpy.asarray(Image.open("a.bmp").convert("L")) / 255.0
+
+    test = GIMP1()
+    print test.get_settings_names(), '=', test.regenerate_parameters(1)
+    #for i in range(1000):
+    im = test.transform_image(im)
+
+    import pylab
+    pylab.imshow(im, pylab.matplotlib.cm.Greys_r)
+    pylab.show()
+
+    pdb.gimp_quit(0)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/image_tiling.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/image_tiling.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,86 @@
+"""
+Illustrate filters (or data) in a grid of small image-shaped tiles.
+
+Note: taken from the pylearn codebase on Feb 4, 2010 (fsavard)
+"""
+
+import numpy
+from PIL import Image
+
+def scale_to_unit_interval(ndar,eps=1e-8):
+    ndar = ndar.copy()
+    ndar -= ndar.min()
+    ndar *= 1.0 / (ndar.max()+eps)
+    return ndar
+
+def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0,0),
+        scale_rows_to_unit_interval=True, 
+        output_pixel_vals=True
+        ):
+    """
+    Transform an array with one flattened image per row, into an array in which images are
+    reshaped and layed out like tiles on a floor.
+
+    This function is useful for visualizing datasets whose rows are images, and also columns of
+    matrices for transforming those rows (such as the first layer of a neural net).
+
+    :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can be 2-D ndarrays or None
+    :param X: a 2-D array in which every row is a flattened image.
+    :type img_shape: tuple; (height, width)
+    :param img_shape: the original shape of each image
+    :type tile_shape: tuple; (rows, cols)
+    :param tile_shape: the number of images to tile (rows, cols)
+
+    :returns: array suitable for viewing as an image.  (See:`PIL.Image.fromarray`.)
+    :rtype: a 2-d array with same dtype as X.
+
+    """
+    assert len(img_shape) == 2
+    assert len(tile_shape) == 2
+    assert len(tile_spacing) == 2
+
+    out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp 
+        in zip(img_shape, tile_shape, tile_spacing)]
+
+    if isinstance(X, tuple):
+        assert len(X) == 4
+        if output_pixel_vals:
+            out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype='uint8')
+        else:
+            out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype)
+
+        #colors default to 0, alpha defaults to 1 (opaque)
+        if output_pixel_vals:
+            channel_defaults = [0,0,0,255]
+        else:
+            channel_defaults = [0.,0.,0.,1.]
+
+        for i in xrange(4):
+            if X[i] is None:
+                out_array[:,:,i] = numpy.zeros(out_shape,
+                        dtype='uint8' if output_pixel_vals else out_array.dtype
+                        )+channel_defaults[i]
+            else:
+                out_array[:,:,i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals)
+        return out_array
+
+    else:
+        H, W = img_shape
+        Hs, Ws = tile_spacing
+
+        out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
+        for tile_row in xrange(tile_shape[0]):
+            for tile_col in xrange(tile_shape[1]):
+                if tile_row * tile_shape[1] + tile_col < X.shape[0]:
+                    if scale_rows_to_unit_interval:
+                        this_img = scale_to_unit_interval(X[tile_row * tile_shape[1] + tile_col].reshape(img_shape))
+                    else:
+                        this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
+                    out_array[
+                        tile_row * (H+Hs):tile_row*(H+Hs)+H,
+                        tile_col * (W+Ws):tile_col*(W+Ws)+W
+                        ] \
+                        = this_img * (255 if output_pixel_vals else 1)
+        return out_array
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/local_elastic_distortions.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/local_elastic_distortions.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,456 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Implementation of elastic distortions as described in
+Simard, Steinkraus, Platt, "Best Practices for Convolutional
+    Neural Networks Applied to Visual Document Analysis", 2003
+
+Author: François Savard
+Date: Fall 2009, revised Winter 2010
+
+Usage: create the Distorter with proper alpha, sigma etc.
+    Then each time you want to change the distortion field applied,
+    call regenerate_field(). 
+
+    (The point behind this is that regeneration takes some time,
+    so we better reuse the fields a few times)
+'''
+
+import sys
+import math
+import numpy
+import numpy.random
+import scipy.signal # convolve2d
+
+_TEST_DIR = "/u/savardf/ift6266/debug_images/"
+
+def _raw_zeros(size):
+    return [[0 for i in range(size[1])] for j in range(size[0])]
+
+class ElasticDistortionParams():
+    def __init__(self, image_size=(32,32), alpha=0.0, sigma=0.0):
+        self.image_size = image_size
+        self.alpha = alpha
+        self.sigma = sigma
+
+        h,w = self.image_size
+
+        self.matrix_tl_corners_rows = _raw_zeros((h,w))
+        self.matrix_tl_corners_cols = _raw_zeros((h,w))
+
+        self.matrix_tr_corners_rows = _raw_zeros((h,w))
+        self.matrix_tr_corners_cols = _raw_zeros((h,w))
+
+        self.matrix_bl_corners_rows = _raw_zeros((h,w))
+        self.matrix_bl_corners_cols = _raw_zeros((h,w))
+
+        self.matrix_br_corners_rows = _raw_zeros((h,w))
+        self.matrix_br_corners_cols = _raw_zeros((h,w))
+
+        # those will hold the precomputed ratios for
+        # bilinear interpolation
+        self.matrix_tl_multiply = numpy.zeros((h,w))
+        self.matrix_tr_multiply = numpy.zeros((h,w))
+        self.matrix_bl_multiply = numpy.zeros((h,w))
+        self.matrix_br_multiply = numpy.zeros((h,w))
+
+    def alpha_sigma(self):
+        return [self.alpha, self.sigma]
+
+class LocalElasticDistorter():
+    def __init__(self, image_size=(32,32)):
+        self.image_size = image_size
+
+        self.current_complexity_10 = 0
+        self.current_complexity = 0
+
+        # number of precomputed fields
+        # (principle: as complexity doesn't change often, we can
+        # precompute a certain number of fields for a given complexity,
+        # each with its own parameters. That way, we have good
+        # randomization, but we're much faster).
+        self.to_precompute_per_complexity = 50
+
+        # Both use ElasticDistortionParams
+        self.current_params = None
+        self.precomputed_params = [[] for i in range(10)]
+
+        # 
+        self.kernel_size = None
+        self.kernel = None
+
+        # set some defaults
+        self.regenerate_parameters(0.0)
+
+    def get_settings_names(self):
+        return []
+
+    def _floor_complexity(self, complexity):
+        return self._to_complexity_10(complexity) / 10.0
+
+    def _to_complexity_10(self, complexity):
+        return min(9, max(0, int(complexity * 10)))
+
+    def regenerate_parameters(self, complexity):
+        complexity_10 = self._to_complexity_10(complexity)
+
+        if complexity_10 != self.current_complexity_10:
+            self.current_complexity_10 = complexity_10
+            self.current_complexity = self._floor_complexity(complexity)
+
+        if len(self.precomputed_params[complexity_10]) <= self.to_precompute_per_complexity:
+            # not yet enough params generated, produce one more
+            # and append to list
+            new_params = self._initialize_new_params()
+            new_params = self._generate_fields(new_params)
+            self.current_params = new_params
+            self.precomputed_params[complexity_10].append(new_params)
+        else:
+            # if we have enough precomputed fields, just select one
+            # at random and set parameters to match what they were
+            # when the field was generated
+            idx = numpy.random.randint(0, len(self.precomputed_params[complexity_10]))
+            self.current_params = self.precomputed_params[complexity_10][idx]
+
+        # don't return anything, to avoid storing deterministic parameters
+        return [] # self.current_params.alpha_sigma()
+
+    def get_parameters_determined_by_complexity(self, complexity):
+        tmp_params = self._initialize_new_params(_floor_complexity(complexity))
+        return tmp_params.alpha_sigma()
+
+    def get_settings_names_determined_by_complexity(self, complexity):
+        return ['alpha', 'sigma']
+
+    # adapted from http://blenderartists.org/forum/showthread.php?t=163361
+    def _gen_gaussian_kernel(self, sigma):
+        # the kernel size can change DRAMATICALLY the time 
+        # for the blur operation... so even though results are better
+        # with a bigger kernel, we need to compromise here
+        # 1*s is very different from 2*s, but there's not much difference
+        # between 2*s and 4*s
+        ks = self.kernel_size
+        s = sigma
+        target_ks = (1.5*s, 1.5*s)
+        if not ks is None and ks[0] == target_ks[0] and ks[1] == target_ks[1]:
+            # kernel size is good, ok, no need to regenerate
+            return
+        self.kernel_size = target_ks
+        h,w = self.kernel_size
+        a,b = h/2.0, w/2.0
+        y,x = numpy.ogrid[0:w, 0:h]
+        gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s))
+        # Normalize so we don't reduce image intensity
+        self.kernel = gauss/gauss.sum()
+
+    def _gen_distortion_field(self, params):
+        self._gen_gaussian_kernel(params.sigma)
+
+        # we add kernel_size on all four sides so blurring
+        # with the kernel produces a smoother result on borders
+        ks0 = self.kernel_size[0]
+        ks1 = self.kernel_size[1]
+        sz0 = self.image_size[1] + ks0
+        sz1 = self.image_size[0] + ks1
+        field = numpy.random.uniform(-1.0, 1.0, (sz0, sz1))
+        field = scipy.signal.convolve2d(field, self.kernel, mode='same')
+
+        # crop only image_size in the middle
+        field = field[ks0:ks0+self.image_size[0], ks1:ks1+self.image_size[1]]
+
+        return params.alpha * field
+        
+
+    def _initialize_new_params(self, complexity=None):
+        if not complexity:
+            complexity = self.current_complexity
+
+        params = ElasticDistortionParams(self.image_size)
+
+        # pour faire progresser la complexité un peu plus vite
+        # tout en gardant les extrêmes de 0.0 et 1.0
+        complexity = complexity ** (1./3.)
+
+        # the smaller the alpha, the closest the pixels are fetched
+        # a max of 10 is reasonable
+        params.alpha = complexity * 10.0
+
+        # the bigger the sigma, the smoother is the distortion
+        # max of 1 is "reasonable", but produces VERY noisy results
+        # And the bigger the sigma, the bigger the blur kernel, and the
+        # slower the field generation, btw.
+        params.sigma = 10.0 - (7.0 * complexity)
+
+        return params
+
+    def _generate_fields(self, params):
+        '''
+        Here's how the code works:
+        - We first generate "distortion fields" for x and y with these steps:
+            - Uniform noise over [-1, 1] in a matrix of size (h,w)
+            - Blur with a Gaussian kernel of spread sigma
+            - Multiply by alpha
+        - Then (conceptually) to compose the distorted image, we loop over each pixel
+            of the new image and use the corresponding x and y distortions
+            (from the matrices generated above) to identify pixels
+            of the old image from which we fetch color data. As the
+            coordinates are not integer, we interpolate between the
+            4 nearby pixels (top left, top right etc.).
+        - That's just conceptually. Here I'm using matrix operations
+            to speed up the computation. I first identify the 4 nearby
+            pixels in the old image for each pixel in the distorted image.
+            I can then use them as "fancy indices" to extract the proper
+            pixels for each new pixel.
+        - Then I multiply those extracted nearby points by precomputed
+            ratios for the bilinear interpolation.
+        '''
+
+        p = params
+
+        dist_fields = [None, None]
+        dist_fields[0] = self._gen_distortion_field(params)
+        dist_fields[1] = self._gen_distortion_field(params)
+
+        #pylab.imshow(dist_fields[0])
+        #pylab.show()
+
+        # regenerate distortion index matrices
+        # "_rows" are row indices
+        # "_cols" are column indices
+        # (separated due to the way fancy indexing works in numpy)
+        h,w = p.image_size
+
+        for y in range(h):
+            for x in range(w): 
+                distort_x = dist_fields[0][y,x]
+                distort_y = dist_fields[1][y,x]
+
+                # the "target" is the coordinate we fetch color data from
+                # (in the original image)
+                # target_left and _top are the rounded coordinate on the
+                # left/top of this target (float) coordinate
+                target_pixel = (y+distort_y, x+distort_x)
+
+                target_left = int(math.floor(x + distort_x))
+                target_top = int(math.floor(y + distort_y))
+
+                index_tl = [target_top, target_left]
+                index_tr = [target_top, target_left+1]
+                index_bl = [target_top+1, target_left]
+                index_br = [target_top+1, target_left+1]
+
+                # x_ratio is the ratio of importance of left pixels
+                # y_ratio is the """" of top pixels
+                # (in bilinear combination)
+                y_ratio = 1.0 - (target_pixel[0] - target_top)
+                x_ratio = 1.0 - (target_pixel[1] - target_left)
+
+                # We use a default background color of 0 for displacements
+                # outside of boundaries of the image.
+
+                # if top left outside bounds
+                if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w: 
+                    p.matrix_tl_corners_rows[y][x] = 0
+                    p.matrix_tl_corners_cols[y][x] = 0
+                    p.matrix_tl_multiply[y,x] = 0
+                else:
+                    p.matrix_tl_corners_rows[y][x] = index_tl[0]
+                    p.matrix_tl_corners_cols[y][x] = index_tl[1]
+                    p.matrix_tl_multiply[y,x] = x_ratio*y_ratio
+
+                # if top right outside bounds
+                if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w:
+                    p.matrix_tr_corners_rows[y][x] = 0
+                    p.matrix_tr_corners_cols[y][x] = 0
+                    p.matrix_tr_multiply[y,x] = 0
+                else:
+                    p.matrix_tr_corners_rows[y][x] = index_tr[0]
+                    p.matrix_tr_corners_cols[y][x] = index_tr[1]
+                    p.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio
+
+                # if bottom left outside bounds
+                if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w:
+                    p.matrix_bl_corners_rows[y][x] = 0
+                    p.matrix_bl_corners_cols[y][x] = 0
+                    p.matrix_bl_multiply[y,x] = 0
+                else:
+                    p.matrix_bl_corners_rows[y][x] = index_bl[0]
+                    p.matrix_bl_corners_cols[y][x] = index_bl[1]
+                    p.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio)
+
+                # if bottom right outside bounds
+                if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w:
+                    p.matrix_br_corners_rows[y][x] = 0
+                    p.matrix_br_corners_cols[y][x] = 0
+                    p.matrix_br_multiply[y,x] = 0
+                else:
+                    p.matrix_br_corners_rows[y][x] = index_br[0]
+                    p.matrix_br_corners_cols[y][x] = index_br[1]
+                    p.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio)
+
+        # not really necessary, but anyway
+        return p
+
+    def transform_image(self, image):
+        p = self.current_params
+
+        # index pixels to get the 4 corners for bilinear combination
+        tl_pixels = image[p.matrix_tl_corners_rows, p.matrix_tl_corners_cols]
+        tr_pixels = image[p.matrix_tr_corners_rows, p.matrix_tr_corners_cols]
+        bl_pixels = image[p.matrix_bl_corners_rows, p.matrix_bl_corners_cols]
+        br_pixels = image[p.matrix_br_corners_rows, p.matrix_br_corners_cols]
+
+        # bilinear ratios, elemwise multiply
+        tl_pixels = numpy.multiply(tl_pixels, p.matrix_tl_multiply)
+        tr_pixels = numpy.multiply(tr_pixels, p.matrix_tr_multiply)
+        bl_pixels = numpy.multiply(bl_pixels, p.matrix_bl_multiply)
+        br_pixels = numpy.multiply(br_pixels, p.matrix_br_multiply)
+
+        # sum to finish bilinear combination
+        return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0).astype(numpy.float32)
+
+# TESTS ----------------------------------------------------------------------
+
+def _load_image(filepath):
+    _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0]
+    img = Image.open(filepath)
+    img = numpy.asarray(img)
+    if len(img.shape) > 2:
+        img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
+    return (img / 255.0).astype('float')
+
+def _specific_test():
+    imgpath = os.path.join(_TEST_DIR, "d.png")
+    img = _load_image(imgpath)
+    dist = LocalElasticDistorter((32,32))
+    print dist.regenerate_parameters(0.5)
+    img = dist.transform_image(img)
+    print dist.get_parameters_determined_by_complexity(0.4)
+    pylab.imshow(img)
+    pylab.show()
+
+def _complexity_tests():
+    imgpath = os.path.join(_TEST_DIR, "d.png")
+    dist = LocalElasticDistorter((32,32))
+    orig_img = _load_image(imgpath)
+    html_content = '''<html><body>Original:<br/><img src='d.png'>'''
+    for complexity in numpy.arange(0.0, 1.1, 0.1):
+        html_content += '<br/>Complexity: ' + str(complexity) + '<br/>'
+        for i in range(10):
+            t1 = time.time()
+            dist.regenerate_parameters(complexity)
+            t2 = time.time()
+            print "diff", t2-t1
+            img = dist.transform_image(orig_img)
+            filename = "complexity_" + str(complexity) + "_" + str(i) + ".png"
+            new_path = os.path.join(_TEST_DIR, filename)
+            _save_image(img, new_path)
+            html_content += '<img src="' + filename + '">'
+    html_content += "</body></html>"
+    html_file = open(os.path.join(_TEST_DIR, "complexity.html"), "w")
+    html_file.write(html_content)
+    html_file.close()
+    
+def _complexity_benchmark():
+    imgpath = os.path.join(_TEST_DIR, "d.png")
+    dist = LocalElasticDistorter((32,32))
+    orig_img = _load_image(imgpath)
+
+    for cpx in (0.21, 0.35):
+        # time the first 10
+        t1 = time.time()
+        for i in range(10):
+            dist.regenerate_parameters(cpx)
+            img = dist.transform_image(orig_img)
+        t2 = time.time()
+
+        print "first 10, total = ", t2-t1, ", avg=", (t2-t1)/10
+
+        # time the next 40
+        t1 = time.time()
+        for i in range(40):
+            dist.regenerate_parameters(cpx)
+            img = dist.transform_image(orig_img)
+        t2 = time.time()
+       
+        print "next 40, total = ", t2-t1, ", avg=", (t2-t1)/40
+
+        # time the next 50
+        t1 = time.time()
+        for i in range(50):
+            dist.regenerate_parameters(cpx)
+            img = dist.transform_image(orig_img)
+        t2 = time.time()
+       
+        print "next 50, total = ", t2-t1, ", avg=", (t2-t1)/50
+
+        # time the next 1000 
+        t1 = time.time()
+        for i in range(1000):
+            dist.regenerate_parameters(cpx)
+            img = dist.transform_image(orig_img)
+        t2 = time.time()
+       
+        print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
+
+    # time the next 1000 with old complexity
+    t1 = time.time()
+    for i in range(1000):
+        dist.regenerate_parameters(0.21)
+        img = dist.transform_image(orig_img)
+    t2 = time.time()
+   
+    print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
+
+
+
+
+def _save_image(img, path):
+    img2 = Image.fromarray((img * 255).astype('uint8'), "L")
+    img2.save(path)
+
+# TODO: reformat to follow new class... it function of complexity now
+'''
+def _distorter_tests():
+    #import pylab
+    #pylab.imshow(img)
+    #pylab.show()
+
+    for letter in ("d", "a", "n", "o"):
+        img = _load_image("tests/" + letter + ".png")
+        for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
+            for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
+                id = LocalElasticDistorter((32,32))
+                img2 = id.distort_image(img)
+                img2 = Image.fromarray((img2 * 255).astype('uint8'), "L")
+                img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png")
+'''
+
+def _benchmark():
+    img = _load_image("tests/d.png")
+    dist = LocalElasticDistorter((32,32))
+    dist.regenerate_parameters(0.0)
+    import time
+    t1 = time.time()
+    for i in range(10000):
+        if i % 1000 == 0:
+            print "-"
+        dist.distort_image(img)
+    t2 = time.time()
+    print "t2-t1", t2-t1
+    print "avg", 10000/(t2-t1)
+
+if __name__ == '__main__':
+    import time
+    import pylab
+    import Image
+    import os.path
+    #_distorter_tests()
+    #_benchmark()
+    #_specific_test()
+    #_complexity_tests()
+    _complexity_benchmark()
+    
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pipeline.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pipeline.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,391 @@
+#!/usr/bin/python
+# coding: utf-8
+
+from __future__ import with_statement
+
+# This is intended to be run as a GIMP script
+#from gimpfu import *
+
+import sys, os, getopt
+import numpy
+import filetensor as ft
+import random
+
+# To debug locally, also call with -s 100 (to stop after ~100)
+# (otherwise we allocate all needed memory, might be loonnng and/or crash
+# if, lucky like me, you have an age-old laptop creaking from everywhere)
+DEBUG = False
+DEBUG_X = False
+if DEBUG:
+    DEBUG_X = False # Debug under X (pylab.show())
+
+DEBUG_IMAGES_PATH = None
+if DEBUG:
+    # UNTESTED YET
+    # To avoid loading NIST if you don't have it handy
+    # (use with debug_images_iterator(), see main())
+    # To use NIST, leave as = None
+    DEBUG_IMAGES_PATH = None#'/home/francois/Desktop/debug_images'
+
+# Directory where to dump images to visualize results
+# (create it, otherwise it'll crash)
+DEBUG_OUTPUT_DIR = 'debug_out'
+
+DEFAULT_NIST_PATH = '/data/lisa/data/ift6266h10/train_data.ft'
+DEFAULT_LABEL_PATH = '/data/lisa/data/ift6266h10/train_labels.ft'
+DEFAULT_OCR_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft'
+DEFAULT_OCRLABEL_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-labels-shuffled.ft'
+ARGS_FILE = os.environ['PIPELINE_ARGS_TMPFILE']
+
+# PARSE COMMAND LINE ARGUMENTS
+def get_argv():
+    with open(ARGS_FILE) as f:
+        args = [l.rstrip() for l in f.readlines()]
+    return args
+
+def usage():
+    print '''
+Usage: run_pipeline.sh [-m ...] [-z ...] [-o ...] [-p ...]
+    -m, --max-complexity: max complexity to generate for an image
+    -z, --probability-zero: probability of using complexity=0 for an image
+    -o, --output-file: full path to file to use for output of images
+    -p, --params-output-file: path to file to output params to
+    -x, --labels-output-file: path to file to output labels to
+    -f, --data-file: path to filetensor (.ft) data file (NIST)
+    -l, --label-file: path to filetensor (.ft) labels file (NIST labels)
+    -c, --ocr-file: path to filetensor (.ft) data file (OCR)
+    -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels)
+    -a, --prob-font: probability of using a raw font image
+    -b, --prob-captcha: probability of using a captcha image
+    -g, --prob-ocr: probability of using an ocr image
+    -y, --seed: the job seed
+    '''
+
+try:
+    opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", 
+"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="])
+except getopt.GetoptError, err:
+        # print help information and exit:
+        print str(err) # will print something like "option -a not recognized"
+        usage()
+        pdb.gimp_quit(0)
+        sys.exit(2)
+
+for o, a in opts:
+    if o in ('-y','--seed'):
+        random.seed(int(a))
+        numpy.random.seed(int(a))
+
+if DEBUG_X:
+    import pylab
+    pylab.ion()
+
+from PoivreSel import PoivreSel
+from thick import Thick
+from BruitGauss import BruitGauss
+from DistorsionGauss import DistorsionGauss
+from PermutPixel import PermutPixel
+from gimp_script import GIMP1
+from Rature import Rature
+from contrast import Contrast
+from local_elastic_distortions import LocalElasticDistorter
+from slant import Slant
+from Occlusion import Occlusion
+from add_background_image import AddBackground
+from affine_transform import AffineTransformation
+from ttf2jpg import ttf2jpg
+from Facade import generateCaptcha
+
+if DEBUG:
+    from visualizer import Visualizer
+    # Either put the visualizer as in the MODULES_INSTANCES list
+    # after each module you want to visualize, or in the
+    # AFTER_EACH_MODULE_HOOK list (but not both, it's redundant)
+    VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR,  on_screen=False)
+
+###---------------------order of transformation module
+MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+
+# These should have a "after_transform_callback(self, image)" method
+# (called after each call to transform_image in a module)
+AFTER_EACH_MODULE_HOOK = []
+if DEBUG:
+    AFTER_EACH_MODULE_HOOK = [VISUALIZER]
+
+# These should have a "end_transform_callback(self, final_image" method
+# (called after all modules have been called)
+END_TRANSFORM_HOOK = []
+if DEBUG:
+    END_TRANSFORM_HOOK = [VISUALIZER]
+
+class Pipeline():
+    def __init__(self, modules, num_img, image_size=(32,32)):
+        self.modules = modules
+        self.num_img = num_img
+        self.num_params_stored = 0
+        self.image_size = image_size
+
+        self.init_memory()
+
+    def init_num_params_stored(self):
+        # just a dummy call to regenerate_parameters() to get the
+        # real number of params (only those which are stored)
+        self.num_params_stored = 0
+        for m in self.modules:
+            self.num_params_stored += len(m.regenerate_parameters(0.0))
+
+    def init_memory(self):
+        self.init_num_params_stored()
+
+        total = self.num_img
+        num_px = self.image_size[0] * self.image_size[1]
+
+        self.res_data = numpy.empty((total, num_px), dtype=numpy.uint8)
+        # +1 to store complexity
+        self.params = numpy.empty((total, self.num_params_stored+len(self.modules)))
+        self.res_labels = numpy.empty(total, dtype=numpy.int32)
+
+    def run(self, img_iterator, complexity_iterator):
+        img_size = self.image_size
+
+        should_hook_after_each = len(AFTER_EACH_MODULE_HOOK) != 0
+        should_hook_at_the_end = len(END_TRANSFORM_HOOK) != 0
+
+        for img_no, (img, label) in enumerate(img_iterator):
+            sys.stdout.flush()
+            
+            global_idx = img_no
+
+            img = img.reshape(img_size)
+
+            param_idx = 0
+            mod_idx = 0
+            for mod in self.modules:
+                # This used to be done _per batch_,
+                # ie. out of the "for img" loop
+                complexity = complexity_iterator.next() 
+                #better to do a complexity sampling for each transformations in order to have more variability
+                #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time))
+                #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall
+                #complexity
+                self.params[global_idx, mod_idx] = complexity
+                mod_idx += 1
+                 
+                p = mod.regenerate_parameters(complexity)
+                self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p
+                param_idx += len(p)
+
+                img = mod.transform_image(img)
+
+                if should_hook_after_each:
+                    for hook in AFTER_EACH_MODULE_HOOK:
+                        hook.after_transform_callback(img)
+
+            self.res_data[global_idx] = \
+                    img.reshape((img_size[0] * img_size[1],))*255
+            self.res_labels[global_idx] = label
+
+            if should_hook_at_the_end:
+                for hook in END_TRANSFORM_HOOK:
+                    hook.end_transform_callback(img)
+
+    def write_output(self, output_file_path, params_output_file_path, labels_output_file_path):
+        with open(output_file_path, 'wb') as f:
+            ft.write(f, self.res_data)
+
+        numpy.save(params_output_file_path, self.params)
+
+        with open(labels_output_file_path, 'wb') as f:
+            ft.write(f, self.res_labels)
+                
+
+##############################################################################
+# COMPLEXITY ITERATORS
+# They're called once every img, to get the complexity to use for that img
+# they must be infinite (should never throw StopIteration when calling next())
+
+# probability of generating 0 complexity, otherwise
+# uniform over 0.0-max_complexity
+def range_complexity_iterator(probability_zero, max_complexity):
+    assert max_complexity <= 1.0
+    n = numpy.random.uniform(0.0, 1.0)
+    while True:
+        if n < probability_zero:
+            yield 0.0
+        else:
+            yield numpy.random.uniform(0.0, max_complexity)
+
+##############################################################################
+# DATA ITERATORS
+# They can be used to interleave different data sources etc.
+
+'''
+# Following code (DebugImages and iterator) is untested
+
+def load_image(filepath):
+    _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0]
+    img = Image.open(filepath)
+    img = numpy.asarray(img)
+    if len(img.shape) > 2:
+        img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
+    return (img / 255.0).astype('float')
+
+class DebugImages():
+    def __init__(self, images_dir_path):
+        import glob, os.path
+        self.filelist = glob.glob(os.path.join(images_dir_path, "*.png"))
+
+def debug_images_iterator(debug_images):
+    for path in debug_images.filelist:
+        yield load_image(path)
+'''
+
+class NistData():
+    def __init__(self, nist_path, label_path, ocr_path, ocrlabel_path):
+        self.train_data = open(nist_path, 'rb')
+        self.train_labels = open(label_path, 'rb')
+        self.dim = tuple(ft._read_header(self.train_data)[3])
+        # in order to seek to the beginning of the file
+        self.train_data.close()
+        self.train_data = open(nist_path, 'rb')
+        self.ocr_data = open(ocr_path, 'rb')
+        self.ocr_labels = open(ocrlabel_path, 'rb')
+
+# cet iterator load tout en ram
+def nist_supp_iterator(nist, prob_font, prob_captcha, prob_ocr, num_img):
+    img = ft.read(nist.train_data)
+    labels = ft.read(nist.train_labels)
+    if prob_ocr:
+        ocr_img = ft.read(nist.ocr_data)
+        ocr_labels = ft.read(nist.ocr_labels)
+    ttf = ttf2jpg()
+    L = [chr(ord('0')+x) for x in range(10)] + [chr(ord('A')+x) for x in range(26)] + [chr(ord('a')+x) for x in range(26)]
+
+    for i in xrange(num_img):
+        r = numpy.random.rand()
+        if r <= prob_font:
+            yield ttf.generate_image()
+        elif r <=prob_font + prob_captcha:
+            (arr, charac) = generateCaptcha(0,1)
+            yield arr.astype(numpy.float32)/255, L.index(charac[0])
+        elif r <= prob_font + prob_captcha + prob_ocr:
+            j = numpy.random.randint(len(ocr_labels))
+            yield ocr_img[j].astype(numpy.float32)/255, ocr_labels[j]
+        else:
+            j = numpy.random.randint(len(labels))
+            yield img[j].astype(numpy.float32)/255, labels[j]
+
+
+# Mostly for debugging, for the moment, just to see if we can
+# reload the images and parameters.
+def reload(output_file_path, params_output_file_path):
+    images_ft = open(output_file_path, 'rb')
+    images_ft_dim = tuple(ft._read_header(images_ft)[3])
+
+    print "Images dimensions: ", images_ft_dim
+
+    params = numpy.load(params_output_file_path)
+
+    print "Params dimensions: ", params.shape
+    print params
+    
+
+##############################################################################
+# MAIN
+
+
+# Might be called locally or through dbidispatch. In all cases it should be
+# passed to the GIMP executable to be able to use GIMP filters.
+# Ex: 
+def _main():
+    #global DEFAULT_NIST_PATH, DEFAULT_LABEL_PATH, DEFAULT_OCR_PATH, DEFAULT_OCRLABEL_PATH
+    #global getopt, get_argv
+
+    max_complexity = 0.5 # default
+    probability_zero = 0.1 # default
+    output_file_path = None
+    params_output_file_path = None
+    labels_output_file_path = None
+    nist_path = DEFAULT_NIST_PATH
+    label_path = DEFAULT_LABEL_PATH
+    ocr_path = DEFAULT_OCR_PATH
+    ocrlabel_path = DEFAULT_OCRLABEL_PATH
+    prob_font = 0.0
+    prob_captcha = 0.0
+    prob_ocr = 0.0
+    stop_after = None
+    reload_mode = False
+
+    for o, a in opts:
+        if o in ('-m', '--max-complexity'):
+            max_complexity = float(a)
+            assert max_complexity >= 0.0 and max_complexity <= 1.0
+        elif o in ('-r', '--reload'):
+            reload_mode = True
+        elif o in ("-z", "--probability-zero"):
+            probability_zero = float(a)
+            assert probability_zero >= 0.0 and probability_zero <= 1.0
+        elif o in ("-o", "--output-file"):
+            output_file_path = a
+        elif o in ('-p', "--params-output-file"):
+            params_output_file_path = a
+        elif o in ('-x', "--labels-output-file"):
+            labels_output_file_path = a
+        elif o in ('-s', "--stop-after"):
+            stop_after = int(a)
+        elif o in ('-f', "--data-file"):
+            nist_path = a
+        elif o in ('-l', "--label-file"):
+            label_path = a
+        elif o in ('-c', "--ocr-file"):
+            ocr_path = a
+        elif o in ('-d', "--ocrlabel-file"):
+            ocrlabel_path = a
+        elif o in ('-a', "--prob-font"):
+            prob_font = float(a)
+        elif o in ('-b', "--prob-captcha"):
+            prob_captcha = float(a)
+        elif o in ('-g', "--prob-ocr"):
+            prob_ocr = float(a)
+        elif o in ('-y', "--seed"):
+            pass
+        else:
+            assert False, "unhandled option"
+
+    if output_file_path == None or params_output_file_path == None or labels_output_file_path == None:
+        print "Must specify the three output files."
+        usage()
+        pdb.gimp_quit(0)
+        sys.exit(2)
+
+    if reload_mode:
+        reload(output_file_path, params_output_file_path)
+    else:
+        if DEBUG_IMAGES_PATH:
+            '''
+            # This code is yet untested
+            debug_images = DebugImages(DEBUG_IMAGES_PATH)
+            num_img = len(debug_images.filelist)
+            pl = Pipeline(modules=MODULE_INSTANCES, num_img=num_img, image_size=(32,32))
+            img_it = debug_images_iterator(debug_images)
+            '''
+        else:
+            nist = NistData(nist_path, label_path, ocr_path, ocrlabel_path)
+            num_img = 819200 # 800 Mb file
+            if stop_after:
+                num_img = stop_after
+            pl = Pipeline(modules=MODULE_INSTANCES, num_img=num_img, image_size=(32,32))
+            img_it = nist_supp_iterator(nist, prob_font, prob_captcha, prob_ocr, num_img)
+
+        cpx_it = range_complexity_iterator(probability_zero, max_complexity)
+        pl.run(img_it, cpx_it)
+        pl.write_output(output_file_path, params_output_file_path, labels_output_file_path)
+
+_main()
+
+if DEBUG_X:
+    pylab.ioff()
+    pylab.show()
+
+pdb.gimp_quit(0)
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/.DS_Store
Binary file data_generation/transformations/pycaptcha/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/BUGS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/BUGS	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,7 @@
+Known bugs:
+
+- PersistentFactory() is almost certainly horrible at concurrent access
+- Tests are never invalidated with PersistentStorage(), as they aren't written back to the database
+- All files in Captcha/data are installed, including silly things like .svn directories and *~
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/COPYING
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/COPYING	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,19 @@
+Copyright (c) 2004 Micah Dowty
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of 
+this software and associated documentation files (the "Software"), to deal in 
+the Software without restriction, including without limitation the rights to 
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+of the Software, and to permit persons to whom the Software is furnished to do 
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all 
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+SOFTWARE. 
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/.DS_Store
Binary file data_generation/transformations/pycaptcha/Captcha/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Base.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Base.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,127 @@
+""" Captcha.Base
+
+Base class for all types of CAPTCHA tests. All tests have one or
+more solution, determined when the test is generated. Solutions
+can be any python object,
+
+All tests can be solved by presenting at least some preset number
+of correct solutions. Some tests may only have one solution and require
+one solution, but other tests may require N correct solutions of M
+possible solutions.
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+import random, string, time, shelve
+
+__all__ = ["BaseCaptcha", "Factory", "PersistentFactory"]
+
+
+def randomIdentifier(alphabet = string.ascii_letters + string.digits,
+                     length = 24):
+    return "".join([random.choice(alphabet) for i in xrange(length)])
+
+
+class BaseCaptcha(object):
+    """Base class for all CAPTCHA tests"""
+    # Subclasses can override these to set the solution criteria
+    minCorrectSolutions = 1
+    maxIncorrectSolutions = 0
+
+    def __init__(self):
+        self.solutions = []
+        self.valid = True
+
+        # Each test has a unique identifier, used to refer to that test
+        # later, and a creation time so it can expire later.
+        self.id = randomIdentifier()
+        self.creationTime = time.time()
+
+    def addSolution(self, solution):
+        self.solutions.append(solution)
+
+    def testSolutions(self, solutions):
+        """Test whether the given solutions are sufficient for this CAPTCHA.
+           A given CAPTCHA can only be tested once, after that it is invalid
+           and always returns False. This makes random guessing much less effective.
+           """
+        if not self.valid:
+            return False
+        self.valid = False
+
+        numCorrect = 0
+        numIncorrect = 0
+
+        for solution in solutions:
+            if solution in self.solutions:
+                numCorrect += 1
+            else:
+                numIncorrect += 1
+
+        return numCorrect >= self.minCorrectSolutions and \
+               numIncorrect <= self.maxIncorrectSolutions
+
+
+class Factory(object):
+    """Creates BaseCaptcha instances on demand, and tests solutions.
+       CAPTCHAs expire after a given amount of time, given in seconds.
+       The default is 15 minutes.
+       """
+    def __init__(self, lifetime=60*15):
+        self.lifetime = lifetime
+        self.storedInstances = {}
+
+    def new(self, cls, *args, **kwargs):
+        """Create a new instance of our assigned BaseCaptcha subclass, passing
+           it any extra arguments we're given. This stores the result for
+           later testing.
+           """
+        self.clean()
+        inst = cls(*args, **kwargs)
+        self.storedInstances[inst.id] = inst
+        return inst
+
+    def get(self, id):
+        """Retrieve the CAPTCHA with the given ID. If it's expired already,
+           this will return None. A typical web application will need to
+           new() a CAPTCHA when generating an html page, then get() it later
+           when its images or sounds must be rendered.
+           """
+        return self.storedInstances.get(id)
+
+    def clean(self):
+        """Removed expired tests"""
+        expiredIds = []
+        now = time.time()
+        for inst in self.storedInstances.itervalues():
+            if inst.creationTime + self.lifetime < now:
+                expiredIds.append(inst.id)
+        for id in expiredIds:
+            del self.storedInstances[id]
+
+    def test(self, id, solutions):
+        """Test the given list of solutions against the BaseCaptcha instance
+           created earlier with the given id. Returns True if the test passed,
+           False on failure. In either case, the test is invalidated. Returns
+           False in the case of an invalid id.
+           """
+        self.clean()
+        inst = self.storedInstances.get(id)
+        if not inst:
+            return False
+        result = inst.testSolutions(solutions)
+        return result
+
+
+class PersistentFactory(Factory):
+    """A simple persistent factory, for use in CGI or multi-process environments
+       where the state must remain across python interpreter sessions.
+       This implementation uses the 'shelve' module.
+       """
+    def __init__(self, filename, lifetime=60*15):
+        Factory.__init__(self, lifetime)
+	self.storedInstances = shelve.open(filename)
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/File.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/File.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,53 @@
+""" Captcha.File
+
+Utilities for finding and picking random files from our 'data' directory
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+import os, random
+
+# Determine the data directory. This can be overridden after import-time if needed.
+dataDir = os.path.join(os.path.split(os.path.abspath(__file__))[0], "data")
+
+
+class RandomFileFactory(object):
+    """Given a list of files and/or directories, this picks a random file.
+       Directories are searched for files matching any of a list of extensions.
+       Files are relative to our data directory plus a subclass-specified base path.
+       """
+    extensions = []
+    basePath = "."
+
+    def __init__(self, *fileList):
+        self.fileList = fileList
+        self._fullPaths = None
+
+    def _checkExtension(self, name):
+        """Check the file against our given list of extensions"""
+        for ext in self.extensions:
+            if name.endswith(ext):
+                return True
+        return False
+
+    def _findFullPaths(self):
+        """From our given file list, find a list of full paths to files"""
+        paths = []
+        for name in self.fileList:
+            path = os.path.join(dataDir, self.basePath, name)
+            if os.path.isdir(path):
+                for content in os.listdir(path):
+                    if self._checkExtension(content):
+                        paths.append(os.path.join(path, content))
+            else:
+                paths.append(path)
+        return paths
+
+    def pick(self):
+        if self._fullPaths is None:
+            self._fullPaths = self._findFullPaths()
+        return random.choice(self._fullPaths)
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Backgrounds.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Backgrounds.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,95 @@
+""" Captcha.Visual.Backgrounds
+
+Background layers for visual CAPTCHAs
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+from Captcha.Visual import Layer, Pictures
+import random, os
+import ImageDraw, Image
+
+
+class SolidColor(Layer):
+    """A solid color background. Very weak on its own, but good
+       to combine with other backgrounds.
+       """
+    def __init__(self, color="white"):
+        self.color = color
+
+    def render(self, image):
+        image.paste(self.color)
+
+
+class Grid(Layer):
+    """A grid of lines, with a given foreground color.
+       The size is given in pixels. The background is transparent,
+       so another layer (like SolidColor) should be put behind it.
+       """
+    def __init__(self, size=16, foreground="black"):
+        self.size = size
+        self.foreground = foreground
+        self.offset = (random.uniform(0, self.size),
+                       random.uniform(0, self.size))
+
+    def render(self, image):
+        draw = ImageDraw.Draw(image)
+
+        for i in xrange(image.size[0] / self.size + 1):
+            draw.line( (i*self.size+self.offset[0], 0,
+                        i*self.size+self.offset[0], image.size[1]), fill=self.foreground)
+
+        for i in xrange(image.size[0] / self.size + 1):
+            draw.line( (0, i*self.size+self.offset[1],
+                        image.size[0], i*self.size+self.offset[1]), fill=self.foreground)
+
+
+class TiledImage(Layer):
+    """Pick a random image and a random offset, and tile the rendered image with it"""
+    def __init__(self, imageFactory=Pictures.abstract):
+        self.tileName = imageFactory.pick()
+        self.offset = (random.uniform(0, 1),
+                       random.uniform(0, 1))
+
+    def render(self, image):
+        tile = Image.open(self.tileName)
+        for j in xrange(-1, int(image.size[1] / tile.size[1]) + 1):
+            for i in xrange(-1, int(image.size[0] / tile.size[0]) + 1):
+                dest = (int((self.offset[0] + i) * tile.size[0]),
+                        int((self.offset[1] + j) * tile.size[1]))
+                image.paste(tile, dest)
+
+
+class CroppedImage(Layer):
+    """Pick a random image, cropped randomly. Source images should be larger than the CAPTCHA."""
+    def __init__(self, imageFactory=Pictures.nature):
+        self.imageName = imageFactory.pick()
+        self.align = (random.uniform(0,1),
+                      random.uniform(0,1))
+
+    def render(self, image):
+        i = Image.open(self.imageName)
+        image.paste(i, (int(self.align[0] * (image.size[0] - i.size[0])),
+                        int(self.align[1] * (image.size[1] - i.size[1]))))
+
+
+class RandomDots(Layer):
+    """Draw random colored dots"""
+    def __init__(self, colors=("white", "black"), dotSize=4, numDots=400):
+        self.colors = colors
+        self.dotSize = dotSize
+        self.numDots = numDots
+	self.seed = random.random()
+
+    def render(self, image):
+        r = random.Random(self.seed)
+        for i in xrange(self.numDots):
+            bx = int(r.uniform(0, image.size[0]-self.dotSize))
+            by = int(r.uniform(0, image.size[1]-self.dotSize))
+            image.paste(r.choice(self.colors), (bx, by,
+                                                bx+self.dotSize-1,
+                                                by+self.dotSize-1))
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Base.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Base.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,70 @@
+""" Captcha.Visual.BAse
+
+Base classes for visual CAPTCHAs. We use the Python Imaging Library
+to manipulate these images.
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+import Captcha
+import Image
+
+__all__ = ['ImageCaptcha', 'Layer']
+
+
+class ImageCaptcha(Captcha.BaseCaptcha):
+    """Base class for image-based CAPTCHA tests.
+       The render() function generates the CAPTCHA image at the given size by
+       combining Layer instances from self.layers, which should be created by
+       the subclass-defined getLayers().
+       """
+    defaultSize = (32,32)
+    # anciennement a defaultSize(256,96)
+    def __init__(self, *args, **kwargs):
+        Captcha.BaseCaptcha.__init__(self)
+        self._layers = self.getLayers(*args, **kwargs)
+
+    def getImage(self):
+        """Get a PIL image representing this CAPTCHA test, creating it if necessary"""
+        if not self._image:
+            self._image = self.render()
+        return self._image
+
+    def getLayers(self):
+        """Subclasses must override this to return a list of Layer instances to render.
+           Lists within the list of layers are recursively rendered.
+           """
+        return []
+
+    def render(self, size=None):
+        """Render this CAPTCHA, returning a PIL image"""
+        if size is None:
+            size = self.defaultSize
+        img = Image.new("L", size)
+       # img = Image.new("RGB", size)
+        return self._renderList(self._layers, Image.new("L", size))
+
+    def _renderList(self, l, img):
+        for i in l:
+            if type(i) == tuple or type(i) == list:
+                img = self._renderList(i, img)
+            else:
+                img = i.render(img) or img
+        return img
+
+
+class Layer(object):
+    """A renderable object representing part of a CAPTCHA.
+       The render() function should return approximately the same result, regardless
+       of the image size. This means any randomization must occur in the constructor.
+
+       If the render() function returns something non-None, it is taken as an image to
+       replace the current image with. This can be used to implement transformations
+       that result in a separate image without having to copy the results back to the first.
+       """
+    def render(self, img):
+        pass
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Distortions.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Distortions.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,117 @@
+""" Captcha.Visual.Distortions
+
+Distortion layers for visual CAPTCHAs
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+from Captcha.Visual import Layer
+import ImageDraw, Image
+import random, math
+
+
+class WigglyBlocks(Layer):
+    """Randomly select and shift blocks of the image"""
+    def __init__(self, blockSize=3, sigma=0.01, iterations=300):
+        self.blockSize = blockSize
+        self.sigma = sigma
+        self.iterations = iterations
+        self.seed = random.random()
+
+    def render(self, image):
+        r = random.Random(self.seed)
+        for i in xrange(self.iterations):
+            # Select a block
+            bx = int(r.uniform(0, image.size[0]-self.blockSize))
+            by = int(r.uniform(0, image.size[1]-self.blockSize))
+            block = image.crop((bx, by, bx+self.blockSize-1, by+self.blockSize-1))
+
+            # Figure out how much to move it.
+            # The call to floor() is important so we always round toward
+            # 0 rather than to -inf. Just int() would bias the block motion.
+            mx = int(math.floor(r.normalvariate(0, self.sigma)))
+            my = int(math.floor(r.normalvariate(0, self.sigma)))
+
+            # Now actually move the block
+            image.paste(block, (bx+mx, by+my))
+
+
+class WarpBase(Layer):
+    """Abstract base class for image warping. Subclasses define a
+       function that maps points in the output image to points in the input image.
+       This warping engine runs a grid of points through this transform and uses
+       PIL's mesh transform to warp the image.
+       """
+    filtering = Image.BILINEAR
+    resolution = 10
+
+    def getTransform(self, image):
+        """Return a transformation function, subclasses should override this"""
+        return lambda x, y: (x, y)
+
+    def render(self, image):
+        r = self.resolution
+        xPoints = image.size[0] / r + 2
+        yPoints = image.size[1] / r + 2
+        f = self.getTransform(image)
+
+        # Create a list of arrays with transformed points
+        xRows = []
+        yRows = []
+        for j in xrange(yPoints):
+            xRow = []
+            yRow = []
+            for i in xrange(xPoints):
+                x, y = f(i*r, j*r)
+
+                # Clamp the edges so we don't get black undefined areas
+                x = max(0, min(image.size[0]-1, x))
+                y = max(0, min(image.size[1]-1, y))
+
+                xRow.append(x)
+                yRow.append(y)
+            xRows.append(xRow)
+            yRows.append(yRow)
+
+        # Create the mesh list, with a transformation for
+        # each square between points on the grid
+        mesh = []
+        for j in xrange(yPoints-1):
+            for i in xrange(xPoints-1):
+                mesh.append((
+                    # Destination rectangle
+                    (i*r, j*r,
+                     (i+1)*r, (j+1)*r),
+                    # Source quadrilateral
+                    (xRows[j  ][i  ], yRows[j  ][i  ],
+                     xRows[j+1][i  ], yRows[j+1][i  ],
+                     xRows[j+1][i+1], yRows[j+1][i+1],
+                     xRows[j  ][i+1], yRows[j  ][i+1]),
+                    ))
+
+        return image.transform(image.size, Image.MESH, mesh, self.filtering)
+
+
+class SineWarp(WarpBase):
+    """Warp the image using a random composition of sine waves"""
+
+    def __init__(self,
+                 amplitudeRange = (3, 6.5),
+                 periodRange    = (0.04, 0.1),
+                 ):
+        self.amplitude = random.uniform(*amplitudeRange)
+        self.period = random.uniform(*periodRange)
+        self.offset = (random.uniform(0, math.pi * 2 / self.period),
+                       random.uniform(0, math.pi * 2 / self.period))
+
+    def getTransform(self, image):
+        return (lambda x, y,
+                a = self.amplitude,
+                p = self.period,
+                o = self.offset:
+                (math.sin( (y+o[0])*p )*a + x,
+                 math.sin( (x+o[1])*p )*a + y))
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Pictures.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Pictures.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,23 @@
+""" Captcha.Visual.Pictures
+
+Random collections of images
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+from Captcha import File
+import Image
+
+
+class ImageFactory(File.RandomFileFactory):
+    """A factory that generates random images from a list"""
+    extensions = [".png", ".jpeg"]
+    basePath = "pictures"
+
+
+abstract = ImageFactory("abstract")
+nature = ImageFactory("nature")
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Tests.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Tests.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,65 @@
+""" Captcha.Visual.Tests
+
+Visual CAPTCHA tests
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+from Captcha.Visual import Text, Backgrounds, Distortions, ImageCaptcha
+from Captcha import Words
+import random
+
+__all__ = ["PseudoGimpy", "AngryGimpy", "AntiSpam"]
+
+
+class PseudoGimpy(ImageCaptcha):
+    """A relatively easy CAPTCHA that's somewhat easy on the eyes"""
+    def getLayers(self):
+        word = Words.defaultWordList.pick()
+        self.addSolution(word)
+        return [
+           # random.choice([
+           #     Backgrounds.CroppedImage(),
+           #     Backgrounds.TiledImage(),
+           # ]),
+            Text.TextLayer(word, borderSize=1),
+            Distortions.SineWarp(),
+            ]
+
+
+class AngryGimpy(ImageCaptcha):
+    """A harder but less visually pleasing CAPTCHA"""
+    def getLayers(self):
+        word = Words.defaultWordList.pick()
+        self.addSolution(word)
+        return [
+           # suppression du background 
+           # Backgrounds.TiledImage(),
+           # Backgrounds.RandomDots(),
+            Text.TextLayer(word, borderSize=1),
+	   # Distortions.SineWarp(periodRange    = (0.04, 0.07))
+            Distortions.WigglyBlocks(),
+              ]
+
+
+class AntiSpam(ImageCaptcha):
+    """A fixed-solution CAPTCHA that can be used to hide email addresses or URLs from bots"""
+    fontFactory = Text.FontFactory(20, "vera/VeraBd.ttf")
+    defaultSize = (512,50)
+
+    def getLayers(self, solution="murray@example.com"):
+        self.addSolution(solution)
+
+        textLayer = Text.TextLayer(solution,
+                                   borderSize = 2,
+                                   fontFactory = self.fontFactory)
+
+        return [
+            Backgrounds.CroppedImage(),
+            textLayer,
+            Distortions.SineWarp(amplitudeRange = (3, 5)),
+            ]
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Text.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Text.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,101 @@
+""" Captcha.Visual.Text
+
+Text generation for visual CAPTCHAs.
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+import random, os
+from Captcha import Visual, File
+import ImageFont, ImageDraw
+
+
+class FontFactory(File.RandomFileFactory):
+    """Picks random fonts and/or sizes from a given list.
+       'sizes' can be a single size or a (min,max) tuple.
+       If any of the given files are directories, all *.ttf found
+       in that directory will be added.
+       """
+    extensions = [".ttf", ".TTF"]
+    basePath = "fonts"
+
+# arguments variables a modifier pour mettre le chemin vers les fontes.
+    def __init__(self, sizes, *fileNames):
+        File.RandomFileFactory.__init__(self, *fileNames)
+
+        if type(sizes) is tuple:			
+            self.minSize = sizes[0]
+            self.maxSize = sizes[1]
+        else:
+            self.minSize = sizes
+            self.maxSize = sizes
+
+    def pick(self):
+        """Returns a (fileName, size) tuple that can be passed to ImageFont.truetype()"""
+        fileName = File.RandomFileFactory.pick(self)
+        size = int(random.uniform(self.minSize, self.maxSize) + 0.5)
+        return (fileName, size)
+
+# Predefined font factories
+defaultFontFactory = FontFactory(25, "allfonts")
+#defaultFontFactory = FontFactory((30, 40), "vera")
+
+class TextLayer(Visual.Layer):
+    """Represents a piece of text rendered within the image.
+       Alignment is given such that (0,0) places the text in the
+       top-left corner and (1,1) places it in the bottom-left.
+
+       The font and alignment are optional, if not specified one is
+       chosen randomly. If no font factory is specified, the default is used.
+       """
+    def __init__(self, text,
+                 alignment   = None,
+                 font        = None,
+                 fontFactory = None,
+                 textColor   = "white",
+                 borderSize  = 0,
+                 borderColor = None,
+                 ):
+        if fontFactory is None:
+            global defaultFontFactory
+            fontFactory = defaultFontFactory
+
+        if font is None:
+            font = fontFactory.pick()
+
+        if alignment is None:
+            alignment = (random.uniform(0,1),
+                         random.uniform(0,1))
+
+        self.text        = text
+        self.alignment   = alignment
+        self.font        = font
+        self.textColor   = textColor
+        self.borderSize  = borderSize
+        self.borderColor = borderColor
+
+    def render(self, img):
+        font = ImageFont.truetype(*self.font)
+    	textSize = font.getsize(self.text)
+        draw = ImageDraw.Draw(img)
+
+        # Find the text's origin given our alignment and current image size
+        x = int((img.size[0] - textSize[0] - self.borderSize*2) * self.alignment[0] + 0.5)
+        y = int((img.size[1] - textSize[1] - self.borderSize*2) * self.alignment[1] + 0.5)
+
+        # Draw the border if we need one. This is slow and ugly, but there doesn't
+        # seem to be a better way with PIL.
+        if self.borderSize > 0:
+            for bx in (-1,0,1):
+                for by in (-1,0,1):
+                    if bx and by:
+                        draw.text((x + bx * self.borderSize,
+                                   y + by * self.borderSize),
+                                  self.text, font=font, fill=self.borderColor)
+
+        # And the text itself...
+        draw.text((x,y), self.text, font=font, fill=self.textColor)
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/__init__.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,14 @@
+""" Captcha.Visual
+
+This package contains functionality specific to visual CAPTCHA tests.
+
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+# Convenience imports
+from Base import *
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Words.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Words.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,58 @@
+""" Captcha.Words
+
+Utilities for managing word lists and finding random words
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+import random, os
+import File
+
+
+class WordList(object):
+    """A class representing a word list read from disk lazily.
+       Blank lines and comment lines starting with '#' are ignored.
+       Any number of words per line may be used. The list can
+       optionally ingore words not within a given length range.
+       """
+    def __init__(self, fileName, minLength=None, maxLength=None):
+        self.words = None
+        self.fileName = fileName
+        self.minLength = minLength
+        self.maxLength = maxLength
+
+    def read(self):
+        """Read words from disk"""
+        f = open(os.path.join(File.dataDir, "words", self.fileName))
+
+        self.words = []
+        for line in f.xreadlines():
+            line = line.strip()
+            if not line:
+                continue
+            if line[0] == '#':
+                continue
+            for word in line.split():
+                if self.minLength is not None and len(word) < self.minLength:
+                    continue
+                if self.maxLength is not None and len(word) > self.maxLength:
+                    continue
+                self.words.append(word)
+
+    def pick(self):
+        """Pick a random word from the list, reading it in if necessary"""
+        if self.words is None:
+            self.read()
+        return random.choice(self.words)
+
+
+# Define several shared word lists that are read from disk on demand
+basic_english            = WordList("basic-english")
+basic_english_restricted = WordList("basic-english", minLength=5, maxLength=8)
+characters = WordList("characters")
+defaultWordList = characters
+
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/__init__.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,41 @@
+""" Captcha
+
+This is the PyCAPTCHA package, a collection of Python modules
+implementing CAPTCHAs: automated tests that humans should pass,
+but current computer programs can't. These tests are often
+used for security.
+
+See  http://www.captcha.net for more information and examples.
+
+This project was started because the CIA project, written in
+Python, needed a CAPTCHA to automate its user creation process
+safely. All existing implementations the author could find were
+written in Java or for the .NET framework, so a simple Python
+alternative was needed.
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
+#
+
+__version__ = "0.3-pre"
+
+
+# Check the python version here before we proceed further
+requiredPythonVersion = (2,2,1)
+def checkVersion():
+    import sys, string
+    if sys.version_info < requiredPythonVersion:
+        raise Exception("%s requires at least Python %s, found %s instead." % (
+            name,
+            string.join(map(str, requiredPythonVersion), "."),
+            string.join(map(str, sys.version_info), ".")))
+checkVersion()
+
+
+# Convenience imports
+from Base import *
+import File
+import Words
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/.DS_Store
Binary file data_generation/transformations/pycaptcha/Captcha/data/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/.DS_Store
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/allfonts
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/allfonts	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,1 @@
+/Tmp/allfonts
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._atari-small.bdf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._atari-small.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._cursive.bdf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._cursive.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._radon-wide.bdf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._radon-wide.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/CIDFnmap
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/others/CIDFnmap	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,10 @@
+/Dotum-Bold (/usr/share/fonts/truetype/unfonts/UnDotumBold.ttf) /Adobe-Korea1-Unicode ;
+/ZenHei (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /Adobe-GB1-Unicode ;
+/Batang-Regular (/usr/share/fonts/truetype/unfonts/UnBatang.ttf) /Adobe-Korea1-Unicode ;
+/VL-PGothic-Regular (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /Adobe-Japan1-Unicode ;
+/Dotum-Regular (/usr/share/fonts/truetype/unfonts/UnDotum.ttf) /Adobe-Korea1-Unicode ;
+/VL-Gothic-Regular-JaH (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /Adobe-Japan2-Unicode ;
+/VL-Gothic-Regular (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /Adobe-Japan1-Unicode ;
+/VL-PGothic-Regular-JaH (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /Adobe-Japan2-Unicode ;
+/ZenHei-CNS (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /Adobe-CNS1-Unicode ;
+/Batang-Bold (/usr/share/fonts/truetype/unfonts/UnBatangBold.ttf) /Adobe-Korea1-Unicode ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/FAPIfontmap
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/others/FAPIfontmap	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,155 @@
+/Garuda-Oblique << /Path (/usr/share/fonts/truetype/thai/Garuda-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Sans << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstOne << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstOne.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Vemana2000 << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/Vemana.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSerif-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypo-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSerif-BoldItalic << /Path (/usr/share/fonts/truetype/freefont/FreeSerifBoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush-Oblique << /Path (/usr/share/fonts/truetype/thai/Umpush-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationMono-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Malige << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/Malige-b.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Loma-Oblique << /Path (/usr/share/fonts/truetype/thai/Loma-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstBook << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstBook.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi-BoldItalic << /Path (/usr/share/fonts/truetype/thai/Norasi-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Sans-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Norasi-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeMono-BoldOblique << /Path (/usr/share/fonts/truetype/freefont/FreeMonoBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Serif << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSerif.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstOffice << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstOffice.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypist-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSans-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Waree-Oblique << /Path (/usr/share/fonts/truetype/thai/Waree-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationMono-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstFarsi << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstFarsi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgMono-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgMono-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Garuda-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Garuda-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSans-BoldOblique << /Path (/usr/share/fonts/truetype/freefont/FreeSansBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/utkal << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/utkal.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSerif-Italic << /Path (/usr/share/fonts/truetype/freefont/FreeSerifItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypist-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSerif-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Sawasdee-BoldOblique << /Path (/usr/share/fonts/truetype/thai/SawasdeeBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Umpush-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/cmex10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmex10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeMono-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi-Bold << /Path (/usr/share/fonts/truetype/thai/Norasi-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSans-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Loma << /Path (/usr/share/fonts/truetype/thai/Loma.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/wasy10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/wasy10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari-BoldItalic << /Path (/usr/share/fonts/truetype/thai/Kinnari-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstNaskh << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstNaskh.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSans-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Waree << /Path (/usr/share/fonts/truetype/thai/Waree.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Garuda << /Path (/usr/share/fonts/truetype/thai/Garuda.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/cmsy10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmsy10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypist-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Sawasdee-Bold << /Path (/usr/share/fonts/truetype/thai/SawasdeeBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Purisa << /Path (/usr/share/fonts/truetype/thai/Purisa.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstPoster << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstPoster.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSans-Oblique << /Path (/usr/share/fonts/truetype/freefont/FreeSansOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypo-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Lohit-Punjabi << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_pa.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Waree-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Waree-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypewriter-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Garuda-Bold << /Path (/usr/share/fonts/truetype/thai/Garuda-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/RachanaMedium << /Path (/usr/share/fonts/truetype/ttf-malayalam-fonts/Rachana_04.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstArt << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstArt.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationMono-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypo-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSerif-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeSerifBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSerif-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstDecorative << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstDecorative.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Lohit-Hindi << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_hi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush-LightOblique << /Path (/usr/share/fonts/truetype/thai/Umpush-LightOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSerif-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeSerif.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/mry_KacstQurn << /Path (/usr/share/fonts/truetype/ttf-kacst/mry_KacstQurn.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstDigital << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstDigital.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Sans-Mono-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Lohit-Gujarati << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_gu.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationMono-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstLetter << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstLetter.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypo << /Path (/usr/share/fonts/truetype/thai/TlwgTypo.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/msbm10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/msbm10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgMono-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Sans-Mono << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi-Italic << /Path (/usr/share/fonts/truetype/thai/Norasi-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstTitleL << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstTitleL.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypewriter << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeMono-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi-Oblique << /Path (/usr/share/fonts/truetype/thai/Norasi-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypewriter-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Phetsarath << /Path (/usr/share/fonts/truetype/ttf-lao/Phetsarath_OT.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/mukti << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/MuktiNarrow.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Sawasdee-Oblique << /Path (/usr/share/fonts/truetype/thai/SawasdeeOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/cmr10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmr10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush-Light << /Path (/usr/share/fonts/truetype/thai/Umpush-Light.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush-Bold << /Path (/usr/share/fonts/truetype/thai/Umpush-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Serif-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSerif-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstTitle << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstTitle.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi << /Path (/usr/share/fonts/truetype/thai/Norasi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari-Oblique << /Path (/usr/share/fonts/truetype/thai/Kinnari-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/muktinarrow << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/MuktiNarrowBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari-Italic << /Path (/usr/share/fonts/truetype/thai/Kinnari-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/kacstPen << /Path (/usr/share/fonts/truetype/ttf-kacst/kacstPen.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Kinnari-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypewriter-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeMono-Oblique << /Path (/usr/share/fonts/truetype/freefont/FreeMonoOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSans-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeSans.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSerif-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush << /Path (/usr/share/fonts/truetype/thai/Umpush.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Sawasdee << /Path (/usr/share/fonts/truetype/thai/Sawasdee.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgMono << /Path (/usr/share/fonts/truetype/thai/TlwgMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstQurn << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstQurn.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari << /Path (/usr/share/fonts/truetype/thai/Kinnari.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgMono-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgMono-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstScreen << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstScreen.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSans-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeSansBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/msam10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/msam10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/cmmi10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmmi10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Lohit-Tamil << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_ta.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypist << /Path (/usr/share/fonts/truetype/thai/TlwgTypist.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Waree-Bold << /Path (/usr/share/fonts/truetype/thai/Waree-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari-Bold << /Path (/usr/share/fonts/truetype/thai/Kinnari-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Loma-Bold << /Path (/usr/share/fonts/truetype/thai/Loma-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSans-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Loma-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Loma-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Palatino-Italic /URWPalladioL-Ital ; 
+/Palatino-Bold /URWPalladioL-Bold ; 
+/AvantGarde-BookOblique /URWGothicL-BookObli ; 
+/Times-Bold /NimbusRomNo9L-Medi ; 
+/HelveticaNarrow-BoldOblique /NimbusSanL-BoldCondItal ; 
+/Times-Roman /NimbusRomNo9L-Regu ; 
+/NewCenturySchlbk-Italic /CenturySchL-Ital ; 
+/HelveticaNarrow /NimbusSanL-ReguCond ; 
+/Helvetica-Narrow-Bold /NimbusSanL-BoldCond ; 
+/Bookman-Light /URWBookmanL-Ligh ; 
+/Palatino-BoldItalic /URWPalladioL-BoldItal ; 
+/Traditional /KacstBook ; 
+/Times-BoldItalic /NimbusRomNo9L-MediItal ; 
+/AvantGarde-Book /URWGothicL-Book ; 
+/AvantGarde-DemiOblique /URWGothicL-DemiObli ; 
+/Helvetica-Narrow-Oblique /NimbusSanL-ReguCondItal ; 
+/Helvetica-Bold /NimbusSanL-Bold ; 
+/Courier-Oblique /NimbusMonL-ReguObli ; 
+/Times-Italic /NimbusRomNo9L-ReguItal ; 
+/Courier /NimbusMonL-Regu ; 
+/Bookman-Demi /URWBookmanL-DemiBold ; 
+/Helvetica-BoldOblique /NimbusSanL-BoldItal ; 
+/Helvetica-Oblique /NimbusSanL-ReguItal ; 
+/HelveticaNarrow-Oblique /NimbusSanL-ReguCondItal ; 
+/NewCenturySchlbk-BoldItalic /CenturySchL-BoldItal ; 
+/Courier-BoldOblique /NimbusMonL-BoldObli ; 
+/HelveticaNarrow-Bold /NimbusSanL-BoldCond ; 
+/AvantGarde-Demi /URWGothicL-Demi ; 
+/Bookman-LightItalic /URWBookmanL-LighItal ; 
+/ZapfDingbats /Dingbats ; 
+/Helvetica-Narrow-BoldOblique /NimbusSanL-BoldCondItal ; 
+/ZapfChancery-MediumItalic /URWChanceryL-MediItal ; 
+/Helvetica /NimbusSanL-Regu ; 
+/Bookman-DemiItalic /URWBookmanL-DemiBoldItal ; 
+/Palatino-Roman /URWPalladioL-Roma ; 
+/NewCenturySchlbk-Bold /CenturySchL-Bold ; 
+/NewCenturySchlbk-Roman /CenturySchL-Roma ; 
+/Courier-Bold /NimbusMonL-Bold ; 
+/Arabic /KacstBook ; 
+/Helvetica-Narrow /NimbusSanL-ReguCond ; 
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/Fontmap
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/others/Fontmap	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,116 @@
+/LMTypewriter10-CapsOblique (lmtcso10.pfb) ;
+/Dingbats (d050000l.pfb) ;
+/URWBookmanL-DemiBoldItal (b018035l.pfb) ;
+/LMSansQuotation8-Bold (lmssqbx8.pfb) ;
+/Symbol (Symbol.pfb) ;
+/LMTypewriterVarWd10-DarkOblique (lmvtko10.pfb) ;
+/LMRoman10-Demi (lmb10.pfb) ;
+/URWPalladioL-Ital (p052023l.pfb) ;
+/LMTypewriter10-DarkOblique (lmtko10.pfb) ;
+/NimbusSanL-Regu (n019003l.pfb) ;
+/LMTypewriter10-Italic (lmtti10.pfb) ;
+/LMSansQuotation8-BoldOblique (lmssqbo8.pfb) ;
+/URWPalladioL-Roma (p052003l.pfb) ;
+/LMTypewriterVarWd10-Light (lmvtl10.pfb) ;
+/NimbusRomNo9L-Medi (n021004l.pfb) ;
+/NimbusSanL-ReguItal (n019023l.pfb) ;
+/NimbusMonL-Regu (n022003l.pfb) ;
+/LMSans10-Bold (lmssbx10.pfb) ;
+/LMRoman10-CapsOblique (lmcsco10.pfb) ;
+/CenturySchL-Roma (c059013l.pfb) ;
+/URWGothicL-BookObli (a010033l.pfb) ;
+/LMTypewriter10-LightCondensedOblique (lmtlco10.pfb) ;
+/LMSans10-DemiCondensedOblique (lmssdo10.pfb) ;
+/LMRoman10-CapsRegular (lmcsc10.pfb) ;
+/CenturySchL-BoldItal (c059036l.pfb) ;
+/LMRoman10-DemiOblique (lmbo10.pfb) ;
+/LMRoman10-Unslanted (lmu10.pfb) ;
+/LMRoman10-Bold (lmbx10.pfb) ;
+/LMSans10-DemiCondensed (lmssdc10.pfb) ;
+/URWChanceryL-MediItal (z003034l.pfb) ;
+/URWGothicL-DemiObli (a010035l.pfb) ;
+/LMTypewriterVarWd10-Oblique (lmvtto10.pfb) ;
+/NimbusMonL-Bold (n022004l.pfb) ;
+/LMTypewriter10-Oblique (lmtto10.pfb) ;
+/LMRoman10-BoldItalic (lmbxi10.pfb) ;
+/NimbusSanL-ReguCond (n019043l.pfb) ;
+/CenturySchL-Bold (c059016l.pfb) ;
+/LMTypewriterVarWd10-Regular (lmvtt10.pfb) ;
+/URWBookmanL-Ligh (b018012l.pfb) ;
+/LMSansQuotation8-Regular (lmssq8.pfb) ;
+/LMSans10-Regular (lmss10.pfb) ;
+/LMSans10-Oblique (lmsso10.pfb) ;
+/NimbusSanL-BoldCond (n019044l.pfb) ;
+/LMRoman10-Regular (lmr10.pfb) ;
+/LMTypewriter10-LightCondensed (lmtlc10.pfb) ;
+/LMTypewriterVarWd10-Dark (lmvtk10.pfb) ;
+/LMTypewriter10-CapsRegular (lmtcsc10.pfb) ;
+/LMSansQuotation8-Oblique (lmssqo8.pfb) ;
+/StandardSymL (s050000l.pfb) ;
+/NimbusRomNo9L-Regu (n021003l.pfb) ;
+/LMTypewriterVarWd10-LightOblique (lmvtlo10.pfb) ;
+/URWPalladioL-BoldItal (p052024l.pfb) ;
+/CenturySchL-Ital (c059033l.pfb) ;
+/LMRoman10-Dunhill (lmdunh10.pfb) ;
+/URWPalladioL-Bold (p052004l.pfb) ;
+/URWGothicL-Book (a010013l.pfb) ;
+/LMTypewriter10-Dark (lmtk10.pfb) ;
+/NimbusSanL-BoldItal (n019024l.pfb) ;
+/URWGothicL-Demi (a010015l.pfb) ;
+/LMTypewriter10-LightOblique (lmtlo10.pfb) ;
+/LMTypewriter10-Light (lmtl10.pfb) ;
+/NimbusSanL-BoldCondItal (n019064l.pfb) ;
+/LMRoman10-Italic (lmri10.pfb) ;
+/LMRoman10-DunhillOblique (lmduno10.pfb) ;
+/NimbusMonL-ReguObli (n022023l.pfb) ;
+/LMRoman10-Oblique (lmro10.pfb) ;
+/NimbusSanL-ReguCondItal (n019063l.pfb) ;
+/NimbusRomNo9L-MediItal (n021024l.pfb) ;
+/LMRoman10-BoldOblique (lmbxo10.pfb) ;
+/URWBookmanL-DemiBold (b018015l.pfb) ;
+/NimbusSanL-Bold (n019004l.pfb) ;
+/LMSans10-BoldOblique (lmssbo10.pfb) ;
+/URWBookmanL-LighItal (b018032l.pfb) ;
+/NimbusMonL-BoldObli (n022024l.pfb) ;
+/NimbusRomNo9L-ReguItal (n021023l.pfb) ;
+/LMTypewriter10-Regular (lmtt10.pfb) ;
+/Palatino-Italic /URWPalladioL-Ital ; 
+/Palatino-Bold /URWPalladioL-Bold ; 
+/AvantGarde-BookOblique /URWGothicL-BookObli ; 
+/Times-Bold /NimbusRomNo9L-Medi ; 
+/HelveticaNarrow-BoldOblique /NimbusSanL-BoldCondItal ; 
+/Times-Roman /NimbusRomNo9L-Regu ; 
+/NewCenturySchlbk-Italic /CenturySchL-Ital ; 
+/HelveticaNarrow /NimbusSanL-ReguCond ; 
+/Helvetica-Narrow-Bold /NimbusSanL-BoldCond ; 
+/Bookman-Light /URWBookmanL-Ligh ; 
+/Palatino-BoldItalic /URWPalladioL-BoldItal ; 
+/Traditional /KacstBook ; 
+/Times-BoldItalic /NimbusRomNo9L-MediItal ; 
+/AvantGarde-Book /URWGothicL-Book ; 
+/AvantGarde-DemiOblique /URWGothicL-DemiObli ; 
+/Helvetica-Narrow-Oblique /NimbusSanL-ReguCondItal ; 
+/Helvetica-Bold /NimbusSanL-Bold ; 
+/Courier-Oblique /NimbusMonL-ReguObli ; 
+/Times-Italic /NimbusRomNo9L-ReguItal ; 
+/Courier /NimbusMonL-Regu ; 
+/Bookman-Demi /URWBookmanL-DemiBold ; 
+/Helvetica-BoldOblique /NimbusSanL-BoldItal ; 
+/Helvetica-Oblique /NimbusSanL-ReguItal ; 
+/HelveticaNarrow-Oblique /NimbusSanL-ReguCondItal ; 
+/NewCenturySchlbk-BoldItalic /CenturySchL-BoldItal ; 
+/Courier-BoldOblique /NimbusMonL-BoldObli ; 
+/HelveticaNarrow-Bold /NimbusSanL-BoldCond ; 
+/AvantGarde-Demi /URWGothicL-Demi ; 
+/Bookman-LightItalic /URWBookmanL-LighItal ; 
+/ZapfDingbats /Dingbats ; 
+/Helvetica-Narrow-BoldOblique /NimbusSanL-BoldCondItal ; 
+/ZapfChancery-MediumItalic /URWChanceryL-MediItal ; 
+/Helvetica /NimbusSanL-Regu ; 
+/Bookman-DemiItalic /URWBookmanL-DemiBoldItal ; 
+/Palatino-Roman /URWPalladioL-Roma ; 
+/NewCenturySchlbk-Bold /CenturySchL-Bold ; 
+/NewCenturySchlbk-Roman /CenturySchL-Roma ; 
+/Courier-Bold /NimbusMonL-Bold ; 
+/Arabic /KacstBook ; 
+/Helvetica-Narrow /NimbusSanL-ReguCond ; 
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/cidfmap
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/others/cidfmap	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,10 @@
+/Dotum-Bold << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnDotumBold.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
+/ZenHei << /FileType /TrueType /Path (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /SubfontID 0 /CSI [(GB1) 0] >> ;
+/Batang-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnBatang.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
+/VL-PGothic-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /SubfontID 0 /CSI [(Japan1) 0] >> ;
+/Dotum-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnDotum.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
+/VL-Gothic-Regular-JaH << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /SubfontID 0 /CSI [(Japan2) 0] >> ;
+/VL-Gothic-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /SubfontID 0 /CSI [(Japan1) 0] >> ;
+/VL-PGothic-Regular-JaH << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /SubfontID 0 /CSI [(Japan2) 0] >> ;
+/ZenHei-CNS << /FileType /TrueType /Path (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /SubfontID 0 /CSI [(CNS1) 0] >> ;
+/Batang-Bold << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnBatangBold.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/COPYRIGHT.TXT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/COPYRIGHT.TXT	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,124 @@
+Bitstream Vera Fonts Copyright
+
+The fonts have a generous copyright, allowing derivative works (as
+long as "Bitstream" or "Vera" are not in the names), and full
+redistribution (so long as they are not *sold* by themselves). They
+can be be bundled, redistributed and sold with any software.
+
+The fonts are distributed under the following copyright:
+
+Copyright
+=========
+
+Copyright (c) 2003 by Bitstream, Inc. All Rights Reserved. Bitstream
+Vera is a trademark of Bitstream, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the fonts accompanying this license ("Fonts") and associated
+documentation files (the "Font Software"), to reproduce and distribute
+the Font Software, including without limitation the rights to use,
+copy, merge, publish, distribute, and/or sell copies of the Font
+Software, and to permit persons to whom the Font Software is furnished
+to do so, subject to the following conditions:
+
+The above copyright and trademark notices and this permission notice
+shall be included in all copies of one or more of the Font Software
+typefaces.
+
+The Font Software may be modified, altered, or added to, and in
+particular the designs of glyphs or characters in the Fonts may be
+modified and additional glyphs or characters may be added to the
+Fonts, only if the fonts are renamed to names not containing either
+the words "Bitstream" or the word "Vera".
+
+This License becomes null and void to the extent applicable to Fonts
+or Font Software that has been modified and is distributed under the
+"Bitstream Vera" names.
+
+The Font Software may be sold as part of a larger software package but
+no copy of one or more of the Font Software typefaces may be sold by
+itself.
+
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL
+BITSTREAM OR THE GNOME FOUNDATION BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL,
+OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT
+SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.
+
+Except as contained in this notice, the names of Gnome, the Gnome
+Foundation, and Bitstream Inc., shall not be used in advertising or
+otherwise to promote the sale, use or other dealings in this Font
+Software without prior written authorization from the Gnome Foundation
+or Bitstream Inc., respectively. For further information, contact:
+fonts at gnome dot org.
+
+Copyright FAQ
+=============
+
+   1. I don't understand the resale restriction... What gives?
+
+      Bitstream is giving away these fonts, but wishes to ensure its
+      competitors can't just drop the fonts as is into a font sale system
+      and sell them as is. It seems fair that if Bitstream can't make money
+      from the Bitstream Vera fonts, their competitors should not be able to
+      do so either. You can sell the fonts as part of any software package,
+      however.
+
+   2. I want to package these fonts separately for distribution and
+      sale as part of a larger software package or system.  Can I do so?
+
+      Yes. A RPM or Debian package is a "larger software package" to begin 
+      with, and you aren't selling them independently by themselves. 
+      See 1. above.
+
+   3. Are derivative works allowed?
+      Yes!
+
+   4. Can I change or add to the font(s)?
+      Yes, but you must change the name(s) of the font(s).
+
+   5. Under what terms are derivative works allowed?
+
+      You must change the name(s) of the fonts. This is to ensure the
+      quality of the fonts, both to protect Bitstream and Gnome. We want to
+      ensure that if an application has opened a font specifically of these
+      names, it gets what it expects (though of course, using fontconfig,
+      substitutions could still could have occurred during font
+      opening). You must include the Bitstream copyright. Additional
+      copyrights can be added, as per copyright law. Happy Font Hacking!
+
+   6. If I have improvements for Bitstream Vera, is it possible they might get 
+       adopted in future versions?
+
+      Yes. The contract between the Gnome Foundation and Bitstream has
+      provisions for working with Bitstream to ensure quality additions to
+      the Bitstream Vera font family. Please contact us if you have such
+      additions. Note, that in general, we will want such additions for the
+      entire family, not just a single font, and that you'll have to keep
+      both Gnome and Jim Lyles, Vera's designer, happy! To make sense to add
+      glyphs to the font, they must be stylistically in keeping with Vera's
+      design. Vera cannot become a "ransom note" font. Jim Lyles will be
+      providing a document describing the design elements used in Vera, as a
+      guide and aid for people interested in contributing to Vera.
+
+   7. I want to sell a software package that uses these fonts: Can I do so?
+
+      Sure. Bundle the fonts with your software and sell your software
+      with the fonts. That is the intent of the copyright.
+
+   8. If applications have built the names "Bitstream Vera" into them, 
+      can I override this somehow to use fonts of my choosing?
+
+      This depends on exact details of the software. Most open source
+      systems and software (e.g., Gnome, KDE, etc.) are now converting to
+      use fontconfig (see www.fontconfig.org) to handle font configuration,
+      selection and substitution; it has provisions for overriding font
+      names and subsituting alternatives. An example is provided by the
+      supplied local.conf file, which chooses the family Bitstream Vera for
+      "sans", "serif" and "monospace".  Other software (e.g., the XFree86
+      core server) has other mechanisms for font substitution.
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/README.TXT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/README.TXT	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,11 @@
+Contained herin is the Bitstream Vera font family.
+
+The Copyright information is found in the COPYRIGHT.TXT file (along
+with being incoporated into the fonts themselves).
+
+The releases notes are found in the file "RELEASENOTES.TXT".
+
+We hope you enjoy Vera!
+
+                        Bitstream, Inc.
+			The Gnome Project
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/RELEASENOTES.TXT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/RELEASENOTES.TXT	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,162 @@
+Bitstream Vera Fonts - April 16, 2003
+=====================================
+
+The version number of these fonts is 1.10 to distinguish them from the
+beta test fonts.
+
+Note that the Vera copyright is incorporated in the fonts themselves.
+The License field in the fonts contains the copyright license as it
+appears below. The TrueType copyright field is not large enough to
+contain the full license, so the license is incorporated (as you might
+think if you thought about it) into the license field, which
+unfortunately can be obscure to find.  (In pfaedit, see: Element->Font
+Info->TTFNames->License).
+
+Our apologies for it taking longer to complete the fonts than planned.
+Beta testers requested a tighter line spacing (less leading) and Jim
+Lyles redesigned Vera's accents to bring its line spacing to more
+typical of other fonts.  This took additional time and effort.  Our
+thanks to Jim for this effort above and beyond the call of duty.
+
+There are four monospace and sans faces (normal, oblique, bold, bold
+oblique) and two serif faces (normal and bold). Fontconfig/Xft2 (see
+www.fontconfig.org) can artificially oblique the serif faces for you:
+this loses hinting and distorts the faces slightly, but is visibly
+different than normal and bold, and reasonably pleasing.
+
+On systems with fontconfig 2.0 or 2.1 installed, making your sans,
+serif and monospace fonts default to these fonts is very easy.  Just
+drop the file local.conf into your /etc/fonts directory.  This will
+make the Bitstream fonts your default fonts for all applications using
+fontconfig (if sans, serif, or monospace names are used, as they often
+are as default values in many desktops). The XML in local.conf may
+need modification to enable subpixel decimation, if appropriate,
+however, the commented out phrase does so for XFree86 4.3, in the case
+that the server does not have sufficient information to identify the
+use of a flat panel.  Fontconfig 2.2 adds Vera to the list of font
+families and will, by default use it as the default sans, serif and
+monospace fonts.
+
+During the testing of the final Vera fonts, we learned that screen
+fonts in general are only typically hinted to work correctly at
+integer pixel sizes.  Vera is coded internally for integer sizes only.
+We need to investigate further to see if there are commonly used fonts
+that are hinted to be rounded but are not rounded to integer sizes due
+to oversights in their coding.
+
+Most fonts work best at 8 pixels and below if anti-aliased only, as
+the amount of work required to hint well at smaller and smaller sizes
+becomes astronomical.  GASP tables are typically used to control
+whether hinting is used or not, but Freetype/Xft does not currently
+support GASP tables (which are present in Vera).
+
+To mitigate this problem, both for Vera and other fonts, there will be
+(very shortly) a new fontconfig 2.2 release that will, by default not
+apply hints if the size is below 8 pixels. if you should have a font
+that in fact has been hinted more agressively, you can use fontconfig
+to note this exception. We believe this should improve many hinted
+fonts in addition to Vera, though implemeting GASP support is likely
+the right long term solution.
+
+Font rendering in Gnome or KDE is the combination of algorithms in
+Xft2 and Freetype, along with hinting in the fonts themselves. It is
+vital to have sufficient information to disentangle problems that you
+may observe.
+
+Note that having your font rendering system set up correctly is vital
+to proper judgement of problems of the fonts:
+
+    * Freetype may or may not be configured to in ways that may
+      implement execution of possibly patented (in some parts of the world)
+      TrueType hinting algorithms, particularly at small sizes.  Best
+      results are obtained while using these algorithms.
+
+    * The freetype autohinter (used when the possibly patented
+      algorithms are not used) continues to improve with each release. If
+      you are using the autohinter, please ensure you are using an up to
+      date version of freetype before reporting problems.
+
+    * Please identify what version of freetype you are using in any
+      bug reports, and how your freetype is configured.
+
+    * Make sure you are not using the freetype version included in
+      XFree86 4.3, as it has bugs that significantly degrade most fonts,
+      including Vera. if you build XFree86 4.3 from source yourself, you may
+      have installed this broken version without intending it (as I
+      did). Vera was verified with the recently released Freetype 2.1.4. On
+      many systems, 'ldd" can be used to see which freetype shared library
+      is actually being used.
+
+    * Xft/X Render does not (yet) implement gamma correction.  This
+      causes significant problems rendering white text on a black background
+      (causing partial pixels to be insufficiently shaded) if the gamma of
+      your monitor has not been compensated for, and minor problems with
+      black text on a while background.  The program "xgamma" can be used to
+      set a gamma correction value in the X server's color pallette. Most
+      monitors have a gamma near 2.
+
+    * Note that the Vera family uses minimal delta hinting. Your
+      results on other systems when not used anti-aliased may not be
+      entirely satisfying. We are primarily interested in reports of
+      problems on open source systems implementing Xft2/fontconfig/freetype
+      (which implements antialiasing and hinting adjustements, and
+      sophisticated subpixel decimation on flatpanels).  Also, the
+      algorithms used by Xft2 adjust the hints to integer widths and the
+      results are crisper on open source systems than on Windows or
+      MacIntosh.
+
+    * Your fontconfig may (probably does) predate the release of
+      fontconfig 2.2, and you may see artifacts not present when the font is
+      used at very small sizes with hinting enabled. "vc-list -V" can be
+      used to see what version you have installed.
+
+We believe and hope that these fonts will resolve the problems
+reported during beta test.  The largest change is the reduction of
+leading (interline spacing), which had annoyed a number of people, and
+reduced Vera's utility for some applcations.  The Vera monospace font
+should also now make '0' and 'O' and '1' and 'l' more clearly
+distinguishable.
+
+The version of these fonts is version 1.10.  Fontconfig should be
+choosing the new version of the fonts if both the released fonts and
+beta test fonts are installed (though please discard them: they have
+names of form tt20[1-12]gn.ttf).  Note that older versions of
+fontconfig sometimes did not rebuild their cache correctly when new
+fonts are installed: please upgrade to fontconfig 2.2. "fc-cache -f"
+can be used to force rebuilding fontconfig's cache files.
+
+If you note problems, please send them to fonts at gnome dot org, with
+exactly which face and size and unicode point you observe the problem
+at. The xfd utility from XFree86 CVS may be useful for this (e.g. "xfd
+-fa sans").  A possibly more useful program to examine fonts at a
+variety of sizes is the "waterfall" program found in Keith Packard's
+CVS.
+
+        $ cvs -d :pserver:anoncvs@keithp.com:/local/src/CVS login
+        Logging in to :pserver:anoncvs@keithp.com:2401/local/src/CVS
+        CVS password: <hit return>
+        $ cvs -d :pserver:anoncvs@keithp.com:/local/src/CVS co waterfall
+        $ cd waterfall
+        $ xmkmf -a
+        $ make
+        # make install
+        # make install.man
+
+Again, please make sure you are running an up-to-date freetype, and
+that you are only examining integer sizes.
+
+Reporting Problems
+==================
+
+Please send problem reports to fonts at gnome org, with the following
+information:
+
+   1. Version of Freetype, Xft2 and fontconfig
+   2. Whether TT hinting is being used, or the autohinter
+   3. Application being used
+   4. Character/Unicode code point that has problems (if applicable)
+   5. Version of which operating system
+   6. Please include a screenshot, when possible.
+
+Please check the fonts list archives before reporting problems to cut
+down on duplication.
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/Vera.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/Vera.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraBI.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraBI.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraBd.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraIt.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraIt.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoBI.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoBI.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoBd.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoIt.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoIt.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMono.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMono.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraSe.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraSe.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraSeBd.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraSeBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/local.conf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/local.conf	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<!DOCTYPE fontconfig SYSTEM "fonts.dtd">
+<!-- /etc/fonts.conf file to configure system font access -->
+<fontconfig>
+        <!--  Enable sub-pixel rendering
+        <match target="font">
+                <test qual="all" name="rgba">
+                        <const>unknown</const>
+                </test>
+                <edit name="rgba" mode="assign"><const>rgb</const></edit>
+        </match>
+         -->
+
+        <alias>
+                <family>serif</family>
+                <prefer>
+                        <family>Bitstream Vera Serif</family>
+                </prefer>
+        </alias>
+        <alias>
+                <family>sans-serif</family>
+                <prefer>
+                        <family>Bitstream Vera Sans</family>
+                </prefer>
+        </alias>
+        <alias>
+                <family>monospace</family>
+                <prefer>
+                        <family>Bitstream Vera Sans Mono</family>
+                </prefer>
+        </alias>
+</fontconfig>
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/.DS_Store
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/1.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/1.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/10.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/10.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/11.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/11.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/12.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/12.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/2.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/2.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/3.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/3.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/4.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/4.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/5.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/5.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/6.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/6.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/7.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/7.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/8.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/8.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/9.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/9.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/README	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,3 @@
+These images were created by the author with Fyre, expressly for PyCAPTCHA.
+
+Copyright (c) 2004 Micah Dowty
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Craig_Barrington_ocotillo_and_mountains.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Craig_Barrington_ocotillo_and_mountains.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Kerry_Carloy_Chisos_Sunset.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Kerry_Carloy_Chisos_Sunset.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Paul_Dowty_Mt_Bross.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Paul_Dowty_Mt_Bross.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/README	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,2 @@
+These are uncopyrighted images gathered from various sources,
+including the author's family and national park service web sites.
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/words/README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/words/README	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,4 @@
+These word lists are from various sources:
+
+basic-english:
+   http://simple.wikipedia.org/wiki/Basic_English_Alphabetical_Wordlist
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/words/basic-english
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/words/basic-english	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,852 @@
+a
+able
+about
+account
+acid
+across
+act
+addition
+adjustment
+advertisement
+agreement
+after
+again
+against
+air
+all
+almost
+among
+amount
+amusement
+and
+angle
+angry
+animal
+answer
+ant
+any
+apparatus
+apple
+approval
+arch
+argument
+arm
+army
+art
+as
+at
+attack
+attempt
+attention
+attraction
+authority
+automatic
+awake
+baby
+back
+bad
+bag
+balance
+ball
+band
+base
+basin
+basket
+bath
+be
+beautiful
+because
+bed
+bee
+before
+behavior
+belief
+bell
+bent
+berry
+between
+bird
+birth
+bit
+bite
+bitter
+black
+blade
+blood
+blow
+blue
+board
+boat
+body
+boiling
+bone
+book
+boot
+bottle
+box
+boy
+brain
+brake
+branch
+brass
+bread
+breath
+brick
+bridge
+bright
+broken
+brother
+brown
+brush
+bucket
+building
+bulb
+burn
+burst
+business
+but
+butter
+button
+by
+cake
+camera
+canvas
+card
+care
+carriage
+cart
+cat
+cause
+certain
+chain
+chalk
+chance
+change
+cheap
+cheese
+chemical
+chest
+chief
+chin
+church
+circle
+clean
+clear
+clock
+cloth
+cloud
+coal
+coat
+cold
+collar
+color
+comb
+come
+comfort
+committee
+common
+company
+comparison
+competition
+complete
+complex
+condition
+connection
+conscious
+control
+cook
+copper
+copy
+cord
+cork
+cotton
+cough
+country
+cover
+cow
+crack
+credit
+crime
+cruel
+crush
+cry
+cup
+current
+curtain
+curve
+cushion
+cut
+damage
+danger
+dark
+daughter
+day
+dead
+dear
+death
+debt
+decision
+deep
+degree
+delicate
+dependent
+design
+desire
+destruction
+detail
+development
+different
+digestion
+direction
+dirty
+discovery
+discussion
+disease
+disgust
+distance
+distribution
+division
+do
+dog
+door
+down
+doubt
+drain
+drawer
+dress
+drink
+driving
+drop
+dry
+dust
+ear
+early
+earth
+east
+edge
+education
+effect
+egg
+elastic
+electric
+end
+engine
+enough
+equal
+error
+even
+event
+ever
+every
+example
+exchange
+existence
+expansion
+experience
+expert
+eye
+face
+fact
+fall
+false
+family
+far
+farm
+fat
+father
+fear
+feather
+feeble
+feeling
+female
+fertile
+fiction
+field
+fight
+finger
+fire
+first
+fish
+fixed
+flag
+flame
+flat
+flight
+floor
+flower
+fly
+fold
+food
+foolish
+foot
+for
+force
+fork
+form
+forward
+fowl
+frame
+free
+frequent
+friend
+from
+front
+fruit
+full
+future
+garden
+general
+get
+girl
+give
+glass
+glove
+go
+goat
+gold
+good
+government
+grain
+grass
+great
+green
+grey/gray
+grip
+group
+growth
+guide
+gun
+hair
+hammer
+hand
+hanging
+happy
+harbor
+hard
+harmony
+hat
+hate
+have
+he
+head
+healthy
+hearing
+heart
+heat
+help
+here
+high
+history
+hole
+hollow
+hook
+hope
+horn
+horse
+hospital
+hour
+house
+how
+humor
+ice
+idea
+if
+ill
+important
+impulse
+in
+increase
+industry
+ink
+insect
+instrument
+insurance
+interest
+invention
+iron
+island
+jelly
+jewel
+join
+journey
+judge
+jump
+keep
+kettle
+key
+kick
+kind
+kiss
+knee
+knife
+knot
+knowledge
+land
+language
+last
+late
+laugh
+law
+lead
+leaf
+learning
+leather
+left
+leg
+let
+letter
+level
+library
+lift
+light
+like
+limit
+line
+linen
+lip
+liquid
+list
+little
+less
+least
+living
+lock
+long
+loose
+loss
+loud
+love
+low
+machine
+make
+male
+man
+manager
+map
+mark
+market
+married
+match
+material
+mass
+may
+meal
+measure
+meat
+medical
+meeting
+memory
+metal
+middle
+military
+milk
+mind
+mine
+minute
+mist
+mixed
+money
+monkey
+month
+moon
+morning
+mother
+motion
+mountain
+mouth
+move
+much
+more
+most
+muscle
+music
+nail
+name
+narrow
+nation
+natural
+near
+necessary
+neck
+need
+needle
+nerve
+net
+new
+news
+night
+no
+noise
+normal
+north
+nose
+not
+note
+now
+number
+nut
+observation
+of
+off
+offer
+office
+oil
+old
+on
+only
+open
+operation
+opposite
+opinion
+other
+or
+orange
+order
+organization
+ornament
+out
+oven
+over
+owner
+page
+pain
+paint
+paper
+parallel
+parcel
+part
+past
+paste
+payment
+peace
+pen
+pencil
+person
+physical
+picture
+pig
+pin
+pipe
+place
+plane
+plant
+plate
+play
+please
+pleasure
+plough/plow
+pocket
+point
+poison
+polish
+political
+poor
+porter
+position
+possible
+pot
+potato
+powder
+power
+present
+price
+print
+prison
+private
+probable
+process
+produce
+profit
+property
+prose
+protest
+public
+pull
+pump
+punishment
+purpose
+push
+put
+quality
+question
+quick
+quiet
+quite
+rail
+rain
+range
+rat
+rate
+ray
+reaction
+red
+reading
+ready
+reason
+receipt
+record
+regret
+regular
+relation
+religion
+representative
+request
+respect
+responsible
+rest
+reward
+rhythm
+rice
+right
+ring
+river
+road
+rod
+roll
+roof
+room
+root
+rough
+round
+rub
+rule
+run
+sad
+safe
+sail
+salt
+same
+sand
+say
+scale
+school
+science
+scissors
+screw
+sea
+seat
+second
+secret
+secretary
+see
+seed
+selection
+self
+send
+seem
+sense
+separate
+serious
+servant
+sex
+shade
+shake
+shame
+sharp
+sheep
+shelf
+ship
+shirt
+shock
+shoe
+short
+shut
+side
+sign
+silk
+silver
+simple
+sister
+size
+skin
+skirt
+sky
+sleep
+slip
+slope
+slow
+small
+smash
+smell
+smile
+smoke
+smooth
+snake
+sneeze
+snow
+so
+soap
+society
+sock
+soft
+solid
+some
+son
+song
+sort
+sound
+south
+soup
+space
+spade
+special
+sponge
+spoon
+spring
+square
+stamp
+stage
+star
+start
+statement
+station
+steam
+stem
+steel
+step
+stick
+still
+stitch
+stocking
+stomach
+stone
+stop
+store
+story
+strange
+street
+stretch
+sticky
+stiff
+straight
+strong
+structure
+substance
+sugar
+suggestion
+summer
+support
+surprise
+such
+sudden
+sun
+sweet
+swim
+system
+table
+tail
+take
+talk
+tall
+taste
+tax
+teaching
+tendency
+test
+than
+that
+the
+then
+theory
+there
+thick
+thin
+thing
+this
+thought
+thread
+throat
+though
+through
+thumb
+thunder
+ticket
+tight
+tired
+till
+time
+tin
+to
+toe
+together
+tomorrow
+tongue
+tooth
+top
+touch
+town
+trade
+train
+transport
+tray
+tree
+trick
+trousers
+true
+trouble
+turn
+twist
+umbrella
+under
+unit
+use
+up
+value
+verse
+very
+vessel
+view
+violent
+voice
+walk
+wall
+waiting
+war
+warm
+wash
+waste
+watch
+water
+wave
+wax
+way
+weather
+week
+weight
+well
+west
+wet
+wheel
+when
+where
+while
+whip
+whistle
+white
+who
+why
+wide
+will
+wind
+window
+wine
+wing
+winter
+wire
+wise
+with
+woman
+wood
+wool
+word
+work
+worm
+wound
+writing
+wrong
+year
+yellow
+yes
+yesterday
+you
+young
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/words/characters
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/words/characters	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,62 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Facade.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Facade.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+
+
+from Captcha.Visual.Tests import PseudoGimpy, AngryGimpy
+import numpy
+
+# Une fonction simple pour generer un captcha
+# ease : represente la difficulte du captcha a generer 
+#      0 = facile et 1 (ou autre chose) = difficile 
+#solution : specifie si on veut en retour un array numpy representant 
+#l image ou un tuple contenant l'array et la solution du captcha.
+
+# Des fontes additionnelles peuvent etre ajoutees au dossier pyCaptcha/Captcha/data/fonts/others
+# Le programme choisit une fonte aleatoirement dans ce dossier ainsi que le dossir vera.
+
+
+def generateCaptcha (ease=0, solution=0):
+
+    if ease == 1:
+        g = AngryGimpy()
+
+    else:
+        g = PseudoGimpy()
+    
+    i = g.render()
+    a = numpy.asarray(i)
+
+    if solution == 0:
+       return a
+
+    else :
+        return (a, g.solutions)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/README	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,51 @@
+======================
+Python CAPTCHA package
+======================
+
+About
+-----
+
+This is the PyCAPTCHA package, a collection of Python modules
+implementing CAPTCHAs: automated tests that humans should pass,
+but current computer programs can't. These tests are often
+used for security.
+
+See  http://www.captcha.net for more information and examples.
+
+This project was started because the CIA project, written in
+Python, needed a CAPTCHA to automate its user creation process
+safely. All existing implementations the author could find were
+written in Java or for the .NET framework, so a simple Python
+alternative was needed.
+
+Examples
+--------
+
+Included are several example programs:
+
+  - simple_example.py is a bare-bones example that just generates
+    and displays an image.
+
+  - http_example.py is a longer example that uses BaseHTTPServer
+    to simulate a CAPTCHA's use in a web environment. Running this
+    example and connecting to it from your web browser is a quick
+    and easy way to see PyCAPTCHA in action
+
+  - modpython_example.py is a version of http_example that runs
+    from an Apache server equipped with a properly configured
+    mod_python.
+
+
+Dependencies
+------------
+
+- Python 2.2.1 or later
+- the Python Imaging Library, required for visual CAPTCHAs
+
+
+Contacts
+--------
+
+Micah Dowty <micah@navi.cx>
+
+'scanline' on irc.freenode.net
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/http_example.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/http_example.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+#
+# An example that presents CAPTCHA tests in a web environment
+# and gives the user a chance to solve them. Run it, optionally
+# specifying a port number on the command line, then point your web
+# browser at the given URL.
+#
+
+from Captcha.Visual import Tests
+from Captcha import Factory
+import BaseHTTPServer, urlparse, sys
+
+
+class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+    def do_GET(self):
+        scheme, host, path, parameters, query, fragment = urlparse.urlparse(self.path)
+
+        # Split the path into segments
+        pathSegments = path.split('/')[1:]
+
+        # Split the query into key-value pairs
+        args = {}
+        for pair in query.split("&"):
+            if pair.find("=") >= 0:
+                key, value = pair.split("=", 1)
+                args.setdefault(key, []).append(value)
+            else:
+                args[pair] = []
+
+        # A hack so it works with a proxy configured for VHostMonster :)
+        if pathSegments[0] == "vhost":
+            pathSegments = pathSegments[3:]
+
+        if pathSegments[0] == "":
+            self.handleRootPage(args.get('test', Tests.__all__)[0])
+
+        elif pathSegments[0] == "images":
+            self.handleImagePage(pathSegments[1])
+
+        elif pathSegments[0] == "solutions":
+            self.handleSolutionPage(pathSegments[1], args['word'][0])
+
+        else:
+            self.handle404()
+
+    def handle404(self):
+        self.send_response(404)
+        self.send_header("Content-Type", "text/html")
+        self.end_headers()
+        self.wfile.write("<html><body><h1>No such resource</h1></body></html>")
+
+    def handleRootPage(self, testName):
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html")
+        self.end_headers()
+
+        test = self.captchaFactory.new(getattr(Tests, testName))
+
+        # Make a list of tests other than the one we're using
+        others = []
+        for t in Tests.__all__:
+            if t != testName:
+                others.append('<li><a href="/?test=%s">%s</a></li>' % (t,t))
+        others = "\n".join(others)
+
+        self.wfile.write("""<html>
+<head>
+<title>PyCAPTCHA Example</title>
+</head>
+<body>
+<h1>PyCAPTCHA Example</h1>
+<p>
+  <b>%s</b>:
+  %s
+</p>
+
+<p><img src="/images/%s"/></p>
+<p>
+  <form action="/solutions/%s" method="get">
+    Enter the word shown:
+    <input type="text" name="word"/>
+  </form>
+</p>
+
+<p>
+Or try...
+<ul>
+%s
+</ul>
+</p>
+
+</body>
+</html>
+""" % (test.__class__.__name__, test.__doc__, test.id, test.id, others))
+
+    def handleImagePage(self, id):
+        test = self.captchaFactory.get(id)
+        if not test:
+            return self.handle404()
+
+        self.send_response(200)
+        self.send_header("Content-Type", "image/jpeg")
+        self.end_headers()
+        test.render().save(self.wfile, "JPEG")
+
+    def handleSolutionPage(self, id, word):
+        test = self.captchaFactory.get(id)
+        if not test:
+            return self.handle404()
+
+        if not test.valid:
+            # Invalid tests will always return False, to prevent
+            # random trial-and-error attacks. This could be confusing to a user...
+            result = "Test invalidated, try another test"
+        elif test.testSolutions([word]):
+            result = "Correct"
+        else:
+            result = "Incorrect"
+
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html")
+        self.end_headers()
+        self.wfile.write("""<html>
+<head>
+<title>PyCAPTCHA Example</title>
+</head>
+<body>
+<h1>PyCAPTCHA Example</h1>
+<h2>%s</h2>
+<p><img src="/images/%s"/></p>
+<p><b>%s</b></p>
+<p>You guessed: %s</p>
+<p>Possible solutions: %s</p>
+<p><a href="/">Try again</a></p>
+</body>
+</html>
+""" % (test.__class__.__name__, test.id, result, word, ", ".join(test.solutions)))
+
+
+def main(port):
+    print "Starting server at http://localhost:%d/" % port
+    handler = RequestHandler
+    handler.captchaFactory = Factory()
+    BaseHTTPServer.HTTPServer(('', port), RequestHandler).serve_forever()
+
+if __name__ == "__main__":
+    # The port number can be specified on the command line, default is 8080
+    if len(sys.argv) >= 2:
+        port = int(sys.argv[1])
+    else:
+        port = 8080
+    main(port)
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/modpython_example.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/modpython_example.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,113 @@
+#
+# An example that presents CAPTCHA tests in a web environment
+# and gives the user a chance to solve them.
+#
+# This example is for use with Apache using mod_python and its
+# Publisher handler. For example, if your apache configuration
+# included something like:
+#
+#   AddHandler python-program .py
+#   PythonHandler mod_python.publisher
+#
+# You could place this script anywhere in your web space to see
+# the demo.
+#
+# --Micah <micah@navi.cx>
+#
+
+from Captcha.Visual import Tests
+import Captcha
+from mod_python import apache
+
+
+def _getFactory(req):
+    return Captcha.PersistentFactory("/tmp/pycaptcha_%s" % req.interpreter)
+
+
+def test(req, name=Tests.__all__[0]):
+    """Show a newly generated CAPTCHA of the given class.
+       Default is the first class name given in Tests.__all__
+       """
+    test = _getFactory(req).new(getattr(Tests, name))
+
+    # Make a list of tests other than the one we're using
+    others = []
+    for t in Tests.__all__:
+        if t != name:
+            others.append('<li><a href="?name=%s">%s</a></li>' % (t,t))
+    others = "\n".join(others)
+
+    return """<html>
+<head>
+<title>PyCAPTCHA Example</title>
+</head>
+<body>
+<h1>PyCAPTCHA Example (for mod_python)</h1>
+<p>
+  <b>%s</b>:
+  %s
+</p>
+
+<p><img src="image?id=%s"/></p>
+<p>
+  <form action="solution" method="get">
+    Enter the word shown:
+    <input type="text" name="word"/>
+    <input type="hidden" name="id" value="%s"/>
+  </form>
+</p>
+
+<p>
+Or try...
+<ul>
+%s
+</ul>
+</p>
+
+</body>
+</html>
+""" % (test.__class__.__name__, test.__doc__, test.id, test.id, others)
+
+
+def image(req, id):
+    """Generate an image for the CAPTCHA with the given ID string"""
+    test = _getFactory(req).get(id)
+    if not test:
+        raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
+    req.content_type = "image/jpeg"
+    test.render().save(req, "JPEG")
+    return apache.OK
+
+
+def solution(req, id, word):
+    """Grade a CAPTCHA given a solution word"""
+    test = _getFactory(req).get(id)
+    if not test:
+        raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
+
+    if not test.valid:
+        # Invalid tests will always return False, to prevent
+        # random trial-and-error attacks. This could be confusing to a user...
+        result = "Test invalidated, try another test"
+    elif test.testSolutions([word]):
+        result = "Correct"
+    else:
+        result = "Incorrect"
+
+    return """<html>
+<head>
+<title>PyCAPTCHA Example</title>
+</head>
+<body>
+<h1>PyCAPTCHA Example</h1>
+<h2>%s</h2>
+<p><img src="image?id=%s"/></p>
+<p><b>%s</b></p>
+<p>You guessed: %s</p>
+<p>Possible solutions: %s</p>
+<p><a href="test">Try again</a></p>
+</body>
+</html>
+""" % (test.__class__.__name__, test.id, result, word, ", ".join(test.solutions))
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/output.png
Binary file data_generation/transformations/pycaptcha/output.png has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/setup.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/setup.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+from distutils.core import setup
+from setup.my_install_data import *
+
+setup (name = "PyCAPTCHA",
+       version = "0.4",
+       description = "A Python framework for CAPTCHA tests",
+       maintainer = "Micah Dowty",
+       maintainer_email = "micah@navi.cx",
+       license = "MIT",
+       packages = [
+           'Captcha',
+           'Captcha.Visual',
+       ],
+       cmdclass = {
+           'install_data': my_install_data,
+       },
+       data_files = [Data_Files(
+           preserve_path = 1,
+           base_dir      = 'install_lib',
+           copy_to       = 'Captcha/data',
+           strip_dirs    = 2,
+           template      = [
+               'graft Captcha/data',
+           ],
+       )],
+       )
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/setup/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/setup/__init__.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,1 @@
+# Extra modules for use with distutils
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/setup/my_install_data.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/setup/my_install_data.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,191 @@
+"""my_install_data.py
+
+Provides a more sophisticated facility to install data files
+than distutils' install_data does.
+You can specify your files as a template like in MANIFEST.in
+and you have more control over the copy process.
+
+Copyright 2000 by Rene Liebscher, Germany.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+Note:
+This licence is only for this file.
+PyOpenGL has its own licence. (it is almost identical.)
+"""
+
+# created 2000/08/01, Rene Liebscher <R.Liebscher@gmx.de>
+
+###########################################################################
+# import some modules we need
+
+import os,sys,string
+from types import StringType,TupleType,ListType
+from distutils.util import change_root
+from distutils.filelist import FileList
+from distutils.command.install_data import install_data
+
+###########################################################################
+# a container class for our more sophisticated install mechanism
+
+class Data_Files:
+    """ container for list of data files.
+        supports alternate base_dirs e.g. 'install_lib','install_header',...
+        supports a directory where to copy files
+        supports templates as in MANIFEST.in
+        supports preserving of paths in filenames
+            eg. foo/xyz is copied to base_dir/foo/xyz
+        supports stripping of leading dirs of source paths
+            eg. foo/bar1/xyz, foo/bar2/abc can be copied to bar1/xyz, bar2/abc
+    """
+
+    def __init__(self,base_dir=None,files=None,copy_to=None,template=None,preserve_path=0,strip_dirs=0):
+        self.base_dir = base_dir
+        self.files = files
+        self.copy_to = copy_to
+        self.template = template
+        self.preserve_path = preserve_path
+        self.strip_dirs = strip_dirs
+        self.finalized = 0
+
+    def warn (self, msg):
+        sys.stderr.write ("warning: %s: %s\n" %
+                          ("install_data", msg))
+
+    def debug_print (self, msg):
+        """Print 'msg' to stdout if the global DEBUG (taken from the
+        DISTUTILS_DEBUG environment variable) flag is true.
+        """
+        from distutils.core import DEBUG
+        if DEBUG:
+            print msg
+
+
+    def finalize(self):
+        """ complete the files list by processing the given template """
+        if self.finalized:
+            return
+        if self.files == None:
+            self.files = []
+        if self.template != None:
+            if type(self.template) == StringType:
+                self.template = string.split(self.template,";")
+            filelist = FileList(self.warn,self.debug_print)
+            for line in self.template:
+                filelist.process_template_line(string.strip(line))
+            filelist.sort()
+            filelist.remove_duplicates()
+            self.files.extend(filelist.files)
+        self.finalized = 1
+
+# end class Data_Files
+
+###########################################################################
+# a more sophisticated install routine than distutils install_data
+
+class my_install_data (install_data):
+
+    def check_data(self,d):
+        """ check if data are in new format, if not create a suitable object.
+            returns finalized data object
+        """
+        if not isinstance(d, Data_Files):
+            self.warn(("old-style data files list found "
+                        "-- please convert to Data_Files instance"))
+            if type(d) is TupleType:
+                if len(d) != 2 or  not (type(d[1]) is ListType):
+                        raise DistutilsSetupError, \
+                          ("each element of 'data_files' option must be an "
+                            "Data File instance, a string or 2-tuple (string,[strings])")
+                d = Data_Files(copy_to=d[0],files=d[1])
+            else:
+                if not (type(d) is StringType):
+                        raise DistutilsSetupError, \
+                          ("each element of 'data_files' option must be an "
+                           "Data File instance, a string or 2-tuple (string,[strings])")
+                d = Data_Files(files=[d])
+        d.finalize()
+        return d
+
+    def run(self):
+        self.outfiles = []
+        install_cmd = self.get_finalized_command('install')
+
+        for d in self.data_files:
+            d = self.check_data(d)
+
+            install_dir = self.install_dir
+            # alternative base dir given => overwrite install_dir
+            if d.base_dir != None:
+                install_dir = getattr(install_cmd,d.base_dir)
+
+            # copy to an other directory
+            if d.copy_to != None:
+                if not os.path.isabs(d.copy_to):
+                    # relatiev path to install_dir
+                    dir = os.path.join(install_dir, d.copy_to)
+                elif install_cmd.root:
+                    # absolute path and alternative root set
+                    dir = change_root(self.root,d.copy_to)
+                else:
+                    # absolute path
+                    dir = d.copy_to
+            else:
+                # simply copy to install_dir
+                dir = install_dir
+                # warn if necceassary
+                self.warn("setup script did not provide a directory to copy files to "
+                          " -- installing right in '%s'" % install_dir)
+
+            dir=os.path.normpath(dir)
+            # create path
+            self.mkpath(dir)
+
+            # copy all files
+            for src in d.files:
+                if d.strip_dirs > 0:
+                    dst = string.join(string.split(src,os.sep)[d.strip_dirs:],os.sep)
+                else:
+                    dst = src
+                if d.preserve_path:
+                    # preserve path in filename
+                    self.mkpath(os.path.dirname(os.path.join(dir,dst)))
+                    out = self.copy_file(src, os.path.join(dir,dst))
+                else:
+                    out = self.copy_file(src, dir)
+                if type(out) is TupleType:
+                    out = out[0]
+                self.outfiles.append(out)
+
+        return self.outfiles
+
+    def get_inputs (self):
+        inputs = []
+        for d in self.data_files:
+            d = self.check_data(d)
+            inputs.append(d.files)
+        return inputs
+
+    def get_outputs (self):
+         return self.outfiles
+
+
+###########################################################################
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/simple_example.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/simple_example.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+#
+# A very simple example that creates a random image from the
+# PseudoGimpy CAPTCHA, saves and shows it, and prints the list
+# of solutions. Normally you would call testSolutions rather
+# than reading this list yourself.
+#
+from Captcha.Visual.Tests import PseudoGimpy, AngryGimpy
+import numpy
+#from numpy import *
+
+#g = AngryGimpy()
+#i = g.render()
+#a = numpy.asarray(i)
+#b = numpy.zeros((2, 2), numpy.int8)
+#c = a == b
+#print c
+#i.save("output.png")
+#i.show()
+#print a
+#print g.solutions
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/test.png
Binary file data_generation/transformations/pycaptcha/test.png has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/transformations.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/transformations.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,25 @@
+
+import Numeric, Image
+    #""" Transforme une image PIL en objet numpy.array et vice versa"""
+
+
+def image2array(im):
+    #""" image vers array numpy"""
+    if im.mode not in ("L", "F"):
+        raise ValueError, "can only convert single-layer images"
+    if im.mode == "L":
+        a = Numeric.fromstring(im.tostring(), Numeric.UnsignedInt8)
+    else:
+        a = Numeric.fromstring(im.tostring(), Numeric.Float32)
+    a.shape = im.size[1], im.size[0]
+    return a
+
+def array2image(a):
+    #""" array numpy vers image"""
+    if a.typecode() == Numeric.UnsignedInt8:
+        mode = "L"
+    elif a.typecode() == Numeric.Float32:
+        mode = "F"
+    else:
+        raise ValueError, "unsupported image mode"
+    return Image.fromstring(mode, (a.shape[1], a.shape[0]), a.tostring())
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/slant.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/slant.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,104 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Author: Youssouf
+
+this module add a slant effect to the image. 
+
+To obtain the slant effect, each row of the array is shifted proportionately by a step controlled by the complexity.
+
+'''
+
+import numpy
+
+
+class Slant():
+    def __init__(self, complexity=1):
+        #---------- private attributes
+        self.direction = 1
+        self.angle = 0
+
+        #---------- generation parameters
+        self.regenerate_parameters(complexity)
+        #------------------------------------------------
+    
+    def _get_current_parameters(self):
+        return [self.angle, self.direction]
+    
+    def get_settings_names(self):
+        return ['angle', 'direction']
+    
+    def regenerate_parameters(self, complexity):
+        self.angle = numpy.random.uniform(0.0, complexity)
+        P = numpy.random.uniform()
+        self.direction = 1;
+        if P < 0.5:
+            self.direction = -1;
+        return self._get_current_parameters()
+    
+    
+    def transform_image(self,image):
+        if self.angle == 0:
+            return image
+        
+        ysize, xsize = image.shape
+        slant = self.direction*self.angle
+
+        output = image.copy()
+
+        # shift all the rows
+        for i in range(ysize):
+            line = image[i]
+            delta = round((i*slant)) % xsize
+            line1 = line[:xsize-delta]
+            line2 = line[xsize-delta:xsize]
+
+            output[i][delta:xsize] = line1
+            output[i][0:delta] = line2
+
+            
+        #correction to center the image
+        correction = (self.direction)*round(self.angle*ysize/2)
+        correction = (xsize - correction) % xsize
+
+        # center the region
+        line1 = output[0:ysize,0:xsize-correction].copy()
+        line2 = output[0:ysize,xsize-correction:xsize].copy()
+        output[0:ysize,correction:xsize] = line1
+        output[0:ysize,0:correction] = line2
+
+
+        return output
+            
+
+# Test function
+# Load an image in local and create several samples of the effect on the
+# original image with different parameter. All the samples are saved in a single image, the 1st image being the original.
+
+def test_slant():
+    import scipy
+    img_name = "test_img/mnist_0.png"
+    dest_img_name = "test_img/slanted.png"
+    nb_samples = 10
+    im = Image.open(img_name)
+    im = im.convert("L")
+    image = numpy.asarray(im)
+
+    image_final = image
+    slant = Slant()	
+    for i in range(nb_samples):
+        slant.regenerate_parameters(1)
+        image_slant = slant.transform_image(image)
+        image_final = scipy.hstack((image_final,image_slant))
+
+    im = Image.fromarray(image_final.astype('uint8'), "L")
+    im.save(dest_img_name)
+
+# Test
+if __name__ == '__main__':  
+    import sys, os, fnmatch
+    import Image
+
+    test_slant()
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/testmod.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/testmod.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,130 @@
+# This script is to test your modules to see if they conform to the module API
+# defined on the wiki.
+import random, numpy, gc, time, math, sys
+
+# this is an example module that does stupid image value shifting
+
+class DummyModule(object):
+    def get_settings_names(self):
+        return ['value']
+    
+    def regenerate_parameters(self, complexity):
+        self._value = random.gauss(0, 0.5*complexity)
+        return [self._value]
+
+    def transform_image(self, image):
+        return numpy.clip(image+self._value, 0, 1)
+    
+#import <your module>
+
+# instanciate your class here (rather than DummyModule)
+mod = DummyModule()
+
+def error(msg):
+    print "ERROR:", msg
+    sys.exit(1)
+
+def warn(msg):
+    print "WARNING:", msg
+
+def timeit(f, lbl):
+
+    gc.disable()
+    t = time.time()
+    f()
+    est = time.time() - t
+    gc.enable()
+
+    loops = max(1, int(10**math.floor(math.log(10/est, 10))))
+
+    gc.disable()
+    t = time.time()
+    for _ in xrange(loops):
+        f()
+
+    print lbl, "(", loops, "loops ):", (time.time() - t)/loops, "s"
+    gc.enable()
+
+########################
+# get_settings_names() #
+########################
+
+print "Testing get_settings_names()"
+
+names = mod.get_settings_names()
+
+if type(names) is not list:
+    error("Must return a list")
+
+if not all(type(e) is str for e in names):
+    warn("The elements of the list should be strings")
+
+###########################
+# regenerate_parameters() #
+###########################
+
+print "Testing regenerate_parameters()"
+
+params = mod.regenerate_parameters(0.2)
+
+if type(params) is not list:
+    error("Must return a list")
+
+if len(params) != len(names):
+    error("the returned parameter list must have the same length as the number of parameters")
+
+params2 = mod.regenerate_parameters(0.2)
+if len(names) != 0 and params == params2:
+    error("the complexity parameter determines the distribution of the parameters, not their value")
+
+mod.regenerate_parameters(0.0)
+mod.regenerate_parameters(1.0)
+    
+mod.regenerate_parameters(0.5)
+
+#####################
+# transform_image() #
+#####################
+
+print "Testing transform_image()"
+
+imgr = numpy.random.random_sample((32, 32)).astype(numpy.float32)
+img1 = numpy.ones((32, 32), dtype=numpy.float32)
+img0 = numpy.zeros((32, 32), dtype=numpy.float32)
+
+resr = mod.transform_image(imgr)
+
+if type(resr) is not numpy.ndarray:
+    error("Must return an ndarray")
+
+if resr.shape != (32, 32):
+    error("Must return 32x32 array")
+
+if resr.dtype != numpy.float32:
+    error("Must return float32 array")
+
+res1 = mod.transform_image(img1)
+res0 = mod.transform_image(img0)
+
+if res1.max() > 1.0 or res0.max() > 1.0:
+    error("Must keep array values between 0 and 1")
+
+if res1.min() < 0.0 or res0.min() < 0.0:
+    error("Must keep array values between 0 and 1")
+
+mod.regenerate_parameters(0.0)
+mod.transform_image(imgr)
+mod.regenerate_parameters(1.0)
+mod.transform_image(imgr)
+
+print "Bonus Stage: timings"
+
+timeit(lambda: None, "empty")
+timeit(lambda: mod.regenerate_parameters(0.5), "regenerate_parameters()")
+timeit(lambda: mod.transform_image(imgr), "tranform_image()")
+
+def f():
+    mod.regenerate_parameters(0.2)
+    mod.transform_image(imgr)
+
+timeit(f, "regen and transform")
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/testtransformations.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/testtransformations.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+
+
+
+from pylearn.io import filetensor as ft
+import copy
+import pygame
+import time
+import numpy as N
+
+from ttf2jpg import ttf2jpg
+
+#from gimpfu import *
+
+
+from PoivreSel import PoivreSel
+from thick import Thick
+from BruitGauss import BruitGauss
+from DistorsionGauss import DistorsionGauss
+from PermutPixel import PermutPixel
+from gimp_script import GIMP1
+from Rature import Rature
+from contrast import Contrast
+from local_elastic_distortions import LocalElasticDistorter
+from slant import Slant
+from Occlusion import Occlusion
+from add_background_image import AddBackground
+from affine_transform import AffineTransformation
+
+###---------------------order of transformation module
+MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+
+###---------------------complexity associated to each of them
+complexity = 0.7
+#complexity = [0.5]*len(MODULE_INSTANCES)
+#complexity = [0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.]
+n=100
+
+def createimage(path,d):
+    for i in range(n):
+        screen.fill(0)
+        a=d[i,:]
+        off1=4*32
+        off2=0
+        for u in range(n):
+            b=N.asarray(N.reshape(a,(32,32)))
+            c=N.asarray([N.reshape(a*255.0,(32,32))]*3).T
+            new=pygame.surfarray.make_surface(c)
+            new=pygame.transform.scale2x(new)
+            new=pygame.transform.scale2x(new)
+            #new.set_palette(anglcolorpalette)
+            screen.blit(new,(0,0))
+            exemple.blit(new,(0,0))
+            
+            offset = 4*32
+            offset2 = 0
+            ct = 0
+            ctmp =  N.random.rand()*complexity
+            print u
+            for j in MODULE_INSTANCES:
+                #max dilation
+                #ctmp = N.random.rand()*complexity[ct]
+                ctmp = N.random.rand()*complexity 
+                #print j.get_settings_names(), j.regenerate_parameters(ctmp)
+                th=j.regenerate_parameters(ctmp)
+                
+                b=j.transform_image(b)
+                c=N.asarray([b*255]*3).T
+                new=pygame.surfarray.make_surface(c)
+                new=pygame.transform.scale2x(new)
+                new=pygame.transform.scale2x(new)
+                if u==0:
+                    #new.set_palette(anglcolorpalette)
+                    screen.blit(new,(offset,offset2))
+                    font = pygame.font.SysFont('liberationserif',18)
+                    text = font.render('%s '%(int(ctmp*100.0)/100.0) + j.__module__,0,(255,255,255),(0,0,0))
+                    #if  j.__module__ == 'Rature':
+                    #     text = font.render('%s,%s'%(th[-1],int(ctmp*100.0)/100.0) + j.__module__,0,(255,255,255),(0,0,0))
+                    screen.blit(text,(offset,offset2+4*32))
+                    if ct == len(MODULE_INSTANCES)/2-1:
+                        offset = 0
+                        offset2 = 4*32+20
+                    else:
+                        offset += 4*32
+                    ct+=1
+            exemple.blit(new,(off1,off2))
+            if off1 != 9*4*32:
+                off1+=4*32
+            else:
+                off1=0
+                off2+=4*32
+        pygame.image.save(exemple,path+'/perimages/%s.PNG'%i)
+        pygame.image.save(screen,path+'/exemples/%s.PNG'%i)
+ 
+
+
+
+nbmodule = len(MODULE_INSTANCES)
+
+pygame.surfarray.use_arraytype('numpy')
+
+#pygame.display.init()
+screen = pygame.Surface((4*(nbmodule+1)/2*32,2*(4*32+20)),depth=32)
+exemple = pygame.Surface((N.ceil(N.sqrt(n))*4*32,N.ceil(N.sqrt(n))*4*32),depth=32)
+
+anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
+#pygame.Surface.set_palette(anglcolorpalette)
+#screen.set_palette(anglcolorpalette)
+
+pygame.font.init()
+
+d = N.zeros((n,1024))
+
+datapath = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft'
+f = open(datapath)
+d = ft.read(f)
+d = d[0:n,:]/255.0
+createimage('/u/glorotxa/transf/OCR',d)
+
+
+
+datapath = '/data/lisa/data/nist/by_class/'
+f = open(datapath+'digits_reshuffled/digits_reshuffled_train_data.ft')
+d = ft.read(f)
+d = d[0:n,:]/255.0
+createimage('/u/glorotxa/transf/NIST_digits',d)
+
+
+
+datapath = '/data/lisa/data/nist/by_class/'
+f = open(datapath+'upper/upper_train_data.ft')
+d = ft.read(f)
+d = d[0:n,:]/255.0
+createimage('/u/glorotxa/transf/NIST_upper',d)
+
+from Facade import *
+
+for i in range(n):
+    d[i,:]=N.asarray(N.reshape(generateCaptcha(0.8,0),(1,1024))/255.0,dtype='float32')
+
+createimage('/u/glorotxa/transf/capcha',d)
+
+
+for i in range(n):
+    myttf2jpg = ttf2jpg()
+    d[i,:]=N.reshape(myttf2jpg.generate_image()[0],(1,1024))
+createimage('/u/glorotxa/transf/fonts',d)
+
+datapath = '/data/lisa/data/nist/by_class/'
+f = open(datapath+'lower/lower_train_data.ft')
+d = ft.read(f)
+d = d[0:n,:]/255.0
+createimage('/u/glorotxa/transf/NIST_lower',d)
+
+
+#pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/thick.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/thick.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,198 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Simple implementation of random thickness deformation using morphological
+operation of scipy.
+Only one morphological operation applied (dilation or erosion), the kernel is random
+out of a list of 12 symmetric kernels. (only 5 to be chosen for erosion because it can
+hurt the recognizability of the charater and 12 for dilation).
+
+Author: Xavier Glorot
+
+'''
+
+import scipy.ndimage.morphology
+import numpy as N
+
+
+class Thick():
+    def __init__(self,complexity = 1):
+        #---------- private attributes
+        self.__nx__ = 32 #xdim of the images
+        self.__ny__ = 32 #ydim of the images
+        self.__erodemax__ = 5 #nb of index max of erode structuring elements
+        self.__dilatemax__ = 9 #nb of index max of dilation structuring elements
+        self.__structuring_elements__ = [N.asarray([[1,1]]),N.asarray([[1],[1]]),\
+                                        N.asarray([[1,1],[1,1]]),N.asarray([[0,1,0],[1,1,1],[0,1,0]]),\
+                                        N.asarray([[1,1,1],[1,1,1]]),N.asarray([[1,1],[1,1],[1,1]]),\
+                                        N.asarray([[1,1,1],[1,1,1],[1,1,1]]),\
+                                        N.asarray([[1,1,1,1],[1,1,1,1],[1,1,1,1]]),\
+                                        N.asarray([[1,1,1],[1,1,1],[1,1,1],[1,1,1]]),\
+                                        N.asarray([[0,0,1,0,0],[0,1,1,1,0],[1,1,1,1,1],[0,1,1,1,0],[0,0,1,0,0]]),\
+                                        N.asarray([[1,1,1,1],[1,1,1,1]]),N.asarray([[1,1],[1,1],[1,1],[1,1]])]
+        #------------------------------------------------
+        
+        #---------- generation parameters
+        self.regenerate_parameters(complexity)
+        #------------------------------------------------
+    
+    def _get_current_parameters(self):
+        return [self.thick_param]
+    
+    def get_settings_names(self):
+        return ['thick_param']
+    
+    def regenerate_parameters(self, complexity):
+        self.erodenb = N.ceil(complexity * self.__erodemax__)
+        self.dilatenb = N.ceil(complexity * self.__dilatemax__)
+        self.Perode = self.erodenb / (self.dilatenb + self.erodenb + 1.0)
+        self.Pdilate = self.dilatenb / (self.dilatenb   + self.erodenb + 1.0)
+        assert (self.Perode + self.Pdilate <= 1) & (self.Perode + self.Pdilate >= 0)
+        assert (complexity >= 0) & (complexity <= 1)
+        P = N.random.uniform()
+        if P>1-(self.Pdilate+self.Perode):
+            if P>1-(self.Pdilate+self.Perode)+self.Perode:
+                self.meth = 1
+                self.nb=N.random.randint(self.dilatenb)
+            else:
+                self.meth = -1
+                self.nb=N.random.randint(self.erodenb)
+        else:
+            self.meth = 0
+            self.nb = -1
+        self.thick_param = self.meth*self.nb
+        return self._get_current_parameters()
+    
+    def transform_1_image(self,image): #the real transformation method
+        if self.meth!=0:
+            maxi = float(N.max(image))
+            mini = float(N.min(image))
+            
+            imagenorm=image/maxi
+            
+            if self.meth==1:
+                trans=scipy.ndimage.morphology.grey_dilation\
+                    (imagenorm,size=self.__structuring_elements__[self.nb].shape,structure=self.__structuring_elements__[self.nb])
+            else:
+                trans=scipy.ndimage.morphology.grey_erosion\
+                    (imagenorm,size=self.__structuring_elements__[self.nb].shape,structure=self.__structuring_elements__[self.nb])
+            
+            #------renormalizing
+            maxit = N.max(trans)
+            minit = N.min(trans)
+            trans= N.asarray((trans - (minit+mini)) / (maxit - (minit+mini)) * maxi,dtype=image.dtype)
+            #--------
+            return trans
+        else:
+            return image
+    
+    def transform_image(self,image): #handling different format
+        if image.shape == (self.__nx__,self.__ny__):
+            return self.transform_1_image(image)
+        if image.ndim == 3:
+            newimage = copy.copy(image)
+            for i in range(image.shape[0]):
+                newimage[i,:,:] = self.transform_1_image(image[i,:,:])
+            return newimage
+        if image.ndim == 2 and image.shape != (self.__nx__,self.__ny__):
+            newimage = N.reshape(image,(image.shape[0],self.__nx__,self.__ny__))
+            for i in range(image.shape[0]):
+                newimage[i,:,:] = self.transform_1_image(newimage[i,:,:])
+            return N.reshape(newimage,image.shape)
+        if image.ndim == 1:
+            newimage = N.reshape(image,(self.__nx__,self.__ny__))
+            newimage = self.transform_1_image(newimage)
+            return N.reshape(newimage,image.shape)
+        assert False #should never go there
+
+
+
+
+#test on NIST (you need pylearn and access to NIST to do that)
+
+if __name__ == '__main__':
+    
+    from pylearn.io import filetensor as ft
+    import copy
+    import pygame
+    import time
+    datapath = '/data/lisa/data/nist/by_class/'
+    f = open(datapath+'digits/digits_train_data.ft')
+    d = ft.read(f)
+    
+    pygame.surfarray.use_arraytype('numpy')
+    
+    pygame.display.init()
+    screen = pygame.display.set_mode((8*4*32,8*32),0,8)
+    anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
+    screen.set_palette(anglcolorpalette)
+    
+    MyThick = Thick()
+    
+    #debut=time.time()
+    #MyThick.transform_image(d)
+    #fin=time.time()
+    #print '------------------------------------------------'
+    #print d.shape[0],' images transformed in :', fin-debut, ' seconds'
+    #print '------------------------------------------------'
+    #print (fin-debut)/d.shape[0]*1000000,' microseconds per image'
+    #print '------------------------------------------------'
+    #print MyThick.get_settings_names()
+    #print MyThick._get_current_parameters()
+    #print MyThick.regenerate_parameters(0)
+    #print MyThick.regenerate_parameters(0.5)
+    #print MyThick.regenerate_parameters(1)
+    for i in range(10000):
+        a=d[i,:]
+        b=N.asarray(N.reshape(a,(32,32))).T
+        
+        new=pygame.surfarray.make_surface(b)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new.set_palette(anglcolorpalette)
+        screen.blit(new,(0,0))
+        
+        #max dilation
+        MyThick.meth=1
+        MyThick.nb=MyThick.__dilatemax__
+        c=MyThick.transform_image(a)
+        b=N.asarray(N.reshape(c,(32,32))).T
+        
+        new=pygame.surfarray.make_surface(b)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new.set_palette(anglcolorpalette)
+        screen.blit(new,(8*32,0))
+        
+        #max erosion
+        MyThick.meth=-1
+        MyThick.nb=MyThick.__erodemax__
+        c=MyThick.transform_image(a)
+        b=N.asarray(N.reshape(c,(32,32))).T
+        
+        new=pygame.surfarray.make_surface(b)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new.set_palette(anglcolorpalette)
+        screen.blit(new,(8*2*32,0))
+        
+        #random
+        print MyThick.get_settings_names(), MyThick.regenerate_parameters(1)
+        c=MyThick.transform_image(a)
+        b=N.asarray(N.reshape(c,(32,32))).T
+        
+        new=pygame.surfarray.make_surface(b)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new=pygame.transform.scale2x(new)
+        new.set_palette(anglcolorpalette)
+        screen.blit(new,(8*3*32,0))
+        
+        pygame.display.update()
+        raw_input('Press Enter')
+    
+    pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/ttf2jpg.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/ttf2jpg.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,121 @@
+#!/usr/bin/python                                                                                 
+# -*- coding: iso-8859-1 -*-                                                                      
+
+'''
+    Implementation of font image generator
+    download fonts from http://www.dafont.com for exemple
+
+    Author: Guillaume Sicard
+'''
+
+import sys, os, fnmatch, random
+import Image, ImageFont, ImageDraw, numpy
+
+class ttf2jpg():
+    def __init__(self, font_file = ''):
+        self.w = 32
+        self.h = 32
+        self.font_dir = '/Tmp/allfonts/'
+        self.font_file = font_file
+        self.image_dir = './images/'
+        self.pattern = '*.ttf'
+        self.char_list = []
+        for i in range(0,10):
+            self.char_list.append(chr(ord('0') + i) )
+        for i in range(0,26):
+            self.char_list.append(chr(ord('A') + i) )
+        for i in range(0,26):
+            self.char_list.append(chr(ord('a') + i) )
+        files = os.listdir(self.font_dir)
+        self.font_files = fnmatch.filter(files, '*.ttf') + fnmatch.filter(files, '*.TTF')
+
+    # get font name
+    def get_settings_names(self):
+        return [self.font_file]
+
+    # save an image
+    def save_image(self,array, filename = ''):
+        image = (array * 255.0).astype('int')
+        image = Image.fromarray(image).convert('L')
+        if (filename != ''):
+            image.save(filename)
+        else:
+            image.show()
+
+    # set a random font for character generation
+    def set_random_font(self):
+        i = random.randint(0, len(self.font_files) - 1)
+        self.font_file = self.font_dir + self.font_files[i]
+
+    # return a picture array of "text" with font "font_file"
+    def create_image(self, text):
+         # create a w x h black picture, and a drawing space
+        image = Image.new('L', (self.w, self.h), 'Black')
+        draw = ImageDraw.Draw(image)
+
+        # load the font with the right size
+        font = ImageFont.truetype(self.font_file, 28)
+        d_w,d_h =  draw.textsize(text, font=font)
+
+        # write text and aligns it
+        draw.text(((32 - d_w) / 2, ((32 - d_h) / 2)), text, font=font, fill='White')
+
+        image = numpy.asarray(image)
+        image = (image / 255.0).astype(numpy.float32)
+
+        return image
+
+    # write all the letters and numbers into pictures
+    def process_font(self):
+        for i in range(0, len(self.char_list) ):
+            image = self.create_image(self.char_list[i])
+            self.save_image(image, self.image_dir + self.char_list[i] + '-' + os.path.basename(self.font_file) + '.jpg')
+            sys.stdout.write('.')
+            sys.stdout.flush()
+        return (len(self.char_list))
+
+    # generate the character from the font_file and returns a numpy array
+    def generate_image_from_char(self, character, font_file = ''):
+        if (font_file != ''):
+            self.font_file = font_file
+
+        return self.create_image(character)
+
+    # generate random character from random font file as a numpy array
+    def generate_image(self):
+        self.set_random_font()
+        i = random.randint(0, len(self.char_list) - 1)
+        return self.generate_image_from_char(self.char_list[i]), i
+
+    # test method, create character images for all fonts in "font_dir" in dir "image_dir"
+    def test(self):
+        import time
+
+        # look for ttf files
+        files = os.listdir(self.font_dir)
+        font_files = fnmatch.filter(files, self.pattern)
+
+        # create "image_dir" if it doesn't exist
+        if not os.path.isdir(self.image_dir):
+            os.mkdir(self.image_dir)
+
+        sys.stdout.write( str(len(font_files)) + ' fonts found, generating jpg images in folder ' + self.image_dir )
+        sys.stdout.flush()
+
+        # main loop
+        t =  time.time()
+        n = 0
+
+        for font_file in font_files:
+            self.font_file = self.font_dir + font_file
+            n += self.process_font()
+        t = time.time() - t
+
+        sys.stdout.write('\nall done!\n' + str(n) + ' images generated in ' + str(t) + 's (average : ' + str(1000 * t / n) + ' ms/im)\n')
+
+if __name__ == '__main__':
+
+    myttf2jpg = ttf2jpg()
+    #myttf2jpg.test()
+    image, i = myttf2jpg.generate_image()
+    myttf2jpg.save_image(image, '')
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/visualizer.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/visualizer.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+
+import numpy
+import Image
+from image_tiling import tile_raster_images
+import pylab
+import time
+
+class Visualizer():
+    def __init__(self, num_columns=10, image_size=(32,32), to_dir=None, on_screen=False):
+        self.list = []
+        self.image_size = image_size
+        self.num_columns = num_columns
+
+        self.on_screen = on_screen
+        self.to_dir = to_dir
+
+        self.cur_grid_image = None
+
+        self.cur_index = 0
+
+    def visualize_stop_and_flush(self):
+        self.make_grid_image()
+
+        if self.on_screen:
+            self.visualize()
+        if self.to_dir:
+            self.dump_to_disk()
+
+        self.stop_and_wait()
+        self.flush()
+
+        self.cur_index += 1
+
+    def make_grid_image(self):
+        num_rows = len(self.list) / self.num_columns
+        if len(self.list) % self.num_columns != 0:
+            num_rows += 1
+        grid_shape = (num_rows, self.num_columns)
+        self.cur_grid_image = tile_raster_images(numpy.array(self.list), self.image_size, grid_shape, tile_spacing=(5,5), output_pixel_vals=False)
+
+    def visualize(self):
+        pylab.imshow(self.cur_grid_image)
+        pylab.draw()
+
+    def dump_to_disk(self):
+        gi = Image.fromarray((self.cur_grid_image * 255).astype('uint8'), "L")
+        gi.save(self.to_dir + "/grid_" + str(self.cur_index) + ".png")
+        
+    def stop_and_wait(self):
+        # can't raw_input under gimp, so sleep)
+        print "New image generated, sleeping 5 secs"
+        time.sleep(5)
+
+    def flush(self):
+        self.list = []
+    
+    def get_parameters_names(self):
+        return []
+
+    def regenerate_parameters(self):
+        return []
+
+    def after_transform_callback(self, image):
+        self.transform_image(image)
+
+    def end_transform_callback(self, final_image):
+        self.visualize_stop_and_flush()
+
+    def transform_image(self, image):
+        sz = self.image_size
+        self.list.append(image.copy().reshape((sz[0] * sz[1])))
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/convolutional_dae/stacked_convolutional_dae.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/convolutional_dae/stacked_convolutional_dae.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,415 @@
+import numpy
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import theano.sandbox.softsign
+
+from theano.tensor.signal import downsample
+from theano.tensor.nnet import conv 
+import gzip
+import cPickle
+ 
+ 
+class LogisticRegression(object):
+ 
+    def __init__(self, input, n_in, n_out):
+ 
+        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
+                                            dtype = theano.config.floatX) )
+
+        self.b = theano.shared( value=numpy.zeros((n_out,),
+                                            dtype = theano.config.floatX) )
+
+        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+        
+
+        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
+ 
+        self.params = [self.W, self.b]
+ 
+    def negative_log_likelihood(self, y):
+        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+ 
+    def MSE(self, y):
+        return -T.mean(abs((self.p_y_given_x)[T.arange(y.shape[0]),y]-y)**2)
+
+    def errors(self, y):
+        if y.ndim != self.y_pred.ndim:
+            raise TypeError('y should have the same shape as self.y_pred',
+                ('y', target.type, 'y_pred', self.y_pred.type))
+ 
+
+        if y.dtype.startswith('int'):
+            return T.mean(T.neq(self.y_pred, y))
+        else:
+            raise NotImplementedError()
+ 
+ 
+class SigmoidalLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+
+        self.input = input
+ 
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+ 
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+ 
+        self.output = T.tanh(T.dot(input, self.W) + self.b)
+        self.params = [self.W, self.b]
+ 
+class dA_conv(object):
+ 
+  def __init__(self, corruption_level = 0.1, input = None, shared_W = None,\
+                   shared_b = None, filter_shape = None, image_shape = None, poolsize = (2,2)):
+
+    theano_rng = RandomStreams()
+    
+    fan_in = numpy.prod(filter_shape[1:])
+    fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])
+
+    center = theano.shared(value = 1, name="center")
+    scale = theano.shared(value = 2, name="scale")
+
+    if shared_W != None and shared_b != None :
+        self.W = shared_W
+        self.b = shared_b
+    else:
+        initial_W = numpy.asarray( numpy.random.uniform( \
+              low = -numpy.sqrt(6./(fan_in+fan_out)), \
+              high = numpy.sqrt(6./(fan_in+fan_out)), \
+              size = filter_shape), dtype = theano.config.floatX)
+        initial_b = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
+    
+    
+        self.W = theano.shared(value = initial_W, name = "W")
+        self.b = theano.shared(value = initial_b, name = "b")
+    
+ 
+    initial_b_prime= numpy.zeros((filter_shape[1],))
+        
+    self.W_prime=T.dtensor4('W_prime')
+
+    self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime")
+ 
+    self.x = input
+
+    self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x
+
+    conv1_out = conv.conv2d(self.tilde_x, self.W, \
+                             filter_shape=filter_shape, \
+                                image_shape=image_shape, border_mode='valid')
+
+    
+    self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
+
+    
+    da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
+                       filter_shape[3] ]
+    da_image_shape = [ image_shape[0],filter_shape[0],image_shape[2]-filter_shape[2]+1, \
+                         image_shape[3]-filter_shape[3]+1 ]
+    initial_W_prime =  numpy.asarray( numpy.random.uniform( \
+              low = -numpy.sqrt(6./(fan_in+fan_out)), \
+              high = numpy.sqrt(6./(fan_in+fan_out)), \
+              size = da_filter_shape), dtype = theano.config.floatX)
+    self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
+
+    #import pdb;pdb.set_trace()
+
+    conv2_out = conv.conv2d(self.y, self.W_prime, \
+                               filter_shape = da_filter_shape, image_shape = da_image_shape ,\
+                                border_mode='full')
+
+    self.z =  (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale
+
+    scaled_x = (self.x + center) / scale
+
+    self.L = - T.sum( scaled_x*T.log(self.z) + (1-scaled_x)*T.log(1-self.z), axis=1 )
+
+    self.cost = T.mean(self.L)
+
+    self.params = [ self.W, self.b, self.b_prime ] 
+ 
+ 
+
+class LeNetConvPoolLayer(object):
+    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
+        assert image_shape[1]==filter_shape[1]
+        self.input = input
+  
+        W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+ 
+        b_values = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+ 
+        conv_out = conv.conv2d(input, self.W,
+                filter_shape=filter_shape, image_shape=image_shape)
+ 
+
+        fan_in = numpy.prod(filter_shape[1:])
+        fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
+
+        W_bound = numpy.sqrt(6./(fan_in + fan_out))
+        self.W.value = numpy.asarray(
+                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
+                dtype = theano.config.floatX)
+  
+
+        pooled_out = downsample.max_pool2D(conv_out, poolsize, ignore_border=True)
+ 
+        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
+        self.params = [self.W, self.b]
+ 
+
+class SdA():
+    def __init__(self, input, n_ins_conv, n_ins_mlp, train_set_x, train_set_y, batch_size, \
+                     conv_hidden_layers_sizes, mlp_hidden_layers_sizes, corruption_levels, \
+                     rng, n_out, pretrain_lr, finetune_lr):
+
+        self.layers = []
+        self.pretrain_functions = []
+        self.params = []
+        self.conv_n_layers = len(conv_hidden_layers_sizes)
+        self.mlp_n_layers = len(mlp_hidden_layers_sizes)
+         
+        index = T.lscalar() # index to a [mini]batch
+        self.x = T.dmatrix('x') # the data is presented as rasterized images
+        self.y = T.ivector('y') # the labels are presented as 1D vector of
+        
+ 
+        
+        for i in xrange( self.conv_n_layers ):
+
+            filter_shape=conv_hidden_layers_sizes[i][0]
+            image_shape=conv_hidden_layers_sizes[i][1]
+            max_poolsize=conv_hidden_layers_sizes[i][2]
+                
+            if i == 0 :
+                layer_input=self.x.reshape((batch_size,1,28,28))
+            else:
+                layer_input=self.layers[-1].output
+
+            layer = LeNetConvPoolLayer(rng, input=layer_input, \
+                                image_shape=image_shape, \
+                                filter_shape=filter_shape,poolsize=max_poolsize)
+            print 'Convolutional layer '+str(i+1)+' created'
+                
+            self.layers += [layer]
+            self.params += layer.params
+                
+            da_layer = dA_conv(corruption_level = corruption_levels[0],\
+                                  input = layer_input, \
+                                  shared_W = layer.W, shared_b = layer.b,\
+                                  filter_shape = filter_shape , image_shape = image_shape )
+                
+                
+            gparams = T.grad(da_layer.cost, da_layer.params)
+                
+            updates = {}
+            for param, gparam in zip(da_layer.params, gparams):
+                    updates[param] = param - gparam * pretrain_lr
+                    
+                
+            update_fn = theano.function([index], da_layer.cost, \
+                                        updates = updates,
+                                        givens = {
+                    self.x : train_set_x[index*batch_size:(index+1)*batch_size]} )
+             
+            self.pretrain_functions += [update_fn]
+
+        for i in xrange( self.mlp_n_layers ): 
+            if i == 0 :
+                input_size = n_ins_mlp
+            else:
+                input_size = mlp_hidden_layers_sizes[i-1]
+
+            if i == 0 :
+                if len( self.layers ) == 0 :
+                    layer_input=self.x
+                else :
+                    layer_input = self.layers[-1].output.flatten(2)
+            else:
+                layer_input = self.layers[-1].output
+     
+            layer = SigmoidalLayer(rng, layer_input, input_size,
+                                        mlp_hidden_layers_sizes[i] )
+              
+            self.layers += [layer]
+            self.params += layer.params
+            
+
+            print 'MLP layer '+str(i+1)+' created'
+            
+        self.logLayer = LogisticRegression(input=self.layers[-1].output, \
+                                                     n_in=mlp_hidden_layers_sizes[-1], n_out=n_out)
+        self.params += self.logLayer.params
+
+        cost = self.logLayer.negative_log_likelihood(self.y)
+
+        gparams = T.grad(cost, self.params)
+        updates = {}
+
+        for param,gparam in zip(self.params, gparams):
+            updates[param] = param - gparam*finetune_lr
+            
+        self.finetune = theano.function([index], cost,
+                updates = updates,
+                givens = {
+                  self.x : train_set_x[index*batch_size:(index+1)*batch_size],
+                  self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
+ 
+
+        self.errors = self.logLayer.errors(self.y)
+ 
+ 
+ 
+def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \
+                            pretrain_lr = 0.01, training_epochs = 1000, \
+                            dataset='mnist.pkl.gz'):
+
+    f = gzip.open(dataset,'rb')
+    train_set, valid_set, test_set = cPickle.load(f)
+    f.close()
+ 
+ 
+    def shared_dataset(data_xy):
+        data_x, data_y = data_xy
+        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+        return shared_x, T.cast(shared_y, 'int32')
+ 
+
+    test_set_x, test_set_y = shared_dataset(test_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    train_set_x, train_set_y = shared_dataset(train_set)
+ 
+    batch_size = 500 # size of the minibatch
+ 
+
+    n_train_batches = train_set_x.value.shape[0] / batch_size
+    n_valid_batches = valid_set_x.value.shape[0] / batch_size
+    n_test_batches = test_set_x.value.shape[0] / batch_size
+ 
+    # allocate symbolic variables for the data
+    index = T.lscalar() # index to a [mini]batch
+    x = T.matrix('x') # the data is presented as rasterized images
+    y = T.ivector('y') # the labels are presented as 1d vector of
+                           # [int] labels
+    layer0_input = x.reshape((batch_size,1,28,28))
+    
+
+    # Setup the convolutional layers with their DAs(add as many as you want)
+    corruption_levels = [ 0.2, 0.2, 0.2]
+    rng = numpy.random.RandomState(1234)
+    ker1=2
+    ker2=2
+    conv_layers=[]
+    conv_layers.append([[ker1,1,5,5], [batch_size,1,28,28], [2,2] ])
+    conv_layers.append([[ker2,ker1,5,5], [batch_size,ker1,12,12], [2,2] ])
+
+    # Setup the MLP layers of the network
+    mlp_layers=[500]
+  
+    network = SdA(input = layer0_input, n_ins_conv = 28*28, n_ins_mlp = ker2*4*4, \
+                      train_set_x = train_set_x, train_set_y = train_set_y, batch_size = batch_size,
+                      conv_hidden_layers_sizes = conv_layers,  \
+                      mlp_hidden_layers_sizes = mlp_layers, \
+                      corruption_levels = corruption_levels , n_out = 10, \
+                      rng = rng , pretrain_lr = pretrain_lr , finetune_lr = learning_rate )
+
+    test_model = theano.function([index], network.errors,
+             givens = {
+                network.x: test_set_x[index*batch_size:(index+1)*batch_size],
+                network.y: test_set_y[index*batch_size:(index+1)*batch_size]})
+ 
+    validate_model = theano.function([index], network.errors,
+           givens = {
+                network.x: valid_set_x[index*batch_size:(index+1)*batch_size],
+                network.y: valid_set_y[index*batch_size:(index+1)*batch_size]})
+
+
+
+    start_time = time.clock()
+    for i in xrange(len(network.layers)-len(mlp_layers)):
+        for epoch in xrange(pretraining_epochs):
+            for batch_index in xrange(n_train_batches):
+                c = network.pretrain_functions[i](batch_index)
+            print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch),c
+
+    patience = 10000 # look as this many examples regardless
+    patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS
+                                  # FOUND
+    improvement_threshold = 0.995 # a relative improvement of this much is
+
+    validation_frequency = min(n_train_batches, patience/2)
+ 
+ 
+    best_params = None
+    best_validation_loss = float('inf')
+    test_score = 0.
+    start_time = time.clock()
+ 
+    done_looping = False
+    epoch = 0
+ 
+    while (epoch < training_epochs) and (not done_looping):
+      epoch = epoch + 1
+      for minibatch_index in xrange(n_train_batches):
+ 
+        cost_ij = network.finetune(minibatch_index)
+        iter = epoch * n_train_batches + minibatch_index
+ 
+        if (iter+1) % validation_frequency == 0:
+            
+            validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
+            this_validation_loss = numpy.mean(validation_losses)
+            print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+                   (epoch, minibatch_index+1, n_train_batches, \
+                    this_validation_loss*100.))
+ 
+ 
+            # if we got the best validation score until now
+            if this_validation_loss < best_validation_loss:
+ 
+                #improve patience if loss improvement is good enough
+                if this_validation_loss < best_validation_loss * \
+                       improvement_threshold :
+                    patience = max(patience, iter * patience_increase)
+ 
+                # save best validation score and iteration number
+                best_validation_loss = this_validation_loss
+                best_iter = iter
+ 
+                # test it on the test set
+                test_losses = [test_model(i) for i in xrange(n_test_batches)]
+                test_score = numpy.mean(test_losses)
+                print((' epoch %i, minibatch %i/%i, test error of best '
+                      'model %f %%') %
+                             (epoch, minibatch_index+1, n_train_batches,
+                              test_score*100.))
+ 
+ 
+        if patience <= iter :
+                done_looping = True
+                break
+ 
+    end_time = time.clock()
+    print(('Optimization complete with best validation score of %f %%,'
+           'with test performance %f %%') %
+                 (best_validation_loss * 100., test_score*100.))
+    print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
+ 
+ 
+ 
+ 
+ 
+ 
+if __name__ == '__main__':
+    sgd_optimization_mnist()
+ 
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/__init__.py
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/mnist_sda.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/mnist_sda.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+# coding: utf-8
+
+# Parameterize call to sgd_optimization for MNIST
+
+import numpy 
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+from sgd_optimization import SdaSgdOptimizer
+import cPickle, gzip
+from jobman import DD
+
+MNIST_LOCATION = '/u/savardf/datasets/mnist.pkl.gz'
+
+def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 2, \
+                            pretrain_lr = 0.1, training_epochs = 5, \
+                            dataset='mnist.pkl.gz'):
+    # Load the dataset 
+    f = gzip.open(dataset,'rb')
+    # this gives us train, valid, test (each with .x, .y)
+    dataset = cPickle.load(f)
+    f.close()
+
+    n_ins = 28*28
+    n_outs = 10
+
+    hyperparameters = DD({'finetuning_lr':learning_rate,
+                       'pretraining_lr':pretrain_lr,
+                       'pretraining_epochs_per_layer':pretraining_epochs,
+                       'max_finetuning_epochs':training_epochs,
+                       'hidden_layers_sizes':[100],
+                       'corruption_levels':[0.2],
+                       'minibatch_size':20})
+
+    optimizer = SdaSgdOptimizer(dataset, hyperparameters, n_ins, n_outs)
+    optimizer.pretrain()
+    optimizer.finetune()
+
+if __name__ == '__main__':
+    sgd_optimization_mnist(dataset=MNIST_LOCATION)
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/nist_sda.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/nist_sda.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,264 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import numpy 
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import copy
+
+import sys
+import os.path
+
+from sgd_optimization import SdaSgdOptimizer
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_croise_jobs
+
+TEST_CONFIG = False
+
+NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
+
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/'
+REDUCE_TRAIN_TO = None
+MAX_FINETUNING_EPOCHS = 1000
+if TEST_CONFIG:
+    JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/'
+    REDUCE_TRAIN_TO = 1000
+    MAX_FINETUNING_EPOCHS = 2
+
+JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs'
+JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results'
+EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint"
+
+# There used to be
+# 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1]
+# and
+#  'num_hidden_layers':[1,2,3]
+# but this is now handled by a special mechanism in SgdOptimizer
+# to reuse intermediate results (for the same training of lower layers,
+# we can test many finetuning_lr)
+JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001],
+        'pretraining_epochs_per_layer': [10,20],
+        'hidden_layers_sizes': [300,800],
+        'corruption_levels': [0.1,0.2],
+        'minibatch_size': [20],
+        'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]}
+FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001]
+NUM_HIDDEN_LAYERS_VALS = [1,2,3]
+
+# Just useful for tests... minimal number of epochs
+DEFAULT_HP_NIST = DD({'finetuning_lr':0.01,
+                       'pretraining_lr':0.01,
+                       'pretraining_epochs_per_layer':1,
+                       'max_finetuning_epochs':1,
+                       'hidden_layers_sizes':[1000],
+                       'corruption_levels':[0.2],
+                       'minibatch_size':20})
+
+def jobman_entrypoint(state, channel):
+    state = copy.copy(state)
+
+    print "Will load NIST"
+    nist = NIST(20)
+    print "NIST loaded"
+
+    rtt = None
+    if state.has_key('reduce_train_to'):
+        rtt = state['reduce_train_to']
+    elif REDUCE_TRAIN_TO:
+        rtt = REDUCE_TRAIN_TO
+
+    if rtt:
+        print "Reducing training set to ", rtt, " examples"
+        nist.reduce_train_set(rtt)
+
+    train,valid,test = nist.get_tvt()
+    dataset = (train,valid,test)
+
+    n_ins = 32*32
+    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+
+    db = jobman.sql.db(JOBDB_RESULTS)
+    optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\
+                    input_divider=255.0, job_tree=True, results_db=db, \
+                    experiment=EXPERIMENT_PATH, \
+                    finetuning_lr_to_try=FINETUNING_LR_VALS, \
+                    num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS)
+    optimizer.train()
+
+    return channel.COMPLETE
+
+def estimate_pretraining_time(job):
+    job = DD(job)
+    # time spent on pretraining estimated as O(n^2) where n=num hidens
+    # no need to multiply by num_hidden_layers, as results from num=1 
+    # is reused for num=2, or 3, so in the end we get the same time
+    # as if we were training 3 times a single layer
+    # constants:
+    # - 20 mins to pretrain a layer with 1000 units (per 1 epoch)
+    # - 12 mins to finetune (per 1 epoch)
+    # basically the job_tree trick gives us a 5 times speedup on the
+    # pretraining time due to reusing for finetuning_lr
+    # and gives us a second x2 speedup for reusing previous layers
+    # to explore num_hidden_layers
+    return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \
+            * job.hidden_layer_sizes * job.hidden_layer_sizes)
+
+def estimate_total_time():
+    jobs = produit_croise_jobs(JOB_VALS)
+    sumtime = 0.0
+    sum_without = 0.0
+    for job in jobs:
+        sumtime += estimate_pretraining_time(job)
+        # 12 mins per epoch * 30 epochs
+        # 5 finetuning_lr per pretraining combination
+    sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS)
+    sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20
+    print "num jobs=", len(jobs)
+    print "estimate", sumtime/60, " hours"
+    print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without
+
+def jobman_insert_nist():
+    jobs = produit_croise_jobs(JOB_VALS)
+
+    db = jobman.sql.db(JOBDB_JOBS)
+    for job in jobs:
+        job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
+        jobman.sql.insert_dict(job, db)
+
+    print "inserted"
+
+class NIST:
+    def __init__(self, minibatch_size, basepath=None, reduce_train_to=None):
+        global NIST_ALL_LOCATION
+
+        self.minibatch_size = minibatch_size
+        self.basepath = basepath and basepath or NIST_ALL_LOCATION
+
+        self.set_filenames()
+
+        # arrays of 2 elements: .x, .y
+        self.train = [None, None]
+        self.test = [None, None]
+
+        self.load_train_test()
+
+        self.valid = [[], []]
+        self.split_train_valid()
+        if reduce_train_to:
+            self.reduce_train_set(reduce_train_to)
+
+    def get_tvt(self):
+        return self.train, self.valid, self.test
+
+    def set_filenames(self):
+        self.train_files = ['all_train_data.ft',
+                                'all_train_labels.ft']
+
+        self.test_files = ['all_test_data.ft',
+                            'all_test_labels.ft']
+
+    def load_train_test(self):
+        self.load_data_labels(self.train_files, self.train)
+        self.load_data_labels(self.test_files, self.test)
+
+    def load_data_labels(self, filenames, pair):
+        for i, fn in enumerate(filenames):
+            f = open(os.path.join(self.basepath, fn))
+            pair[i] = filetensor.read(f)
+            f.close()
+
+    def reduce_train_set(self, max):
+        self.train[0] = self.train[0][:max]
+        self.train[1] = self.train[1][:max]
+
+        if max < len(self.test[0]):
+            for ar in (self.test, self.valid):
+                ar[0] = ar[0][:max]
+                ar[1] = ar[1][:max]
+
+    def split_train_valid(self):
+        test_len = len(self.test[0])
+        
+        new_train_x = self.train[0][:-test_len]
+        new_train_y = self.train[1][:-test_len]
+
+        self.valid[0] = self.train[0][-test_len:]
+        self.valid[1] = self.train[1][-test_len:]
+
+        self.train[0] = new_train_x
+        self.train[1] = new_train_y
+
+def test_load_nist():
+    print "Will load NIST"
+
+    import time
+    t1 = time.time()
+    nist = NIST(20)
+    t2 = time.time()
+
+    print "NIST loaded. time delta = ", t2-t1
+
+    tr,v,te = nist.get_tvt()
+
+    print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0])
+
+    raw_input("Press any key")
+
+# hp for hyperparameters
+def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'):
+    global DEFAULT_HP_NIST
+    hp = hp and hp or DEFAULT_HP_NIST
+
+    print "Will load NIST"
+
+    import time
+    t1 = time.time()
+    nist = NIST(20, reduce_train_to=100)
+    t2 = time.time()
+
+    print "NIST loaded. time delta = ", t2-t1
+
+    train,valid,test = nist.get_tvt()
+    dataset = (train,valid,test)
+
+    print train[0][15]
+    print type(train[0][1])
+
+
+    print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0])
+
+    n_ins = 32*32
+    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+
+    optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0)
+    optimizer.train()
+
+if __name__ == '__main__':
+
+    import sys
+
+    args = sys.argv[1:]
+
+    if len(args) > 0 and args[0] == 'load_nist':
+        test_load_nist()
+
+    elif len(args) > 0 and args[0] == 'jobman_insert':
+        jobman_insert_nist()
+    elif len(args) > 0 and args[0] == 'test_job_tree':
+        # dont forget to comment out sql.inserts and make reduce_train_to=100
+        print "TESTING JOB TREE"
+        chanmock = {'COMPLETE':0}
+        hp = copy.copy(DEFAULT_HP_NIST)
+        hp.update({'reduce_train_to':100})
+        jobman_entrypoint(hp, chanmock)
+    elif len(args) > 0 and args[0] == 'estimate':
+        estimate_total_time()
+    else:
+        sgd_optimization_nist()
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/sgd_optimization.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/sgd_optimization.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,270 @@
+#!/usr/bin/python
+# coding: utf-8
+
+# Generic SdA optimization loop, adapted from the deeplearning.net tutorial
+
+import numpy 
+import theano
+import time
+import theano.tensor as T
+import copy
+import sys
+
+from jobman import DD
+import jobman, jobman.sql
+
+from stacked_dae import SdA
+
+def shared_dataset(data_xy):
+    data_x, data_y = data_xy
+    #shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+    #shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+    #shared_y = T.cast(shared_y, 'int32')
+    shared_x = theano.shared(data_x)
+    shared_y = theano.shared(data_y)
+    return shared_x, shared_y
+
+class SdaSgdOptimizer:
+    def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0,\
+                job_tree=False, results_db=None,\
+                experiment="",\
+                num_hidden_layers_to_try=[1,2,3], \
+                finetuning_lr_to_try=[0.1, 0.01, 0.001, 0.0001, 0.00001]):
+
+        self.dataset = dataset
+        self.hp = copy.copy(hyperparameters)
+        self.n_ins = n_ins
+        self.n_outs = n_outs
+        self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
+
+        self.job_tree = job_tree
+        self.results_db = results_db
+        self.experiment = experiment
+        if self.job_tree:
+            assert(not results_db is None)
+            # these hp should not be there, so we insert default values
+            # we use 3 hidden layers as we'll iterate through 1,2,3
+            self.hp.finetuning_lr = 0.1 # dummy value, will be replaced anyway
+            cl = self.hp.corruption_levels
+            nh = self.hp.hidden_layers_sizes
+            self.hp.corruption_levels = [cl,cl,cl]
+            self.hp.hidden_layers_sizes = [nh,nh,nh]
+            
+        self.num_hidden_layers_to_try = num_hidden_layers_to_try
+        self.finetuning_lr_to_try = finetuning_lr_to_try
+
+        self.printout_frequency = 1000
+
+        self.rng = numpy.random.RandomState(1234)
+
+        self.init_datasets()
+        self.init_classifier()
+     
+    def init_datasets(self):
+        print "init_datasets"
+        train_set, valid_set, test_set = self.dataset
+        self.test_set_x, self.test_set_y = shared_dataset(test_set)
+        self.valid_set_x, self.valid_set_y = shared_dataset(valid_set)
+        self.train_set_x, self.train_set_y = shared_dataset(train_set)
+
+        # compute number of minibatches for training, validation and testing
+        self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
+        self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
+        self.n_test_batches  = self.test_set_x.value.shape[0]  / self.hp.minibatch_size
+
+    def init_classifier(self):
+        print "Constructing classifier"
+        # construct the stacked denoising autoencoder class
+        self.classifier = SdA( \
+                          train_set_x= self.train_set_x, \
+                          train_set_y = self.train_set_y,\
+                          batch_size = self.hp.minibatch_size, \
+                          n_ins= self.n_ins, \
+                          hidden_layers_sizes = self.hp.hidden_layers_sizes, \
+                          n_outs = self.n_outs, \
+                          corruption_levels = self.hp.corruption_levels,\
+                          rng = self.rng,\
+                          pretrain_lr = self.hp.pretraining_lr, \
+                          finetune_lr = self.hp.finetuning_lr,\
+                          input_divider = self.input_divider )
+
+    def train(self):
+        self.pretrain()
+        if not self.job_tree:
+            # if job_tree is True, finetuning was already performed
+            self.finetune()
+
+    def pretrain(self):
+        print "STARTING PRETRAINING"
+
+        printout_acc = 0.0
+        last_error = 0.0
+
+        start_time = time.clock()  
+        ## Pre-train layer-wise 
+        for i in xrange(self.classifier.n_layers):
+            # go through pretraining epochs 
+            for epoch in xrange(self.hp.pretraining_epochs_per_layer):
+                # go through the training set
+                for batch_index in xrange(self.n_train_batches):
+                    c = self.classifier.pretrain_functions[i](batch_index)
+
+                    printout_acc += c / self.printout_frequency
+                    if (batch_index+1) % self.printout_frequency == 0:
+                        print batch_index, "reconstruction cost avg=", printout_acc
+                        last_error = printout_acc
+                        printout_acc = 0.0
+                        
+                print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
+
+            self.job_splitter(i+1, time.clock()-start_time, last_error)
+     
+        end_time = time.clock()
+
+        print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
+
+    # Save time by reusing intermediate results
+    def job_splitter(self, current_pretraining_layer, pretraining_time, last_error):
+
+        state_copy = None
+        original_classifier = None
+
+        if self.job_tree and current_pretraining_layer in self.num_hidden_layers_to_try:
+            for lr in self.finetuning_lr_to_try:
+                sys.stdout.flush()
+                sys.stderr.flush()
+
+                state_copy = copy.copy(self.hp)
+
+                self.hp.update({'num_hidden_layers':current_pretraining_layer, \
+                            'finetuning_lr':lr,\
+                            'pretraining_time':pretraining_time,\
+                            'last_reconstruction_error':last_error})
+
+                original_classifier = self.classifier
+                print "ORIGINAL CLASSIFIER MEANS",original_classifier.get_params_means()
+                self.classifier = SdA.copy_reusing_lower_layers(original_classifier, current_pretraining_layer, new_finetuning_lr=lr)
+                
+                self.finetune()
+            
+                self.insert_finished_job()
+
+                print "NEW CLASSIFIER MEANS AFTERWARDS",self.classifier.get_params_means()
+                print "ORIGINAL CLASSIFIER MEANS AFTERWARDS",original_classifier.get_params_means()
+                self.classifier = original_classifier
+                self.hp = state_copy
+
+    def insert_finished_job(self):
+        job = copy.copy(self.hp)
+        job[jobman.sql.STATUS] = jobman.sql.DONE
+        job[jobman.sql.EXPERIMENT] = self.experiment
+
+        # don,t try to store arrays in db
+        job['hidden_layers_sizes'] = job.hidden_layers_sizes[0]
+        job['corruption_levels'] = job.corruption_levels[0]
+
+        print "Will insert finished job", job
+        jobman.sql.insert_dict(jobman.flatten(job), self.results_db)
+
+    def finetune(self):
+        print "STARTING FINETUNING"
+
+        index   = T.lscalar()    # index to a [mini]batch 
+        minibatch_size = self.hp.minibatch_size
+
+        # create a function to compute the mistakes that are made by the model
+        # on the validation set, or testing set
+        test_model = theano.function([index], self.classifier.errors,
+                 givens = {
+                   self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / self.input_divider,
+                   self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]})
+
+        validate_model = theano.function([index], self.classifier.errors,
+                givens = {
+                   self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / self.input_divider,
+                   self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]})
+
+
+        # early-stopping parameters
+        patience              = 10000 # look as this many examples regardless
+        patience_increase     = 2.    # wait this much longer when a new best is 
+                                      # found
+        improvement_threshold = 0.995 # a relative improvement of this much is 
+                                      # considered significant
+        validation_frequency  = min(self.n_train_batches, patience/2)
+                                      # go through this many 
+                                      # minibatche before checking the network 
+                                      # on the validation set; in this case we 
+                                      # check every epoch 
+
+        best_params          = None
+        best_validation_loss = float('inf')
+        test_score           = 0.
+        start_time = time.clock()
+
+        done_looping = False
+        epoch = 0
+
+        printout_acc = 0.0
+
+        if not self.hp.has_key('max_finetuning_epochs'):
+            self.hp.max_finetuning_epochs = 1000
+
+        while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
+            epoch = epoch + 1
+            for minibatch_index in xrange(self.n_train_batches):
+
+                cost_ij = self.classifier.finetune(minibatch_index)
+                iter    = epoch * self.n_train_batches + minibatch_index
+
+                printout_acc += cost_ij / float(self.printout_frequency * minibatch_size)
+                if (iter+1) % self.printout_frequency == 0:
+                    print iter, "cost avg=", printout_acc
+                    printout_acc = 0.0
+
+                if (iter+1) % validation_frequency == 0: 
+                    
+                    validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)]
+                    this_validation_loss = numpy.mean(validation_losses)
+                    print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+                           (epoch, minibatch_index+1, self.n_train_batches, \
+                            this_validation_loss*100.))
+
+
+                    # if we got the best validation score until now
+                    if this_validation_loss < best_validation_loss:
+
+                        #improve patience if loss improvement is good enough
+                        if this_validation_loss < best_validation_loss *  \
+                               improvement_threshold :
+                            patience = max(patience, iter * patience_increase)
+
+                        # save best validation score and iteration number
+                        best_validation_loss = this_validation_loss
+                        best_iter = iter
+
+                        # test it on the test set
+                        test_losses = [test_model(i) for i in xrange(self.n_test_batches)]
+                        test_score = numpy.mean(test_losses)
+                        print(('     epoch %i, minibatch %i/%i, test error of best '
+                              'model %f %%') % 
+                                     (epoch, minibatch_index+1, self.n_train_batches,
+                                      test_score*100.))
+
+
+            if patience <= iter :
+                done_looping = True
+                break
+
+        end_time = time.clock()
+        self.hp.update({'finetuning_time':end_time-start_time,\
+                    'best_validation_error':best_validation_loss,\
+                    'test_score':test_score,
+                    'num_finetuning_epochs':epoch})
+        print(('Optimization complete with best validation score of %f %%,'
+               'with test performance %f %%') %  
+                     (best_validation_loss * 100., test_score*100.))
+        print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
+
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/stacked_dae.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/stacked_dae.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,287 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import numpy 
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import copy
+
+from utils import update_locals
+
+class LogisticRegression(object):
+    def __init__(self, input, n_in, n_out):
+        # initialize with 0 the weights W as a matrix of shape (n_in, n_out) 
+        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
+                                            dtype = theano.config.floatX) )
+        # initialize the baises b as a vector of n_out 0s
+        self.b = theano.shared( value=numpy.zeros((n_out,), 
+                                            dtype = theano.config.floatX) )
+        # compute vector of class-membership probabilities in symbolic form
+        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+        
+        # compute prediction as class whose probability is maximal in 
+        # symbolic form
+        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
+
+        # list of parameters for this layer
+        self.params = [self.W, self.b]
+
+    def negative_log_likelihood(self, y):
+       return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+
+    def errors(self, y):
+        # check if y has same dimension of y_pred 
+        if y.ndim != self.y_pred.ndim:
+            raise TypeError('y should have the same shape as self.y_pred', 
+                ('y', target.type, 'y_pred', self.y_pred.type))
+
+        # check if y is of the correct datatype        
+        if y.dtype.startswith('int'):
+            # the T.neq operator returns a vector of 0s and 1s, where 1
+            # represents a mistake in prediction
+            return T.mean(T.neq(self.y_pred, y))
+        else:
+            raise NotImplementedError()
+
+
+class SigmoidalLayer(object):
+    def __init__(self, rng, input, n_in, n_out):
+        self.input = input
+
+        W_values = numpy.asarray( rng.uniform( \
+              low = -numpy.sqrt(6./(n_in+n_out)), \
+              high = numpy.sqrt(6./(n_in+n_out)), \
+              size = (n_in, n_out)), dtype = theano.config.floatX)
+        self.W = theano.shared(value = W_values)
+
+        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+        self.b = theano.shared(value= b_values)
+
+        self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
+        self.params = [self.W, self.b]
+
+
+
+class dA(object):
+  def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\
+               input = None, shared_W = None, shared_b = None):
+    self.n_visible = n_visible
+    self.n_hidden  = n_hidden
+    
+    # create a Theano random generator that gives symbolic random values
+    theano_rng = RandomStreams()
+    
+    if shared_W != None and shared_b != None : 
+        self.W = shared_W
+        self.b = shared_b
+    else:
+        # initial values for weights and biases
+        # note : W' was written as `W_prime` and b' as `b_prime`
+
+        # W is initialized with `initial_W` which is uniformely sampled
+        # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible)
+        # the output of uniform if converted using asarray to dtype 
+        # theano.config.floatX so that the code is runable on GPU
+        initial_W = numpy.asarray( numpy.random.uniform( \
+              low = -numpy.sqrt(6./(n_hidden+n_visible)), \
+              high = numpy.sqrt(6./(n_hidden+n_visible)), \
+              size = (n_visible, n_hidden)), dtype = theano.config.floatX)
+        initial_b       = numpy.zeros(n_hidden, dtype = theano.config.floatX)
+    
+    
+        # theano shared variables for weights and biases
+        self.W       = theano.shared(value = initial_W,       name = "W")
+        self.b       = theano.shared(value = initial_b,       name = "b")
+    
+ 
+    initial_b_prime= numpy.zeros(n_visible)
+    # tied weights, therefore W_prime is W transpose
+    self.W_prime = self.W.T 
+    self.b_prime = theano.shared(value = initial_b_prime, name = "b'")
+
+    # if no input is given, generate a variable representing the input
+    if input == None : 
+        # we use a matrix because we expect a minibatch of several examples,
+        # each example being a row
+        self.x = T.dmatrix(name = 'input') 
+    else:
+        self.x = input
+    # Equation (1)
+    # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs
+    # note : first argument of theano.rng.binomial is the shape(size) of 
+    #        random numbers that it should produce
+    #        second argument is the number of trials 
+    #        third argument is the probability of success of any trial
+    #
+    #        this will produce an array of 0s and 1s where 1 has a 
+    #        probability of 1 - ``corruption_level`` and 0 with
+    #        ``corruption_level``
+    self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  1 - corruption_level) * self.x
+    # Equation (2)
+    # note  : y is stored as an attribute of the class so that it can be 
+    #         used later when stacking dAs. 
+    self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
+    # Equation (3)
+    self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
+    # Equation (4)
+    # note : we sum over the size of a datapoint; if we are using minibatches,
+    #        L will  be a vector, with one entry per example in minibatch
+    self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
+    # note : L is now a vector, where each element is the cross-entropy cost 
+    #        of the reconstruction of the corresponding example of the 
+    #        minibatch. We need to compute the average of all these to get 
+    #        the cost of the minibatch
+    self.cost = T.mean(self.L)
+
+    self.params = [ self.W, self.b, self.b_prime ]
+
+
+
+
+class SdA(object):
+    def __init__(self, train_set_x, train_set_y, batch_size, n_ins, 
+                 hidden_layers_sizes, n_outs, 
+                 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0):
+        update_locals(self, locals())      
+ 
+        self.layers             = []
+        self.pretrain_functions = []
+        self.params             = []
+        self.n_layers           = len(hidden_layers_sizes)
+
+        self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
+
+        if len(hidden_layers_sizes) < 1 :
+            raiseException (' You must have at least one hidden layer ')
+
+
+        # allocate symbolic variables for the data
+        index   = T.lscalar()    # index to a [mini]batch 
+        self.x  = T.matrix('x')  # the data is presented as rasterized images
+        self.y  = T.ivector('y') # the labels are presented as 1D vector of 
+                                 # [int] labels
+
+        for i in xrange( self.n_layers ):
+            # construct the sigmoidal layer
+
+            # the size of the input is either the number of hidden units of 
+            # the layer below or the input size if we are on the first layer
+            if i == 0 :
+                input_size = n_ins
+            else:
+                input_size = hidden_layers_sizes[i-1]
+
+            # the input to this layer is either the activation of the hidden
+            # layer below or the input of the SdA if you are on the first
+            # layer
+            if i == 0 : 
+                layer_input = self.x
+            else:
+                layer_input = self.layers[-1].output
+
+            layer = SigmoidalLayer(rng, layer_input, input_size, 
+                                   hidden_layers_sizes[i] )
+            # add the layer to the 
+            self.layers += [layer]
+            self.params += layer.params
+        
+            # Construct a denoising autoencoder that shared weights with this
+            # layer
+            dA_layer = dA(input_size, hidden_layers_sizes[i], \
+                          corruption_level = corruption_levels[0],\
+                          input = layer_input, \
+                          shared_W = layer.W, shared_b = layer.b)
+        
+            # Construct a function that trains this dA
+            # compute gradients of layer parameters
+            gparams = T.grad(dA_layer.cost, dA_layer.params)
+            # compute the list of updates
+            updates = {}
+            for param, gparam in zip(dA_layer.params, gparams):
+                updates[param] = param - gparam * pretrain_lr
+            
+            # create a function that trains the dA
+            update_fn = theano.function([index], dA_layer.cost, \
+                  updates = updates,
+                  givens = { 
+                     self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.input_divider})
+            # collect this function into a list
+            self.pretrain_functions += [update_fn]
+
+        
+        # We now need to add a logistic layer on top of the MLP
+        self.logLayer = LogisticRegression(\
+                         input = self.layers[-1].output,\
+                         n_in = hidden_layers_sizes[-1], n_out = n_outs)
+
+        self.params += self.logLayer.params
+        # construct a function that implements one step of finetunining
+
+        # compute the cost, defined as the negative log likelihood 
+        cost = self.logLayer.negative_log_likelihood(self.y)
+        # compute the gradients with respect to the model parameters
+        gparams = T.grad(cost, self.params)
+        # compute list of updates
+        updates = {}
+        for param,gparam in zip(self.params, gparams):
+            updates[param] = param - gparam*finetune_lr
+            
+        self.finetune = theano.function([index], cost, 
+                updates = updates,
+                givens = {
+                  self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.input_divider,
+                  self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
+
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+
+        self.errors = self.logLayer.errors(self.y)
+
+    @classmethod
+    def copy_reusing_lower_layers(cls, obj, num_hidden_layers, new_finetuning_lr=None):
+        assert(num_hidden_layers <= obj.n_layers)
+
+        if not new_finetuning_lr:
+            new_finetuning_lr = obj.finetune_lr
+
+        new_sda = cls(train_set_x= obj.train_set_x, \
+                      train_set_y = obj.train_set_y,\
+                      batch_size = obj.batch_size, \
+                      n_ins= obj.n_ins, \
+                      hidden_layers_sizes = obj.hidden_layers_sizes[:num_hidden_layers], \
+                      n_outs = obj.n_outs, \
+                      corruption_levels = obj.corruption_levels[:num_hidden_layers],\
+                      rng = obj.rng,\
+                      pretrain_lr = obj.pretrain_lr, \
+                      finetune_lr = new_finetuning_lr, \
+                      input_divider = obj.input_divider )
+
+        # new_sda.layers contains only the hidden layers actually
+        for i, layer in enumerate(new_sda.layers):
+            original_layer = obj.layers[i]
+            for p1,p2 in zip(layer.params, original_layer.params):
+                p1.value = p2.value.copy()
+
+        return new_sda
+
+    def get_params_copy(self):
+        return copy.deepcopy(self.params)
+
+    def set_params_from_copy(self, copy):
+        # We don't want to replace the var, as the functions have pointers in there
+        # We only want to replace values.
+        for i, p in enumerate(self.params):
+            p.value = copy[i].value
+
+    def get_params_means(self):
+        s = []
+        for p in self.params:
+            s.append(numpy.mean(p.value))
+        return s
+
+if __name__ == '__main__':
+    import sys
+    args = sys.argv[1:]
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/utils.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/utils.py	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,57 @@
+#!/usr/bin/python
+
+from jobman import DD
+
+# from pylearn codebase
+def update_locals(obj, dct):
+    if 'self' in dct:
+        del dct['self']
+    obj.__dict__.update(dct)
+
+def produit_croise_jobs(val_dict):
+    job_list = [DD()]
+    all_keys = val_dict.keys()
+
+    for key in all_keys:
+        possible_values = val_dict[key]
+        new_job_list = []
+        for val in possible_values:
+            for job in job_list:
+                to_insert = job.copy()
+                to_insert.update({key: val})
+                new_job_list.append(to_insert)
+        job_list = new_job_list
+
+    return job_list
+
+def test_produit_croise_jobs():
+    vals = {'a': [1,2], 'b': [3,4,5]}
+    print produit_croise_jobs(vals)
+
+
+# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python
+"""Simple module for getting amount of memory used by a specified user's
+processes on a UNIX system.
+It uses UNIX ps utility to get the memory usage for a specified username and
+pipe it to awk for summing up per application memory usage and return the total.
+Python's Popen() from subprocess module is used for spawning ps and awk.
+
+"""
+
+import subprocess
+
+class MemoryMonitor(object):
+
+    def __init__(self, username):
+        """Create new MemoryMonitor instance."""
+        self.username = username
+
+    def usage(self):
+        """Return int containing memory used by user's processes."""
+        self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username,
+                                        shell=True,
+                                        stdout=subprocess.PIPE,
+                                        )
+        self.stdout_list = self.process.communicate()[0].split('\n')
+        return int(self.stdout_list[0])
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/.DS_Store
Binary file pycaptcha/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/BUGS
--- a/pycaptcha/BUGS	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-Known bugs:
-
-- PersistentFactory() is almost certainly horrible at concurrent access
-- Tests are never invalidated with PersistentStorage(), as they aren't written back to the database
-- All files in Captcha/data are installed, including silly things like .svn directories and *~
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/COPYING
--- a/pycaptcha/COPYING	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-Copyright (c) 2004 Micah Dowty
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of 
-this software and associated documentation files (the "Software"), to deal in 
-the Software without restriction, including without limitation the rights to 
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
-of the Software, and to permit persons to whom the Software is furnished to do 
-so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all 
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
-SOFTWARE. 
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/.DS_Store
Binary file pycaptcha/Captcha/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Base.py
--- a/pycaptcha/Captcha/Base.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-""" Captcha.Base
-
-Base class for all types of CAPTCHA tests. All tests have one or
-more solution, determined when the test is generated. Solutions
-can be any python object,
-
-All tests can be solved by presenting at least some preset number
-of correct solutions. Some tests may only have one solution and require
-one solution, but other tests may require N correct solutions of M
-possible solutions.
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-import random, string, time, shelve
-
-__all__ = ["BaseCaptcha", "Factory", "PersistentFactory"]
-
-
-def randomIdentifier(alphabet = string.ascii_letters + string.digits,
-                     length = 24):
-    return "".join([random.choice(alphabet) for i in xrange(length)])
-
-
-class BaseCaptcha(object):
-    """Base class for all CAPTCHA tests"""
-    # Subclasses can override these to set the solution criteria
-    minCorrectSolutions = 1
-    maxIncorrectSolutions = 0
-
-    def __init__(self):
-        self.solutions = []
-        self.valid = True
-
-        # Each test has a unique identifier, used to refer to that test
-        # later, and a creation time so it can expire later.
-        self.id = randomIdentifier()
-        self.creationTime = time.time()
-
-    def addSolution(self, solution):
-        self.solutions.append(solution)
-
-    def testSolutions(self, solutions):
-        """Test whether the given solutions are sufficient for this CAPTCHA.
-           A given CAPTCHA can only be tested once, after that it is invalid
-           and always returns False. This makes random guessing much less effective.
-           """
-        if not self.valid:
-            return False
-        self.valid = False
-
-        numCorrect = 0
-        numIncorrect = 0
-
-        for solution in solutions:
-            if solution in self.solutions:
-                numCorrect += 1
-            else:
-                numIncorrect += 1
-
-        return numCorrect >= self.minCorrectSolutions and \
-               numIncorrect <= self.maxIncorrectSolutions
-
-
-class Factory(object):
-    """Creates BaseCaptcha instances on demand, and tests solutions.
-       CAPTCHAs expire after a given amount of time, given in seconds.
-       The default is 15 minutes.
-       """
-    def __init__(self, lifetime=60*15):
-        self.lifetime = lifetime
-        self.storedInstances = {}
-
-    def new(self, cls, *args, **kwargs):
-        """Create a new instance of our assigned BaseCaptcha subclass, passing
-           it any extra arguments we're given. This stores the result for
-           later testing.
-           """
-        self.clean()
-        inst = cls(*args, **kwargs)
-        self.storedInstances[inst.id] = inst
-        return inst
-
-    def get(self, id):
-        """Retrieve the CAPTCHA with the given ID. If it's expired already,
-           this will return None. A typical web application will need to
-           new() a CAPTCHA when generating an html page, then get() it later
-           when its images or sounds must be rendered.
-           """
-        return self.storedInstances.get(id)
-
-    def clean(self):
-        """Removed expired tests"""
-        expiredIds = []
-        now = time.time()
-        for inst in self.storedInstances.itervalues():
-            if inst.creationTime + self.lifetime < now:
-                expiredIds.append(inst.id)
-        for id in expiredIds:
-            del self.storedInstances[id]
-
-    def test(self, id, solutions):
-        """Test the given list of solutions against the BaseCaptcha instance
-           created earlier with the given id. Returns True if the test passed,
-           False on failure. In either case, the test is invalidated. Returns
-           False in the case of an invalid id.
-           """
-        self.clean()
-        inst = self.storedInstances.get(id)
-        if not inst:
-            return False
-        result = inst.testSolutions(solutions)
-        return result
-
-
-class PersistentFactory(Factory):
-    """A simple persistent factory, for use in CGI or multi-process environments
-       where the state must remain across python interpreter sessions.
-       This implementation uses the 'shelve' module.
-       """
-    def __init__(self, filename, lifetime=60*15):
-        Factory.__init__(self, lifetime)
-	self.storedInstances = shelve.open(filename)
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/File.py
--- a/pycaptcha/Captcha/File.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-""" Captcha.File
-
-Utilities for finding and picking random files from our 'data' directory
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-import os, random
-
-# Determine the data directory. This can be overridden after import-time if needed.
-dataDir = os.path.join(os.path.split(os.path.abspath(__file__))[0], "data")
-
-
-class RandomFileFactory(object):
-    """Given a list of files and/or directories, this picks a random file.
-       Directories are searched for files matching any of a list of extensions.
-       Files are relative to our data directory plus a subclass-specified base path.
-       """
-    extensions = []
-    basePath = "."
-
-    def __init__(self, *fileList):
-        self.fileList = fileList
-        self._fullPaths = None
-
-    def _checkExtension(self, name):
-        """Check the file against our given list of extensions"""
-        for ext in self.extensions:
-            if name.endswith(ext):
-                return True
-        return False
-
-    def _findFullPaths(self):
-        """From our given file list, find a list of full paths to files"""
-        paths = []
-        for name in self.fileList:
-            path = os.path.join(dataDir, self.basePath, name)
-            if os.path.isdir(path):
-                for content in os.listdir(path):
-                    if self._checkExtension(content):
-                        paths.append(os.path.join(path, content))
-            else:
-                paths.append(path)
-        return paths
-
-    def pick(self):
-        if self._fullPaths is None:
-            self._fullPaths = self._findFullPaths()
-        return random.choice(self._fullPaths)
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Backgrounds.py
--- a/pycaptcha/Captcha/Visual/Backgrounds.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-""" Captcha.Visual.Backgrounds
-
-Background layers for visual CAPTCHAs
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-from Captcha.Visual import Layer, Pictures
-import random, os
-import ImageDraw, Image
-
-
-class SolidColor(Layer):
-    """A solid color background. Very weak on its own, but good
-       to combine with other backgrounds.
-       """
-    def __init__(self, color="white"):
-        self.color = color
-
-    def render(self, image):
-        image.paste(self.color)
-
-
-class Grid(Layer):
-    """A grid of lines, with a given foreground color.
-       The size is given in pixels. The background is transparent,
-       so another layer (like SolidColor) should be put behind it.
-       """
-    def __init__(self, size=16, foreground="black"):
-        self.size = size
-        self.foreground = foreground
-        self.offset = (random.uniform(0, self.size),
-                       random.uniform(0, self.size))
-
-    def render(self, image):
-        draw = ImageDraw.Draw(image)
-
-        for i in xrange(image.size[0] / self.size + 1):
-            draw.line( (i*self.size+self.offset[0], 0,
-                        i*self.size+self.offset[0], image.size[1]), fill=self.foreground)
-
-        for i in xrange(image.size[0] / self.size + 1):
-            draw.line( (0, i*self.size+self.offset[1],
-                        image.size[0], i*self.size+self.offset[1]), fill=self.foreground)
-
-
-class TiledImage(Layer):
-    """Pick a random image and a random offset, and tile the rendered image with it"""
-    def __init__(self, imageFactory=Pictures.abstract):
-        self.tileName = imageFactory.pick()
-        self.offset = (random.uniform(0, 1),
-                       random.uniform(0, 1))
-
-    def render(self, image):
-        tile = Image.open(self.tileName)
-        for j in xrange(-1, int(image.size[1] / tile.size[1]) + 1):
-            for i in xrange(-1, int(image.size[0] / tile.size[0]) + 1):
-                dest = (int((self.offset[0] + i) * tile.size[0]),
-                        int((self.offset[1] + j) * tile.size[1]))
-                image.paste(tile, dest)
-
-
-class CroppedImage(Layer):
-    """Pick a random image, cropped randomly. Source images should be larger than the CAPTCHA."""
-    def __init__(self, imageFactory=Pictures.nature):
-        self.imageName = imageFactory.pick()
-        self.align = (random.uniform(0,1),
-                      random.uniform(0,1))
-
-    def render(self, image):
-        i = Image.open(self.imageName)
-        image.paste(i, (int(self.align[0] * (image.size[0] - i.size[0])),
-                        int(self.align[1] * (image.size[1] - i.size[1]))))
-
-
-class RandomDots(Layer):
-    """Draw random colored dots"""
-    def __init__(self, colors=("white", "black"), dotSize=4, numDots=400):
-        self.colors = colors
-        self.dotSize = dotSize
-        self.numDots = numDots
-	self.seed = random.random()
-
-    def render(self, image):
-        r = random.Random(self.seed)
-        for i in xrange(self.numDots):
-            bx = int(r.uniform(0, image.size[0]-self.dotSize))
-            by = int(r.uniform(0, image.size[1]-self.dotSize))
-            image.paste(r.choice(self.colors), (bx, by,
-                                                bx+self.dotSize-1,
-                                                by+self.dotSize-1))
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Base.py
--- a/pycaptcha/Captcha/Visual/Base.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-""" Captcha.Visual.BAse
-
-Base classes for visual CAPTCHAs. We use the Python Imaging Library
-to manipulate these images.
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-import Captcha
-import Image
-
-__all__ = ['ImageCaptcha', 'Layer']
-
-
-class ImageCaptcha(Captcha.BaseCaptcha):
-    """Base class for image-based CAPTCHA tests.
-       The render() function generates the CAPTCHA image at the given size by
-       combining Layer instances from self.layers, which should be created by
-       the subclass-defined getLayers().
-       """
-    defaultSize = (32,32)
-    # anciennement a defaultSize(256,96)
-    def __init__(self, *args, **kwargs):
-        Captcha.BaseCaptcha.__init__(self)
-        self._layers = self.getLayers(*args, **kwargs)
-
-    def getImage(self):
-        """Get a PIL image representing this CAPTCHA test, creating it if necessary"""
-        if not self._image:
-            self._image = self.render()
-        return self._image
-
-    def getLayers(self):
-        """Subclasses must override this to return a list of Layer instances to render.
-           Lists within the list of layers are recursively rendered.
-           """
-        return []
-
-    def render(self, size=None):
-        """Render this CAPTCHA, returning a PIL image"""
-        if size is None:
-            size = self.defaultSize
-        img = Image.new("L", size)
-       # img = Image.new("RGB", size)
-        return self._renderList(self._layers, Image.new("L", size))
-
-    def _renderList(self, l, img):
-        for i in l:
-            if type(i) == tuple or type(i) == list:
-                img = self._renderList(i, img)
-            else:
-                img = i.render(img) or img
-        return img
-
-
-class Layer(object):
-    """A renderable object representing part of a CAPTCHA.
-       The render() function should return approximately the same result, regardless
-       of the image size. This means any randomization must occur in the constructor.
-
-       If the render() function returns something non-None, it is taken as an image to
-       replace the current image with. This can be used to implement transformations
-       that result in a separate image without having to copy the results back to the first.
-       """
-    def render(self, img):
-        pass
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Distortions.py
--- a/pycaptcha/Captcha/Visual/Distortions.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-""" Captcha.Visual.Distortions
-
-Distortion layers for visual CAPTCHAs
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-from Captcha.Visual import Layer
-import ImageDraw, Image
-import random, math
-
-
-class WigglyBlocks(Layer):
-    """Randomly select and shift blocks of the image"""
-    def __init__(self, blockSize=3, sigma=0.01, iterations=300):
-        self.blockSize = blockSize
-        self.sigma = sigma
-        self.iterations = iterations
-        self.seed = random.random()
-
-    def render(self, image):
-        r = random.Random(self.seed)
-        for i in xrange(self.iterations):
-            # Select a block
-            bx = int(r.uniform(0, image.size[0]-self.blockSize))
-            by = int(r.uniform(0, image.size[1]-self.blockSize))
-            block = image.crop((bx, by, bx+self.blockSize-1, by+self.blockSize-1))
-
-            # Figure out how much to move it.
-            # The call to floor() is important so we always round toward
-            # 0 rather than to -inf. Just int() would bias the block motion.
-            mx = int(math.floor(r.normalvariate(0, self.sigma)))
-            my = int(math.floor(r.normalvariate(0, self.sigma)))
-
-            # Now actually move the block
-            image.paste(block, (bx+mx, by+my))
-
-
-class WarpBase(Layer):
-    """Abstract base class for image warping. Subclasses define a
-       function that maps points in the output image to points in the input image.
-       This warping engine runs a grid of points through this transform and uses
-       PIL's mesh transform to warp the image.
-       """
-    filtering = Image.BILINEAR
-    resolution = 10
-
-    def getTransform(self, image):
-        """Return a transformation function, subclasses should override this"""
-        return lambda x, y: (x, y)
-
-    def render(self, image):
-        r = self.resolution
-        xPoints = image.size[0] / r + 2
-        yPoints = image.size[1] / r + 2
-        f = self.getTransform(image)
-
-        # Create a list of arrays with transformed points
-        xRows = []
-        yRows = []
-        for j in xrange(yPoints):
-            xRow = []
-            yRow = []
-            for i in xrange(xPoints):
-                x, y = f(i*r, j*r)
-
-                # Clamp the edges so we don't get black undefined areas
-                x = max(0, min(image.size[0]-1, x))
-                y = max(0, min(image.size[1]-1, y))
-
-                xRow.append(x)
-                yRow.append(y)
-            xRows.append(xRow)
-            yRows.append(yRow)
-
-        # Create the mesh list, with a transformation for
-        # each square between points on the grid
-        mesh = []
-        for j in xrange(yPoints-1):
-            for i in xrange(xPoints-1):
-                mesh.append((
-                    # Destination rectangle
-                    (i*r, j*r,
-                     (i+1)*r, (j+1)*r),
-                    # Source quadrilateral
-                    (xRows[j  ][i  ], yRows[j  ][i  ],
-                     xRows[j+1][i  ], yRows[j+1][i  ],
-                     xRows[j+1][i+1], yRows[j+1][i+1],
-                     xRows[j  ][i+1], yRows[j  ][i+1]),
-                    ))
-
-        return image.transform(image.size, Image.MESH, mesh, self.filtering)
-
-
-class SineWarp(WarpBase):
-    """Warp the image using a random composition of sine waves"""
-
-    def __init__(self,
-                 amplitudeRange = (3, 6.5),
-                 periodRange    = (0.04, 0.1),
-                 ):
-        self.amplitude = random.uniform(*amplitudeRange)
-        self.period = random.uniform(*periodRange)
-        self.offset = (random.uniform(0, math.pi * 2 / self.period),
-                       random.uniform(0, math.pi * 2 / self.period))
-
-    def getTransform(self, image):
-        return (lambda x, y,
-                a = self.amplitude,
-                p = self.period,
-                o = self.offset:
-                (math.sin( (y+o[0])*p )*a + x,
-                 math.sin( (x+o[1])*p )*a + y))
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Pictures.py
--- a/pycaptcha/Captcha/Visual/Pictures.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-""" Captcha.Visual.Pictures
-
-Random collections of images
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-from Captcha import File
-import Image
-
-
-class ImageFactory(File.RandomFileFactory):
-    """A factory that generates random images from a list"""
-    extensions = [".png", ".jpeg"]
-    basePath = "pictures"
-
-
-abstract = ImageFactory("abstract")
-nature = ImageFactory("nature")
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Tests.py
--- a/pycaptcha/Captcha/Visual/Tests.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-""" Captcha.Visual.Tests
-
-Visual CAPTCHA tests
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-from Captcha.Visual import Text, Backgrounds, Distortions, ImageCaptcha
-from Captcha import Words
-import random
-
-__all__ = ["PseudoGimpy", "AngryGimpy", "AntiSpam"]
-
-
-class PseudoGimpy(ImageCaptcha):
-    """A relatively easy CAPTCHA that's somewhat easy on the eyes"""
-    def getLayers(self):
-        word = Words.defaultWordList.pick()
-        self.addSolution(word)
-        return [
-           # random.choice([
-           #     Backgrounds.CroppedImage(),
-           #     Backgrounds.TiledImage(),
-           # ]),
-            Text.TextLayer(word, borderSize=1),
-            Distortions.SineWarp(),
-            ]
-
-
-class AngryGimpy(ImageCaptcha):
-    """A harder but less visually pleasing CAPTCHA"""
-    def getLayers(self):
-        word = Words.defaultWordList.pick()
-        self.addSolution(word)
-        return [
-           # suppression du background 
-           # Backgrounds.TiledImage(),
-           # Backgrounds.RandomDots(),
-            Text.TextLayer(word, borderSize=1),
-	   # Distortions.SineWarp(periodRange    = (0.04, 0.07))
-            Distortions.WigglyBlocks(),
-              ]
-
-
-class AntiSpam(ImageCaptcha):
-    """A fixed-solution CAPTCHA that can be used to hide email addresses or URLs from bots"""
-    fontFactory = Text.FontFactory(20, "vera/VeraBd.ttf")
-    defaultSize = (512,50)
-
-    def getLayers(self, solution="murray@example.com"):
-        self.addSolution(solution)
-
-        textLayer = Text.TextLayer(solution,
-                                   borderSize = 2,
-                                   fontFactory = self.fontFactory)
-
-        return [
-            Backgrounds.CroppedImage(),
-            textLayer,
-            Distortions.SineWarp(amplitudeRange = (3, 5)),
-            ]
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Text.py
--- a/pycaptcha/Captcha/Visual/Text.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-""" Captcha.Visual.Text
-
-Text generation for visual CAPTCHAs.
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-import random, os
-from Captcha import Visual, File
-import ImageFont, ImageDraw
-
-
-class FontFactory(File.RandomFileFactory):
-    """Picks random fonts and/or sizes from a given list.
-       'sizes' can be a single size or a (min,max) tuple.
-       If any of the given files are directories, all *.ttf found
-       in that directory will be added.
-       """
-    extensions = [".ttf", ".TTF"]
-    basePath = "fonts"
-
-# arguments variables a modifier pour mettre le chemin vers les fontes.
-    def __init__(self, sizes, *fileNames):
-        File.RandomFileFactory.__init__(self, *fileNames)
-
-        if type(sizes) is tuple:			
-            self.minSize = sizes[0]
-            self.maxSize = sizes[1]
-        else:
-            self.minSize = sizes
-            self.maxSize = sizes
-
-    def pick(self):
-        """Returns a (fileName, size) tuple that can be passed to ImageFont.truetype()"""
-        fileName = File.RandomFileFactory.pick(self)
-        size = int(random.uniform(self.minSize, self.maxSize) + 0.5)
-        return (fileName, size)
-
-# Predefined font factories
-defaultFontFactory = FontFactory(25, "allfonts")
-#defaultFontFactory = FontFactory((30, 40), "vera")
-
-class TextLayer(Visual.Layer):
-    """Represents a piece of text rendered within the image.
-       Alignment is given such that (0,0) places the text in the
-       top-left corner and (1,1) places it in the bottom-left.
-
-       The font and alignment are optional, if not specified one is
-       chosen randomly. If no font factory is specified, the default is used.
-       """
-    def __init__(self, text,
-                 alignment   = None,
-                 font        = None,
-                 fontFactory = None,
-                 textColor   = "white",
-                 borderSize  = 0,
-                 borderColor = None,
-                 ):
-        if fontFactory is None:
-            global defaultFontFactory
-            fontFactory = defaultFontFactory
-
-        if font is None:
-            font = fontFactory.pick()
-
-        if alignment is None:
-            alignment = (random.uniform(0,1),
-                         random.uniform(0,1))
-
-        self.text        = text
-        self.alignment   = alignment
-        self.font        = font
-        self.textColor   = textColor
-        self.borderSize  = borderSize
-        self.borderColor = borderColor
-
-    def render(self, img):
-        font = ImageFont.truetype(*self.font)
-    	textSize = font.getsize(self.text)
-        draw = ImageDraw.Draw(img)
-
-        # Find the text's origin given our alignment and current image size
-        x = int((img.size[0] - textSize[0] - self.borderSize*2) * self.alignment[0] + 0.5)
-        y = int((img.size[1] - textSize[1] - self.borderSize*2) * self.alignment[1] + 0.5)
-
-        # Draw the border if we need one. This is slow and ugly, but there doesn't
-        # seem to be a better way with PIL.
-        if self.borderSize > 0:
-            for bx in (-1,0,1):
-                for by in (-1,0,1):
-                    if bx and by:
-                        draw.text((x + bx * self.borderSize,
-                                   y + by * self.borderSize),
-                                  self.text, font=font, fill=self.borderColor)
-
-        # And the text itself...
-        draw.text((x,y), self.text, font=font, fill=self.textColor)
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/__init__.py
--- a/pycaptcha/Captcha/Visual/__init__.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-""" Captcha.Visual
-
-This package contains functionality specific to visual CAPTCHA tests.
-
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-# Convenience imports
-from Base import *
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Words.py
--- a/pycaptcha/Captcha/Words.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-""" Captcha.Words
-
-Utilities for managing word lists and finding random words
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-import random, os
-import File
-
-
-class WordList(object):
-    """A class representing a word list read from disk lazily.
-       Blank lines and comment lines starting with '#' are ignored.
-       Any number of words per line may be used. The list can
-       optionally ingore words not within a given length range.
-       """
-    def __init__(self, fileName, minLength=None, maxLength=None):
-        self.words = None
-        self.fileName = fileName
-        self.minLength = minLength
-        self.maxLength = maxLength
-
-    def read(self):
-        """Read words from disk"""
-        f = open(os.path.join(File.dataDir, "words", self.fileName))
-
-        self.words = []
-        for line in f.xreadlines():
-            line = line.strip()
-            if not line:
-                continue
-            if line[0] == '#':
-                continue
-            for word in line.split():
-                if self.minLength is not None and len(word) < self.minLength:
-                    continue
-                if self.maxLength is not None and len(word) > self.maxLength:
-                    continue
-                self.words.append(word)
-
-    def pick(self):
-        """Pick a random word from the list, reading it in if necessary"""
-        if self.words is None:
-            self.read()
-        return random.choice(self.words)
-
-
-# Define several shared word lists that are read from disk on demand
-basic_english            = WordList("basic-english")
-basic_english_restricted = WordList("basic-english", minLength=5, maxLength=8)
-characters = WordList("characters")
-defaultWordList = characters
-
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/__init__.py
--- a/pycaptcha/Captcha/__init__.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-""" Captcha
-
-This is the PyCAPTCHA package, a collection of Python modules
-implementing CAPTCHAs: automated tests that humans should pass,
-but current computer programs can't. These tests are often
-used for security.
-
-See  http://www.captcha.net for more information and examples.
-
-This project was started because the CIA project, written in
-Python, needed a CAPTCHA to automate its user creation process
-safely. All existing implementations the author could find were
-written in Java or for the .NET framework, so a simple Python
-alternative was needed.
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty <micah@navi.cx>
-#
-
-__version__ = "0.3-pre"
-
-
-# Check the python version here before we proceed further
-requiredPythonVersion = (2,2,1)
-def checkVersion():
-    import sys, string
-    if sys.version_info < requiredPythonVersion:
-        raise Exception("%s requires at least Python %s, found %s instead." % (
-            name,
-            string.join(map(str, requiredPythonVersion), "."),
-            string.join(map(str, sys.version_info), ".")))
-checkVersion()
-
-
-# Convenience imports
-from Base import *
-import File
-import Words
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/.DS_Store
Binary file pycaptcha/Captcha/data/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/.DS_Store
Binary file pycaptcha/Captcha/data/fonts/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/allfonts
--- a/pycaptcha/Captcha/data/fonts/allfonts	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-/Tmp/allfonts
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/._atari-small.bdf
Binary file pycaptcha/Captcha/data/fonts/others/._atari-small.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/._cursive.bdf
Binary file pycaptcha/Captcha/data/fonts/others/._cursive.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/._radon-wide.bdf
Binary file pycaptcha/Captcha/data/fonts/others/._radon-wide.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/CIDFnmap
--- a/pycaptcha/Captcha/data/fonts/others/CIDFnmap	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-/Dotum-Bold (/usr/share/fonts/truetype/unfonts/UnDotumBold.ttf) /Adobe-Korea1-Unicode ;
-/ZenHei (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /Adobe-GB1-Unicode ;
-/Batang-Regular (/usr/share/fonts/truetype/unfonts/UnBatang.ttf) /Adobe-Korea1-Unicode ;
-/VL-PGothic-Regular (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /Adobe-Japan1-Unicode ;
-/Dotum-Regular (/usr/share/fonts/truetype/unfonts/UnDotum.ttf) /Adobe-Korea1-Unicode ;
-/VL-Gothic-Regular-JaH (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /Adobe-Japan2-Unicode ;
-/VL-Gothic-Regular (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /Adobe-Japan1-Unicode ;
-/VL-PGothic-Regular-JaH (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /Adobe-Japan2-Unicode ;
-/ZenHei-CNS (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /Adobe-CNS1-Unicode ;
-/Batang-Bold (/usr/share/fonts/truetype/unfonts/UnBatangBold.ttf) /Adobe-Korea1-Unicode ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/FAPIfontmap
--- a/pycaptcha/Captcha/data/fonts/others/FAPIfontmap	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,155 +0,0 @@
-/Garuda-Oblique << /Path (/usr/share/fonts/truetype/thai/Garuda-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Sans << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstOne << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstOne.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Vemana2000 << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/Vemana.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSerif-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypo-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSerif-BoldItalic << /Path (/usr/share/fonts/truetype/freefont/FreeSerifBoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush-Oblique << /Path (/usr/share/fonts/truetype/thai/Umpush-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationMono-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Malige << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/Malige-b.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Loma-Oblique << /Path (/usr/share/fonts/truetype/thai/Loma-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstBook << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstBook.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi-BoldItalic << /Path (/usr/share/fonts/truetype/thai/Norasi-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Sans-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Norasi-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeMono-BoldOblique << /Path (/usr/share/fonts/truetype/freefont/FreeMonoBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Serif << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSerif.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstOffice << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstOffice.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypist-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSans-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Waree-Oblique << /Path (/usr/share/fonts/truetype/thai/Waree-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationMono-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstFarsi << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstFarsi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgMono-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgMono-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Garuda-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Garuda-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSans-BoldOblique << /Path (/usr/share/fonts/truetype/freefont/FreeSansBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/utkal << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/utkal.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSerif-Italic << /Path (/usr/share/fonts/truetype/freefont/FreeSerifItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypist-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSerif-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Sawasdee-BoldOblique << /Path (/usr/share/fonts/truetype/thai/SawasdeeBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Umpush-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/cmex10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmex10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeMono-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi-Bold << /Path (/usr/share/fonts/truetype/thai/Norasi-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSans-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Loma << /Path (/usr/share/fonts/truetype/thai/Loma.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/wasy10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/wasy10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari-BoldItalic << /Path (/usr/share/fonts/truetype/thai/Kinnari-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstNaskh << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstNaskh.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSans-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Waree << /Path (/usr/share/fonts/truetype/thai/Waree.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Garuda << /Path (/usr/share/fonts/truetype/thai/Garuda.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/cmsy10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmsy10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypist-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Sawasdee-Bold << /Path (/usr/share/fonts/truetype/thai/SawasdeeBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Purisa << /Path (/usr/share/fonts/truetype/thai/Purisa.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstPoster << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstPoster.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSans-Oblique << /Path (/usr/share/fonts/truetype/freefont/FreeSansOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypo-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Lohit-Punjabi << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_pa.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Waree-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Waree-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypewriter-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Garuda-Bold << /Path (/usr/share/fonts/truetype/thai/Garuda-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/RachanaMedium << /Path (/usr/share/fonts/truetype/ttf-malayalam-fonts/Rachana_04.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstArt << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstArt.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationMono-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypo-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSerif-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeSerifBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSerif-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstDecorative << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstDecorative.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Lohit-Hindi << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_hi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush-LightOblique << /Path (/usr/share/fonts/truetype/thai/Umpush-LightOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSerif-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeSerif.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/mry_KacstQurn << /Path (/usr/share/fonts/truetype/ttf-kacst/mry_KacstQurn.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstDigital << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstDigital.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Sans-Mono-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Lohit-Gujarati << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_gu.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationMono-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstLetter << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstLetter.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypo << /Path (/usr/share/fonts/truetype/thai/TlwgTypo.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/msbm10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/msbm10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgMono-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Sans-Mono << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi-Italic << /Path (/usr/share/fonts/truetype/thai/Norasi-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstTitleL << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstTitleL.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypewriter << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeMono-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi-Oblique << /Path (/usr/share/fonts/truetype/thai/Norasi-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypewriter-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Phetsarath << /Path (/usr/share/fonts/truetype/ttf-lao/Phetsarath_OT.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/mukti << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/MuktiNarrow.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Sawasdee-Oblique << /Path (/usr/share/fonts/truetype/thai/SawasdeeOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/cmr10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmr10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush-Light << /Path (/usr/share/fonts/truetype/thai/Umpush-Light.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush-Bold << /Path (/usr/share/fonts/truetype/thai/Umpush-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Serif-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSerif-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstTitle << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstTitle.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi << /Path (/usr/share/fonts/truetype/thai/Norasi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari-Oblique << /Path (/usr/share/fonts/truetype/thai/Kinnari-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/muktinarrow << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/MuktiNarrowBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari-Italic << /Path (/usr/share/fonts/truetype/thai/Kinnari-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/kacstPen << /Path (/usr/share/fonts/truetype/ttf-kacst/kacstPen.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Kinnari-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypewriter-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeMono-Oblique << /Path (/usr/share/fonts/truetype/freefont/FreeMonoOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSans-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeSans.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSerif-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush << /Path (/usr/share/fonts/truetype/thai/Umpush.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Sawasdee << /Path (/usr/share/fonts/truetype/thai/Sawasdee.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgMono << /Path (/usr/share/fonts/truetype/thai/TlwgMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstQurn << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstQurn.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari << /Path (/usr/share/fonts/truetype/thai/Kinnari.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgMono-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgMono-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstScreen << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstScreen.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSans-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeSansBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/msam10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/msam10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/cmmi10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmmi10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Lohit-Tamil << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_ta.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypist << /Path (/usr/share/fonts/truetype/thai/TlwgTypist.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Waree-Bold << /Path (/usr/share/fonts/truetype/thai/Waree-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari-Bold << /Path (/usr/share/fonts/truetype/thai/Kinnari-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Loma-Bold << /Path (/usr/share/fonts/truetype/thai/Loma-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSans-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Loma-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Loma-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Palatino-Italic /URWPalladioL-Ital ; 
-/Palatino-Bold /URWPalladioL-Bold ; 
-/AvantGarde-BookOblique /URWGothicL-BookObli ; 
-/Times-Bold /NimbusRomNo9L-Medi ; 
-/HelveticaNarrow-BoldOblique /NimbusSanL-BoldCondItal ; 
-/Times-Roman /NimbusRomNo9L-Regu ; 
-/NewCenturySchlbk-Italic /CenturySchL-Ital ; 
-/HelveticaNarrow /NimbusSanL-ReguCond ; 
-/Helvetica-Narrow-Bold /NimbusSanL-BoldCond ; 
-/Bookman-Light /URWBookmanL-Ligh ; 
-/Palatino-BoldItalic /URWPalladioL-BoldItal ; 
-/Traditional /KacstBook ; 
-/Times-BoldItalic /NimbusRomNo9L-MediItal ; 
-/AvantGarde-Book /URWGothicL-Book ; 
-/AvantGarde-DemiOblique /URWGothicL-DemiObli ; 
-/Helvetica-Narrow-Oblique /NimbusSanL-ReguCondItal ; 
-/Helvetica-Bold /NimbusSanL-Bold ; 
-/Courier-Oblique /NimbusMonL-ReguObli ; 
-/Times-Italic /NimbusRomNo9L-ReguItal ; 
-/Courier /NimbusMonL-Regu ; 
-/Bookman-Demi /URWBookmanL-DemiBold ; 
-/Helvetica-BoldOblique /NimbusSanL-BoldItal ; 
-/Helvetica-Oblique /NimbusSanL-ReguItal ; 
-/HelveticaNarrow-Oblique /NimbusSanL-ReguCondItal ; 
-/NewCenturySchlbk-BoldItalic /CenturySchL-BoldItal ; 
-/Courier-BoldOblique /NimbusMonL-BoldObli ; 
-/HelveticaNarrow-Bold /NimbusSanL-BoldCond ; 
-/AvantGarde-Demi /URWGothicL-Demi ; 
-/Bookman-LightItalic /URWBookmanL-LighItal ; 
-/ZapfDingbats /Dingbats ; 
-/Helvetica-Narrow-BoldOblique /NimbusSanL-BoldCondItal ; 
-/ZapfChancery-MediumItalic /URWChanceryL-MediItal ; 
-/Helvetica /NimbusSanL-Regu ; 
-/Bookman-DemiItalic /URWBookmanL-DemiBoldItal ; 
-/Palatino-Roman /URWPalladioL-Roma ; 
-/NewCenturySchlbk-Bold /CenturySchL-Bold ; 
-/NewCenturySchlbk-Roman /CenturySchL-Roma ; 
-/Courier-Bold /NimbusMonL-Bold ; 
-/Arabic /KacstBook ; 
-/Helvetica-Narrow /NimbusSanL-ReguCond ; 
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/Fontmap
--- a/pycaptcha/Captcha/data/fonts/others/Fontmap	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,116 +0,0 @@
-/LMTypewriter10-CapsOblique (lmtcso10.pfb) ;
-/Dingbats (d050000l.pfb) ;
-/URWBookmanL-DemiBoldItal (b018035l.pfb) ;
-/LMSansQuotation8-Bold (lmssqbx8.pfb) ;
-/Symbol (Symbol.pfb) ;
-/LMTypewriterVarWd10-DarkOblique (lmvtko10.pfb) ;
-/LMRoman10-Demi (lmb10.pfb) ;
-/URWPalladioL-Ital (p052023l.pfb) ;
-/LMTypewriter10-DarkOblique (lmtko10.pfb) ;
-/NimbusSanL-Regu (n019003l.pfb) ;
-/LMTypewriter10-Italic (lmtti10.pfb) ;
-/LMSansQuotation8-BoldOblique (lmssqbo8.pfb) ;
-/URWPalladioL-Roma (p052003l.pfb) ;
-/LMTypewriterVarWd10-Light (lmvtl10.pfb) ;
-/NimbusRomNo9L-Medi (n021004l.pfb) ;
-/NimbusSanL-ReguItal (n019023l.pfb) ;
-/NimbusMonL-Regu (n022003l.pfb) ;
-/LMSans10-Bold (lmssbx10.pfb) ;
-/LMRoman10-CapsOblique (lmcsco10.pfb) ;
-/CenturySchL-Roma (c059013l.pfb) ;
-/URWGothicL-BookObli (a010033l.pfb) ;
-/LMTypewriter10-LightCondensedOblique (lmtlco10.pfb) ;
-/LMSans10-DemiCondensedOblique (lmssdo10.pfb) ;
-/LMRoman10-CapsRegular (lmcsc10.pfb) ;
-/CenturySchL-BoldItal (c059036l.pfb) ;
-/LMRoman10-DemiOblique (lmbo10.pfb) ;
-/LMRoman10-Unslanted (lmu10.pfb) ;
-/LMRoman10-Bold (lmbx10.pfb) ;
-/LMSans10-DemiCondensed (lmssdc10.pfb) ;
-/URWChanceryL-MediItal (z003034l.pfb) ;
-/URWGothicL-DemiObli (a010035l.pfb) ;
-/LMTypewriterVarWd10-Oblique (lmvtto10.pfb) ;
-/NimbusMonL-Bold (n022004l.pfb) ;
-/LMTypewriter10-Oblique (lmtto10.pfb) ;
-/LMRoman10-BoldItalic (lmbxi10.pfb) ;
-/NimbusSanL-ReguCond (n019043l.pfb) ;
-/CenturySchL-Bold (c059016l.pfb) ;
-/LMTypewriterVarWd10-Regular (lmvtt10.pfb) ;
-/URWBookmanL-Ligh (b018012l.pfb) ;
-/LMSansQuotation8-Regular (lmssq8.pfb) ;
-/LMSans10-Regular (lmss10.pfb) ;
-/LMSans10-Oblique (lmsso10.pfb) ;
-/NimbusSanL-BoldCond (n019044l.pfb) ;
-/LMRoman10-Regular (lmr10.pfb) ;
-/LMTypewriter10-LightCondensed (lmtlc10.pfb) ;
-/LMTypewriterVarWd10-Dark (lmvtk10.pfb) ;
-/LMTypewriter10-CapsRegular (lmtcsc10.pfb) ;
-/LMSansQuotation8-Oblique (lmssqo8.pfb) ;
-/StandardSymL (s050000l.pfb) ;
-/NimbusRomNo9L-Regu (n021003l.pfb) ;
-/LMTypewriterVarWd10-LightOblique (lmvtlo10.pfb) ;
-/URWPalladioL-BoldItal (p052024l.pfb) ;
-/CenturySchL-Ital (c059033l.pfb) ;
-/LMRoman10-Dunhill (lmdunh10.pfb) ;
-/URWPalladioL-Bold (p052004l.pfb) ;
-/URWGothicL-Book (a010013l.pfb) ;
-/LMTypewriter10-Dark (lmtk10.pfb) ;
-/NimbusSanL-BoldItal (n019024l.pfb) ;
-/URWGothicL-Demi (a010015l.pfb) ;
-/LMTypewriter10-LightOblique (lmtlo10.pfb) ;
-/LMTypewriter10-Light (lmtl10.pfb) ;
-/NimbusSanL-BoldCondItal (n019064l.pfb) ;
-/LMRoman10-Italic (lmri10.pfb) ;
-/LMRoman10-DunhillOblique (lmduno10.pfb) ;
-/NimbusMonL-ReguObli (n022023l.pfb) ;
-/LMRoman10-Oblique (lmro10.pfb) ;
-/NimbusSanL-ReguCondItal (n019063l.pfb) ;
-/NimbusRomNo9L-MediItal (n021024l.pfb) ;
-/LMRoman10-BoldOblique (lmbxo10.pfb) ;
-/URWBookmanL-DemiBold (b018015l.pfb) ;
-/NimbusSanL-Bold (n019004l.pfb) ;
-/LMSans10-BoldOblique (lmssbo10.pfb) ;
-/URWBookmanL-LighItal (b018032l.pfb) ;
-/NimbusMonL-BoldObli (n022024l.pfb) ;
-/NimbusRomNo9L-ReguItal (n021023l.pfb) ;
-/LMTypewriter10-Regular (lmtt10.pfb) ;
-/Palatino-Italic /URWPalladioL-Ital ; 
-/Palatino-Bold /URWPalladioL-Bold ; 
-/AvantGarde-BookOblique /URWGothicL-BookObli ; 
-/Times-Bold /NimbusRomNo9L-Medi ; 
-/HelveticaNarrow-BoldOblique /NimbusSanL-BoldCondItal ; 
-/Times-Roman /NimbusRomNo9L-Regu ; 
-/NewCenturySchlbk-Italic /CenturySchL-Ital ; 
-/HelveticaNarrow /NimbusSanL-ReguCond ; 
-/Helvetica-Narrow-Bold /NimbusSanL-BoldCond ; 
-/Bookman-Light /URWBookmanL-Ligh ; 
-/Palatino-BoldItalic /URWPalladioL-BoldItal ; 
-/Traditional /KacstBook ; 
-/Times-BoldItalic /NimbusRomNo9L-MediItal ; 
-/AvantGarde-Book /URWGothicL-Book ; 
-/AvantGarde-DemiOblique /URWGothicL-DemiObli ; 
-/Helvetica-Narrow-Oblique /NimbusSanL-ReguCondItal ; 
-/Helvetica-Bold /NimbusSanL-Bold ; 
-/Courier-Oblique /NimbusMonL-ReguObli ; 
-/Times-Italic /NimbusRomNo9L-ReguItal ; 
-/Courier /NimbusMonL-Regu ; 
-/Bookman-Demi /URWBookmanL-DemiBold ; 
-/Helvetica-BoldOblique /NimbusSanL-BoldItal ; 
-/Helvetica-Oblique /NimbusSanL-ReguItal ; 
-/HelveticaNarrow-Oblique /NimbusSanL-ReguCondItal ; 
-/NewCenturySchlbk-BoldItalic /CenturySchL-BoldItal ; 
-/Courier-BoldOblique /NimbusMonL-BoldObli ; 
-/HelveticaNarrow-Bold /NimbusSanL-BoldCond ; 
-/AvantGarde-Demi /URWGothicL-Demi ; 
-/Bookman-LightItalic /URWBookmanL-LighItal ; 
-/ZapfDingbats /Dingbats ; 
-/Helvetica-Narrow-BoldOblique /NimbusSanL-BoldCondItal ; 
-/ZapfChancery-MediumItalic /URWChanceryL-MediItal ; 
-/Helvetica /NimbusSanL-Regu ; 
-/Bookman-DemiItalic /URWBookmanL-DemiBoldItal ; 
-/Palatino-Roman /URWPalladioL-Roma ; 
-/NewCenturySchlbk-Bold /CenturySchL-Bold ; 
-/NewCenturySchlbk-Roman /CenturySchL-Roma ; 
-/Courier-Bold /NimbusMonL-Bold ; 
-/Arabic /KacstBook ; 
-/Helvetica-Narrow /NimbusSanL-ReguCond ; 
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/cidfmap
--- a/pycaptcha/Captcha/data/fonts/others/cidfmap	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-/Dotum-Bold << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnDotumBold.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
-/ZenHei << /FileType /TrueType /Path (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /SubfontID 0 /CSI [(GB1) 0] >> ;
-/Batang-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnBatang.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
-/VL-PGothic-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /SubfontID 0 /CSI [(Japan1) 0] >> ;
-/Dotum-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnDotum.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
-/VL-Gothic-Regular-JaH << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /SubfontID 0 /CSI [(Japan2) 0] >> ;
-/VL-Gothic-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /SubfontID 0 /CSI [(Japan1) 0] >> ;
-/VL-PGothic-Regular-JaH << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /SubfontID 0 /CSI [(Japan2) 0] >> ;
-/ZenHei-CNS << /FileType /TrueType /Path (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /SubfontID 0 /CSI [(CNS1) 0] >> ;
-/Batang-Bold << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnBatangBold.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/COPYRIGHT.TXT
--- a/pycaptcha/Captcha/data/fonts/vera/COPYRIGHT.TXT	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-Bitstream Vera Fonts Copyright
-
-The fonts have a generous copyright, allowing derivative works (as
-long as "Bitstream" or "Vera" are not in the names), and full
-redistribution (so long as they are not *sold* by themselves). They
-can be be bundled, redistributed and sold with any software.
-
-The fonts are distributed under the following copyright:
-
-Copyright
-=========
-
-Copyright (c) 2003 by Bitstream, Inc. All Rights Reserved. Bitstream
-Vera is a trademark of Bitstream, Inc.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of the fonts accompanying this license ("Fonts") and associated
-documentation files (the "Font Software"), to reproduce and distribute
-the Font Software, including without limitation the rights to use,
-copy, merge, publish, distribute, and/or sell copies of the Font
-Software, and to permit persons to whom the Font Software is furnished
-to do so, subject to the following conditions:
-
-The above copyright and trademark notices and this permission notice
-shall be included in all copies of one or more of the Font Software
-typefaces.
-
-The Font Software may be modified, altered, or added to, and in
-particular the designs of glyphs or characters in the Fonts may be
-modified and additional glyphs or characters may be added to the
-Fonts, only if the fonts are renamed to names not containing either
-the words "Bitstream" or the word "Vera".
-
-This License becomes null and void to the extent applicable to Fonts
-or Font Software that has been modified and is distributed under the
-"Bitstream Vera" names.
-
-The Font Software may be sold as part of a larger software package but
-no copy of one or more of the Font Software typefaces may be sold by
-itself.
-
-THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
-OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL
-BITSTREAM OR THE GNOME FOUNDATION BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL,
-OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT
-SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.
-
-Except as contained in this notice, the names of Gnome, the Gnome
-Foundation, and Bitstream Inc., shall not be used in advertising or
-otherwise to promote the sale, use or other dealings in this Font
-Software without prior written authorization from the Gnome Foundation
-or Bitstream Inc., respectively. For further information, contact:
-fonts at gnome dot org.
-
-Copyright FAQ
-=============
-
-   1. I don't understand the resale restriction... What gives?
-
-      Bitstream is giving away these fonts, but wishes to ensure its
-      competitors can't just drop the fonts as is into a font sale system
-      and sell them as is. It seems fair that if Bitstream can't make money
-      from the Bitstream Vera fonts, their competitors should not be able to
-      do so either. You can sell the fonts as part of any software package,
-      however.
-
-   2. I want to package these fonts separately for distribution and
-      sale as part of a larger software package or system.  Can I do so?
-
-      Yes. A RPM or Debian package is a "larger software package" to begin 
-      with, and you aren't selling them independently by themselves. 
-      See 1. above.
-
-   3. Are derivative works allowed?
-      Yes!
-
-   4. Can I change or add to the font(s)?
-      Yes, but you must change the name(s) of the font(s).
-
-   5. Under what terms are derivative works allowed?
-
-      You must change the name(s) of the fonts. This is to ensure the
-      quality of the fonts, both to protect Bitstream and Gnome. We want to
-      ensure that if an application has opened a font specifically of these
-      names, it gets what it expects (though of course, using fontconfig,
-      substitutions could still could have occurred during font
-      opening). You must include the Bitstream copyright. Additional
-      copyrights can be added, as per copyright law. Happy Font Hacking!
-
-   6. If I have improvements for Bitstream Vera, is it possible they might get 
-       adopted in future versions?
-
-      Yes. The contract between the Gnome Foundation and Bitstream has
-      provisions for working with Bitstream to ensure quality additions to
-      the Bitstream Vera font family. Please contact us if you have such
-      additions. Note, that in general, we will want such additions for the
-      entire family, not just a single font, and that you'll have to keep
-      both Gnome and Jim Lyles, Vera's designer, happy! To make sense to add
-      glyphs to the font, they must be stylistically in keeping with Vera's
-      design. Vera cannot become a "ransom note" font. Jim Lyles will be
-      providing a document describing the design elements used in Vera, as a
-      guide and aid for people interested in contributing to Vera.
-
-   7. I want to sell a software package that uses these fonts: Can I do so?
-
-      Sure. Bundle the fonts with your software and sell your software
-      with the fonts. That is the intent of the copyright.
-
-   8. If applications have built the names "Bitstream Vera" into them, 
-      can I override this somehow to use fonts of my choosing?
-
-      This depends on exact details of the software. Most open source
-      systems and software (e.g., Gnome, KDE, etc.) are now converting to
-      use fontconfig (see www.fontconfig.org) to handle font configuration,
-      selection and substitution; it has provisions for overriding font
-      names and subsituting alternatives. An example is provided by the
-      supplied local.conf file, which chooses the family Bitstream Vera for
-      "sans", "serif" and "monospace".  Other software (e.g., the XFree86
-      core server) has other mechanisms for font substitution.
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/README.TXT
--- a/pycaptcha/Captcha/data/fonts/vera/README.TXT	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-Contained herin is the Bitstream Vera font family.
-
-The Copyright information is found in the COPYRIGHT.TXT file (along
-with being incoporated into the fonts themselves).
-
-The releases notes are found in the file "RELEASENOTES.TXT".
-
-We hope you enjoy Vera!
-
-                        Bitstream, Inc.
-			The Gnome Project
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/RELEASENOTES.TXT
--- a/pycaptcha/Captcha/data/fonts/vera/RELEASENOTES.TXT	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-Bitstream Vera Fonts - April 16, 2003
-=====================================
-
-The version number of these fonts is 1.10 to distinguish them from the
-beta test fonts.
-
-Note that the Vera copyright is incorporated in the fonts themselves.
-The License field in the fonts contains the copyright license as it
-appears below. The TrueType copyright field is not large enough to
-contain the full license, so the license is incorporated (as you might
-think if you thought about it) into the license field, which
-unfortunately can be obscure to find.  (In pfaedit, see: Element->Font
-Info->TTFNames->License).
-
-Our apologies for it taking longer to complete the fonts than planned.
-Beta testers requested a tighter line spacing (less leading) and Jim
-Lyles redesigned Vera's accents to bring its line spacing to more
-typical of other fonts.  This took additional time and effort.  Our
-thanks to Jim for this effort above and beyond the call of duty.
-
-There are four monospace and sans faces (normal, oblique, bold, bold
-oblique) and two serif faces (normal and bold). Fontconfig/Xft2 (see
-www.fontconfig.org) can artificially oblique the serif faces for you:
-this loses hinting and distorts the faces slightly, but is visibly
-different than normal and bold, and reasonably pleasing.
-
-On systems with fontconfig 2.0 or 2.1 installed, making your sans,
-serif and monospace fonts default to these fonts is very easy.  Just
-drop the file local.conf into your /etc/fonts directory.  This will
-make the Bitstream fonts your default fonts for all applications using
-fontconfig (if sans, serif, or monospace names are used, as they often
-are as default values in many desktops). The XML in local.conf may
-need modification to enable subpixel decimation, if appropriate,
-however, the commented out phrase does so for XFree86 4.3, in the case
-that the server does not have sufficient information to identify the
-use of a flat panel.  Fontconfig 2.2 adds Vera to the list of font
-families and will, by default use it as the default sans, serif and
-monospace fonts.
-
-During the testing of the final Vera fonts, we learned that screen
-fonts in general are only typically hinted to work correctly at
-integer pixel sizes.  Vera is coded internally for integer sizes only.
-We need to investigate further to see if there are commonly used fonts
-that are hinted to be rounded but are not rounded to integer sizes due
-to oversights in their coding.
-
-Most fonts work best at 8 pixels and below if anti-aliased only, as
-the amount of work required to hint well at smaller and smaller sizes
-becomes astronomical.  GASP tables are typically used to control
-whether hinting is used or not, but Freetype/Xft does not currently
-support GASP tables (which are present in Vera).
-
-To mitigate this problem, both for Vera and other fonts, there will be
-(very shortly) a new fontconfig 2.2 release that will, by default not
-apply hints if the size is below 8 pixels. if you should have a font
-that in fact has been hinted more agressively, you can use fontconfig
-to note this exception. We believe this should improve many hinted
-fonts in addition to Vera, though implemeting GASP support is likely
-the right long term solution.
-
-Font rendering in Gnome or KDE is the combination of algorithms in
-Xft2 and Freetype, along with hinting in the fonts themselves. It is
-vital to have sufficient information to disentangle problems that you
-may observe.
-
-Note that having your font rendering system set up correctly is vital
-to proper judgement of problems of the fonts:
-
-    * Freetype may or may not be configured to in ways that may
-      implement execution of possibly patented (in some parts of the world)
-      TrueType hinting algorithms, particularly at small sizes.  Best
-      results are obtained while using these algorithms.
-
-    * The freetype autohinter (used when the possibly patented
-      algorithms are not used) continues to improve with each release. If
-      you are using the autohinter, please ensure you are using an up to
-      date version of freetype before reporting problems.
-
-    * Please identify what version of freetype you are using in any
-      bug reports, and how your freetype is configured.
-
-    * Make sure you are not using the freetype version included in
-      XFree86 4.3, as it has bugs that significantly degrade most fonts,
-      including Vera. if you build XFree86 4.3 from source yourself, you may
-      have installed this broken version without intending it (as I
-      did). Vera was verified with the recently released Freetype 2.1.4. On
-      many systems, 'ldd" can be used to see which freetype shared library
-      is actually being used.
-
-    * Xft/X Render does not (yet) implement gamma correction.  This
-      causes significant problems rendering white text on a black background
-      (causing partial pixels to be insufficiently shaded) if the gamma of
-      your monitor has not been compensated for, and minor problems with
-      black text on a while background.  The program "xgamma" can be used to
-      set a gamma correction value in the X server's color pallette. Most
-      monitors have a gamma near 2.
-
-    * Note that the Vera family uses minimal delta hinting. Your
-      results on other systems when not used anti-aliased may not be
-      entirely satisfying. We are primarily interested in reports of
-      problems on open source systems implementing Xft2/fontconfig/freetype
-      (which implements antialiasing and hinting adjustements, and
-      sophisticated subpixel decimation on flatpanels).  Also, the
-      algorithms used by Xft2 adjust the hints to integer widths and the
-      results are crisper on open source systems than on Windows or
-      MacIntosh.
-
-    * Your fontconfig may (probably does) predate the release of
-      fontconfig 2.2, and you may see artifacts not present when the font is
-      used at very small sizes with hinting enabled. "vc-list -V" can be
-      used to see what version you have installed.
-
-We believe and hope that these fonts will resolve the problems
-reported during beta test.  The largest change is the reduction of
-leading (interline spacing), which had annoyed a number of people, and
-reduced Vera's utility for some applcations.  The Vera monospace font
-should also now make '0' and 'O' and '1' and 'l' more clearly
-distinguishable.
-
-The version of these fonts is version 1.10.  Fontconfig should be
-choosing the new version of the fonts if both the released fonts and
-beta test fonts are installed (though please discard them: they have
-names of form tt20[1-12]gn.ttf).  Note that older versions of
-fontconfig sometimes did not rebuild their cache correctly when new
-fonts are installed: please upgrade to fontconfig 2.2. "fc-cache -f"
-can be used to force rebuilding fontconfig's cache files.
-
-If you note problems, please send them to fonts at gnome dot org, with
-exactly which face and size and unicode point you observe the problem
-at. The xfd utility from XFree86 CVS may be useful for this (e.g. "xfd
--fa sans").  A possibly more useful program to examine fonts at a
-variety of sizes is the "waterfall" program found in Keith Packard's
-CVS.
-
-        $ cvs -d :pserver:anoncvs@keithp.com:/local/src/CVS login
-        Logging in to :pserver:anoncvs@keithp.com:2401/local/src/CVS
-        CVS password: <hit return>
-        $ cvs -d :pserver:anoncvs@keithp.com:/local/src/CVS co waterfall
-        $ cd waterfall
-        $ xmkmf -a
-        $ make
-        # make install
-        # make install.man
-
-Again, please make sure you are running an up-to-date freetype, and
-that you are only examining integer sizes.
-
-Reporting Problems
-==================
-
-Please send problem reports to fonts at gnome org, with the following
-information:
-
-   1. Version of Freetype, Xft2 and fontconfig
-   2. Whether TT hinting is being used, or the autohinter
-   3. Application being used
-   4. Character/Unicode code point that has problems (if applicable)
-   5. Version of which operating system
-   6. Please include a screenshot, when possible.
-
-Please check the fonts list archives before reporting problems to cut
-down on duplication.
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/Vera.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/Vera.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraBI.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraBI.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraBd.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraIt.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraIt.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraMoBI.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraMoBI.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraMoBd.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraMoBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraMoIt.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraMoIt.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraMono.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraMono.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraSe.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraSe.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraSeBd.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraSeBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/local.conf
--- a/pycaptcha/Captcha/data/fonts/vera/local.conf	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE fontconfig SYSTEM "fonts.dtd">
-<!-- /etc/fonts.conf file to configure system font access -->
-<fontconfig>
-        <!--  Enable sub-pixel rendering
-        <match target="font">
-                <test qual="all" name="rgba">
-                        <const>unknown</const>
-                </test>
-                <edit name="rgba" mode="assign"><const>rgb</const></edit>
-        </match>
-         -->
-
-        <alias>
-                <family>serif</family>
-                <prefer>
-                        <family>Bitstream Vera Serif</family>
-                </prefer>
-        </alias>
-        <alias>
-                <family>sans-serif</family>
-                <prefer>
-                        <family>Bitstream Vera Sans</family>
-                </prefer>
-        </alias>
-        <alias>
-                <family>monospace</family>
-                <prefer>
-                        <family>Bitstream Vera Sans Mono</family>
-                </prefer>
-        </alias>
-</fontconfig>
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/.DS_Store
Binary file pycaptcha/Captcha/data/pictures/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/1.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/1.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/10.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/10.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/11.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/11.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/12.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/12.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/2.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/2.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/3.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/3.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/4.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/4.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/5.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/5.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/6.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/6.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/7.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/7.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/8.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/8.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/9.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/9.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/README
--- a/pycaptcha/Captcha/data/pictures/abstract/README	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-These images were created by the author with Fyre, expressly for PyCAPTCHA.
-
-Copyright (c) 2004 Micah Dowty
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/nature/Craig_Barrington_ocotillo_and_mountains.jpeg
Binary file pycaptcha/Captcha/data/pictures/nature/Craig_Barrington_ocotillo_and_mountains.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/nature/Kerry_Carloy_Chisos_Sunset.jpeg
Binary file pycaptcha/Captcha/data/pictures/nature/Kerry_Carloy_Chisos_Sunset.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/nature/Paul_Dowty_Mt_Bross.jpeg
Binary file pycaptcha/Captcha/data/pictures/nature/Paul_Dowty_Mt_Bross.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/nature/README
--- a/pycaptcha/Captcha/data/pictures/nature/README	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-These are uncopyrighted images gathered from various sources,
-including the author's family and national park service web sites.
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/words/README
--- a/pycaptcha/Captcha/data/words/README	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-These word lists are from various sources:
-
-basic-english:
-   http://simple.wikipedia.org/wiki/Basic_English_Alphabetical_Wordlist
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/words/basic-english
--- a/pycaptcha/Captcha/data/words/basic-english	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,852 +0,0 @@
-a
-able
-about
-account
-acid
-across
-act
-addition
-adjustment
-advertisement
-agreement
-after
-again
-against
-air
-all
-almost
-among
-amount
-amusement
-and
-angle
-angry
-animal
-answer
-ant
-any
-apparatus
-apple
-approval
-arch
-argument
-arm
-army
-art
-as
-at
-attack
-attempt
-attention
-attraction
-authority
-automatic
-awake
-baby
-back
-bad
-bag
-balance
-ball
-band
-base
-basin
-basket
-bath
-be
-beautiful
-because
-bed
-bee
-before
-behavior
-belief
-bell
-bent
-berry
-between
-bird
-birth
-bit
-bite
-bitter
-black
-blade
-blood
-blow
-blue
-board
-boat
-body
-boiling
-bone
-book
-boot
-bottle
-box
-boy
-brain
-brake
-branch
-brass
-bread
-breath
-brick
-bridge
-bright
-broken
-brother
-brown
-brush
-bucket
-building
-bulb
-burn
-burst
-business
-but
-butter
-button
-by
-cake
-camera
-canvas
-card
-care
-carriage
-cart
-cat
-cause
-certain
-chain
-chalk
-chance
-change
-cheap
-cheese
-chemical
-chest
-chief
-chin
-church
-circle
-clean
-clear
-clock
-cloth
-cloud
-coal
-coat
-cold
-collar
-color
-comb
-come
-comfort
-committee
-common
-company
-comparison
-competition
-complete
-complex
-condition
-connection
-conscious
-control
-cook
-copper
-copy
-cord
-cork
-cotton
-cough
-country
-cover
-cow
-crack
-credit
-crime
-cruel
-crush
-cry
-cup
-current
-curtain
-curve
-cushion
-cut
-damage
-danger
-dark
-daughter
-day
-dead
-dear
-death
-debt
-decision
-deep
-degree
-delicate
-dependent
-design
-desire
-destruction
-detail
-development
-different
-digestion
-direction
-dirty
-discovery
-discussion
-disease
-disgust
-distance
-distribution
-division
-do
-dog
-door
-down
-doubt
-drain
-drawer
-dress
-drink
-driving
-drop
-dry
-dust
-ear
-early
-earth
-east
-edge
-education
-effect
-egg
-elastic
-electric
-end
-engine
-enough
-equal
-error
-even
-event
-ever
-every
-example
-exchange
-existence
-expansion
-experience
-expert
-eye
-face
-fact
-fall
-false
-family
-far
-farm
-fat
-father
-fear
-feather
-feeble
-feeling
-female
-fertile
-fiction
-field
-fight
-finger
-fire
-first
-fish
-fixed
-flag
-flame
-flat
-flight
-floor
-flower
-fly
-fold
-food
-foolish
-foot
-for
-force
-fork
-form
-forward
-fowl
-frame
-free
-frequent
-friend
-from
-front
-fruit
-full
-future
-garden
-general
-get
-girl
-give
-glass
-glove
-go
-goat
-gold
-good
-government
-grain
-grass
-great
-green
-grey/gray
-grip
-group
-growth
-guide
-gun
-hair
-hammer
-hand
-hanging
-happy
-harbor
-hard
-harmony
-hat
-hate
-have
-he
-head
-healthy
-hearing
-heart
-heat
-help
-here
-high
-history
-hole
-hollow
-hook
-hope
-horn
-horse
-hospital
-hour
-house
-how
-humor
-ice
-idea
-if
-ill
-important
-impulse
-in
-increase
-industry
-ink
-insect
-instrument
-insurance
-interest
-invention
-iron
-island
-jelly
-jewel
-join
-journey
-judge
-jump
-keep
-kettle
-key
-kick
-kind
-kiss
-knee
-knife
-knot
-knowledge
-land
-language
-last
-late
-laugh
-law
-lead
-leaf
-learning
-leather
-left
-leg
-let
-letter
-level
-library
-lift
-light
-like
-limit
-line
-linen
-lip
-liquid
-list
-little
-less
-least
-living
-lock
-long
-loose
-loss
-loud
-love
-low
-machine
-make
-male
-man
-manager
-map
-mark
-market
-married
-match
-material
-mass
-may
-meal
-measure
-meat
-medical
-meeting
-memory
-metal
-middle
-military
-milk
-mind
-mine
-minute
-mist
-mixed
-money
-monkey
-month
-moon
-morning
-mother
-motion
-mountain
-mouth
-move
-much
-more
-most
-muscle
-music
-nail
-name
-narrow
-nation
-natural
-near
-necessary
-neck
-need
-needle
-nerve
-net
-new
-news
-night
-no
-noise
-normal
-north
-nose
-not
-note
-now
-number
-nut
-observation
-of
-off
-offer
-office
-oil
-old
-on
-only
-open
-operation
-opposite
-opinion
-other
-or
-orange
-order
-organization
-ornament
-out
-oven
-over
-owner
-page
-pain
-paint
-paper
-parallel
-parcel
-part
-past
-paste
-payment
-peace
-pen
-pencil
-person
-physical
-picture
-pig
-pin
-pipe
-place
-plane
-plant
-plate
-play
-please
-pleasure
-plough/plow
-pocket
-point
-poison
-polish
-political
-poor
-porter
-position
-possible
-pot
-potato
-powder
-power
-present
-price
-print
-prison
-private
-probable
-process
-produce
-profit
-property
-prose
-protest
-public
-pull
-pump
-punishment
-purpose
-push
-put
-quality
-question
-quick
-quiet
-quite
-rail
-rain
-range
-rat
-rate
-ray
-reaction
-red
-reading
-ready
-reason
-receipt
-record
-regret
-regular
-relation
-religion
-representative
-request
-respect
-responsible
-rest
-reward
-rhythm
-rice
-right
-ring
-river
-road
-rod
-roll
-roof
-room
-root
-rough
-round
-rub
-rule
-run
-sad
-safe
-sail
-salt
-same
-sand
-say
-scale
-school
-science
-scissors
-screw
-sea
-seat
-second
-secret
-secretary
-see
-seed
-selection
-self
-send
-seem
-sense
-separate
-serious
-servant
-sex
-shade
-shake
-shame
-sharp
-sheep
-shelf
-ship
-shirt
-shock
-shoe
-short
-shut
-side
-sign
-silk
-silver
-simple
-sister
-size
-skin
-skirt
-sky
-sleep
-slip
-slope
-slow
-small
-smash
-smell
-smile
-smoke
-smooth
-snake
-sneeze
-snow
-so
-soap
-society
-sock
-soft
-solid
-some
-son
-song
-sort
-sound
-south
-soup
-space
-spade
-special
-sponge
-spoon
-spring
-square
-stamp
-stage
-star
-start
-statement
-station
-steam
-stem
-steel
-step
-stick
-still
-stitch
-stocking
-stomach
-stone
-stop
-store
-story
-strange
-street
-stretch
-sticky
-stiff
-straight
-strong
-structure
-substance
-sugar
-suggestion
-summer
-support
-surprise
-such
-sudden
-sun
-sweet
-swim
-system
-table
-tail
-take
-talk
-tall
-taste
-tax
-teaching
-tendency
-test
-than
-that
-the
-then
-theory
-there
-thick
-thin
-thing
-this
-thought
-thread
-throat
-though
-through
-thumb
-thunder
-ticket
-tight
-tired
-till
-time
-tin
-to
-toe
-together
-tomorrow
-tongue
-tooth
-top
-touch
-town
-trade
-train
-transport
-tray
-tree
-trick
-trousers
-true
-trouble
-turn
-twist
-umbrella
-under
-unit
-use
-up
-value
-verse
-very
-vessel
-view
-violent
-voice
-walk
-wall
-waiting
-war
-warm
-wash
-waste
-watch
-water
-wave
-wax
-way
-weather
-week
-weight
-well
-west
-wet
-wheel
-when
-where
-while
-whip
-whistle
-white
-who
-why
-wide
-will
-wind
-window
-wine
-wing
-winter
-wire
-wise
-with
-woman
-wood
-wool
-word
-work
-worm
-wound
-writing
-wrong
-year
-yellow
-yes
-yesterday
-you
-young
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/words/characters
--- a/pycaptcha/Captcha/data/words/characters	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Facade.py
--- a/pycaptcha/Facade.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-#!/usr/bin/env python
-
-
-
-from Captcha.Visual.Tests import PseudoGimpy, AngryGimpy
-import numpy
-
-# Une fonction simple pour generer un captcha
-# ease : represente la difficulte du captcha a generer 
-#      0 = facile et 1 (ou autre chose) = difficile 
-#solution : specifie si on veut en retour un array numpy representant 
-#l image ou un tuple contenant l'array et la solution du captcha.
-
-# Des fontes additionnelles peuvent etre ajoutees au dossier pyCaptcha/Captcha/data/fonts/others
-# Le programme choisit une fonte aleatoirement dans ce dossier ainsi que le dossir vera.
-
-
-def generateCaptcha (ease=0, solution=0):
-
-    if ease == 1:
-        g = AngryGimpy()
-
-    else:
-        g = PseudoGimpy()
-    
-    i = g.render()
-    a = numpy.asarray(i)
-
-    if solution == 0:
-       return a
-
-    else :
-        return (a, g.solutions)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/README
--- a/pycaptcha/README	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-======================
-Python CAPTCHA package
-======================
-
-About
------
-
-This is the PyCAPTCHA package, a collection of Python modules
-implementing CAPTCHAs: automated tests that humans should pass,
-but current computer programs can't. These tests are often
-used for security.
-
-See  http://www.captcha.net for more information and examples.
-
-This project was started because the CIA project, written in
-Python, needed a CAPTCHA to automate its user creation process
-safely. All existing implementations the author could find were
-written in Java or for the .NET framework, so a simple Python
-alternative was needed.
-
-Examples
---------
-
-Included are several example programs:
-
-  - simple_example.py is a bare-bones example that just generates
-    and displays an image.
-
-  - http_example.py is a longer example that uses BaseHTTPServer
-    to simulate a CAPTCHA's use in a web environment. Running this
-    example and connecting to it from your web browser is a quick
-    and easy way to see PyCAPTCHA in action
-
-  - modpython_example.py is a version of http_example that runs
-    from an Apache server equipped with a properly configured
-    mod_python.
-
-
-Dependencies
-------------
-
-- Python 2.2.1 or later
-- the Python Imaging Library, required for visual CAPTCHAs
-
-
-Contacts
---------
-
-Micah Dowty <micah@navi.cx>
-
-'scanline' on irc.freenode.net
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/http_example.py
--- a/pycaptcha/http_example.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-#!/usr/bin/env python
-#
-# An example that presents CAPTCHA tests in a web environment
-# and gives the user a chance to solve them. Run it, optionally
-# specifying a port number on the command line, then point your web
-# browser at the given URL.
-#
-
-from Captcha.Visual import Tests
-from Captcha import Factory
-import BaseHTTPServer, urlparse, sys
-
-
-class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
-    def do_GET(self):
-        scheme, host, path, parameters, query, fragment = urlparse.urlparse(self.path)
-
-        # Split the path into segments
-        pathSegments = path.split('/')[1:]
-
-        # Split the query into key-value pairs
-        args = {}
-        for pair in query.split("&"):
-            if pair.find("=") >= 0:
-                key, value = pair.split("=", 1)
-                args.setdefault(key, []).append(value)
-            else:
-                args[pair] = []
-
-        # A hack so it works with a proxy configured for VHostMonster :)
-        if pathSegments[0] == "vhost":
-            pathSegments = pathSegments[3:]
-
-        if pathSegments[0] == "":
-            self.handleRootPage(args.get('test', Tests.__all__)[0])
-
-        elif pathSegments[0] == "images":
-            self.handleImagePage(pathSegments[1])
-
-        elif pathSegments[0] == "solutions":
-            self.handleSolutionPage(pathSegments[1], args['word'][0])
-
-        else:
-            self.handle404()
-
-    def handle404(self):
-        self.send_response(404)
-        self.send_header("Content-Type", "text/html")
-        self.end_headers()
-        self.wfile.write("<html><body><h1>No such resource</h1></body></html>")
-
-    def handleRootPage(self, testName):
-        self.send_response(200)
-        self.send_header("Content-Type", "text/html")
-        self.end_headers()
-
-        test = self.captchaFactory.new(getattr(Tests, testName))
-
-        # Make a list of tests other than the one we're using
-        others = []
-        for t in Tests.__all__:
-            if t != testName:
-                others.append('<li><a href="/?test=%s">%s</a></li>' % (t,t))
-        others = "\n".join(others)
-
-        self.wfile.write("""<html>
-<head>
-<title>PyCAPTCHA Example</title>
-</head>
-<body>
-<h1>PyCAPTCHA Example</h1>
-<p>
-  <b>%s</b>:
-  %s
-</p>
-
-<p><img src="/images/%s"/></p>
-<p>
-  <form action="/solutions/%s" method="get">
-    Enter the word shown:
-    <input type="text" name="word"/>
-  </form>
-</p>
-
-<p>
-Or try...
-<ul>
-%s
-</ul>
-</p>
-
-</body>
-</html>
-""" % (test.__class__.__name__, test.__doc__, test.id, test.id, others))
-
-    def handleImagePage(self, id):
-        test = self.captchaFactory.get(id)
-        if not test:
-            return self.handle404()
-
-        self.send_response(200)
-        self.send_header("Content-Type", "image/jpeg")
-        self.end_headers()
-        test.render().save(self.wfile, "JPEG")
-
-    def handleSolutionPage(self, id, word):
-        test = self.captchaFactory.get(id)
-        if not test:
-            return self.handle404()
-
-        if not test.valid:
-            # Invalid tests will always return False, to prevent
-            # random trial-and-error attacks. This could be confusing to a user...
-            result = "Test invalidated, try another test"
-        elif test.testSolutions([word]):
-            result = "Correct"
-        else:
-            result = "Incorrect"
-
-        self.send_response(200)
-        self.send_header("Content-Type", "text/html")
-        self.end_headers()
-        self.wfile.write("""<html>
-<head>
-<title>PyCAPTCHA Example</title>
-</head>
-<body>
-<h1>PyCAPTCHA Example</h1>
-<h2>%s</h2>
-<p><img src="/images/%s"/></p>
-<p><b>%s</b></p>
-<p>You guessed: %s</p>
-<p>Possible solutions: %s</p>
-<p><a href="/">Try again</a></p>
-</body>
-</html>
-""" % (test.__class__.__name__, test.id, result, word, ", ".join(test.solutions)))
-
-
-def main(port):
-    print "Starting server at http://localhost:%d/" % port
-    handler = RequestHandler
-    handler.captchaFactory = Factory()
-    BaseHTTPServer.HTTPServer(('', port), RequestHandler).serve_forever()
-
-if __name__ == "__main__":
-    # The port number can be specified on the command line, default is 8080
-    if len(sys.argv) >= 2:
-        port = int(sys.argv[1])
-    else:
-        port = 8080
-    main(port)
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/modpython_example.py
--- a/pycaptcha/modpython_example.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-#
-# An example that presents CAPTCHA tests in a web environment
-# and gives the user a chance to solve them.
-#
-# This example is for use with Apache using mod_python and its
-# Publisher handler. For example, if your apache configuration
-# included something like:
-#
-#   AddHandler python-program .py
-#   PythonHandler mod_python.publisher
-#
-# You could place this script anywhere in your web space to see
-# the demo.
-#
-# --Micah <micah@navi.cx>
-#
-
-from Captcha.Visual import Tests
-import Captcha
-from mod_python import apache
-
-
-def _getFactory(req):
-    return Captcha.PersistentFactory("/tmp/pycaptcha_%s" % req.interpreter)
-
-
-def test(req, name=Tests.__all__[0]):
-    """Show a newly generated CAPTCHA of the given class.
-       Default is the first class name given in Tests.__all__
-       """
-    test = _getFactory(req).new(getattr(Tests, name))
-
-    # Make a list of tests other than the one we're using
-    others = []
-    for t in Tests.__all__:
-        if t != name:
-            others.append('<li><a href="?name=%s">%s</a></li>' % (t,t))
-    others = "\n".join(others)
-
-    return """<html>
-<head>
-<title>PyCAPTCHA Example</title>
-</head>
-<body>
-<h1>PyCAPTCHA Example (for mod_python)</h1>
-<p>
-  <b>%s</b>:
-  %s
-</p>
-
-<p><img src="image?id=%s"/></p>
-<p>
-  <form action="solution" method="get">
-    Enter the word shown:
-    <input type="text" name="word"/>
-    <input type="hidden" name="id" value="%s"/>
-  </form>
-</p>
-
-<p>
-Or try...
-<ul>
-%s
-</ul>
-</p>
-
-</body>
-</html>
-""" % (test.__class__.__name__, test.__doc__, test.id, test.id, others)
-
-
-def image(req, id):
-    """Generate an image for the CAPTCHA with the given ID string"""
-    test = _getFactory(req).get(id)
-    if not test:
-        raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
-    req.content_type = "image/jpeg"
-    test.render().save(req, "JPEG")
-    return apache.OK
-
-
-def solution(req, id, word):
-    """Grade a CAPTCHA given a solution word"""
-    test = _getFactory(req).get(id)
-    if not test:
-        raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
-
-    if not test.valid:
-        # Invalid tests will always return False, to prevent
-        # random trial-and-error attacks. This could be confusing to a user...
-        result = "Test invalidated, try another test"
-    elif test.testSolutions([word]):
-        result = "Correct"
-    else:
-        result = "Incorrect"
-
-    return """<html>
-<head>
-<title>PyCAPTCHA Example</title>
-</head>
-<body>
-<h1>PyCAPTCHA Example</h1>
-<h2>%s</h2>
-<p><img src="image?id=%s"/></p>
-<p><b>%s</b></p>
-<p>You guessed: %s</p>
-<p>Possible solutions: %s</p>
-<p><a href="test">Try again</a></p>
-</body>
-</html>
-""" % (test.__class__.__name__, test.id, result, word, ", ".join(test.solutions))
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/output.png
Binary file pycaptcha/output.png has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/setup.py
--- a/pycaptcha/setup.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-#!/usr/bin/env python
-from distutils.core import setup
-from setup.my_install_data import *
-
-setup (name = "PyCAPTCHA",
-       version = "0.4",
-       description = "A Python framework for CAPTCHA tests",
-       maintainer = "Micah Dowty",
-       maintainer_email = "micah@navi.cx",
-       license = "MIT",
-       packages = [
-           'Captcha',
-           'Captcha.Visual',
-       ],
-       cmdclass = {
-           'install_data': my_install_data,
-       },
-       data_files = [Data_Files(
-           preserve_path = 1,
-           base_dir      = 'install_lib',
-           copy_to       = 'Captcha/data',
-           strip_dirs    = 2,
-           template      = [
-               'graft Captcha/data',
-           ],
-       )],
-       )
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/setup/__init__.py
--- a/pycaptcha/setup/__init__.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-# Extra modules for use with distutils
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/setup/my_install_data.py
--- a/pycaptcha/setup/my_install_data.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-"""my_install_data.py
-
-Provides a more sophisticated facility to install data files
-than distutils' install_data does.
-You can specify your files as a template like in MANIFEST.in
-and you have more control over the copy process.
-
-Copyright 2000 by Rene Liebscher, Germany.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-Note:
-This licence is only for this file.
-PyOpenGL has its own licence. (it is almost identical.)
-"""
-
-# created 2000/08/01, Rene Liebscher <R.Liebscher@gmx.de>
-
-###########################################################################
-# import some modules we need
-
-import os,sys,string
-from types import StringType,TupleType,ListType
-from distutils.util import change_root
-from distutils.filelist import FileList
-from distutils.command.install_data import install_data
-
-###########################################################################
-# a container class for our more sophisticated install mechanism
-
-class Data_Files:
-    """ container for list of data files.
-        supports alternate base_dirs e.g. 'install_lib','install_header',...
-        supports a directory where to copy files
-        supports templates as in MANIFEST.in
-        supports preserving of paths in filenames
-            eg. foo/xyz is copied to base_dir/foo/xyz
-        supports stripping of leading dirs of source paths
-            eg. foo/bar1/xyz, foo/bar2/abc can be copied to bar1/xyz, bar2/abc
-    """
-
-    def __init__(self,base_dir=None,files=None,copy_to=None,template=None,preserve_path=0,strip_dirs=0):
-        self.base_dir = base_dir
-        self.files = files
-        self.copy_to = copy_to
-        self.template = template
-        self.preserve_path = preserve_path
-        self.strip_dirs = strip_dirs
-        self.finalized = 0
-
-    def warn (self, msg):
-        sys.stderr.write ("warning: %s: %s\n" %
-                          ("install_data", msg))
-
-    def debug_print (self, msg):
-        """Print 'msg' to stdout if the global DEBUG (taken from the
-        DISTUTILS_DEBUG environment variable) flag is true.
-        """
-        from distutils.core import DEBUG
-        if DEBUG:
-            print msg
-
-
-    def finalize(self):
-        """ complete the files list by processing the given template """
-        if self.finalized:
-            return
-        if self.files == None:
-            self.files = []
-        if self.template != None:
-            if type(self.template) == StringType:
-                self.template = string.split(self.template,";")
-            filelist = FileList(self.warn,self.debug_print)
-            for line in self.template:
-                filelist.process_template_line(string.strip(line))
-            filelist.sort()
-            filelist.remove_duplicates()
-            self.files.extend(filelist.files)
-        self.finalized = 1
-
-# end class Data_Files
-
-###########################################################################
-# a more sophisticated install routine than distutils install_data
-
-class my_install_data (install_data):
-
-    def check_data(self,d):
-        """ check if data are in new format, if not create a suitable object.
-            returns finalized data object
-        """
-        if not isinstance(d, Data_Files):
-            self.warn(("old-style data files list found "
-                        "-- please convert to Data_Files instance"))
-            if type(d) is TupleType:
-                if len(d) != 2 or  not (type(d[1]) is ListType):
-                        raise DistutilsSetupError, \
-                          ("each element of 'data_files' option must be an "
-                            "Data File instance, a string or 2-tuple (string,[strings])")
-                d = Data_Files(copy_to=d[0],files=d[1])
-            else:
-                if not (type(d) is StringType):
-                        raise DistutilsSetupError, \
-                          ("each element of 'data_files' option must be an "
-                           "Data File instance, a string or 2-tuple (string,[strings])")
-                d = Data_Files(files=[d])
-        d.finalize()
-        return d
-
-    def run(self):
-        self.outfiles = []
-        install_cmd = self.get_finalized_command('install')
-
-        for d in self.data_files:
-            d = self.check_data(d)
-
-            install_dir = self.install_dir
-            # alternative base dir given => overwrite install_dir
-            if d.base_dir != None:
-                install_dir = getattr(install_cmd,d.base_dir)
-
-            # copy to an other directory
-            if d.copy_to != None:
-                if not os.path.isabs(d.copy_to):
-                    # relatiev path to install_dir
-                    dir = os.path.join(install_dir, d.copy_to)
-                elif install_cmd.root:
-                    # absolute path and alternative root set
-                    dir = change_root(self.root,d.copy_to)
-                else:
-                    # absolute path
-                    dir = d.copy_to
-            else:
-                # simply copy to install_dir
-                dir = install_dir
-                # warn if necceassary
-                self.warn("setup script did not provide a directory to copy files to "
-                          " -- installing right in '%s'" % install_dir)
-
-            dir=os.path.normpath(dir)
-            # create path
-            self.mkpath(dir)
-
-            # copy all files
-            for src in d.files:
-                if d.strip_dirs > 0:
-                    dst = string.join(string.split(src,os.sep)[d.strip_dirs:],os.sep)
-                else:
-                    dst = src
-                if d.preserve_path:
-                    # preserve path in filename
-                    self.mkpath(os.path.dirname(os.path.join(dir,dst)))
-                    out = self.copy_file(src, os.path.join(dir,dst))
-                else:
-                    out = self.copy_file(src, dir)
-                if type(out) is TupleType:
-                    out = out[0]
-                self.outfiles.append(out)
-
-        return self.outfiles
-
-    def get_inputs (self):
-        inputs = []
-        for d in self.data_files:
-            d = self.check_data(d)
-            inputs.append(d.files)
-        return inputs
-
-    def get_outputs (self):
-         return self.outfiles
-
-
-###########################################################################
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/simple_example.py
--- a/pycaptcha/simple_example.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-#
-# A very simple example that creates a random image from the
-# PseudoGimpy CAPTCHA, saves and shows it, and prints the list
-# of solutions. Normally you would call testSolutions rather
-# than reading this list yourself.
-#
-from Captcha.Visual.Tests import PseudoGimpy, AngryGimpy
-import numpy
-#from numpy import *
-
-#g = AngryGimpy()
-#i = g.render()
-#a = numpy.asarray(i)
-#b = numpy.zeros((2, 2), numpy.int8)
-#c = a == b
-#print c
-#i.save("output.png")
-#i.show()
-#print a
-#print g.solutions
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/test.png
Binary file pycaptcha/test.png has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/transformations.py
--- a/pycaptcha/transformations.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-
-import Numeric, Image
-    #""" Transforme une image PIL en objet numpy.array et vice versa"""
-
-
-def image2array(im):
-    #""" image vers array numpy"""
-    if im.mode not in ("L", "F"):
-        raise ValueError, "can only convert single-layer images"
-    if im.mode == "L":
-        a = Numeric.fromstring(im.tostring(), Numeric.UnsignedInt8)
-    else:
-        a = Numeric.fromstring(im.tostring(), Numeric.Float32)
-    a.shape = im.size[1], im.size[0]
-    return a
-
-def array2image(a):
-    #""" array numpy vers image"""
-    if a.typecode() == Numeric.UnsignedInt8:
-        mode = "L"
-    elif a.typecode() == Numeric.Float32:
-        mode = "F"
-    else:
-        raise ValueError, "unsupported image mode"
-    return Image.fromstring(mode, (a.shape[1], a.shape[0]), a.tostring())
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/run_pipeline.sh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/run_pipeline.sh	Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# This is one _ugly_ hack, but I couldn't figure out how
+# to cleanly pass command line options to the script if
+# invoking using the "gimp --batch < script.py" syntax
+
+# Basically I create a temp file, put the args into it,
+# then the script gets the filename and reads back the
+# args
+
+export PIPELINE_ARGS_TMPFILE=`mktemp`
+
+for arg in "$@"
+do
+	echo $arg >> $PIPELINE_ARGS_TMPFILE
+done
+
+gimp -i --batch-interpreter python-fu-eval --batch - < pipeline.py
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/__init__.py
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/mnist_sda.py
--- a/scripts/stacked_dae/mnist_sda.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-# Parameterize call to sgd_optimization for MNIST
-
-import numpy 
-import theano
-import time
-import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
-
-from sgd_optimization import SdaSgdOptimizer
-import cPickle, gzip
-from jobman import DD
-
-MNIST_LOCATION = '/u/savardf/datasets/mnist.pkl.gz'
-
-def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 2, \
-                            pretrain_lr = 0.1, training_epochs = 5, \
-                            dataset='mnist.pkl.gz'):
-    # Load the dataset 
-    f = gzip.open(dataset,'rb')
-    # this gives us train, valid, test (each with .x, .y)
-    dataset = cPickle.load(f)
-    f.close()
-
-    n_ins = 28*28
-    n_outs = 10
-
-    hyperparameters = DD({'finetuning_lr':learning_rate,
-                       'pretraining_lr':pretrain_lr,
-                       'pretraining_epochs_per_layer':pretraining_epochs,
-                       'max_finetuning_epochs':training_epochs,
-                       'hidden_layers_sizes':[100],
-                       'corruption_levels':[0.2],
-                       'minibatch_size':20})
-
-    optimizer = SdaSgdOptimizer(dataset, hyperparameters, n_ins, n_outs)
-    optimizer.pretrain()
-    optimizer.finetune()
-
-if __name__ == '__main__':
-    sgd_optimization_mnist(dataset=MNIST_LOCATION)
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/nist_sda.py
--- a/scripts/stacked_dae/nist_sda.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,264 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-import numpy 
-import theano
-import time
-import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
-import copy
-
-import sys
-import os.path
-
-from sgd_optimization import SdaSgdOptimizer
-
-from jobman import DD
-import jobman, jobman.sql
-from pylearn.io import filetensor
-
-from utils import produit_croise_jobs
-
-TEST_CONFIG = False
-
-NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
-
-JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/'
-REDUCE_TRAIN_TO = None
-MAX_FINETUNING_EPOCHS = 1000
-if TEST_CONFIG:
-    JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/'
-    REDUCE_TRAIN_TO = 1000
-    MAX_FINETUNING_EPOCHS = 2
-
-JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs'
-JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results'
-EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint"
-
-# There used to be
-# 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1]
-# and
-#  'num_hidden_layers':[1,2,3]
-# but this is now handled by a special mechanism in SgdOptimizer
-# to reuse intermediate results (for the same training of lower layers,
-# we can test many finetuning_lr)
-JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001],
-        'pretraining_epochs_per_layer': [10,20],
-        'hidden_layers_sizes': [300,800],
-        'corruption_levels': [0.1,0.2],
-        'minibatch_size': [20],
-        'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]}
-FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001]
-NUM_HIDDEN_LAYERS_VALS = [1,2,3]
-
-# Just useful for tests... minimal number of epochs
-DEFAULT_HP_NIST = DD({'finetuning_lr':0.01,
-                       'pretraining_lr':0.01,
-                       'pretraining_epochs_per_layer':1,
-                       'max_finetuning_epochs':1,
-                       'hidden_layers_sizes':[1000],
-                       'corruption_levels':[0.2],
-                       'minibatch_size':20})
-
-def jobman_entrypoint(state, channel):
-    state = copy.copy(state)
-
-    print "Will load NIST"
-    nist = NIST(20)
-    print "NIST loaded"
-
-    rtt = None
-    if state.has_key('reduce_train_to'):
-        rtt = state['reduce_train_to']
-    elif REDUCE_TRAIN_TO:
-        rtt = REDUCE_TRAIN_TO
-
-    if rtt:
-        print "Reducing training set to ", rtt, " examples"
-        nist.reduce_train_set(rtt)
-
-    train,valid,test = nist.get_tvt()
-    dataset = (train,valid,test)
-
-    n_ins = 32*32
-    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
-
-    db = jobman.sql.db(JOBDB_RESULTS)
-    optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\
-                    input_divider=255.0, job_tree=True, results_db=db, \
-                    experiment=EXPERIMENT_PATH, \
-                    finetuning_lr_to_try=FINETUNING_LR_VALS, \
-                    num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS)
-    optimizer.train()
-
-    return channel.COMPLETE
-
-def estimate_pretraining_time(job):
-    job = DD(job)
-    # time spent on pretraining estimated as O(n^2) where n=num hidens
-    # no need to multiply by num_hidden_layers, as results from num=1 
-    # is reused for num=2, or 3, so in the end we get the same time
-    # as if we were training 3 times a single layer
-    # constants:
-    # - 20 mins to pretrain a layer with 1000 units (per 1 epoch)
-    # - 12 mins to finetune (per 1 epoch)
-    # basically the job_tree trick gives us a 5 times speedup on the
-    # pretraining time due to reusing for finetuning_lr
-    # and gives us a second x2 speedup for reusing previous layers
-    # to explore num_hidden_layers
-    return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \
-            * job.hidden_layer_sizes * job.hidden_layer_sizes)
-
-def estimate_total_time():
-    jobs = produit_croise_jobs(JOB_VALS)
-    sumtime = 0.0
-    sum_without = 0.0
-    for job in jobs:
-        sumtime += estimate_pretraining_time(job)
-        # 12 mins per epoch * 30 epochs
-        # 5 finetuning_lr per pretraining combination
-    sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS)
-    sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20
-    print "num jobs=", len(jobs)
-    print "estimate", sumtime/60, " hours"
-    print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without
-
-def jobman_insert_nist():
-    jobs = produit_croise_jobs(JOB_VALS)
-
-    db = jobman.sql.db(JOBDB_JOBS)
-    for job in jobs:
-        job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
-        jobman.sql.insert_dict(job, db)
-
-    print "inserted"
-
-class NIST:
-    def __init__(self, minibatch_size, basepath=None, reduce_train_to=None):
-        global NIST_ALL_LOCATION
-
-        self.minibatch_size = minibatch_size
-        self.basepath = basepath and basepath or NIST_ALL_LOCATION
-
-        self.set_filenames()
-
-        # arrays of 2 elements: .x, .y
-        self.train = [None, None]
-        self.test = [None, None]
-
-        self.load_train_test()
-
-        self.valid = [[], []]
-        self.split_train_valid()
-        if reduce_train_to:
-            self.reduce_train_set(reduce_train_to)
-
-    def get_tvt(self):
-        return self.train, self.valid, self.test
-
-    def set_filenames(self):
-        self.train_files = ['all_train_data.ft',
-                                'all_train_labels.ft']
-
-        self.test_files = ['all_test_data.ft',
-                            'all_test_labels.ft']
-
-    def load_train_test(self):
-        self.load_data_labels(self.train_files, self.train)
-        self.load_data_labels(self.test_files, self.test)
-
-    def load_data_labels(self, filenames, pair):
-        for i, fn in enumerate(filenames):
-            f = open(os.path.join(self.basepath, fn))
-            pair[i] = filetensor.read(f)
-            f.close()
-
-    def reduce_train_set(self, max):
-        self.train[0] = self.train[0][:max]
-        self.train[1] = self.train[1][:max]
-
-        if max < len(self.test[0]):
-            for ar in (self.test, self.valid):
-                ar[0] = ar[0][:max]
-                ar[1] = ar[1][:max]
-
-    def split_train_valid(self):
-        test_len = len(self.test[0])
-        
-        new_train_x = self.train[0][:-test_len]
-        new_train_y = self.train[1][:-test_len]
-
-        self.valid[0] = self.train[0][-test_len:]
-        self.valid[1] = self.train[1][-test_len:]
-
-        self.train[0] = new_train_x
-        self.train[1] = new_train_y
-
-def test_load_nist():
-    print "Will load NIST"
-
-    import time
-    t1 = time.time()
-    nist = NIST(20)
-    t2 = time.time()
-
-    print "NIST loaded. time delta = ", t2-t1
-
-    tr,v,te = nist.get_tvt()
-
-    print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0])
-
-    raw_input("Press any key")
-
-# hp for hyperparameters
-def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'):
-    global DEFAULT_HP_NIST
-    hp = hp and hp or DEFAULT_HP_NIST
-
-    print "Will load NIST"
-
-    import time
-    t1 = time.time()
-    nist = NIST(20, reduce_train_to=100)
-    t2 = time.time()
-
-    print "NIST loaded. time delta = ", t2-t1
-
-    train,valid,test = nist.get_tvt()
-    dataset = (train,valid,test)
-
-    print train[0][15]
-    print type(train[0][1])
-
-
-    print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0])
-
-    n_ins = 32*32
-    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
-
-    optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0)
-    optimizer.train()
-
-if __name__ == '__main__':
-
-    import sys
-
-    args = sys.argv[1:]
-
-    if len(args) > 0 and args[0] == 'load_nist':
-        test_load_nist()
-
-    elif len(args) > 0 and args[0] == 'jobman_insert':
-        jobman_insert_nist()
-    elif len(args) > 0 and args[0] == 'test_job_tree':
-        # dont forget to comment out sql.inserts and make reduce_train_to=100
-        print "TESTING JOB TREE"
-        chanmock = {'COMPLETE':0}
-        hp = copy.copy(DEFAULT_HP_NIST)
-        hp.update({'reduce_train_to':100})
-        jobman_entrypoint(hp, chanmock)
-    elif len(args) > 0 and args[0] == 'estimate':
-        estimate_total_time()
-    else:
-        sgd_optimization_nist()
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/sgd_optimization.py
--- a/scripts/stacked_dae/sgd_optimization.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,270 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-# Generic SdA optimization loop, adapted from the deeplearning.net tutorial
-
-import numpy 
-import theano
-import time
-import theano.tensor as T
-import copy
-import sys
-
-from jobman import DD
-import jobman, jobman.sql
-
-from stacked_dae import SdA
-
-def shared_dataset(data_xy):
-    data_x, data_y = data_xy
-    #shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
-    #shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
-    #shared_y = T.cast(shared_y, 'int32')
-    shared_x = theano.shared(data_x)
-    shared_y = theano.shared(data_y)
-    return shared_x, shared_y
-
-class SdaSgdOptimizer:
-    def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0,\
-                job_tree=False, results_db=None,\
-                experiment="",\
-                num_hidden_layers_to_try=[1,2,3], \
-                finetuning_lr_to_try=[0.1, 0.01, 0.001, 0.0001, 0.00001]):
-
-        self.dataset = dataset
-        self.hp = copy.copy(hyperparameters)
-        self.n_ins = n_ins
-        self.n_outs = n_outs
-        self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
-
-        self.job_tree = job_tree
-        self.results_db = results_db
-        self.experiment = experiment
-        if self.job_tree:
-            assert(not results_db is None)
-            # these hp should not be there, so we insert default values
-            # we use 3 hidden layers as we'll iterate through 1,2,3
-            self.hp.finetuning_lr = 0.1 # dummy value, will be replaced anyway
-            cl = self.hp.corruption_levels
-            nh = self.hp.hidden_layers_sizes
-            self.hp.corruption_levels = [cl,cl,cl]
-            self.hp.hidden_layers_sizes = [nh,nh,nh]
-            
-        self.num_hidden_layers_to_try = num_hidden_layers_to_try
-        self.finetuning_lr_to_try = finetuning_lr_to_try
-
-        self.printout_frequency = 1000
-
-        self.rng = numpy.random.RandomState(1234)
-
-        self.init_datasets()
-        self.init_classifier()
-     
-    def init_datasets(self):
-        print "init_datasets"
-        train_set, valid_set, test_set = self.dataset
-        self.test_set_x, self.test_set_y = shared_dataset(test_set)
-        self.valid_set_x, self.valid_set_y = shared_dataset(valid_set)
-        self.train_set_x, self.train_set_y = shared_dataset(train_set)
-
-        # compute number of minibatches for training, validation and testing
-        self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
-        self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
-        self.n_test_batches  = self.test_set_x.value.shape[0]  / self.hp.minibatch_size
-
-    def init_classifier(self):
-        print "Constructing classifier"
-        # construct the stacked denoising autoencoder class
-        self.classifier = SdA( \
-                          train_set_x= self.train_set_x, \
-                          train_set_y = self.train_set_y,\
-                          batch_size = self.hp.minibatch_size, \
-                          n_ins= self.n_ins, \
-                          hidden_layers_sizes = self.hp.hidden_layers_sizes, \
-                          n_outs = self.n_outs, \
-                          corruption_levels = self.hp.corruption_levels,\
-                          rng = self.rng,\
-                          pretrain_lr = self.hp.pretraining_lr, \
-                          finetune_lr = self.hp.finetuning_lr,\
-                          input_divider = self.input_divider )
-
-    def train(self):
-        self.pretrain()
-        if not self.job_tree:
-            # if job_tree is True, finetuning was already performed
-            self.finetune()
-
-    def pretrain(self):
-        print "STARTING PRETRAINING"
-
-        printout_acc = 0.0
-        last_error = 0.0
-
-        start_time = time.clock()  
-        ## Pre-train layer-wise 
-        for i in xrange(self.classifier.n_layers):
-            # go through pretraining epochs 
-            for epoch in xrange(self.hp.pretraining_epochs_per_layer):
-                # go through the training set
-                for batch_index in xrange(self.n_train_batches):
-                    c = self.classifier.pretrain_functions[i](batch_index)
-
-                    printout_acc += c / self.printout_frequency
-                    if (batch_index+1) % self.printout_frequency == 0:
-                        print batch_index, "reconstruction cost avg=", printout_acc
-                        last_error = printout_acc
-                        printout_acc = 0.0
-                        
-                print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
-
-            self.job_splitter(i+1, time.clock()-start_time, last_error)
-     
-        end_time = time.clock()
-
-        print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
-
-    # Save time by reusing intermediate results
-    def job_splitter(self, current_pretraining_layer, pretraining_time, last_error):
-
-        state_copy = None
-        original_classifier = None
-
-        if self.job_tree and current_pretraining_layer in self.num_hidden_layers_to_try:
-            for lr in self.finetuning_lr_to_try:
-                sys.stdout.flush()
-                sys.stderr.flush()
-
-                state_copy = copy.copy(self.hp)
-
-                self.hp.update({'num_hidden_layers':current_pretraining_layer, \
-                            'finetuning_lr':lr,\
-                            'pretraining_time':pretraining_time,\
-                            'last_reconstruction_error':last_error})
-
-                original_classifier = self.classifier
-                print "ORIGINAL CLASSIFIER MEANS",original_classifier.get_params_means()
-                self.classifier = SdA.copy_reusing_lower_layers(original_classifier, current_pretraining_layer, new_finetuning_lr=lr)
-                
-                self.finetune()
-            
-                self.insert_finished_job()
-
-                print "NEW CLASSIFIER MEANS AFTERWARDS",self.classifier.get_params_means()
-                print "ORIGINAL CLASSIFIER MEANS AFTERWARDS",original_classifier.get_params_means()
-                self.classifier = original_classifier
-                self.hp = state_copy
-
-    def insert_finished_job(self):
-        job = copy.copy(self.hp)
-        job[jobman.sql.STATUS] = jobman.sql.DONE
-        job[jobman.sql.EXPERIMENT] = self.experiment
-
-        # don,t try to store arrays in db
-        job['hidden_layers_sizes'] = job.hidden_layers_sizes[0]
-        job['corruption_levels'] = job.corruption_levels[0]
-
-        print "Will insert finished job", job
-        jobman.sql.insert_dict(jobman.flatten(job), self.results_db)
-
-    def finetune(self):
-        print "STARTING FINETUNING"
-
-        index   = T.lscalar()    # index to a [mini]batch 
-        minibatch_size = self.hp.minibatch_size
-
-        # create a function to compute the mistakes that are made by the model
-        # on the validation set, or testing set
-        test_model = theano.function([index], self.classifier.errors,
-                 givens = {
-                   self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / self.input_divider,
-                   self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]})
-
-        validate_model = theano.function([index], self.classifier.errors,
-                givens = {
-                   self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / self.input_divider,
-                   self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]})
-
-
-        # early-stopping parameters
-        patience              = 10000 # look as this many examples regardless
-        patience_increase     = 2.    # wait this much longer when a new best is 
-                                      # found
-        improvement_threshold = 0.995 # a relative improvement of this much is 
-                                      # considered significant
-        validation_frequency  = min(self.n_train_batches, patience/2)
-                                      # go through this many 
-                                      # minibatche before checking the network 
-                                      # on the validation set; in this case we 
-                                      # check every epoch 
-
-        best_params          = None
-        best_validation_loss = float('inf')
-        test_score           = 0.
-        start_time = time.clock()
-
-        done_looping = False
-        epoch = 0
-
-        printout_acc = 0.0
-
-        if not self.hp.has_key('max_finetuning_epochs'):
-            self.hp.max_finetuning_epochs = 1000
-
-        while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
-            epoch = epoch + 1
-            for minibatch_index in xrange(self.n_train_batches):
-
-                cost_ij = self.classifier.finetune(minibatch_index)
-                iter    = epoch * self.n_train_batches + minibatch_index
-
-                printout_acc += cost_ij / float(self.printout_frequency * minibatch_size)
-                if (iter+1) % self.printout_frequency == 0:
-                    print iter, "cost avg=", printout_acc
-                    printout_acc = 0.0
-
-                if (iter+1) % validation_frequency == 0: 
-                    
-                    validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)]
-                    this_validation_loss = numpy.mean(validation_losses)
-                    print('epoch %i, minibatch %i/%i, validation error %f %%' % \
-                           (epoch, minibatch_index+1, self.n_train_batches, \
-                            this_validation_loss*100.))
-
-
-                    # if we got the best validation score until now
-                    if this_validation_loss < best_validation_loss:
-
-                        #improve patience if loss improvement is good enough
-                        if this_validation_loss < best_validation_loss *  \
-                               improvement_threshold :
-                            patience = max(patience, iter * patience_increase)
-
-                        # save best validation score and iteration number
-                        best_validation_loss = this_validation_loss
-                        best_iter = iter
-
-                        # test it on the test set
-                        test_losses = [test_model(i) for i in xrange(self.n_test_batches)]
-                        test_score = numpy.mean(test_losses)
-                        print(('     epoch %i, minibatch %i/%i, test error of best '
-                              'model %f %%') % 
-                                     (epoch, minibatch_index+1, self.n_train_batches,
-                                      test_score*100.))
-
-
-            if patience <= iter :
-                done_looping = True
-                break
-
-        end_time = time.clock()
-        self.hp.update({'finetuning_time':end_time-start_time,\
-                    'best_validation_error':best_validation_loss,\
-                    'test_score':test_score,
-                    'num_finetuning_epochs':epoch})
-        print(('Optimization complete with best validation score of %f %%,'
-               'with test performance %f %%') %  
-                     (best_validation_loss * 100., test_score*100.))
-        print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
-
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/stacked_convolutional_dae.py
--- a/scripts/stacked_dae/stacked_convolutional_dae.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,415 +0,0 @@
-import numpy
-import theano
-import time
-import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
-import theano.sandbox.softsign
-
-from theano.tensor.signal import downsample
-from theano.tensor.nnet import conv 
-import gzip
-import cPickle
- 
- 
-class LogisticRegression(object):
- 
-    def __init__(self, input, n_in, n_out):
- 
-        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
-                                            dtype = theano.config.floatX) )
-
-        self.b = theano.shared( value=numpy.zeros((n_out,),
-                                            dtype = theano.config.floatX) )
-
-        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
-        
-
-        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
- 
-        self.params = [self.W, self.b]
- 
-    def negative_log_likelihood(self, y):
-        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
- 
-    def MSE(self, y):
-        return -T.mean(abs((self.p_y_given_x)[T.arange(y.shape[0]),y]-y)**2)
-
-    def errors(self, y):
-        if y.ndim != self.y_pred.ndim:
-            raise TypeError('y should have the same shape as self.y_pred',
-                ('y', target.type, 'y_pred', self.y_pred.type))
- 
-
-        if y.dtype.startswith('int'):
-            return T.mean(T.neq(self.y_pred, y))
-        else:
-            raise NotImplementedError()
- 
- 
-class SigmoidalLayer(object):
-    def __init__(self, rng, input, n_in, n_out):
-
-        self.input = input
- 
-        W_values = numpy.asarray( rng.uniform( \
-              low = -numpy.sqrt(6./(n_in+n_out)), \
-              high = numpy.sqrt(6./(n_in+n_out)), \
-              size = (n_in, n_out)), dtype = theano.config.floatX)
-        self.W = theano.shared(value = W_values)
- 
-        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
-        self.b = theano.shared(value= b_values)
- 
-        self.output = T.tanh(T.dot(input, self.W) + self.b)
-        self.params = [self.W, self.b]
- 
-class dA_conv(object):
- 
-  def __init__(self, corruption_level = 0.1, input = None, shared_W = None,\
-                   shared_b = None, filter_shape = None, image_shape = None, poolsize = (2,2)):
-
-    theano_rng = RandomStreams()
-    
-    fan_in = numpy.prod(filter_shape[1:])
-    fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])
-
-    center = theano.shared(value = 1, name="center")
-    scale = theano.shared(value = 2, name="scale")
-
-    if shared_W != None and shared_b != None :
-        self.W = shared_W
-        self.b = shared_b
-    else:
-        initial_W = numpy.asarray( numpy.random.uniform( \
-              low = -numpy.sqrt(6./(fan_in+fan_out)), \
-              high = numpy.sqrt(6./(fan_in+fan_out)), \
-              size = filter_shape), dtype = theano.config.floatX)
-        initial_b = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
-    
-    
-        self.W = theano.shared(value = initial_W, name = "W")
-        self.b = theano.shared(value = initial_b, name = "b")
-    
- 
-    initial_b_prime= numpy.zeros((filter_shape[1],))
-        
-    self.W_prime=T.dtensor4('W_prime')
-
-    self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime")
- 
-    self.x = input
-
-    self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x
-
-    conv1_out = conv.conv2d(self.tilde_x, self.W, \
-                             filter_shape=filter_shape, \
-                                image_shape=image_shape, border_mode='valid')
-
-    
-    self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
-
-    
-    da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
-                       filter_shape[3] ]
-    da_image_shape = [ image_shape[0],filter_shape[0],image_shape[2]-filter_shape[2]+1, \
-                         image_shape[3]-filter_shape[3]+1 ]
-    initial_W_prime =  numpy.asarray( numpy.random.uniform( \
-              low = -numpy.sqrt(6./(fan_in+fan_out)), \
-              high = numpy.sqrt(6./(fan_in+fan_out)), \
-              size = da_filter_shape), dtype = theano.config.floatX)
-    self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
-
-    #import pdb;pdb.set_trace()
-
-    conv2_out = conv.conv2d(self.y, self.W_prime, \
-                               filter_shape = da_filter_shape, image_shape = da_image_shape ,\
-                                border_mode='full')
-
-    self.z =  (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale
-
-    scaled_x = (self.x + center) / scale
-
-    self.L = - T.sum( scaled_x*T.log(self.z) + (1-scaled_x)*T.log(1-self.z), axis=1 )
-
-    self.cost = T.mean(self.L)
-
-    self.params = [ self.W, self.b, self.b_prime ] 
- 
- 
-
-class LeNetConvPoolLayer(object):
-    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
-        assert image_shape[1]==filter_shape[1]
-        self.input = input
-  
-        W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
-        self.W = theano.shared(value = W_values)
- 
-        b_values = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
-        self.b = theano.shared(value= b_values)
- 
-        conv_out = conv.conv2d(input, self.W,
-                filter_shape=filter_shape, image_shape=image_shape)
- 
-
-        fan_in = numpy.prod(filter_shape[1:])
-        fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
-
-        W_bound = numpy.sqrt(6./(fan_in + fan_out))
-        self.W.value = numpy.asarray(
-                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
-                dtype = theano.config.floatX)
-  
-
-        pooled_out = downsample.max_pool2D(conv_out, poolsize, ignore_border=True)
- 
-        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
-        self.params = [self.W, self.b]
- 
-
-class SdA():
-    def __init__(self, input, n_ins_conv, n_ins_mlp, train_set_x, train_set_y, batch_size, \
-                     conv_hidden_layers_sizes, mlp_hidden_layers_sizes, corruption_levels, \
-                     rng, n_out, pretrain_lr, finetune_lr):
-
-        self.layers = []
-        self.pretrain_functions = []
-        self.params = []
-        self.conv_n_layers = len(conv_hidden_layers_sizes)
-        self.mlp_n_layers = len(mlp_hidden_layers_sizes)
-         
-        index = T.lscalar() # index to a [mini]batch
-        self.x = T.dmatrix('x') # the data is presented as rasterized images
-        self.y = T.ivector('y') # the labels are presented as 1D vector of
-        
- 
-        
-        for i in xrange( self.conv_n_layers ):
-
-            filter_shape=conv_hidden_layers_sizes[i][0]
-            image_shape=conv_hidden_layers_sizes[i][1]
-            max_poolsize=conv_hidden_layers_sizes[i][2]
-                
-            if i == 0 :
-                layer_input=self.x.reshape((batch_size,1,28,28))
-            else:
-                layer_input=self.layers[-1].output
-
-            layer = LeNetConvPoolLayer(rng, input=layer_input, \
-                                image_shape=image_shape, \
-                                filter_shape=filter_shape,poolsize=max_poolsize)
-            print 'Convolutional layer '+str(i+1)+' created'
-                
-            self.layers += [layer]
-            self.params += layer.params
-                
-            da_layer = dA_conv(corruption_level = corruption_levels[0],\
-                                  input = layer_input, \
-                                  shared_W = layer.W, shared_b = layer.b,\
-                                  filter_shape = filter_shape , image_shape = image_shape )
-                
-                
-            gparams = T.grad(da_layer.cost, da_layer.params)
-                
-            updates = {}
-            for param, gparam in zip(da_layer.params, gparams):
-                    updates[param] = param - gparam * pretrain_lr
-                    
-                
-            update_fn = theano.function([index], da_layer.cost, \
-                                        updates = updates,
-                                        givens = {
-                    self.x : train_set_x[index*batch_size:(index+1)*batch_size]} )
-             
-            self.pretrain_functions += [update_fn]
-
-        for i in xrange( self.mlp_n_layers ): 
-            if i == 0 :
-                input_size = n_ins_mlp
-            else:
-                input_size = mlp_hidden_layers_sizes[i-1]
-
-            if i == 0 :
-                if len( self.layers ) == 0 :
-                    layer_input=self.x
-                else :
-                    layer_input = self.layers[-1].output.flatten(2)
-            else:
-                layer_input = self.layers[-1].output
-     
-            layer = SigmoidalLayer(rng, layer_input, input_size,
-                                        mlp_hidden_layers_sizes[i] )
-              
-            self.layers += [layer]
-            self.params += layer.params
-            
-
-            print 'MLP layer '+str(i+1)+' created'
-            
-        self.logLayer = LogisticRegression(input=self.layers[-1].output, \
-                                                     n_in=mlp_hidden_layers_sizes[-1], n_out=n_out)
-        self.params += self.logLayer.params
-
-        cost = self.logLayer.negative_log_likelihood(self.y)
-
-        gparams = T.grad(cost, self.params)
-        updates = {}
-
-        for param,gparam in zip(self.params, gparams):
-            updates[param] = param - gparam*finetune_lr
-            
-        self.finetune = theano.function([index], cost,
-                updates = updates,
-                givens = {
-                  self.x : train_set_x[index*batch_size:(index+1)*batch_size],
-                  self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
- 
-
-        self.errors = self.logLayer.errors(self.y)
- 
- 
- 
-def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \
-                            pretrain_lr = 0.01, training_epochs = 1000, \
-                            dataset='mnist.pkl.gz'):
-
-    f = gzip.open(dataset,'rb')
-    train_set, valid_set, test_set = cPickle.load(f)
-    f.close()
- 
- 
-    def shared_dataset(data_xy):
-        data_x, data_y = data_xy
-        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
-        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
-        return shared_x, T.cast(shared_y, 'int32')
- 
-
-    test_set_x, test_set_y = shared_dataset(test_set)
-    valid_set_x, valid_set_y = shared_dataset(valid_set)
-    train_set_x, train_set_y = shared_dataset(train_set)
- 
-    batch_size = 500 # size of the minibatch
- 
-
-    n_train_batches = train_set_x.value.shape[0] / batch_size
-    n_valid_batches = valid_set_x.value.shape[0] / batch_size
-    n_test_batches = test_set_x.value.shape[0] / batch_size
- 
-    # allocate symbolic variables for the data
-    index = T.lscalar() # index to a [mini]batch
-    x = T.matrix('x') # the data is presented as rasterized images
-    y = T.ivector('y') # the labels are presented as 1d vector of
-                           # [int] labels
-    layer0_input = x.reshape((batch_size,1,28,28))
-    
-
-    # Setup the convolutional layers with their DAs(add as many as you want)
-    corruption_levels = [ 0.2, 0.2, 0.2]
-    rng = numpy.random.RandomState(1234)
-    ker1=2
-    ker2=2
-    conv_layers=[]
-    conv_layers.append([[ker1,1,5,5], [batch_size,1,28,28], [2,2] ])
-    conv_layers.append([[ker2,ker1,5,5], [batch_size,ker1,12,12], [2,2] ])
-
-    # Setup the MLP layers of the network
-    mlp_layers=[500]
-  
-    network = SdA(input = layer0_input, n_ins_conv = 28*28, n_ins_mlp = ker2*4*4, \
-                      train_set_x = train_set_x, train_set_y = train_set_y, batch_size = batch_size,
-                      conv_hidden_layers_sizes = conv_layers,  \
-                      mlp_hidden_layers_sizes = mlp_layers, \
-                      corruption_levels = corruption_levels , n_out = 10, \
-                      rng = rng , pretrain_lr = pretrain_lr , finetune_lr = learning_rate )
-
-    test_model = theano.function([index], network.errors,
-             givens = {
-                network.x: test_set_x[index*batch_size:(index+1)*batch_size],
-                network.y: test_set_y[index*batch_size:(index+1)*batch_size]})
- 
-    validate_model = theano.function([index], network.errors,
-           givens = {
-                network.x: valid_set_x[index*batch_size:(index+1)*batch_size],
-                network.y: valid_set_y[index*batch_size:(index+1)*batch_size]})
-
-
-
-    start_time = time.clock()
-    for i in xrange(len(network.layers)-len(mlp_layers)):
-        for epoch in xrange(pretraining_epochs):
-            for batch_index in xrange(n_train_batches):
-                c = network.pretrain_functions[i](batch_index)
-            print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch),c
-
-    patience = 10000 # look as this many examples regardless
-    patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS
-                                  # FOUND
-    improvement_threshold = 0.995 # a relative improvement of this much is
-
-    validation_frequency = min(n_train_batches, patience/2)
- 
- 
-    best_params = None
-    best_validation_loss = float('inf')
-    test_score = 0.
-    start_time = time.clock()
- 
-    done_looping = False
-    epoch = 0
- 
-    while (epoch < training_epochs) and (not done_looping):
-      epoch = epoch + 1
-      for minibatch_index in xrange(n_train_batches):
- 
-        cost_ij = network.finetune(minibatch_index)
-        iter = epoch * n_train_batches + minibatch_index
- 
-        if (iter+1) % validation_frequency == 0:
-            
-            validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
-            this_validation_loss = numpy.mean(validation_losses)
-            print('epoch %i, minibatch %i/%i, validation error %f %%' % \
-                   (epoch, minibatch_index+1, n_train_batches, \
-                    this_validation_loss*100.))
- 
- 
-            # if we got the best validation score until now
-            if this_validation_loss < best_validation_loss:
- 
-                #improve patience if loss improvement is good enough
-                if this_validation_loss < best_validation_loss * \
-                       improvement_threshold :
-                    patience = max(patience, iter * patience_increase)
- 
-                # save best validation score and iteration number
-                best_validation_loss = this_validation_loss
-                best_iter = iter
- 
-                # test it on the test set
-                test_losses = [test_model(i) for i in xrange(n_test_batches)]
-                test_score = numpy.mean(test_losses)
-                print((' epoch %i, minibatch %i/%i, test error of best '
-                      'model %f %%') %
-                             (epoch, minibatch_index+1, n_train_batches,
-                              test_score*100.))
- 
- 
-        if patience <= iter :
-                done_looping = True
-                break
- 
-    end_time = time.clock()
-    print(('Optimization complete with best validation score of %f %%,'
-           'with test performance %f %%') %
-                 (best_validation_loss * 100., test_score*100.))
-    print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
- 
- 
- 
- 
- 
- 
-if __name__ == '__main__':
-    sgd_optimization_mnist()
- 
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/stacked_dae.py
--- a/scripts/stacked_dae/stacked_dae.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,287 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-import numpy 
-import theano
-import time
-import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
-import copy
-
-from utils import update_locals
-
-class LogisticRegression(object):
-    def __init__(self, input, n_in, n_out):
-        # initialize with 0 the weights W as a matrix of shape (n_in, n_out) 
-        self.W = theano.shared( value=numpy.zeros((n_in,n_out),
-                                            dtype = theano.config.floatX) )
-        # initialize the baises b as a vector of n_out 0s
-        self.b = theano.shared( value=numpy.zeros((n_out,), 
-                                            dtype = theano.config.floatX) )
-        # compute vector of class-membership probabilities in symbolic form
-        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
-        
-        # compute prediction as class whose probability is maximal in 
-        # symbolic form
-        self.y_pred=T.argmax(self.p_y_given_x, axis=1)
-
-        # list of parameters for this layer
-        self.params = [self.W, self.b]
-
-    def negative_log_likelihood(self, y):
-       return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
-
-    def errors(self, y):
-        # check if y has same dimension of y_pred 
-        if y.ndim != self.y_pred.ndim:
-            raise TypeError('y should have the same shape as self.y_pred', 
-                ('y', target.type, 'y_pred', self.y_pred.type))
-
-        # check if y is of the correct datatype        
-        if y.dtype.startswith('int'):
-            # the T.neq operator returns a vector of 0s and 1s, where 1
-            # represents a mistake in prediction
-            return T.mean(T.neq(self.y_pred, y))
-        else:
-            raise NotImplementedError()
-
-
-class SigmoidalLayer(object):
-    def __init__(self, rng, input, n_in, n_out):
-        self.input = input
-
-        W_values = numpy.asarray( rng.uniform( \
-              low = -numpy.sqrt(6./(n_in+n_out)), \
-              high = numpy.sqrt(6./(n_in+n_out)), \
-              size = (n_in, n_out)), dtype = theano.config.floatX)
-        self.W = theano.shared(value = W_values)
-
-        b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
-        self.b = theano.shared(value= b_values)
-
-        self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
-        self.params = [self.W, self.b]
-
-
-
-class dA(object):
-  def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\
-               input = None, shared_W = None, shared_b = None):
-    self.n_visible = n_visible
-    self.n_hidden  = n_hidden
-    
-    # create a Theano random generator that gives symbolic random values
-    theano_rng = RandomStreams()
-    
-    if shared_W != None and shared_b != None : 
-        self.W = shared_W
-        self.b = shared_b
-    else:
-        # initial values for weights and biases
-        # note : W' was written as `W_prime` and b' as `b_prime`
-
-        # W is initialized with `initial_W` which is uniformely sampled
-        # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible)
-        # the output of uniform if converted using asarray to dtype 
-        # theano.config.floatX so that the code is runable on GPU
-        initial_W = numpy.asarray( numpy.random.uniform( \
-              low = -numpy.sqrt(6./(n_hidden+n_visible)), \
-              high = numpy.sqrt(6./(n_hidden+n_visible)), \
-              size = (n_visible, n_hidden)), dtype = theano.config.floatX)
-        initial_b       = numpy.zeros(n_hidden, dtype = theano.config.floatX)
-    
-    
-        # theano shared variables for weights and biases
-        self.W       = theano.shared(value = initial_W,       name = "W")
-        self.b       = theano.shared(value = initial_b,       name = "b")
-    
- 
-    initial_b_prime= numpy.zeros(n_visible)
-    # tied weights, therefore W_prime is W transpose
-    self.W_prime = self.W.T 
-    self.b_prime = theano.shared(value = initial_b_prime, name = "b'")
-
-    # if no input is given, generate a variable representing the input
-    if input == None : 
-        # we use a matrix because we expect a minibatch of several examples,
-        # each example being a row
-        self.x = T.dmatrix(name = 'input') 
-    else:
-        self.x = input
-    # Equation (1)
-    # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs
-    # note : first argument of theano.rng.binomial is the shape(size) of 
-    #        random numbers that it should produce
-    #        second argument is the number of trials 
-    #        third argument is the probability of success of any trial
-    #
-    #        this will produce an array of 0s and 1s where 1 has a 
-    #        probability of 1 - ``corruption_level`` and 0 with
-    #        ``corruption_level``
-    self.tilde_x  = theano_rng.binomial( self.x.shape,  1,  1 - corruption_level) * self.x
-    # Equation (2)
-    # note  : y is stored as an attribute of the class so that it can be 
-    #         used later when stacking dAs. 
-    self.y   = T.nnet.sigmoid(T.dot(self.tilde_x, self.W      ) + self.b)
-    # Equation (3)
-    self.z   = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
-    # Equation (4)
-    # note : we sum over the size of a datapoint; if we are using minibatches,
-    #        L will  be a vector, with one entry per example in minibatch
-    self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
-    # note : L is now a vector, where each element is the cross-entropy cost 
-    #        of the reconstruction of the corresponding example of the 
-    #        minibatch. We need to compute the average of all these to get 
-    #        the cost of the minibatch
-    self.cost = T.mean(self.L)
-
-    self.params = [ self.W, self.b, self.b_prime ]
-
-
-
-
-class SdA(object):
-    def __init__(self, train_set_x, train_set_y, batch_size, n_ins, 
-                 hidden_layers_sizes, n_outs, 
-                 corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0):
-        update_locals(self, locals())      
- 
-        self.layers             = []
-        self.pretrain_functions = []
-        self.params             = []
-        self.n_layers           = len(hidden_layers_sizes)
-
-        self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
-
-        if len(hidden_layers_sizes) < 1 :
-            raiseException (' You must have at least one hidden layer ')
-
-
-        # allocate symbolic variables for the data
-        index   = T.lscalar()    # index to a [mini]batch 
-        self.x  = T.matrix('x')  # the data is presented as rasterized images
-        self.y  = T.ivector('y') # the labels are presented as 1D vector of 
-                                 # [int] labels
-
-        for i in xrange( self.n_layers ):
-            # construct the sigmoidal layer
-
-            # the size of the input is either the number of hidden units of 
-            # the layer below or the input size if we are on the first layer
-            if i == 0 :
-                input_size = n_ins
-            else:
-                input_size = hidden_layers_sizes[i-1]
-
-            # the input to this layer is either the activation of the hidden
-            # layer below or the input of the SdA if you are on the first
-            # layer
-            if i == 0 : 
-                layer_input = self.x
-            else:
-                layer_input = self.layers[-1].output
-
-            layer = SigmoidalLayer(rng, layer_input, input_size, 
-                                   hidden_layers_sizes[i] )
-            # add the layer to the 
-            self.layers += [layer]
-            self.params += layer.params
-        
-            # Construct a denoising autoencoder that shared weights with this
-            # layer
-            dA_layer = dA(input_size, hidden_layers_sizes[i], \
-                          corruption_level = corruption_levels[0],\
-                          input = layer_input, \
-                          shared_W = layer.W, shared_b = layer.b)
-        
-            # Construct a function that trains this dA
-            # compute gradients of layer parameters
-            gparams = T.grad(dA_layer.cost, dA_layer.params)
-            # compute the list of updates
-            updates = {}
-            for param, gparam in zip(dA_layer.params, gparams):
-                updates[param] = param - gparam * pretrain_lr
-            
-            # create a function that trains the dA
-            update_fn = theano.function([index], dA_layer.cost, \
-                  updates = updates,
-                  givens = { 
-                     self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.input_divider})
-            # collect this function into a list
-            self.pretrain_functions += [update_fn]
-
-        
-        # We now need to add a logistic layer on top of the MLP
-        self.logLayer = LogisticRegression(\
-                         input = self.layers[-1].output,\
-                         n_in = hidden_layers_sizes[-1], n_out = n_outs)
-
-        self.params += self.logLayer.params
-        # construct a function that implements one step of finetunining
-
-        # compute the cost, defined as the negative log likelihood 
-        cost = self.logLayer.negative_log_likelihood(self.y)
-        # compute the gradients with respect to the model parameters
-        gparams = T.grad(cost, self.params)
-        # compute list of updates
-        updates = {}
-        for param,gparam in zip(self.params, gparams):
-            updates[param] = param - gparam*finetune_lr
-            
-        self.finetune = theano.function([index], cost, 
-                updates = updates,
-                givens = {
-                  self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.input_divider,
-                  self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
-
-        # symbolic variable that points to the number of errors made on the
-        # minibatch given by self.x and self.y
-
-        self.errors = self.logLayer.errors(self.y)
-
-    @classmethod
-    def copy_reusing_lower_layers(cls, obj, num_hidden_layers, new_finetuning_lr=None):
-        assert(num_hidden_layers <= obj.n_layers)
-
-        if not new_finetuning_lr:
-            new_finetuning_lr = obj.finetune_lr
-
-        new_sda = cls(train_set_x= obj.train_set_x, \
-                      train_set_y = obj.train_set_y,\
-                      batch_size = obj.batch_size, \
-                      n_ins= obj.n_ins, \
-                      hidden_layers_sizes = obj.hidden_layers_sizes[:num_hidden_layers], \
-                      n_outs = obj.n_outs, \
-                      corruption_levels = obj.corruption_levels[:num_hidden_layers],\
-                      rng = obj.rng,\
-                      pretrain_lr = obj.pretrain_lr, \
-                      finetune_lr = new_finetuning_lr, \
-                      input_divider = obj.input_divider )
-
-        # new_sda.layers contains only the hidden layers actually
-        for i, layer in enumerate(new_sda.layers):
-            original_layer = obj.layers[i]
-            for p1,p2 in zip(layer.params, original_layer.params):
-                p1.value = p2.value.copy()
-
-        return new_sda
-
-    def get_params_copy(self):
-        return copy.deepcopy(self.params)
-
-    def set_params_from_copy(self, copy):
-        # We don't want to replace the var, as the functions have pointers in there
-        # We only want to replace values.
-        for i, p in enumerate(self.params):
-            p.value = copy[i].value
-
-    def get_params_means(self):
-        s = []
-        for p in self.params:
-            s.append(numpy.mean(p.value))
-        return s
-
-if __name__ == '__main__':
-    import sys
-    args = sys.argv[1:]
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/utils.py
--- a/scripts/stacked_dae/utils.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-#!/usr/bin/python
-
-from jobman import DD
-
-# from pylearn codebase
-def update_locals(obj, dct):
-    if 'self' in dct:
-        del dct['self']
-    obj.__dict__.update(dct)
-
-def produit_croise_jobs(val_dict):
-    job_list = [DD()]
-    all_keys = val_dict.keys()
-
-    for key in all_keys:
-        possible_values = val_dict[key]
-        new_job_list = []
-        for val in possible_values:
-            for job in job_list:
-                to_insert = job.copy()
-                to_insert.update({key: val})
-                new_job_list.append(to_insert)
-        job_list = new_job_list
-
-    return job_list
-
-def test_produit_croise_jobs():
-    vals = {'a': [1,2], 'b': [3,4,5]}
-    print produit_croise_jobs(vals)
-
-
-# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python
-"""Simple module for getting amount of memory used by a specified user's
-processes on a UNIX system.
-It uses UNIX ps utility to get the memory usage for a specified username and
-pipe it to awk for summing up per application memory usage and return the total.
-Python's Popen() from subprocess module is used for spawning ps and awk.
-
-"""
-
-import subprocess
-
-class MemoryMonitor(object):
-
-    def __init__(self, username):
-        """Create new MemoryMonitor instance."""
-        self.username = username
-
-    def usage(self):
-        """Return int containing memory used by user's processes."""
-        self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username,
-                                        shell=True,
-                                        stdout=subprocess.PIPE,
-                                        )
-        self.stdout_list = self.process.communicate()[0].split('\n')
-        return int(self.stdout_list[0])
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/BruitGauss.py
--- a/transformations/BruitGauss.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,138 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Ajout de bruit gaussien dans les donnees. A chaque iteration, un bruit poivre 
-et sel est ajoute, puis un lissage gaussien autour de ce point est ajoute.
-On fait un nombre d'iteration = 1024*complexity/25 ce qui equivaud
-a complexity/25 des points qui recoivent le centre du noyau gaussien.
-Il y en a beaucoup moins que le bruit poivre et sel, car la transformation
-est plutôt aggressive et touche beaucoup de pixels autour du centre 
-
-La grandeur de la gaussienne ainsi que son ecart type sont definit par complexity 
-et par une composante aleatoire normale.
-
-On a 25 % de chances d'effectuer le bruitage
-
-Ce fichier prend pour acquis que les images sont donnees une a la fois
-sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-import numpy
-#import random
-import scipy
-from scipy import ndimage
-
-class BruitGauss():
-    
-    def __init__(self,complexity=1,seed=6378):
-        self.nb_chngmax =10 #Le nombre de pixels changes. Seulement pour fin de calcul
-        self.grandeurmax = 20
-        self.sigmamax = 6.0
-        self.regenerate_parameters(complexity)
-        self.seed=seed
-        
-        #numpy.random.seed(self.seed)
-        
-    def get_seed(self):
-        return self.seed
-        
-    def get_settings_names(self):
-        return ['nb_chng','sigma_gauss','grandeur']
-
-    def regenerate_parameters(self, complexity):
-        self.effectuer =numpy.random.binomial(1,0.25)    ##### On a 25% de faire un bruit #####
-
-        
-        if self.effectuer and complexity > 0:
-            self.nb_chng=3+int(numpy.random.rand()*self.nb_chngmax*complexity)
-            self.sigma_gauss=2.0 + numpy.random.rand()*self.sigmamax*complexity
-            self.grandeur=12+int(numpy.random.rand()*self.grandeurmax*complexity)
-                        #creation du noyau gaussien
-            self.gauss=numpy.zeros((self.grandeur,self.grandeur))
-            x0 = y0 = self.grandeur/2.0
-            for i in xrange(self.grandeur):
-                for j in xrange(self.grandeur):
-                    self.gauss[i,j]=numpy.exp(-((i-x0)**2 + (j-y0)**2) / self.sigma_gauss**2)
-            #creation de la fenetre de moyennage
-            self.moy=numpy.zeros((self.grandeur,self.grandeur))
-            x0 = y0 = self.grandeur/2
-            for i in xrange(0,self.grandeur):
-                for j in xrange(0,self.grandeur):
-                    self.moy[i,j]=((numpy.sqrt(2*(self.grandeur/2.0)**2) -\
-                                 numpy.sqrt(numpy.abs(i-self.grandeur/2.0)**2+numpy.abs(j-self.grandeur/2.0)**2))/numpy.sqrt((self.grandeur/2.0)**2))**5
-        else:
-            self.sigma_gauss = 1 # eviter division par 0
-            self.grandeur=1
-            self.nb_chng = 0
-            self.effectuer = 0
-        
-        return self._get_current_parameters()
-
-    def _get_current_parameters(self):
-        return [self.nb_chng,self.sigma_gauss,self.grandeur]
-
-    
-    def transform_image(self, image):
-        if self.effectuer == 0:
-            return image
-        image=image.reshape((32,32))
-        filtered_image = ndimage.convolve(image,self.gauss,mode='constant')
-        assert image.shape == filtered_image.shape
-        filtered_image = (filtered_image - filtered_image.min() + image.min()) / (filtered_image.max() - filtered_image.min() + image.min()) * image.max()
-               
-        #construction of the moyennage Mask
-        Mask = numpy.zeros((32,32))
-        
-        for i in xrange(0,self.nb_chng):
-            x_bruit=int(numpy.random.randint(0,32))
-            y_bruit=int(numpy.random.randint(0,32))
-            offsetxmin = 0
-            offsetxmax = 0
-            offsetymin = 0
-            offsetymax = 0
-            if x_bruit < self.grandeur / 2:
-                offsetxmin = self.grandeur / 2 - x_bruit
-            if 32-x_bruit < numpy.ceil(self.grandeur / 2.0):
-                offsetxmax = numpy.ceil(self.grandeur / 2.0) - (32-x_bruit)
-            if y_bruit < self.grandeur / 2:
-                offsetymin = self.grandeur / 2 - y_bruit
-            if 32-y_bruit < numpy.ceil(self.grandeur / 2.0):
-                offsetymax = numpy.ceil(self.grandeur / 2.0) - (32-y_bruit)
-            Mask[x_bruit - self.grandeur/2 + offsetxmin : x_bruit + numpy.ceil(self.grandeur/2.0) - offsetxmax,\
-                    y_bruit - self.grandeur/2 + offsetymin : y_bruit + numpy.ceil(self.grandeur/2.0)-  offsetymax] +=\
-                        self.moy[offsetxmin:self.grandeur - offsetxmax,offsetymin:self.grandeur - offsetymax] 
-                    
-        return numpy.asarray((image + filtered_image*Mask)/(Mask+1),dtype='float32')
-
-#---TESTS---
-
-def _load_image():
-    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
-    d = ft.read(f)
-    w=numpy.asarray(d[0])
-    return (w/255.0).astype('float')
-
-def _test(complexite):
-    img=_load_image()
-    transfo = BruitGauss()
-    pylab.imshow(img.reshape((32,32)))
-    pylab.show()
-    print transfo.get_settings_names()
-    print transfo.regenerate_parameters(complexite)
-    
-    img_trans=transfo.transform_image(img)
-    
-    pylab.imshow(img_trans.reshape((32,32)))
-    pylab.show()
-    
-
-if __name__ == '__main__':
-    from pylearn.io import filetensor as ft
-    import pylab
-    _test(0.5)
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/DistorsionGauss.py
--- a/transformations/DistorsionGauss.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Ajout d'une composante aleatoire dans chaque pixel de l'image.
-C'est une distorsion gaussienne de moyenne 0 et d'écart type complexity/10
-
-Il y a 30% d'effectuer le bruitage
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-import numpy
-import random
-
-class DistorsionGauss():
-    
-    def __init__(self,seed=3459):
-        self.ecart_type=0.1 #L'ecart type de la gaussienne
-        self.effectuer=1    #1=on effectue et 0=rien faire
-        self.seed=seed
-        
-        #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
-        #numpy.random.seed(self.seed) 
-        #random.seed(self.seed)
-        
-    def get_settings_names(self):
-        return ['effectuer']
-    
-    def get_seed(self):
-        return self.seed
-    
-    def get_settings_names_determined_by_complexity(self,complexity):
-        return ['ecart_type']
-
-    def regenerate_parameters(self, complexity):
-        self.ecart_type=float(complexity)/10
-        self.effectuer =numpy.random.binomial(1,0.3)    ##### On a 30% de faire un bruit #####
-        return self._get_current_parameters()
-
-    def _get_current_parameters(self):
-        return [self.effectuer]
-    
-    def get_parameters_determined_by_complexity(self,complexity):
-        return [float(complexity)/10]
-    
-    def transform_image(self, image):
-        if self.effectuer == 0:
-            return image
-        
-        image=image.reshape(1024,1)
-        aleatoire=numpy.zeros((1024,1)).astype('float32')
-        for i in xrange(0,1024):
-            aleatoire[i]=float(random.gauss(0,self.ecart_type))
-        image=image+aleatoire
-        
-        
-        #Ramener tout entre 0 et 1. Ancienne facon de normaliser.
-        #Resultats moins interessant je trouve.
-##        if numpy.min(image) < 0:
-##            image-=numpy.min(image)
-##        if numpy.max(image) > 1:
-##            image/=numpy.max(image)
-            
-        for i in xrange(0,1024):
-            image[i]=min(1,max(0,image[i]))
-            
-        return image.reshape(32,32)
-
-
-#---TESTS---
-
-def _load_image():
-    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
-    d = ft.read(f)
-    w=numpy.asarray(d[random.randint(0,100)])
-    return (w/255.0).astype('float')
-
-def _test(complexite):
-    img=_load_image()
-    transfo = DistorsionGauss()
-    pylab.imshow(img.reshape((32,32)))
-    pylab.show()
-    print transfo.get_settings_names()
-    print transfo.regenerate_parameters(complexite)
-    
-    img_trans=transfo.transform_image(img)
-    
-    pylab.imshow(img_trans.reshape((32,32)))
-    pylab.show()
-    
-
-if __name__ == '__main__':
-    from pylearn.io import filetensor as ft
-    import pylab
-    for i in xrange(0,5):
-        _test(0.5)
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/Occlusion.py
--- a/transformations/Occlusion.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,161 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Ajout de bruit d'occlusion dans l'image originale.
-
-Le bruit provient d'un echantillon pris dans la seconde image puis rajoutee a
-gauche ou a droite de l'image originale. De plus, il se peut aussi que le
-bruit soit rajoute sur l'image originale, mais en plus pâle.
-
-Le fichier /data/lisa/data/ift6266h10/echantillon_occlusion.ft 
-(sur le reseau DIRO) est necessaire.
-
-Il y a 30% de chance d'avoir une occlusion quelconque.
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-
-import numpy
-
-from pylearn.io import filetensor as ft
-
-class Occlusion():
-    
-    def __init__(self,seed=9854):
-        #Ces 4 variables representent la taille du "crop" sur l'image2
-        #Ce "crop" est pris a partie de image1[15,15], le milieu de l'image1
-        self.haut=2
-        self.bas=2
-        self.gauche=2
-        self.droite=2
-        
-        #Ces deux variables representent le deplacement en x et y par rapport
-        #au milieu du bord gauche ou droit
-        self.x_arrivee=0
-        self.y_arrivee=0
-        
-        #Cette variable =1 si l'image est mise a gauche et -1 si a droite
-        #et =0 si au centre, mais plus pale
-        self.endroit=-1
-        
-        #Cette variable determine l'opacite de l'ajout dans le cas ou on est au milieu
-        self.opacite=0.5    #C'est completement arbitraire. Possible de le changer si voulu
-        
-        #Sert a dire si on fait quelque chose. 0=faire rien, 1 on fait quelque chose
-        self.appliquer=1
-        
-        self.seed=seed
-        #numpy.random.seed(self.seed)
-        
-        f3 = open('/data/lisa/data/ift6266h10/echantillon_occlusion.ft')   #Doit etre sur le reseau DIRO.
-        #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/echantillon_occlusion.ft')
-        #Il faut arranger le path sinon
-        w=ft.read(f3)
-        f3.close()
-        
-        self.longueur=len(w)
-        self.d=(w.astype('float'))/255
-        
-        
-    def get_settings_names(self):
-        return ['haut','bas','gauche','droite','x_arrivee','y_arrivee','endroit','rajout','appliquer']
-    
-    def get_seed(self):
-        return self.seed
-
-    def regenerate_parameters(self, complexity):
-        self.haut=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
-        self.bas=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
-        self.gauche=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
-        self.droite=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
-        if self.haut+self.bas+self.gauche+self.droite==0:   #Tres improbable
-            self.haut=1
-            self.bas=1
-            self.gauche=1
-            self.droite=1
-        
-        #Ces deux valeurs seront controlees afin d'etre certain de ne pas depasser
-        self.x_arrivee=int(numpy.abs(numpy.random.normal(0,2))) #Complexity n'entre pas en jeu, pas besoin
-        self.y_arrivee=int(numpy.random.normal(0,3)) 
-        
-        self.rajout=numpy.random.randint(0,self.longueur-1)  #les bouts de quelle lettre
-        self.appliquer=numpy.random.binomial(1,0.4)    #####  40 % du temps, on met une occlusion #####
-        
-        if complexity == 0: #On ne fait rien dans ce cas
-            self.applique=0
-        
-        self.endroit=numpy.random.randint(-1,2) 
-
-        return self._get_current_parameters()
-
-    def _get_current_parameters(self):
-        return [self.haut,self.bas,self.gauche,self.droite,self.x_arrivee,self.y_arrivee,self.endroit,self.rajout,self.appliquer]
-    
-    
-    def transform_image(self, image):
-        if self.appliquer == 0: #Si on fait rien, on retourne tout de suite l'image
-            return image
-        
-        #Attrapper le bruit d'occlusion
-        bruit=self.d[self.rajout].reshape((32,32))[15-self.haut:15+self.bas+1,15-self.gauche:15+self.droite+1]
-        
-        if self.x_arrivee+self.gauche+self.droite>32:
-            self.endroit*=-1    #On change de bord et on colle sur le cote
-            self.x_arrivee=0
-        if self.y_arrivee-self.haut <-16:
-            self.y_arrivee=self.haut-16#On colle le morceau en haut
-        if self.y_arrivee+self.bas > 15:
-            self.y_arrivee=15-self.bas  #On colle le morceau en bas
-            
-        if self.endroit==-1:    #a gauche
-            for i in xrange(-self.haut,self.bas+1):
-                for j in xrange(0,self.gauche+self.droite+1):
-                    image[16+self.y_arrivee+i,self.x_arrivee+j]=\
-                    max(image[16+self.y_arrivee+i,self.x_arrivee+j],bruit[i+self.haut,j])
-            
-        elif self.endroit==1: #a droite
-            for i in xrange(-self.haut,self.bas+1):
-                for j in xrange(-self.gauche-self.droite,1):
-                    image[16+self.y_arrivee+i,31-self.x_arrivee+j]=\
-                    max(image[16+self.y_arrivee+i,31-self.x_arrivee+j],bruit[i+self.haut,j+self.gauche+self.droite])
-            
-        elif self.endroit==0:    #au milieu
-            for i in xrange(-self.haut,self.bas+1):
-                for j in xrange(-self.gauche,self.droite+1):
-                    image[16+i,16+j]=max(image[16+i,16+j],bruit[i+self.haut,j+self.gauche]*self.opacite)
-            
-        
-        return image
-        
-#---TESTS---
-
-def _load_image():
-    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
-    d = ft.read(f)
-    w=numpy.asarray(d[numpy.random.randint(0,50)])
-    return (w/255.0).astype('float')
-
-def _test(complexite):
-    
-    transfo = Occlusion()
-    for i in xrange(0,20):
-        img = _load_image()
-        pylab.imshow(img.reshape((32,32)))
-        pylab.show()
-        print transfo.get_settings_names()
-        print transfo.regenerate_parameters(complexite)
-        
-        img_trans=transfo.transform_image(img.reshape((32,32)))
-        
-        print transfo.get_seed()
-        pylab.imshow(img_trans.reshape((32,32)))
-        pylab.show()
-    
-
-if __name__ == '__main__':
-    import pylab
-    import scipy
-    _test(0.5)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/PermutPixel.py
--- a/transformations/PermutPixel.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,114 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Un echange de pixels est effectue entre certain pixels choisit aleatoirement
-et un de ses 4 voisins, tout aussi choisi aleatoirement.
-
-Le nombre de pixels permutes est definit pas complexity*1024
-
-Il y a proba 20% d'effectuer le bruitage
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-import numpy
-import random
-
-class PermutPixel():
-    
-    def __init__(self,seed=7152):
-        self.nombre=10 #Le nombre de pixels a permuter
-        self.proportion=0.3
-        self.effectuer=1    #1=on effectue, 0=rien faire
-        self.seed=seed
-        
-        #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
-        #numpy.random.seed(self.seed)
-        #random.seed(self.seed)
-        
-    def get_seed(self):
-        return self.seed
-        
-    def get_settings_names(self):
-        return ['effectuer']
-    
-    def get_settings_names_determined_by_complexity(self,complexity):
-        return ['nombre']
-
-    def regenerate_parameters(self, complexity):
-        self.proportion=float(complexity)/3
-        self.nombre=int(256*self.proportion)*4   #Par multiple de 4 (256=1024/4)
-        self.echantillon=random.sample(xrange(0,1024),self.nombre)  #Les pixels qui seront permutes
-        self.effectuer =numpy.random.binomial(1,0.2)    ##### On a 20% de faire un bruit #####
-        return self._get_current_parameters()
-
-    def _get_current_parameters(self):
-        return [self.effectuer]  
-    
-    def get_parameters_determined_by_complexity(self, complexity):
-        return [int(complexity*256)*4]
-    
-    def transform_image(self, image):
-        if self.effectuer==0:
-            return image
-        
-        image=image.reshape(1024,1)
-        temp=0  #variable temporaire
-
-        for i in xrange(0,self.nombre,4):   #Par bonds de 4
-            #gauche
-            if self.echantillon[i] > 0:
-                temp=image[self.echantillon[i]-1]
-                image[self.echantillon[i]-1]=image[self.echantillon[i]]
-                image[self.echantillon[i]]=temp
-            #droite
-            if self.echantillon[i+1] < 1023:
-                temp=image[self.echantillon[i+1]+1]
-                image[self.echantillon[i+1]+1]=image[self.echantillon[i+1]]
-                image[self.echantillon[i+1]]=temp
-            #haut
-            if self.echantillon[i+2] > 31:
-                temp=image[self.echantillon[i+2]-32]
-                image[self.echantillon[i+2]-32]=image[self.echantillon[i+2]]
-                image[self.echantillon[i+2]]=temp
-            #bas
-            if self.echantillon[i+3] < 992:
-                temp=image[self.echantillon[i+3]+32]
-                image[self.echantillon[i+3]+32]=image[self.echantillon[i+3]]
-                image[self.echantillon[i+3]]=temp
-            
-            
-        return image.reshape((32,32))
-
-
-#---TESTS---
-
-def _load_image():
-    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
-    d = ft.read(f)
-    w=numpy.asarray(d[random.randint(0,100)])
-    return (w/255.0).astype('float')
-
-def _test(complexite):
-    img=_load_image()
-    transfo = PermutPixel()
-    pylab.imshow(img.reshape((32,32)))
-    pylab.show()
-    print transfo.get_settings_names()
-    print transfo.regenerate_parameters(complexite)
-    
-    img_trans=transfo.transform_image(img)
-    
-    pylab.imshow(img_trans.reshape((32,32)))
-    pylab.show()
-    
-
-if __name__ == '__main__':
-    from pylearn.io import filetensor as ft
-    import pylab
-    for i in xrange(0,5):
-        _test(0.5)
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/PoivreSel.py
--- a/transformations/PoivreSel.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Ajout de bruit poivre et sel dans les donnees. Le bruit est distribue de facon 
-aleatoire tire d'une uniforme tout comme la clarte des bites changees.
-
-La proportion de bites aleatoires est definit par complexity/5.
-Lorsque cette valeur est a 1 ==> Plus reconnaissable et 0 ==> Rien ne se passe
-
-On a maintenant 25% de chance d'effectuer un bruitage.
-
-Ce fichier prend pour acquis que les images sont donnees une a la fois
-sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-import numpy
-import random
-
-class PoivreSel():
-    
-    def __init__(self,seed=9361):
-        self.proportion_bruit=0.08 #Le pourcentage des pixels qui seront bruites
-        self.nb_chng=10 #Le nombre de pixels changes. Seulement pour fin de calcul
-        self.effectuer=1    #Vaut 1 si on effectue et 0 sinon.
-        
-        self.seed=seed
-        #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
-        #numpy.random.seed(self.seed)
-        #random.seed(self.seed)
-        
-    def get_seed(self):
-        return self.seed
-        
-    def get_settings_names(self):
-        return ['effectuer']
-    
-    def get_settings_names_determined_by_complexity(self,complexity):
-        return ['proportion_bruit']
-
-    def regenerate_parameters(self, complexity):
-        self.proportion_bruit = float(complexity)/5
-        self.nb_chng=int(1024*self.proportion_bruit)
-        self.changements=random.sample(xrange(1024),self.nb_chng)   #Les pixels qui seront changes
-        self.effectuer =numpy.random.binomial(1,0.25)    ##### On a 25% de faire un bruit #####
-        return self._get_current_parameters()
-
-    def _get_current_parameters(self):
-        return [self.effectuer]
-    
-    def get_parameters_determined_by_complexity(self, complexity):
-        return [float(complexity)/5]
-    
-    def transform_image(self, image):
-        if self.effectuer == 0:
-            return image
-        
-        image=image.reshape(1024,1)
-        for j in xrange(0,self.nb_chng):
-            image[self.changements[j]]=numpy.random.random()    #On determine les nouvelles valeurs des pixels changes
-        return image.reshape(32,32)
-
-
-#---TESTS---
-
-def _load_image():
-    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
-    d = ft.read(f)
-    w=numpy.asarray(d[0])
-    return (w/255.0).astype('float')
-
-def _test(complexite):
-    img=_load_image()
-    transfo = PoivreSel()
-    pylab.imshow(img.reshape((32,32)))
-    pylab.show()
-    print transfo.get_settings_names()
-    print transfo.regenerate_parameters(complexite)
-    
-    img_trans=transfo.transform_image(img)
-    
-    pylab.imshow(img_trans.reshape((32,32)))
-    pylab.show()
-    
-
-if __name__ == '__main__':
-    from pylearn.io import filetensor as ft
-    import pylab
-    _test(0.5)
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/Rature.py
--- a/transformations/Rature.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,255 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Ajout d'une rature sur le caractère. La rature est en fait un 1 qui recoit une
-rotation et qui est ensuite appliqué sur le caractère. Un grossissement, puis deux
-erosions sont effectuees sur le 1 afin qu'il ne soit plus reconnaissable.
-Il y a des chances d'avoir plus d'une seule rature !
-
-Il y a 15% d'effectuer une rature.
-
-Ce fichier prend pour acquis que les images sont donnees une a la fois
-sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-import numpy, Image, random
-import scipy.ndimage.morphology
-from pylearn.io import filetensor as ft
-
-
-class Rature():
-   
-    def __init__(self,seed=1256):
-        self.angle=0 #Angle en degre de la rotation (entre 0 et 180)
-        self.numero=0 #Le numero du 1 choisi dans la banque de 1
-        self.gauche=-1   #Le numero de la colonne la plus a gauche contenant le 1
-        self.droite=-1
-        self.haut=-1
-        self.bas=-1
-        self.faire=1    #1=on effectue et 0=fait rien
-        
-        self.crop_haut=0
-        self.crop_gauche=0  #Ces deux valeurs sont entre 0 et 31 afin de definir
-                            #l'endroit ou sera pris le crop dans l'image du 1
-                            
-        self.largeur_bande=-1    #La largeur de la bande
-        self.smooth=-1   #La largeur de la matrice carree servant a l'erosion
-        self.nb_ratures=-1   #Le nombre de ratures appliques
-        self.fini=0 #1=fini de mettre toutes les couches 0=pas fini
-        self.complexity=0   #Pour garder en memoire la complexite si plusieurs couches sont necessaires
-        self.seed=seed
-        
-        #numpy.random.seed(self.seed)
-        
-        f3 = open('/data/lisa/data/ift6266h10/un_rature.ft')   #Doit etre sur le reseau DIRO.
-        #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/un_rature.ft')
-        #Il faut arranger le path sinon
-        w=ft.read(f3)
-        f3.close()
-        self.d=(w.astype('float'))/255
-        
-        self.patch=self.d[0].reshape((32,32)) #La patch de rature qui sera appliquee sur l'image
-
-    def get_settings_names(self):
-        return ['angle','numero','faire','crop_haut','crop_gauche','largeur_bande','smooth','nb_ratures']
-    
-    def get_seed(self):
-        return self.seed
-
-    def regenerate_parameters(self, complexity,next_rature = False):
-        
-        
-        self.numero=random.randint(0,4999)  #Ces bornes sont inclusives !
-        self.fini=0
-        self.complexity=complexity
-            
-        if float(complexity) > 0:
-            
-            self.gauche=self.droite=self.haut=self.bas=-1   #Remet tout a -1
-            
-            self.angle=int(numpy.random.normal(90,100*complexity))
-
-            self.faire=numpy.random.binomial(1,0.15)    ##### 15% d'effectuer une rature #####
-            if next_rature:
-                self.faire = 1
-            #self.faire=1 #Pour tester seulement
-            
-            self.crop_haut=random.randint(0,17)
-            self.crop_gauche=random.randint(0,17)
-            if complexity <= 0.25 :
-                self.smooth=6
-            elif complexity <= 0.5:
-                self.smooth=5
-            elif complexity <= 0.75:
-                self.smooth=4
-            else:
-                self.smooth=3
-            
-            p = numpy.random.rand()
-            if p < 0.5:
-                self.nb_ratures= 1
-            else:
-                if p < 0.8:
-                    self.nb_ratures = 2
-                else:
-                    self.nb_ratures = 3
-            
-            #Creation de la "patch" de rature qui sera appliquee sur l'image
-            if self.faire == 1:
-                self.get_size()
-                self.get_image_rot()    #On fait la "patch"
-            
-        else:
-            self.faire=0    #On ne fait rien si complexity=0 !!
-        
-        return self._get_current_parameters()
-    
-    
-    def get_image_rot(self):
-        image2=(self.d[self.numero].reshape((32,32))[self.haut:self.bas,self.gauche:self.droite])
-        
-        im = Image.fromarray(numpy.asarray(image2*255,dtype='uint8'))
-        
-        #La rotation et le resize sont de belle qualite afin d'avoir une image nette
-        im2 = im.rotate(self.angle,Image.BICUBIC,expand=False)
-        im3=im2.resize((50,50),Image.ANTIALIAS)
-        
-        grosse=numpy.asarray(numpy.asarray(im3)/255.0,dtype='float32')
-        crop=grosse[self.haut:self.haut+32,self.gauche:self.gauche+32]
-        
-        self.get_patch(crop)
-        
-    def get_patch(self,crop):
-        smooting = numpy.ones((self.smooth,self.smooth))
-        #Il y a deux erosions afin d'avoir un beau resultat. Pas trop large et
-        #pas trop mince
-        trans=scipy.ndimage.morphology.grey_erosion\
-                    (crop,size=smooting.shape,structure=smooting,mode='wrap')
-        trans1=scipy.ndimage.morphology.grey_erosion\
-                    (trans,size=smooting.shape,structure=smooting,mode='wrap')
-        
-               
-        patch_img=Image.fromarray(numpy.asarray(trans1*255,dtype='uint8'))
-        
-        patch_img2=patch_img.crop((4,4,28,28)).resize((32,32))  #Pour contrer les effets de bords !
-        
-        trans2=numpy.asarray(numpy.asarray(patch_img2)/255.0,dtype='float32')
-            
-            
-        #Tout ramener entre 0 et 1
-        trans2=trans2-trans2.min() #On remet tout positif
-        trans2=trans2/trans2.max()
-        
-        #La rayure a plus de chance d'etre en bas ou oblique le haut a 10h
-        if random.random() <= 0.5:  #On renverse la matrice dans ce cas
-            for i in xrange(0,32):
-                self.patch[i,:]=trans2[31-i,:]
-        else:
-            self.patch=trans2
-        
-    
-    
-    
-    def get_size(self):
-        image=self.d[self.numero].reshape((32,32))
-        
-        #haut
-        for i in xrange(0,32):
-            for j in xrange(0,32):
-                if(image[i,j]) != 0:
-                    if self.haut == -1:
-                        self.haut=i
-                        break
-            if self.haut > -1:
-                break
-        
-        #bas
-        for i in xrange(31,-1,-1):
-            for j in xrange(0,32):
-                if(image[i,j]) != 0:
-                    if self.bas == -1:
-                        self.bas=i
-                        break
-            if self.bas > -1:
-                break
-            
-        #gauche
-        for i in xrange(0,32):
-            for j in xrange(0,32):
-                if(image[j,i]) != 0:
-                    if self.gauche == -1:
-                        self.gauche=i
-                        break
-            if self.gauche > -1:
-                break
-            
-        #droite
-        for i in xrange(31,-1,-1):
-            for j in xrange(0,32):
-                if(image[j,i]) != 0:
-                    if self.droite == -1:
-                        self.droite=i
-                        break
-            if self.droite > -1:
-                break
-                
-
-    def _get_current_parameters(self):
-        return [self.angle,self.numero,self.faire,self.crop_haut,self.crop_gauche,self.largeur_bande,self.smooth,self.nb_ratures]
-
-    def transform_image(self, image):
-        if self.faire == 0: #Rien faire !!
-            return image
-        
-        if self.fini == 0:   #S'il faut rajouter des couches
-            patch_temp=self.patch
-            for w in xrange(1,self.nb_ratures):
-                self.regenerate_parameters(self.complexity,1)
-                for i in xrange(0,32):
-                    for j in xrange(0,32):
-                        patch_temp[i,j]=max(patch_temp[i,j],self.patch[i,j])
-            self.fini=1
-            self.patch=patch_temp
-            
-        for i in xrange(0,32):
-            for j in xrange(0,32):
-                image[i,j]=max(image[i,j],self.patch[i,j])
-        self.patch*=0   #Remise a zero de la patch (pas necessaire)
-        return image
-
-
-#---TESTS---
-
-def _load_image():
-    f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft')  #Le jeu de donnees est en local. 
-    d = ft.read(f)
-    w=numpy.asarray(d[0:1000])
-    return (w/255.0).astype('float')
-
-def _test(complexite):
-    img=_load_image()
-    transfo = Rature()
-    for i in xrange(0,10):
-        img2=img[random.randint(0,1000)]
-        pylab.imshow(img2.reshape((32,32)))
-        pylab.show()
-        print transfo.get_settings_names()
-        print transfo.regenerate_parameters(complexite)
-        img2=img2.reshape((32,32))
-        
-        img2_trans=transfo.transform_image(img2)
-        
-        pylab.imshow(img2_trans.reshape((32,32)))
-        pylab.show()
-    
-
-if __name__ == '__main__':
-    from pylearn.io import filetensor as ft
-    import pylab
-    _test(1)
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/add_background_image.py
--- a/transformations/add_background_image.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-#!/usr/bin/python                                                                                 
-# -*- coding: iso-8859-1 -*-                                                                      
-
-'''
-    Implementation of random background adding to a specific image
-
-    Author: Guillaume Sicard
-'''
-
-import sys, os, random
-import cPickle
-import Image, numpy           
-
-class AddBackground():
-    def __init__(self, threshold = 128, complexity = 1):
-        self.h = 32
-        self.w = 32
-        self.threshold = 1;
-        try: #in order to load locally if it is available
-            self.bg_image_file = '/Tmp/image_net/'
-            f=open(self.bg_image_file+'filelist.pkl')
-        except:
-            self.bg_image_file = '/data/lisa/data/ift6266h10/image_net/'
-            f=open(self.bg_image_file+'filelist.pkl')
-        self.image_files = cPickle.load(f)
-        f.close()
-        self.regenerate_parameters(complexity)
-    
-    def get_current_parameters(self):
-        return [self.contrast]
-    # get threshold value
-    def get_settings_names(self):
-        return ['contrast']
-    
-    # no need, except for testmod.py
-    def regenerate_parameters(self, complexity):
-        self.contrast = 1-numpy.random.rand()*complexity
-        return [self.contrast]
-
-    # load an image
-    def load_image(self,filename):
-        image = Image.open(filename).convert('L')
-        image = numpy.asarray(image)
-        image = (image / 255.0).astype(numpy.float32)
-        return image
-
-    # save an image
-    def save_image(self,array, filename):
-        image = (array * 255.0).astype('int')
-        image = Image.fromarray(image)
-        if (filename != ''):
-            image.save(filename)
-        else:
-            image.show()
-
-    # make a random 32x32 crop of an image
-    def rand_crop(self,image):
-        i_w, i_h = image.shape
-        x, y = random.randint(0, i_w - self.w), random.randint(0, i_h - self.h)
-        return image[x:x + self.w, y:y + self.h]
-
-    # select a random background image from "bg_image_file" and crops it
-    def rand_bg_image(self,maximage):
-        i = random.randint(0, len(self.image_files) - 1)
-
-        image = self.load_image(self.bg_image_file + self.image_files[i])
-        self.bg_image = self.rand_crop(image)
-        maxbg = self.bg_image.max()
-        self.bg_image = self.bg_image / maxbg * ( max(maximage - self.contrast,0.0) ) 
-
-    # set "bg_image" as background to "image", based on a pixels threshold
-    def set_bg(self,image):
-        tensor = numpy.asarray([self.bg_image,image],dtype='float32')
-        return tensor.max(0)
-
-    # transform an image file and return an array
-    def transform_image_from_file(self, filename):
-        self.rand_bg_image()
-        image = self.load_image(filename)
-        image = self.set_bg(image)
-        return image
-
-    # standard array to array transform
-    def transform_image(self, image):
-        self.rand_bg_image(image.max())
-        image = self.set_bg(image)
-        return image
-
-    # test method
-    def test(self,filename):
-        import time
-
-        sys.stdout.write('Starting addBackground test : loading image')
-        sys.stdout.flush()
-
-        image = self.load_image(filename)
-
-        t = 0
-        n = 500
-        for i in range(n):
-            t0 =  time.time()
-            image2 = self.transform_image(image)
-            t = ( i * t + (time.time() - t0) ) / (i + 1)
-            sys.stdout.write('.')
-            sys.stdout.flush()
-            
-        print "Done!\nAverage time : " + str(1000 * t) + " ms"
-
-if __name__ == '__main__':
-
-    myAddBackground = AddBackground()
-    myAddBackground.test('./images/0-LiberationSans-Italic.ttf.jpg')
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/affine_transform.py
--- a/transformations/affine_transform.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Simple implementation of random affine transformations based on the Python 
-Imaging Module affine transformations.
-
-
-Author: Razvan Pascanu
-'''
-
-import numpy, Image
-
-
-
-class AffineTransformation():
-    def __init__( self, complexity = .5):
-        self.shape = (32,32)
-        self.complexity = complexity
-        params = numpy.random.uniform(size=6) -.5
-        self.a = 1. + params[0]*.6*complexity
-        self.b = 0. + params[1]*.6*complexity
-        self.c = params[2]*8.*complexity
-        self.d = 0. + params[3]*.6*complexity
-        self.e = 1. + params[4]*.6*complexity
-        self.f = params[5]*8.*complexity
-
-    
-    def _get_current_parameters(self):
-        return [self.a, self.b, self.c, self.d, self.e, self.f]
-
-    def get_settings_names(self):
-        return ['a','b','c','d','e','f']
-
-    def regenerate_parameters(self, complexity):
-        # generate random affine transformation
-        # a point (x',y') of the new image corresponds to (x,y) of the old
-        # image where : 
-        #   x' = params[0]*x + params[1]*y + params[2]
-        #   y' = params[3]*x + params[4]*y _ params[5]
-
-        # the ranges are set manually as to look acceptable
- 
-        self.complexity = complexity
-        params = numpy.random.uniform(size=6) -.5
-        self.a = 1. + params[0]*.8*complexity
-        self.b = 0. + params[1]*.8*complexity
-        self.c = params[2]*9.*complexity
-        self.d = 0. + params[3]*.8*complexity
-        self.e = 1. + params[4]*.8*complexity
-        self.f = params[5]*9.*complexity
-        return self._get_current_parameters()
-
-      
-
-
-    def transform_image(self,NIST_image):
-    
-        im = Image.fromarray( \
-                numpy.asarray(\
-                       NIST_image.reshape(self.shape)*255.0, dtype='uint8'))
-        nwim = im.transform( (32,32), Image.AFFINE, [self.a,self.b,self.c,self.d,self.e,self.f])
-        return numpy.asarray(numpy.asarray(nwim)/255.0,dtype='float32')
-
-
-
-if __name__ =='__main__':
-    print 'random test'
-    
-    from pylearn.io import filetensor as ft
-    import pylab
-
-    datapath = '/data/lisa/data/nist/by_class/'
-
-    f = open(datapath+'digits/digits_train_data.ft')
-    d = ft.read(f)
-    f.close()
-
-
-    transformer = AffineTransformation()
-    id = numpy.random.randint(30)
-    
-    pylab.figure()
-    pylab.imshow(d[id].reshape((32,32)))
-    pylab.figure()
-    pylab.imshow(transformer.transform_image(d[id]).reshape((32,32)))
-
-    pylab.show()
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/contrast.py
--- a/transformations/contrast.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Simple implementation of random contrast. This always switch half the time the polarity.
-then it decides of a random contrast dependant of the complexity, the mean of the maximum and minimum
-pixel value stays 0 (to avoid import bias change between exemples).
-
-Author: Xavier Glorot
-'''
-
-import numpy as N
-import copy
-
-
-class Contrast():
-    def __init__(self,complexity = 1):
-        #---------- private attributes
-        self.__nx__ = 32 #xdim of the images
-        self.__ny__ = 32 #ydim of the images
-        self.__Pinvert__ = 0.5 #probability to switch polarity
-        self.__mincontrast__ = 0.15
-        self.__resolution__ = 256
-        self.__rangecontrastres__ = self.__resolution__ - N.int(self.__mincontrast__*self.__resolution__)
-        #------------------------------------------------
-        
-        #---------- generation parameters
-        self.regenerate_parameters(complexity)
-        #------------------------------------------------
-    
-    def _get_current_parameters(self):
-        return [self.invert,self.contrast]
-    
-    def get_settings_names(self):
-        return ['invert','contrast']
-    
-    def regenerate_parameters(self, complexity):
-        self.invert = (N.random.uniform() < self.__Pinvert__)
-        self.contrast = self.__resolution__ - N.random.randint(1 + self.__rangecontrastres__ * complexity)
-        return self._get_current_parameters()
-    
-    def transform_1_image(self,image): #the real transformation method
-        maxi = image.max()
-        mini = image.min()
-        if self.invert:
-            newimage = 1 - (self.__resolution__- self.contrast) / (2 * float(self.__resolution__)) -\
-                        (image - mini) / float(maxi - mini) * self.contrast / float(self.__resolution__)
-        else:
-            newimage = (self.__resolution__- self.contrast) / (2 * float(self.__resolution__)) +\
-                        (image - mini) / float(maxi - mini) * self.contrast / float(self.__resolution__)
-        if image.dtype == 'uint8':
-            return N.asarray(newimage*255,dtype='uint8')
-        else:
-            return N.asarray(newimage,dtype=image.dtype)
-    
-    def transform_image(self,image): #handling different format
-        if image.shape == (self.__nx__,self.__ny__):
-            return self.transform_1_image(image)
-        if image.ndim == 3:
-            newimage = copy.copy(image)
-            for i in range(image.shape[0]):
-                newimage[i,:,:] = self.transform_1_image(image[i,:,:])
-            return newimage
-        if image.ndim == 2 and image.shape != (self.__nx__,self.__ny__):
-            newimage = N.reshape(image,(image.shape[0],self.__nx__,self.__ny__))
-            for i in range(image.shape[0]):
-                newimage[i,:,:] = self.transform_1_image(newimage[i,:,:])
-            return N.reshape(newimage,image.shape)
-        if image.ndim == 1:
-            newimage = N.reshape(image,(self.__nx__,self.__ny__))
-            newimage = self.transform_1_image(newimage)
-            return N.reshape(newimage,image.shape)
-        assert False #should never go there
-
-
-
-
-#test on NIST (you need pylearn and access to NIST to do that)
-
-if __name__ == '__main__':
-    
-    from pylearn.io import filetensor as ft
-    import copy
-    import pygame
-    import time
-    datapath = '/data/lisa/data/nist/by_class/'
-    f = open(datapath+'digits/digits_train_data.ft')
-    d = ft.read(f)
-    
-    pygame.surfarray.use_arraytype('numpy')
-    
-    pygame.display.init()
-    screen = pygame.display.set_mode((8*2*32,8*32),0,8)
-    anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
-    screen.set_palette(anglcolorpalette)
-    
-    MyContrast = Contrast()
-    
-    debut=time.time()
-    MyContrast.transform_image(d)
-    fin=time.time()
-    print '------------------------------------------------'
-    print d.shape[0],' images transformed in :', fin-debut, ' seconds'
-    print '------------------------------------------------'
-    print (fin-debut)/d.shape[0]*1000000,' microseconds per image'
-    print '------------------------------------------------'
-    print MyContrast.get_settings_names()
-    print MyContrast._get_current_parameters()
-    print MyContrast.regenerate_parameters(0)
-    print MyContrast.regenerate_parameters(0.5)
-    print MyContrast.regenerate_parameters(1)
-    for i in range(10000):
-        a=d[i,:]
-        b=N.asarray(N.reshape(a,(32,32))).T
-        
-        new=pygame.surfarray.make_surface(b)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new.set_palette(anglcolorpalette)
-        screen.blit(new,(0,0))
-        
-        print MyContrast.get_settings_names(), MyContrast.regenerate_parameters(1)
-        c=MyContrast.transform_image(a)
-        b=N.asarray(N.reshape(c,(32,32))).T
-        
-        new=pygame.surfarray.make_surface(b)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new.set_palette(anglcolorpalette)
-        screen.blit(new,(8*32,0))
-        
-        pygame.display.update()
-        raw_input('Press Enter')
-    
-    pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/filetensor.py
--- a/transformations/filetensor.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,232 +0,0 @@
-"""
-Read and write the matrix file format described at
-U{http://www.cs.nyu.edu/~ylclab/data/norb-v1.0/index.html}
-
-The format is for dense tensors:
-
-    - magic number indicating type and endianness - 4bytes
-    - rank of tensor - int32
-    - dimensions - int32, int32, int32, ...
-    - <data>
-
-The number of dimensions and rank is slightly tricky: 
-    - for scalar: rank=0, dimensions = [1, 1, 1]
-    - for vector: rank=1, dimensions = [?, 1, 1]
-    - for matrix: rank=2, dimensions = [?, ?, 1]
-
-For rank >= 3, the number of dimensions matches the rank exactly.
-
-
-@todo: add complex type support
-
-"""
-import sys
-import numpy
-
-def _prod(lst):
-    p = 1
-    for l in lst:
-        p *= l
-    return p
-
-_magic_dtype = {
-        0x1E3D4C51 : ('float32', 4),
-        #0x1E3D4C52 : ('packed matrix', 0), #what is a packed matrix?
-        0x1E3D4C53 : ('float64', 8),
-        0x1E3D4C54 : ('int32', 4),
-        0x1E3D4C55 : ('uint8', 1),
-        0x1E3D4C56 : ('int16', 2),
-        }
-_dtype_magic = {
-        'float32': 0x1E3D4C51,
-        #'packed matrix': 0x1E3D4C52,
-        'float64': 0x1E3D4C53,
-        'int32': 0x1E3D4C54,
-        'uint8': 0x1E3D4C55,
-        'int16': 0x1E3D4C56
-        }
-
-def _read_int32(f):
-    """unpack a 4-byte integer from the current position in file f"""
-    s = f.read(4)
-    s_array = numpy.fromstring(s, dtype='int32')
-    return s_array.item()
-
-def _read_header(f, debug=False):
-    """
-    :returns: data type, element size, rank, shape, size
-    """
-    #what is the data type of this matrix?
-    #magic_s = f.read(4)
-    #magic = numpy.fromstring(magic_s, dtype='int32')
-    magic = _read_int32(f)
-    magic_t, elsize = _magic_dtype[magic]
-    if debug: 
-        print 'header magic', magic, magic_t, elsize
-    if magic_t == 'packed matrix':
-        raise NotImplementedError('packed matrix not supported')
-
-    #what is the rank of the tensor?
-    ndim = _read_int32(f)
-    if debug: print 'header ndim', ndim
-
-    #what are the dimensions of the tensor?
-    dim = numpy.fromfile(f, dtype='int32', count=max(ndim,3))[:ndim]
-    dim_size = _prod(dim)
-    if debug: print 'header dim', dim, dim_size
-
-    return magic_t, elsize, ndim, dim, dim_size
-
-class arraylike(object):
-    """Provide an array-like interface to the filetensor in f.
-
-    The rank parameter to __init__ controls how this object interprets the underlying tensor.
-    Its behaviour should be clear from the following example.
-    Suppose the underlying tensor is MxNxK.
-
-    - If rank is 0, self[i] will be a scalar and len(self) == M*N*K.
-
-    - If rank is 1, self[i] is a vector of length K, and len(self) == M*N.
-
-    - If rank is 3, self[i] is a 3D tensor of size MxNxK, and len(self)==1.
-
-    - If rank is 5, self[i] is a 5D tensor of size 1x1xMxNxK, and len(self) == 1.
-
-
-    :note: Objects of this class generally require exclusive use of the underlying file handle, because
-    they call seek() every time you access an element.
-    """
-
-    f = None 
-    """File-like object"""
-
-    magic_t = None
-    """numpy data type of array"""
-
-    elsize = None
-    """number of bytes per scalar element"""
-
-    ndim = None
-    """Rank of underlying tensor"""
-
-    dim = None
-    """tuple of array dimensions (aka shape)"""
-
-    dim_size = None
-    """number of scalars in the tensor (prod of dim)"""
-
-    f_start = None
-    """The file position of the first element of the tensor"""
-
-    readshape = None
-    """tuple of array dimensions of the block that we read"""
-
-    readsize = None
-    """number of elements we must read for each block"""
-    
-    def __init__(self, f, rank=0, debug=False):
-        self.f = f
-        self.magic_t, self.elsize, self.ndim, self.dim, self.dim_size = _read_header(f,debug)
-        self.f_start = f.tell()
-
-        if rank <= self.ndim:
-          self.readshape = tuple(self.dim[self.ndim-rank:])
-        else:
-          self.readshape = tuple(self.dim)
-
-        #self.readshape = tuple(self.dim[self.ndim-rank:]) if rank <= self.ndim else tuple(self.dim)
-
-        if rank <= self.ndim:
-          padding = tuple()
-        else:
-          padding = (1,) * (rank - self.ndim)
-
-        #padding = tuple() if rank <= self.ndim else (1,) * (rank - self.ndim)
-        self.returnshape = padding + self.readshape
-        self.readsize = _prod(self.readshape)
-        if debug: print 'READ PARAM', self.readshape, self.returnshape, self.readsize
-
-    def __len__(self):
-        return _prod(self.dim[:self.ndim-len(self.readshape)])
-
-    def __getitem__(self, idx):
-        if idx >= len(self):
-            raise IndexError(idx)
-        self.f.seek(self.f_start + idx * self.elsize * self.readsize)
-        return numpy.fromfile(self.f, 
-                dtype=self.magic_t, 
-                count=self.readsize).reshape(self.returnshape)
-
-
-#
-# TODO: implement item selection:
-#  e.g. load('some mat', subtensor=(:6, 2:5))
-#
-#  This function should be memory efficient by:
-#  - allocating an output matrix at the beginning
-#  - seeking through the file, reading subtensors from multiple places
-def read(f, subtensor=None, debug=False):
-    """Load all or part of file 'f' into a numpy ndarray
-
-    @param f: file from which to read
-    @type f: file-like object
-
-    If subtensor is not None, it should be like the argument to
-    numpy.ndarray.__getitem__.  The following two expressions should return
-    equivalent ndarray objects, but the one on the left may be faster and more
-    memory efficient if the underlying file f is big.
-
-        read(f, subtensor) <===> read(f)[*subtensor]
-    
-    Support for subtensors is currently spotty, so check the code to see if your
-    particular type of subtensor is supported.
-
-    """
-    magic_t, elsize, ndim, dim, dim_size = _read_header(f,debug)
-    f_start = f.tell()
-
-    rval = None
-    if subtensor is None:
-        rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
-    elif isinstance(subtensor, slice):
-        if subtensor.step not in (None, 1):
-            raise NotImplementedError('slice with step', subtensor.step)
-        if subtensor.start not in (None, 0):
-            bytes_per_row = _prod(dim[1:]) * elsize
-            f.seek(f_start + subtensor.start * bytes_per_row)
-        dim[0] = min(dim[0], subtensor.stop) - subtensor.start
-        rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
-    else:
-        raise NotImplementedError('subtensor access not written yet:', subtensor) 
-
-    return rval
-
-def write(f, mat):
-    """Write a numpy.ndarray to file.
-
-    @param f: file into which to write
-    @type f: file-like object
-
-    @param mat: array to write to file
-    @type mat: numpy ndarray or compatible
-
-    """
-    def _write_int32(f, i):
-        i_array = numpy.asarray(i, dtype='int32')
-        if 0: print 'writing int32', i, i_array
-        i_array.tofile(f)
-
-    try:
-        _write_int32(f, _dtype_magic[str(mat.dtype)])
-    except KeyError:
-        raise TypeError('Invalid ndarray dtype for filetensor format', mat.dtype)
-
-    _write_int32(f, len(mat.shape))
-    shape = mat.shape
-    if len(shape) < 3:
-        shape = list(shape) + [1] * (3 - len(shape))
-    if 0: print 'writing shape =', shape
-    for sh in shape:
-        _write_int32(f, sh)
-    mat.tofile(f)
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/gimp_script.py
--- a/transformations/gimp_script.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-'''
-Filtres GIMP sous Python
-Auteur: Nicolas Boulanger-Lewandowski
-Date: Hiver 2010
-
-run with: gimp -i --batch-interpreter python-fu-eval --batch - < gimp_script.py
-end with: pdb.gimp_quit(0)
-
-Implémente le motionblur et le pinch
-'''
-
-from gimpfu import *
-import numpy
-
-img = gimp.Image(32, 32, GRAY)
-img.disable_undo()
-layer1 = gimp.Layer(img, "layer1", 32, 32, GRAY_IMAGE, 100, NORMAL_MODE)
-img.add_layer(layer1, 0)
-dest_rgn = layer1.get_pixel_rgn(0, 0, 32, 32, True)
-
-def setpix(image):
-    dest_rgn[:,:] = (image.T*255).astype(numpy.uint8).tostring()
-    layer1.flush()
-    layer1.update(0, 0, 32, 32)
-
-def getpix():
-    return numpy.fromstring(dest_rgn[:,:], 'UInt8').astype(numpy.float32).reshape((32,32)).T / 255.0
-
-class GIMP1():
-    def get_settings_names(self):
-        return ['mblur_length', 'mblur_angle', 'pinch']
-    
-    def regenerate_parameters(self, complexity):
-        if complexity:
-            self.mblur_length = abs(int(round(numpy.random.normal(0, 3*complexity))))
-        else:
-            self.mblur_length = 0
-        self.mblur_angle =  int(round(numpy.random.uniform(0,360)))
-        self.pinch = numpy.random.uniform(-complexity, 0.7*complexity)
-
-        return [self.mblur_length, self.mblur_angle, self.pinch]
-
-    def transform_image(self, image):
-        if self.mblur_length or self.pinch:
-            setpix(image)
-            if self.mblur_length:
-                pdb.plug_in_mblur(img, layer1, 0, self.mblur_length, self.mblur_angle, 0, 0)
-            if self.pinch:        
-                pdb.plug_in_whirl_pinch(img, layer1, 0.0, self.pinch, 1.0)
-            image = getpix()
-
-        return image
-
-# test
-if __name__ == '__main__':
-    import Image
-    im = numpy.asarray(Image.open("a.bmp").convert("L")) / 255.0
-
-    test = GIMP1()
-    print test.get_settings_names(), '=', test.regenerate_parameters(1)
-    #for i in range(1000):
-    im = test.transform_image(im)
-
-    import pylab
-    pylab.imshow(im, pylab.matplotlib.cm.Greys_r)
-    pylab.show()
-
-    pdb.gimp_quit(0)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/image_tiling.py
--- a/transformations/image_tiling.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-"""
-Illustrate filters (or data) in a grid of small image-shaped tiles.
-
-Note: taken from the pylearn codebase on Feb 4, 2010 (fsavard)
-"""
-
-import numpy
-from PIL import Image
-
-def scale_to_unit_interval(ndar,eps=1e-8):
-    ndar = ndar.copy()
-    ndar -= ndar.min()
-    ndar *= 1.0 / (ndar.max()+eps)
-    return ndar
-
-def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0,0),
-        scale_rows_to_unit_interval=True, 
-        output_pixel_vals=True
-        ):
-    """
-    Transform an array with one flattened image per row, into an array in which images are
-    reshaped and layed out like tiles on a floor.
-
-    This function is useful for visualizing datasets whose rows are images, and also columns of
-    matrices for transforming those rows (such as the first layer of a neural net).
-
-    :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can be 2-D ndarrays or None
-    :param X: a 2-D array in which every row is a flattened image.
-    :type img_shape: tuple; (height, width)
-    :param img_shape: the original shape of each image
-    :type tile_shape: tuple; (rows, cols)
-    :param tile_shape: the number of images to tile (rows, cols)
-
-    :returns: array suitable for viewing as an image.  (See:`PIL.Image.fromarray`.)
-    :rtype: a 2-d array with same dtype as X.
-
-    """
-    assert len(img_shape) == 2
-    assert len(tile_shape) == 2
-    assert len(tile_spacing) == 2
-
-    out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp 
-        in zip(img_shape, tile_shape, tile_spacing)]
-
-    if isinstance(X, tuple):
-        assert len(X) == 4
-        if output_pixel_vals:
-            out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype='uint8')
-        else:
-            out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype)
-
-        #colors default to 0, alpha defaults to 1 (opaque)
-        if output_pixel_vals:
-            channel_defaults = [0,0,0,255]
-        else:
-            channel_defaults = [0.,0.,0.,1.]
-
-        for i in xrange(4):
-            if X[i] is None:
-                out_array[:,:,i] = numpy.zeros(out_shape,
-                        dtype='uint8' if output_pixel_vals else out_array.dtype
-                        )+channel_defaults[i]
-            else:
-                out_array[:,:,i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals)
-        return out_array
-
-    else:
-        H, W = img_shape
-        Hs, Ws = tile_spacing
-
-        out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
-        for tile_row in xrange(tile_shape[0]):
-            for tile_col in xrange(tile_shape[1]):
-                if tile_row * tile_shape[1] + tile_col < X.shape[0]:
-                    if scale_rows_to_unit_interval:
-                        this_img = scale_to_unit_interval(X[tile_row * tile_shape[1] + tile_col].reshape(img_shape))
-                    else:
-                        this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
-                    out_array[
-                        tile_row * (H+Hs):tile_row*(H+Hs)+H,
-                        tile_col * (W+Ws):tile_col*(W+Ws)+W
-                        ] \
-                        = this_img * (255 if output_pixel_vals else 1)
-        return out_array
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/local_elastic_distortions.py
--- a/transformations/local_elastic_distortions.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,456 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Implementation of elastic distortions as described in
-Simard, Steinkraus, Platt, "Best Practices for Convolutional
-    Neural Networks Applied to Visual Document Analysis", 2003
-
-Author: François Savard
-Date: Fall 2009, revised Winter 2010
-
-Usage: create the Distorter with proper alpha, sigma etc.
-    Then each time you want to change the distortion field applied,
-    call regenerate_field(). 
-
-    (The point behind this is that regeneration takes some time,
-    so we better reuse the fields a few times)
-'''
-
-import sys
-import math
-import numpy
-import numpy.random
-import scipy.signal # convolve2d
-
-_TEST_DIR = "/u/savardf/ift6266/debug_images/"
-
-def _raw_zeros(size):
-    return [[0 for i in range(size[1])] for j in range(size[0])]
-
-class ElasticDistortionParams():
-    def __init__(self, image_size=(32,32), alpha=0.0, sigma=0.0):
-        self.image_size = image_size
-        self.alpha = alpha
-        self.sigma = sigma
-
-        h,w = self.image_size
-
-        self.matrix_tl_corners_rows = _raw_zeros((h,w))
-        self.matrix_tl_corners_cols = _raw_zeros((h,w))
-
-        self.matrix_tr_corners_rows = _raw_zeros((h,w))
-        self.matrix_tr_corners_cols = _raw_zeros((h,w))
-
-        self.matrix_bl_corners_rows = _raw_zeros((h,w))
-        self.matrix_bl_corners_cols = _raw_zeros((h,w))
-
-        self.matrix_br_corners_rows = _raw_zeros((h,w))
-        self.matrix_br_corners_cols = _raw_zeros((h,w))
-
-        # those will hold the precomputed ratios for
-        # bilinear interpolation
-        self.matrix_tl_multiply = numpy.zeros((h,w))
-        self.matrix_tr_multiply = numpy.zeros((h,w))
-        self.matrix_bl_multiply = numpy.zeros((h,w))
-        self.matrix_br_multiply = numpy.zeros((h,w))
-
-    def alpha_sigma(self):
-        return [self.alpha, self.sigma]
-
-class LocalElasticDistorter():
-    def __init__(self, image_size=(32,32)):
-        self.image_size = image_size
-
-        self.current_complexity_10 = 0
-        self.current_complexity = 0
-
-        # number of precomputed fields
-        # (principle: as complexity doesn't change often, we can
-        # precompute a certain number of fields for a given complexity,
-        # each with its own parameters. That way, we have good
-        # randomization, but we're much faster).
-        self.to_precompute_per_complexity = 50
-
-        # Both use ElasticDistortionParams
-        self.current_params = None
-        self.precomputed_params = [[] for i in range(10)]
-
-        # 
-        self.kernel_size = None
-        self.kernel = None
-
-        # set some defaults
-        self.regenerate_parameters(0.0)
-
-    def get_settings_names(self):
-        return []
-
-    def _floor_complexity(self, complexity):
-        return self._to_complexity_10(complexity) / 10.0
-
-    def _to_complexity_10(self, complexity):
-        return min(9, max(0, int(complexity * 10)))
-
-    def regenerate_parameters(self, complexity):
-        complexity_10 = self._to_complexity_10(complexity)
-
-        if complexity_10 != self.current_complexity_10:
-            self.current_complexity_10 = complexity_10
-            self.current_complexity = self._floor_complexity(complexity)
-
-        if len(self.precomputed_params[complexity_10]) <= self.to_precompute_per_complexity:
-            # not yet enough params generated, produce one more
-            # and append to list
-            new_params = self._initialize_new_params()
-            new_params = self._generate_fields(new_params)
-            self.current_params = new_params
-            self.precomputed_params[complexity_10].append(new_params)
-        else:
-            # if we have enough precomputed fields, just select one
-            # at random and set parameters to match what they were
-            # when the field was generated
-            idx = numpy.random.randint(0, len(self.precomputed_params[complexity_10]))
-            self.current_params = self.precomputed_params[complexity_10][idx]
-
-        # don't return anything, to avoid storing deterministic parameters
-        return [] # self.current_params.alpha_sigma()
-
-    def get_parameters_determined_by_complexity(self, complexity):
-        tmp_params = self._initialize_new_params(_floor_complexity(complexity))
-        return tmp_params.alpha_sigma()
-
-    def get_settings_names_determined_by_complexity(self, complexity):
-        return ['alpha', 'sigma']
-
-    # adapted from http://blenderartists.org/forum/showthread.php?t=163361
-    def _gen_gaussian_kernel(self, sigma):
-        # the kernel size can change DRAMATICALLY the time 
-        # for the blur operation... so even though results are better
-        # with a bigger kernel, we need to compromise here
-        # 1*s is very different from 2*s, but there's not much difference
-        # between 2*s and 4*s
-        ks = self.kernel_size
-        s = sigma
-        target_ks = (1.5*s, 1.5*s)
-        if not ks is None and ks[0] == target_ks[0] and ks[1] == target_ks[1]:
-            # kernel size is good, ok, no need to regenerate
-            return
-        self.kernel_size = target_ks
-        h,w = self.kernel_size
-        a,b = h/2.0, w/2.0
-        y,x = numpy.ogrid[0:w, 0:h]
-        gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s))
-        # Normalize so we don't reduce image intensity
-        self.kernel = gauss/gauss.sum()
-
-    def _gen_distortion_field(self, params):
-        self._gen_gaussian_kernel(params.sigma)
-
-        # we add kernel_size on all four sides so blurring
-        # with the kernel produces a smoother result on borders
-        ks0 = self.kernel_size[0]
-        ks1 = self.kernel_size[1]
-        sz0 = self.image_size[1] + ks0
-        sz1 = self.image_size[0] + ks1
-        field = numpy.random.uniform(-1.0, 1.0, (sz0, sz1))
-        field = scipy.signal.convolve2d(field, self.kernel, mode='same')
-
-        # crop only image_size in the middle
-        field = field[ks0:ks0+self.image_size[0], ks1:ks1+self.image_size[1]]
-
-        return params.alpha * field
-        
-
-    def _initialize_new_params(self, complexity=None):
-        if not complexity:
-            complexity = self.current_complexity
-
-        params = ElasticDistortionParams(self.image_size)
-
-        # pour faire progresser la complexité un peu plus vite
-        # tout en gardant les extrêmes de 0.0 et 1.0
-        complexity = complexity ** (1./3.)
-
-        # the smaller the alpha, the closest the pixels are fetched
-        # a max of 10 is reasonable
-        params.alpha = complexity * 10.0
-
-        # the bigger the sigma, the smoother is the distortion
-        # max of 1 is "reasonable", but produces VERY noisy results
-        # And the bigger the sigma, the bigger the blur kernel, and the
-        # slower the field generation, btw.
-        params.sigma = 10.0 - (7.0 * complexity)
-
-        return params
-
-    def _generate_fields(self, params):
-        '''
-        Here's how the code works:
-        - We first generate "distortion fields" for x and y with these steps:
-            - Uniform noise over [-1, 1] in a matrix of size (h,w)
-            - Blur with a Gaussian kernel of spread sigma
-            - Multiply by alpha
-        - Then (conceptually) to compose the distorted image, we loop over each pixel
-            of the new image and use the corresponding x and y distortions
-            (from the matrices generated above) to identify pixels
-            of the old image from which we fetch color data. As the
-            coordinates are not integer, we interpolate between the
-            4 nearby pixels (top left, top right etc.).
-        - That's just conceptually. Here I'm using matrix operations
-            to speed up the computation. I first identify the 4 nearby
-            pixels in the old image for each pixel in the distorted image.
-            I can then use them as "fancy indices" to extract the proper
-            pixels for each new pixel.
-        - Then I multiply those extracted nearby points by precomputed
-            ratios for the bilinear interpolation.
-        '''
-
-        p = params
-
-        dist_fields = [None, None]
-        dist_fields[0] = self._gen_distortion_field(params)
-        dist_fields[1] = self._gen_distortion_field(params)
-
-        #pylab.imshow(dist_fields[0])
-        #pylab.show()
-
-        # regenerate distortion index matrices
-        # "_rows" are row indices
-        # "_cols" are column indices
-        # (separated due to the way fancy indexing works in numpy)
-        h,w = p.image_size
-
-        for y in range(h):
-            for x in range(w): 
-                distort_x = dist_fields[0][y,x]
-                distort_y = dist_fields[1][y,x]
-
-                # the "target" is the coordinate we fetch color data from
-                # (in the original image)
-                # target_left and _top are the rounded coordinate on the
-                # left/top of this target (float) coordinate
-                target_pixel = (y+distort_y, x+distort_x)
-
-                target_left = int(math.floor(x + distort_x))
-                target_top = int(math.floor(y + distort_y))
-
-                index_tl = [target_top, target_left]
-                index_tr = [target_top, target_left+1]
-                index_bl = [target_top+1, target_left]
-                index_br = [target_top+1, target_left+1]
-
-                # x_ratio is the ratio of importance of left pixels
-                # y_ratio is the """" of top pixels
-                # (in bilinear combination)
-                y_ratio = 1.0 - (target_pixel[0] - target_top)
-                x_ratio = 1.0 - (target_pixel[1] - target_left)
-
-                # We use a default background color of 0 for displacements
-                # outside of boundaries of the image.
-
-                # if top left outside bounds
-                if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w: 
-                    p.matrix_tl_corners_rows[y][x] = 0
-                    p.matrix_tl_corners_cols[y][x] = 0
-                    p.matrix_tl_multiply[y,x] = 0
-                else:
-                    p.matrix_tl_corners_rows[y][x] = index_tl[0]
-                    p.matrix_tl_corners_cols[y][x] = index_tl[1]
-                    p.matrix_tl_multiply[y,x] = x_ratio*y_ratio
-
-                # if top right outside bounds
-                if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w:
-                    p.matrix_tr_corners_rows[y][x] = 0
-                    p.matrix_tr_corners_cols[y][x] = 0
-                    p.matrix_tr_multiply[y,x] = 0
-                else:
-                    p.matrix_tr_corners_rows[y][x] = index_tr[0]
-                    p.matrix_tr_corners_cols[y][x] = index_tr[1]
-                    p.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio
-
-                # if bottom left outside bounds
-                if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w:
-                    p.matrix_bl_corners_rows[y][x] = 0
-                    p.matrix_bl_corners_cols[y][x] = 0
-                    p.matrix_bl_multiply[y,x] = 0
-                else:
-                    p.matrix_bl_corners_rows[y][x] = index_bl[0]
-                    p.matrix_bl_corners_cols[y][x] = index_bl[1]
-                    p.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio)
-
-                # if bottom right outside bounds
-                if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w:
-                    p.matrix_br_corners_rows[y][x] = 0
-                    p.matrix_br_corners_cols[y][x] = 0
-                    p.matrix_br_multiply[y,x] = 0
-                else:
-                    p.matrix_br_corners_rows[y][x] = index_br[0]
-                    p.matrix_br_corners_cols[y][x] = index_br[1]
-                    p.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio)
-
-        # not really necessary, but anyway
-        return p
-
-    def transform_image(self, image):
-        p = self.current_params
-
-        # index pixels to get the 4 corners for bilinear combination
-        tl_pixels = image[p.matrix_tl_corners_rows, p.matrix_tl_corners_cols]
-        tr_pixels = image[p.matrix_tr_corners_rows, p.matrix_tr_corners_cols]
-        bl_pixels = image[p.matrix_bl_corners_rows, p.matrix_bl_corners_cols]
-        br_pixels = image[p.matrix_br_corners_rows, p.matrix_br_corners_cols]
-
-        # bilinear ratios, elemwise multiply
-        tl_pixels = numpy.multiply(tl_pixels, p.matrix_tl_multiply)
-        tr_pixels = numpy.multiply(tr_pixels, p.matrix_tr_multiply)
-        bl_pixels = numpy.multiply(bl_pixels, p.matrix_bl_multiply)
-        br_pixels = numpy.multiply(br_pixels, p.matrix_br_multiply)
-
-        # sum to finish bilinear combination
-        return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0).astype(numpy.float32)
-
-# TESTS ----------------------------------------------------------------------
-
-def _load_image(filepath):
-    _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0]
-    img = Image.open(filepath)
-    img = numpy.asarray(img)
-    if len(img.shape) > 2:
-        img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
-    return (img / 255.0).astype('float')
-
-def _specific_test():
-    imgpath = os.path.join(_TEST_DIR, "d.png")
-    img = _load_image(imgpath)
-    dist = LocalElasticDistorter((32,32))
-    print dist.regenerate_parameters(0.5)
-    img = dist.transform_image(img)
-    print dist.get_parameters_determined_by_complexity(0.4)
-    pylab.imshow(img)
-    pylab.show()
-
-def _complexity_tests():
-    imgpath = os.path.join(_TEST_DIR, "d.png")
-    dist = LocalElasticDistorter((32,32))
-    orig_img = _load_image(imgpath)
-    html_content = '''<html><body>Original:<br/><img src='d.png'>'''
-    for complexity in numpy.arange(0.0, 1.1, 0.1):
-        html_content += '<br/>Complexity: ' + str(complexity) + '<br/>'
-        for i in range(10):
-            t1 = time.time()
-            dist.regenerate_parameters(complexity)
-            t2 = time.time()
-            print "diff", t2-t1
-            img = dist.transform_image(orig_img)
-            filename = "complexity_" + str(complexity) + "_" + str(i) + ".png"
-            new_path = os.path.join(_TEST_DIR, filename)
-            _save_image(img, new_path)
-            html_content += '<img src="' + filename + '">'
-    html_content += "</body></html>"
-    html_file = open(os.path.join(_TEST_DIR, "complexity.html"), "w")
-    html_file.write(html_content)
-    html_file.close()
-    
-def _complexity_benchmark():
-    imgpath = os.path.join(_TEST_DIR, "d.png")
-    dist = LocalElasticDistorter((32,32))
-    orig_img = _load_image(imgpath)
-
-    for cpx in (0.21, 0.35):
-        # time the first 10
-        t1 = time.time()
-        for i in range(10):
-            dist.regenerate_parameters(cpx)
-            img = dist.transform_image(orig_img)
-        t2 = time.time()
-
-        print "first 10, total = ", t2-t1, ", avg=", (t2-t1)/10
-
-        # time the next 40
-        t1 = time.time()
-        for i in range(40):
-            dist.regenerate_parameters(cpx)
-            img = dist.transform_image(orig_img)
-        t2 = time.time()
-       
-        print "next 40, total = ", t2-t1, ", avg=", (t2-t1)/40
-
-        # time the next 50
-        t1 = time.time()
-        for i in range(50):
-            dist.regenerate_parameters(cpx)
-            img = dist.transform_image(orig_img)
-        t2 = time.time()
-       
-        print "next 50, total = ", t2-t1, ", avg=", (t2-t1)/50
-
-        # time the next 1000 
-        t1 = time.time()
-        for i in range(1000):
-            dist.regenerate_parameters(cpx)
-            img = dist.transform_image(orig_img)
-        t2 = time.time()
-       
-        print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
-
-    # time the next 1000 with old complexity
-    t1 = time.time()
-    for i in range(1000):
-        dist.regenerate_parameters(0.21)
-        img = dist.transform_image(orig_img)
-    t2 = time.time()
-   
-    print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
-
-
-
-
-def _save_image(img, path):
-    img2 = Image.fromarray((img * 255).astype('uint8'), "L")
-    img2.save(path)
-
-# TODO: reformat to follow new class... it function of complexity now
-'''
-def _distorter_tests():
-    #import pylab
-    #pylab.imshow(img)
-    #pylab.show()
-
-    for letter in ("d", "a", "n", "o"):
-        img = _load_image("tests/" + letter + ".png")
-        for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
-            for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
-                id = LocalElasticDistorter((32,32))
-                img2 = id.distort_image(img)
-                img2 = Image.fromarray((img2 * 255).astype('uint8'), "L")
-                img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png")
-'''
-
-def _benchmark():
-    img = _load_image("tests/d.png")
-    dist = LocalElasticDistorter((32,32))
-    dist.regenerate_parameters(0.0)
-    import time
-    t1 = time.time()
-    for i in range(10000):
-        if i % 1000 == 0:
-            print "-"
-        dist.distort_image(img)
-    t2 = time.time()
-    print "t2-t1", t2-t1
-    print "avg", 10000/(t2-t1)
-
-if __name__ == '__main__':
-    import time
-    import pylab
-    import Image
-    import os.path
-    #_distorter_tests()
-    #_benchmark()
-    #_specific_test()
-    #_complexity_tests()
-    _complexity_benchmark()
-    
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/pipeline.py
--- a/transformations/pipeline.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,391 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-from __future__ import with_statement
-
-# This is intended to be run as a GIMP script
-#from gimpfu import *
-
-import sys, os, getopt
-import numpy
-import filetensor as ft
-import random
-
-# To debug locally, also call with -s 100 (to stop after ~100)
-# (otherwise we allocate all needed memory, might be loonnng and/or crash
-# if, lucky like me, you have an age-old laptop creaking from everywhere)
-DEBUG = False
-DEBUG_X = False
-if DEBUG:
-    DEBUG_X = False # Debug under X (pylab.show())
-
-DEBUG_IMAGES_PATH = None
-if DEBUG:
-    # UNTESTED YET
-    # To avoid loading NIST if you don't have it handy
-    # (use with debug_images_iterator(), see main())
-    # To use NIST, leave as = None
-    DEBUG_IMAGES_PATH = None#'/home/francois/Desktop/debug_images'
-
-# Directory where to dump images to visualize results
-# (create it, otherwise it'll crash)
-DEBUG_OUTPUT_DIR = 'debug_out'
-
-DEFAULT_NIST_PATH = '/data/lisa/data/ift6266h10/train_data.ft'
-DEFAULT_LABEL_PATH = '/data/lisa/data/ift6266h10/train_labels.ft'
-DEFAULT_OCR_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft'
-DEFAULT_OCRLABEL_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-labels-shuffled.ft'
-ARGS_FILE = os.environ['PIPELINE_ARGS_TMPFILE']
-
-# PARSE COMMAND LINE ARGUMENTS
-def get_argv():
-    with open(ARGS_FILE) as f:
-        args = [l.rstrip() for l in f.readlines()]
-    return args
-
-def usage():
-    print '''
-Usage: run_pipeline.sh [-m ...] [-z ...] [-o ...] [-p ...]
-    -m, --max-complexity: max complexity to generate for an image
-    -z, --probability-zero: probability of using complexity=0 for an image
-    -o, --output-file: full path to file to use for output of images
-    -p, --params-output-file: path to file to output params to
-    -x, --labels-output-file: path to file to output labels to
-    -f, --data-file: path to filetensor (.ft) data file (NIST)
-    -l, --label-file: path to filetensor (.ft) labels file (NIST labels)
-    -c, --ocr-file: path to filetensor (.ft) data file (OCR)
-    -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels)
-    -a, --prob-font: probability of using a raw font image
-    -b, --prob-captcha: probability of using a captcha image
-    -g, --prob-ocr: probability of using an ocr image
-    -y, --seed: the job seed
-    '''
-
-try:
-    opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", 
-"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="])
-except getopt.GetoptError, err:
-        # print help information and exit:
-        print str(err) # will print something like "option -a not recognized"
-        usage()
-        pdb.gimp_quit(0)
-        sys.exit(2)
-
-for o, a in opts:
-    if o in ('-y','--seed'):
-        random.seed(int(a))
-        numpy.random.seed(int(a))
-
-if DEBUG_X:
-    import pylab
-    pylab.ion()
-
-from PoivreSel import PoivreSel
-from thick import Thick
-from BruitGauss import BruitGauss
-from DistorsionGauss import DistorsionGauss
-from PermutPixel import PermutPixel
-from gimp_script import GIMP1
-from Rature import Rature
-from contrast import Contrast
-from local_elastic_distortions import LocalElasticDistorter
-from slant import Slant
-from Occlusion import Occlusion
-from add_background_image import AddBackground
-from affine_transform import AffineTransformation
-from ttf2jpg import ttf2jpg
-from Facade import generateCaptcha
-
-if DEBUG:
-    from visualizer import Visualizer
-    # Either put the visualizer as in the MODULES_INSTANCES list
-    # after each module you want to visualize, or in the
-    # AFTER_EACH_MODULE_HOOK list (but not both, it's redundant)
-    VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR,  on_screen=False)
-
-###---------------------order of transformation module
-MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
-
-# These should have a "after_transform_callback(self, image)" method
-# (called after each call to transform_image in a module)
-AFTER_EACH_MODULE_HOOK = []
-if DEBUG:
-    AFTER_EACH_MODULE_HOOK = [VISUALIZER]
-
-# These should have a "end_transform_callback(self, final_image" method
-# (called after all modules have been called)
-END_TRANSFORM_HOOK = []
-if DEBUG:
-    END_TRANSFORM_HOOK = [VISUALIZER]
-
-class Pipeline():
-    def __init__(self, modules, num_img, image_size=(32,32)):
-        self.modules = modules
-        self.num_img = num_img
-        self.num_params_stored = 0
-        self.image_size = image_size
-
-        self.init_memory()
-
-    def init_num_params_stored(self):
-        # just a dummy call to regenerate_parameters() to get the
-        # real number of params (only those which are stored)
-        self.num_params_stored = 0
-        for m in self.modules:
-            self.num_params_stored += len(m.regenerate_parameters(0.0))
-
-    def init_memory(self):
-        self.init_num_params_stored()
-
-        total = self.num_img
-        num_px = self.image_size[0] * self.image_size[1]
-
-        self.res_data = numpy.empty((total, num_px), dtype=numpy.uint8)
-        # +1 to store complexity
-        self.params = numpy.empty((total, self.num_params_stored+len(self.modules)))
-        self.res_labels = numpy.empty(total, dtype=numpy.int32)
-
-    def run(self, img_iterator, complexity_iterator):
-        img_size = self.image_size
-
-        should_hook_after_each = len(AFTER_EACH_MODULE_HOOK) != 0
-        should_hook_at_the_end = len(END_TRANSFORM_HOOK) != 0
-
-        for img_no, (img, label) in enumerate(img_iterator):
-            sys.stdout.flush()
-            
-            global_idx = img_no
-
-            img = img.reshape(img_size)
-
-            param_idx = 0
-            mod_idx = 0
-            for mod in self.modules:
-                # This used to be done _per batch_,
-                # ie. out of the "for img" loop
-                complexity = complexity_iterator.next() 
-                #better to do a complexity sampling for each transformations in order to have more variability
-                #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time))
-                #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall
-                #complexity
-                self.params[global_idx, mod_idx] = complexity
-                mod_idx += 1
-                 
-                p = mod.regenerate_parameters(complexity)
-                self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p
-                param_idx += len(p)
-
-                img = mod.transform_image(img)
-
-                if should_hook_after_each:
-                    for hook in AFTER_EACH_MODULE_HOOK:
-                        hook.after_transform_callback(img)
-
-            self.res_data[global_idx] = \
-                    img.reshape((img_size[0] * img_size[1],))*255
-            self.res_labels[global_idx] = label
-
-            if should_hook_at_the_end:
-                for hook in END_TRANSFORM_HOOK:
-                    hook.end_transform_callback(img)
-
-    def write_output(self, output_file_path, params_output_file_path, labels_output_file_path):
-        with open(output_file_path, 'wb') as f:
-            ft.write(f, self.res_data)
-
-        numpy.save(params_output_file_path, self.params)
-
-        with open(labels_output_file_path, 'wb') as f:
-            ft.write(f, self.res_labels)
-                
-
-##############################################################################
-# COMPLEXITY ITERATORS
-# They're called once every img, to get the complexity to use for that img
-# they must be infinite (should never throw StopIteration when calling next())
-
-# probability of generating 0 complexity, otherwise
-# uniform over 0.0-max_complexity
-def range_complexity_iterator(probability_zero, max_complexity):
-    assert max_complexity <= 1.0
-    n = numpy.random.uniform(0.0, 1.0)
-    while True:
-        if n < probability_zero:
-            yield 0.0
-        else:
-            yield numpy.random.uniform(0.0, max_complexity)
-
-##############################################################################
-# DATA ITERATORS
-# They can be used to interleave different data sources etc.
-
-'''
-# Following code (DebugImages and iterator) is untested
-
-def load_image(filepath):
-    _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0]
-    img = Image.open(filepath)
-    img = numpy.asarray(img)
-    if len(img.shape) > 2:
-        img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
-    return (img / 255.0).astype('float')
-
-class DebugImages():
-    def __init__(self, images_dir_path):
-        import glob, os.path
-        self.filelist = glob.glob(os.path.join(images_dir_path, "*.png"))
-
-def debug_images_iterator(debug_images):
-    for path in debug_images.filelist:
-        yield load_image(path)
-'''
-
-class NistData():
-    def __init__(self, nist_path, label_path, ocr_path, ocrlabel_path):
-        self.train_data = open(nist_path, 'rb')
-        self.train_labels = open(label_path, 'rb')
-        self.dim = tuple(ft._read_header(self.train_data)[3])
-        # in order to seek to the beginning of the file
-        self.train_data.close()
-        self.train_data = open(nist_path, 'rb')
-        self.ocr_data = open(ocr_path, 'rb')
-        self.ocr_labels = open(ocrlabel_path, 'rb')
-
-# cet iterator load tout en ram
-def nist_supp_iterator(nist, prob_font, prob_captcha, prob_ocr, num_img):
-    img = ft.read(nist.train_data)
-    labels = ft.read(nist.train_labels)
-    if prob_ocr:
-        ocr_img = ft.read(nist.ocr_data)
-        ocr_labels = ft.read(nist.ocr_labels)
-    ttf = ttf2jpg()
-    L = [chr(ord('0')+x) for x in range(10)] + [chr(ord('A')+x) for x in range(26)] + [chr(ord('a')+x) for x in range(26)]
-
-    for i in xrange(num_img):
-        r = numpy.random.rand()
-        if r <= prob_font:
-            yield ttf.generate_image()
-        elif r <=prob_font + prob_captcha:
-            (arr, charac) = generateCaptcha(0,1)
-            yield arr.astype(numpy.float32)/255, L.index(charac[0])
-        elif r <= prob_font + prob_captcha + prob_ocr:
-            j = numpy.random.randint(len(ocr_labels))
-            yield ocr_img[j].astype(numpy.float32)/255, ocr_labels[j]
-        else:
-            j = numpy.random.randint(len(labels))
-            yield img[j].astype(numpy.float32)/255, labels[j]
-
-
-# Mostly for debugging, for the moment, just to see if we can
-# reload the images and parameters.
-def reload(output_file_path, params_output_file_path):
-    images_ft = open(output_file_path, 'rb')
-    images_ft_dim = tuple(ft._read_header(images_ft)[3])
-
-    print "Images dimensions: ", images_ft_dim
-
-    params = numpy.load(params_output_file_path)
-
-    print "Params dimensions: ", params.shape
-    print params
-    
-
-##############################################################################
-# MAIN
-
-
-# Might be called locally or through dbidispatch. In all cases it should be
-# passed to the GIMP executable to be able to use GIMP filters.
-# Ex: 
-def _main():
-    #global DEFAULT_NIST_PATH, DEFAULT_LABEL_PATH, DEFAULT_OCR_PATH, DEFAULT_OCRLABEL_PATH
-    #global getopt, get_argv
-
-    max_complexity = 0.5 # default
-    probability_zero = 0.1 # default
-    output_file_path = None
-    params_output_file_path = None
-    labels_output_file_path = None
-    nist_path = DEFAULT_NIST_PATH
-    label_path = DEFAULT_LABEL_PATH
-    ocr_path = DEFAULT_OCR_PATH
-    ocrlabel_path = DEFAULT_OCRLABEL_PATH
-    prob_font = 0.0
-    prob_captcha = 0.0
-    prob_ocr = 0.0
-    stop_after = None
-    reload_mode = False
-
-    for o, a in opts:
-        if o in ('-m', '--max-complexity'):
-            max_complexity = float(a)
-            assert max_complexity >= 0.0 and max_complexity <= 1.0
-        elif o in ('-r', '--reload'):
-            reload_mode = True
-        elif o in ("-z", "--probability-zero"):
-            probability_zero = float(a)
-            assert probability_zero >= 0.0 and probability_zero <= 1.0
-        elif o in ("-o", "--output-file"):
-            output_file_path = a
-        elif o in ('-p', "--params-output-file"):
-            params_output_file_path = a
-        elif o in ('-x', "--labels-output-file"):
-            labels_output_file_path = a
-        elif o in ('-s', "--stop-after"):
-            stop_after = int(a)
-        elif o in ('-f', "--data-file"):
-            nist_path = a
-        elif o in ('-l', "--label-file"):
-            label_path = a
-        elif o in ('-c', "--ocr-file"):
-            ocr_path = a
-        elif o in ('-d', "--ocrlabel-file"):
-            ocrlabel_path = a
-        elif o in ('-a', "--prob-font"):
-            prob_font = float(a)
-        elif o in ('-b', "--prob-captcha"):
-            prob_captcha = float(a)
-        elif o in ('-g', "--prob-ocr"):
-            prob_ocr = float(a)
-        elif o in ('-y', "--seed"):
-            pass
-        else:
-            assert False, "unhandled option"
-
-    if output_file_path == None or params_output_file_path == None or labels_output_file_path == None:
-        print "Must specify the three output files."
-        usage()
-        pdb.gimp_quit(0)
-        sys.exit(2)
-
-    if reload_mode:
-        reload(output_file_path, params_output_file_path)
-    else:
-        if DEBUG_IMAGES_PATH:
-            '''
-            # This code is yet untested
-            debug_images = DebugImages(DEBUG_IMAGES_PATH)
-            num_img = len(debug_images.filelist)
-            pl = Pipeline(modules=MODULE_INSTANCES, num_img=num_img, image_size=(32,32))
-            img_it = debug_images_iterator(debug_images)
-            '''
-        else:
-            nist = NistData(nist_path, label_path, ocr_path, ocrlabel_path)
-            num_img = 819200 # 800 Mb file
-            if stop_after:
-                num_img = stop_after
-            pl = Pipeline(modules=MODULE_INSTANCES, num_img=num_img, image_size=(32,32))
-            img_it = nist_supp_iterator(nist, prob_font, prob_captcha, prob_ocr, num_img)
-
-        cpx_it = range_complexity_iterator(probability_zero, max_complexity)
-        pl.run(img_it, cpx_it)
-        pl.write_output(output_file_path, params_output_file_path, labels_output_file_path)
-
-_main()
-
-if DEBUG_X:
-    pylab.ioff()
-    pylab.show()
-
-pdb.gimp_quit(0)
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/run_pipeline.sh
--- a/transformations/run_pipeline.sh	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-# This is one _ugly_ hack, but I couldn't figure out how
-# to cleanly pass command line options to the script if
-# invoking using the "gimp --batch < script.py" syntax
-
-# Basically I create a temp file, put the args into it,
-# then the script gets the filename and reads back the
-# args
-
-export PIPELINE_ARGS_TMPFILE=`mktemp`
-
-for arg in "$@"
-do
-	echo $arg >> $PIPELINE_ARGS_TMPFILE
-done
-
-gimp -i --batch-interpreter python-fu-eval --batch - < pipeline.py
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/slant.py
--- a/transformations/slant.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Author: Youssouf
-
-this module add a slant effect to the image. 
-
-To obtain the slant effect, each row of the array is shifted proportionately by a step controlled by the complexity.
-
-'''
-
-import numpy
-
-
-class Slant():
-    def __init__(self, complexity=1):
-        #---------- private attributes
-        self.direction = 1
-        self.angle = 0
-
-        #---------- generation parameters
-        self.regenerate_parameters(complexity)
-        #------------------------------------------------
-    
-    def _get_current_parameters(self):
-        return [self.angle, self.direction]
-    
-    def get_settings_names(self):
-        return ['angle', 'direction']
-    
-    def regenerate_parameters(self, complexity):
-        self.angle = numpy.random.uniform(0.0, complexity)
-        P = numpy.random.uniform()
-        self.direction = 1;
-        if P < 0.5:
-            self.direction = -1;
-        return self._get_current_parameters()
-    
-    
-    def transform_image(self,image):
-        if self.angle == 0:
-            return image
-        
-        ysize, xsize = image.shape
-        slant = self.direction*self.angle
-
-        output = image.copy()
-
-        # shift all the rows
-        for i in range(ysize):
-            line = image[i]
-            delta = round((i*slant)) % xsize
-            line1 = line[:xsize-delta]
-            line2 = line[xsize-delta:xsize]
-
-            output[i][delta:xsize] = line1
-            output[i][0:delta] = line2
-
-            
-        #correction to center the image
-        correction = (self.direction)*round(self.angle*ysize/2)
-        correction = (xsize - correction) % xsize
-
-        # center the region
-        line1 = output[0:ysize,0:xsize-correction].copy()
-        line2 = output[0:ysize,xsize-correction:xsize].copy()
-        output[0:ysize,correction:xsize] = line1
-        output[0:ysize,0:correction] = line2
-
-
-        return output
-            
-
-# Test function
-# Load an image in local and create several samples of the effect on the
-# original image with different parameter. All the samples are saved in a single image, the 1st image being the original.
-
-def test_slant():
-    import scipy
-    img_name = "test_img/mnist_0.png"
-    dest_img_name = "test_img/slanted.png"
-    nb_samples = 10
-    im = Image.open(img_name)
-    im = im.convert("L")
-    image = numpy.asarray(im)
-
-    image_final = image
-    slant = Slant()	
-    for i in range(nb_samples):
-        slant.regenerate_parameters(1)
-        image_slant = slant.transform_image(image)
-        image_final = scipy.hstack((image_final,image_slant))
-
-    im = Image.fromarray(image_final.astype('uint8'), "L")
-    im.save(dest_img_name)
-
-# Test
-if __name__ == '__main__':  
-    import sys, os, fnmatch
-    import Image
-
-    test_slant()
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/testmod.py
--- a/transformations/testmod.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-# This script is to test your modules to see if they conform to the module API
-# defined on the wiki.
-import random, numpy, gc, time, math, sys
-
-# this is an example module that does stupid image value shifting
-
-class DummyModule(object):
-    def get_settings_names(self):
-        return ['value']
-    
-    def regenerate_parameters(self, complexity):
-        self._value = random.gauss(0, 0.5*complexity)
-        return [self._value]
-
-    def transform_image(self, image):
-        return numpy.clip(image+self._value, 0, 1)
-    
-#import <your module>
-
-# instanciate your class here (rather than DummyModule)
-mod = DummyModule()
-
-def error(msg):
-    print "ERROR:", msg
-    sys.exit(1)
-
-def warn(msg):
-    print "WARNING:", msg
-
-def timeit(f, lbl):
-
-    gc.disable()
-    t = time.time()
-    f()
-    est = time.time() - t
-    gc.enable()
-
-    loops = max(1, int(10**math.floor(math.log(10/est, 10))))
-
-    gc.disable()
-    t = time.time()
-    for _ in xrange(loops):
-        f()
-
-    print lbl, "(", loops, "loops ):", (time.time() - t)/loops, "s"
-    gc.enable()
-
-########################
-# get_settings_names() #
-########################
-
-print "Testing get_settings_names()"
-
-names = mod.get_settings_names()
-
-if type(names) is not list:
-    error("Must return a list")
-
-if not all(type(e) is str for e in names):
-    warn("The elements of the list should be strings")
-
-###########################
-# regenerate_parameters() #
-###########################
-
-print "Testing regenerate_parameters()"
-
-params = mod.regenerate_parameters(0.2)
-
-if type(params) is not list:
-    error("Must return a list")
-
-if len(params) != len(names):
-    error("the returned parameter list must have the same length as the number of parameters")
-
-params2 = mod.regenerate_parameters(0.2)
-if len(names) != 0 and params == params2:
-    error("the complexity parameter determines the distribution of the parameters, not their value")
-
-mod.regenerate_parameters(0.0)
-mod.regenerate_parameters(1.0)
-    
-mod.regenerate_parameters(0.5)
-
-#####################
-# transform_image() #
-#####################
-
-print "Testing transform_image()"
-
-imgr = numpy.random.random_sample((32, 32)).astype(numpy.float32)
-img1 = numpy.ones((32, 32), dtype=numpy.float32)
-img0 = numpy.zeros((32, 32), dtype=numpy.float32)
-
-resr = mod.transform_image(imgr)
-
-if type(resr) is not numpy.ndarray:
-    error("Must return an ndarray")
-
-if resr.shape != (32, 32):
-    error("Must return 32x32 array")
-
-if resr.dtype != numpy.float32:
-    error("Must return float32 array")
-
-res1 = mod.transform_image(img1)
-res0 = mod.transform_image(img0)
-
-if res1.max() > 1.0 or res0.max() > 1.0:
-    error("Must keep array values between 0 and 1")
-
-if res1.min() < 0.0 or res0.min() < 0.0:
-    error("Must keep array values between 0 and 1")
-
-mod.regenerate_parameters(0.0)
-mod.transform_image(imgr)
-mod.regenerate_parameters(1.0)
-mod.transform_image(imgr)
-
-print "Bonus Stage: timings"
-
-timeit(lambda: None, "empty")
-timeit(lambda: mod.regenerate_parameters(0.5), "regenerate_parameters()")
-timeit(lambda: mod.transform_image(imgr), "tranform_image()")
-
-def f():
-    mod.regenerate_parameters(0.2)
-    mod.transform_image(imgr)
-
-timeit(f, "regen and transform")
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/testtransformations.py
--- a/transformations/testtransformations.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,156 +0,0 @@
-#!/usr/bin/env python
-
-
-
-from pylearn.io import filetensor as ft
-import copy
-import pygame
-import time
-import numpy as N
-
-from ttf2jpg import ttf2jpg
-
-#from gimpfu import *
-
-
-from PoivreSel import PoivreSel
-from thick import Thick
-from BruitGauss import BruitGauss
-from DistorsionGauss import DistorsionGauss
-from PermutPixel import PermutPixel
-from gimp_script import GIMP1
-from Rature import Rature
-from contrast import Contrast
-from local_elastic_distortions import LocalElasticDistorter
-from slant import Slant
-from Occlusion import Occlusion
-from add_background_image import AddBackground
-from affine_transform import AffineTransformation
-
-###---------------------order of transformation module
-MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
-
-###---------------------complexity associated to each of them
-complexity = 0.7
-#complexity = [0.5]*len(MODULE_INSTANCES)
-#complexity = [0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.]
-n=100
-
-def createimage(path,d):
-    for i in range(n):
-        screen.fill(0)
-        a=d[i,:]
-        off1=4*32
-        off2=0
-        for u in range(n):
-            b=N.asarray(N.reshape(a,(32,32)))
-            c=N.asarray([N.reshape(a*255.0,(32,32))]*3).T
-            new=pygame.surfarray.make_surface(c)
-            new=pygame.transform.scale2x(new)
-            new=pygame.transform.scale2x(new)
-            #new.set_palette(anglcolorpalette)
-            screen.blit(new,(0,0))
-            exemple.blit(new,(0,0))
-            
-            offset = 4*32
-            offset2 = 0
-            ct = 0
-            ctmp =  N.random.rand()*complexity
-            print u
-            for j in MODULE_INSTANCES:
-                #max dilation
-                #ctmp = N.random.rand()*complexity[ct]
-                ctmp = N.random.rand()*complexity 
-                #print j.get_settings_names(), j.regenerate_parameters(ctmp)
-                th=j.regenerate_parameters(ctmp)
-                
-                b=j.transform_image(b)
-                c=N.asarray([b*255]*3).T
-                new=pygame.surfarray.make_surface(c)
-                new=pygame.transform.scale2x(new)
-                new=pygame.transform.scale2x(new)
-                if u==0:
-                    #new.set_palette(anglcolorpalette)
-                    screen.blit(new,(offset,offset2))
-                    font = pygame.font.SysFont('liberationserif',18)
-                    text = font.render('%s '%(int(ctmp*100.0)/100.0) + j.__module__,0,(255,255,255),(0,0,0))
-                    #if  j.__module__ == 'Rature':
-                    #     text = font.render('%s,%s'%(th[-1],int(ctmp*100.0)/100.0) + j.__module__,0,(255,255,255),(0,0,0))
-                    screen.blit(text,(offset,offset2+4*32))
-                    if ct == len(MODULE_INSTANCES)/2-1:
-                        offset = 0
-                        offset2 = 4*32+20
-                    else:
-                        offset += 4*32
-                    ct+=1
-            exemple.blit(new,(off1,off2))
-            if off1 != 9*4*32:
-                off1+=4*32
-            else:
-                off1=0
-                off2+=4*32
-        pygame.image.save(exemple,path+'/perimages/%s.PNG'%i)
-        pygame.image.save(screen,path+'/exemples/%s.PNG'%i)
- 
-
-
-
-nbmodule = len(MODULE_INSTANCES)
-
-pygame.surfarray.use_arraytype('numpy')
-
-#pygame.display.init()
-screen = pygame.Surface((4*(nbmodule+1)/2*32,2*(4*32+20)),depth=32)
-exemple = pygame.Surface((N.ceil(N.sqrt(n))*4*32,N.ceil(N.sqrt(n))*4*32),depth=32)
-
-anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
-#pygame.Surface.set_palette(anglcolorpalette)
-#screen.set_palette(anglcolorpalette)
-
-pygame.font.init()
-
-d = N.zeros((n,1024))
-
-datapath = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft'
-f = open(datapath)
-d = ft.read(f)
-d = d[0:n,:]/255.0
-createimage('/u/glorotxa/transf/OCR',d)
-
-
-
-datapath = '/data/lisa/data/nist/by_class/'
-f = open(datapath+'digits_reshuffled/digits_reshuffled_train_data.ft')
-d = ft.read(f)
-d = d[0:n,:]/255.0
-createimage('/u/glorotxa/transf/NIST_digits',d)
-
-
-
-datapath = '/data/lisa/data/nist/by_class/'
-f = open(datapath+'upper/upper_train_data.ft')
-d = ft.read(f)
-d = d[0:n,:]/255.0
-createimage('/u/glorotxa/transf/NIST_upper',d)
-
-from Facade import *
-
-for i in range(n):
-    d[i,:]=N.asarray(N.reshape(generateCaptcha(0.8,0),(1,1024))/255.0,dtype='float32')
-
-createimage('/u/glorotxa/transf/capcha',d)
-
-
-for i in range(n):
-    myttf2jpg = ttf2jpg()
-    d[i,:]=N.reshape(myttf2jpg.generate_image()[0],(1,1024))
-createimage('/u/glorotxa/transf/fonts',d)
-
-datapath = '/data/lisa/data/nist/by_class/'
-f = open(datapath+'lower/lower_train_data.ft')
-d = ft.read(f)
-d = d[0:n,:]/255.0
-createimage('/u/glorotxa/transf/NIST_lower',d)
-
-
-#pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/thick.py
--- a/transformations/thick.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,198 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Simple implementation of random thickness deformation using morphological
-operation of scipy.
-Only one morphological operation applied (dilation or erosion), the kernel is random
-out of a list of 12 symmetric kernels. (only 5 to be chosen for erosion because it can
-hurt the recognizability of the charater and 12 for dilation).
-
-Author: Xavier Glorot
-
-'''
-
-import scipy.ndimage.morphology
-import numpy as N
-
-
-class Thick():
-    def __init__(self,complexity = 1):
-        #---------- private attributes
-        self.__nx__ = 32 #xdim of the images
-        self.__ny__ = 32 #ydim of the images
-        self.__erodemax__ = 5 #nb of index max of erode structuring elements
-        self.__dilatemax__ = 9 #nb of index max of dilation structuring elements
-        self.__structuring_elements__ = [N.asarray([[1,1]]),N.asarray([[1],[1]]),\
-                                        N.asarray([[1,1],[1,1]]),N.asarray([[0,1,0],[1,1,1],[0,1,0]]),\
-                                        N.asarray([[1,1,1],[1,1,1]]),N.asarray([[1,1],[1,1],[1,1]]),\
-                                        N.asarray([[1,1,1],[1,1,1],[1,1,1]]),\
-                                        N.asarray([[1,1,1,1],[1,1,1,1],[1,1,1,1]]),\
-                                        N.asarray([[1,1,1],[1,1,1],[1,1,1],[1,1,1]]),\
-                                        N.asarray([[0,0,1,0,0],[0,1,1,1,0],[1,1,1,1,1],[0,1,1,1,0],[0,0,1,0,0]]),\
-                                        N.asarray([[1,1,1,1],[1,1,1,1]]),N.asarray([[1,1],[1,1],[1,1],[1,1]])]
-        #------------------------------------------------
-        
-        #---------- generation parameters
-        self.regenerate_parameters(complexity)
-        #------------------------------------------------
-    
-    def _get_current_parameters(self):
-        return [self.thick_param]
-    
-    def get_settings_names(self):
-        return ['thick_param']
-    
-    def regenerate_parameters(self, complexity):
-        self.erodenb = N.ceil(complexity * self.__erodemax__)
-        self.dilatenb = N.ceil(complexity * self.__dilatemax__)
-        self.Perode = self.erodenb / (self.dilatenb + self.erodenb + 1.0)
-        self.Pdilate = self.dilatenb / (self.dilatenb   + self.erodenb + 1.0)
-        assert (self.Perode + self.Pdilate <= 1) & (self.Perode + self.Pdilate >= 0)
-        assert (complexity >= 0) & (complexity <= 1)
-        P = N.random.uniform()
-        if P>1-(self.Pdilate+self.Perode):
-            if P>1-(self.Pdilate+self.Perode)+self.Perode:
-                self.meth = 1
-                self.nb=N.random.randint(self.dilatenb)
-            else:
-                self.meth = -1
-                self.nb=N.random.randint(self.erodenb)
-        else:
-            self.meth = 0
-            self.nb = -1
-        self.thick_param = self.meth*self.nb
-        return self._get_current_parameters()
-    
-    def transform_1_image(self,image): #the real transformation method
-        if self.meth!=0:
-            maxi = float(N.max(image))
-            mini = float(N.min(image))
-            
-            imagenorm=image/maxi
-            
-            if self.meth==1:
-                trans=scipy.ndimage.morphology.grey_dilation\
-                    (imagenorm,size=self.__structuring_elements__[self.nb].shape,structure=self.__structuring_elements__[self.nb])
-            else:
-                trans=scipy.ndimage.morphology.grey_erosion\
-                    (imagenorm,size=self.__structuring_elements__[self.nb].shape,structure=self.__structuring_elements__[self.nb])
-            
-            #------renormalizing
-            maxit = N.max(trans)
-            minit = N.min(trans)
-            trans= N.asarray((trans - (minit+mini)) / (maxit - (minit+mini)) * maxi,dtype=image.dtype)
-            #--------
-            return trans
-        else:
-            return image
-    
-    def transform_image(self,image): #handling different format
-        if image.shape == (self.__nx__,self.__ny__):
-            return self.transform_1_image(image)
-        if image.ndim == 3:
-            newimage = copy.copy(image)
-            for i in range(image.shape[0]):
-                newimage[i,:,:] = self.transform_1_image(image[i,:,:])
-            return newimage
-        if image.ndim == 2 and image.shape != (self.__nx__,self.__ny__):
-            newimage = N.reshape(image,(image.shape[0],self.__nx__,self.__ny__))
-            for i in range(image.shape[0]):
-                newimage[i,:,:] = self.transform_1_image(newimage[i,:,:])
-            return N.reshape(newimage,image.shape)
-        if image.ndim == 1:
-            newimage = N.reshape(image,(self.__nx__,self.__ny__))
-            newimage = self.transform_1_image(newimage)
-            return N.reshape(newimage,image.shape)
-        assert False #should never go there
-
-
-
-
-#test on NIST (you need pylearn and access to NIST to do that)
-
-if __name__ == '__main__':
-    
-    from pylearn.io import filetensor as ft
-    import copy
-    import pygame
-    import time
-    datapath = '/data/lisa/data/nist/by_class/'
-    f = open(datapath+'digits/digits_train_data.ft')
-    d = ft.read(f)
-    
-    pygame.surfarray.use_arraytype('numpy')
-    
-    pygame.display.init()
-    screen = pygame.display.set_mode((8*4*32,8*32),0,8)
-    anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
-    screen.set_palette(anglcolorpalette)
-    
-    MyThick = Thick()
-    
-    #debut=time.time()
-    #MyThick.transform_image(d)
-    #fin=time.time()
-    #print '------------------------------------------------'
-    #print d.shape[0],' images transformed in :', fin-debut, ' seconds'
-    #print '------------------------------------------------'
-    #print (fin-debut)/d.shape[0]*1000000,' microseconds per image'
-    #print '------------------------------------------------'
-    #print MyThick.get_settings_names()
-    #print MyThick._get_current_parameters()
-    #print MyThick.regenerate_parameters(0)
-    #print MyThick.regenerate_parameters(0.5)
-    #print MyThick.regenerate_parameters(1)
-    for i in range(10000):
-        a=d[i,:]
-        b=N.asarray(N.reshape(a,(32,32))).T
-        
-        new=pygame.surfarray.make_surface(b)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new.set_palette(anglcolorpalette)
-        screen.blit(new,(0,0))
-        
-        #max dilation
-        MyThick.meth=1
-        MyThick.nb=MyThick.__dilatemax__
-        c=MyThick.transform_image(a)
-        b=N.asarray(N.reshape(c,(32,32))).T
-        
-        new=pygame.surfarray.make_surface(b)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new.set_palette(anglcolorpalette)
-        screen.blit(new,(8*32,0))
-        
-        #max erosion
-        MyThick.meth=-1
-        MyThick.nb=MyThick.__erodemax__
-        c=MyThick.transform_image(a)
-        b=N.asarray(N.reshape(c,(32,32))).T
-        
-        new=pygame.surfarray.make_surface(b)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new.set_palette(anglcolorpalette)
-        screen.blit(new,(8*2*32,0))
-        
-        #random
-        print MyThick.get_settings_names(), MyThick.regenerate_parameters(1)
-        c=MyThick.transform_image(a)
-        b=N.asarray(N.reshape(c,(32,32))).T
-        
-        new=pygame.surfarray.make_surface(b)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new=pygame.transform.scale2x(new)
-        new.set_palette(anglcolorpalette)
-        screen.blit(new,(8*3*32,0))
-        
-        pygame.display.update()
-        raw_input('Press Enter')
-    
-    pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/ttf2jpg.py
--- a/transformations/ttf2jpg.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-#!/usr/bin/python                                                                                 
-# -*- coding: iso-8859-1 -*-                                                                      
-
-'''
-    Implementation of font image generator
-    download fonts from http://www.dafont.com for exemple
-
-    Author: Guillaume Sicard
-'''
-
-import sys, os, fnmatch, random
-import Image, ImageFont, ImageDraw, numpy
-
-class ttf2jpg():
-    def __init__(self, font_file = ''):
-        self.w = 32
-        self.h = 32
-        self.font_dir = '/Tmp/allfonts/'
-        self.font_file = font_file
-        self.image_dir = './images/'
-        self.pattern = '*.ttf'
-        self.char_list = []
-        for i in range(0,10):
-            self.char_list.append(chr(ord('0') + i) )
-        for i in range(0,26):
-            self.char_list.append(chr(ord('A') + i) )
-        for i in range(0,26):
-            self.char_list.append(chr(ord('a') + i) )
-        files = os.listdir(self.font_dir)
-        self.font_files = fnmatch.filter(files, '*.ttf') + fnmatch.filter(files, '*.TTF')
-
-    # get font name
-    def get_settings_names(self):
-        return [self.font_file]
-
-    # save an image
-    def save_image(self,array, filename = ''):
-        image = (array * 255.0).astype('int')
-        image = Image.fromarray(image).convert('L')
-        if (filename != ''):
-            image.save(filename)
-        else:
-            image.show()
-
-    # set a random font for character generation
-    def set_random_font(self):
-        i = random.randint(0, len(self.font_files) - 1)
-        self.font_file = self.font_dir + self.font_files[i]
-
-    # return a picture array of "text" with font "font_file"
-    def create_image(self, text):
-         # create a w x h black picture, and a drawing space
-        image = Image.new('L', (self.w, self.h), 'Black')
-        draw = ImageDraw.Draw(image)
-
-        # load the font with the right size
-        font = ImageFont.truetype(self.font_file, 28)
-        d_w,d_h =  draw.textsize(text, font=font)
-
-        # write text and aligns it
-        draw.text(((32 - d_w) / 2, ((32 - d_h) / 2)), text, font=font, fill='White')
-
-        image = numpy.asarray(image)
-        image = (image / 255.0).astype(numpy.float32)
-
-        return image
-
-    # write all the letters and numbers into pictures
-    def process_font(self):
-        for i in range(0, len(self.char_list) ):
-            image = self.create_image(self.char_list[i])
-            self.save_image(image, self.image_dir + self.char_list[i] + '-' + os.path.basename(self.font_file) + '.jpg')
-            sys.stdout.write('.')
-            sys.stdout.flush()
-        return (len(self.char_list))
-
-    # generate the character from the font_file and returns a numpy array
-    def generate_image_from_char(self, character, font_file = ''):
-        if (font_file != ''):
-            self.font_file = font_file
-
-        return self.create_image(character)
-
-    # generate random character from random font file as a numpy array
-    def generate_image(self):
-        self.set_random_font()
-        i = random.randint(0, len(self.char_list) - 1)
-        return self.generate_image_from_char(self.char_list[i]), i
-
-    # test method, create character images for all fonts in "font_dir" in dir "image_dir"
-    def test(self):
-        import time
-
-        # look for ttf files
-        files = os.listdir(self.font_dir)
-        font_files = fnmatch.filter(files, self.pattern)
-
-        # create "image_dir" if it doesn't exist
-        if not os.path.isdir(self.image_dir):
-            os.mkdir(self.image_dir)
-
-        sys.stdout.write( str(len(font_files)) + ' fonts found, generating jpg images in folder ' + self.image_dir )
-        sys.stdout.flush()
-
-        # main loop
-        t =  time.time()
-        n = 0
-
-        for font_file in font_files:
-            self.font_file = self.font_dir + font_file
-            n += self.process_font()
-        t = time.time() - t
-
-        sys.stdout.write('\nall done!\n' + str(n) + ' images generated in ' + str(t) + 's (average : ' + str(1000 * t / n) + ' ms/im)\n')
-
-if __name__ == '__main__':
-
-    myttf2jpg = ttf2jpg()
-    #myttf2jpg.test()
-    image, i = myttf2jpg.generate_image()
-    myttf2jpg.save_image(image, '')
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/visualizer.py
--- a/transformations/visualizer.py	Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-#!/usr/bin/python
-
-import numpy
-import Image
-from image_tiling import tile_raster_images
-import pylab
-import time
-
-class Visualizer():
-    def __init__(self, num_columns=10, image_size=(32,32), to_dir=None, on_screen=False):
-        self.list = []
-        self.image_size = image_size
-        self.num_columns = num_columns
-
-        self.on_screen = on_screen
-        self.to_dir = to_dir
-
-        self.cur_grid_image = None
-
-        self.cur_index = 0
-
-    def visualize_stop_and_flush(self):
-        self.make_grid_image()
-
-        if self.on_screen:
-            self.visualize()
-        if self.to_dir:
-            self.dump_to_disk()
-
-        self.stop_and_wait()
-        self.flush()
-
-        self.cur_index += 1
-
-    def make_grid_image(self):
-        num_rows = len(self.list) / self.num_columns
-        if len(self.list) % self.num_columns != 0:
-            num_rows += 1
-        grid_shape = (num_rows, self.num_columns)
-        self.cur_grid_image = tile_raster_images(numpy.array(self.list), self.image_size, grid_shape, tile_spacing=(5,5), output_pixel_vals=False)
-
-    def visualize(self):
-        pylab.imshow(self.cur_grid_image)
-        pylab.draw()
-
-    def dump_to_disk(self):
-        gi = Image.fromarray((self.cur_grid_image * 255).astype('uint8'), "L")
-        gi.save(self.to_dir + "/grid_" + str(self.cur_index) + ".png")
-        
-    def stop_and_wait(self):
-        # can't raw_input under gimp, so sleep)
-        print "New image generated, sleeping 5 secs"
-        time.sleep(5)
-
-    def flush(self):
-        self.list = []
-    
-    def get_parameters_names(self):
-        return []
-
-    def regenerate_parameters(self):
-        return []
-
-    def after_transform_callback(self, image):
-        self.transform_image(image)
-
-    def end_transform_callback(self, final_image):
-        self.visualize_stop_and_flush()
-
-    def transform_image(self, image):
-        sz = self.image_size
-        self.list.append(image.copy().reshape((sz[0] * sz[1])))
-