# HG changeset patch
# User Dumitru Erhan
# Date 1267211738 18000
# Node ID 1f5937e9e5308da70563789925217b60008a513a
# Parent 17ae5a1a4dd1e6c41a2fd2e6b4fab644f9110df5
More moves - transformations into data_generation, added "deep" folder
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/BruitGauss.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/BruitGauss.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,138 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Ajout de bruit gaussien dans les donnees. A chaque iteration, un bruit poivre
+et sel est ajoute, puis un lissage gaussien autour de ce point est ajoute.
+On fait un nombre d'iteration = 1024*complexity/25 ce qui equivaud
+a complexity/25 des points qui recoivent le centre du noyau gaussien.
+Il y en a beaucoup moins que le bruit poivre et sel, car la transformation
+est plutôt aggressive et touche beaucoup de pixels autour du centre
+
+La grandeur de la gaussienne ainsi que son ecart type sont definit par complexity
+et par une composante aleatoire normale.
+
+On a 25 % de chances d'effectuer le bruitage
+
+Ce fichier prend pour acquis que les images sont donnees une a la fois
+sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+import numpy
+#import random
+import scipy
+from scipy import ndimage
+
+class BruitGauss():
+
+ def __init__(self,complexity=1,seed=6378):
+ self.nb_chngmax =10 #Le nombre de pixels changes. Seulement pour fin de calcul
+ self.grandeurmax = 20
+ self.sigmamax = 6.0
+ self.regenerate_parameters(complexity)
+ self.seed=seed
+
+ #numpy.random.seed(self.seed)
+
+ def get_seed(self):
+ return self.seed
+
+ def get_settings_names(self):
+ return ['nb_chng','sigma_gauss','grandeur']
+
+ def regenerate_parameters(self, complexity):
+ self.effectuer =numpy.random.binomial(1,0.25) ##### On a 25% de faire un bruit #####
+
+
+ if self.effectuer and complexity > 0:
+ self.nb_chng=3+int(numpy.random.rand()*self.nb_chngmax*complexity)
+ self.sigma_gauss=2.0 + numpy.random.rand()*self.sigmamax*complexity
+ self.grandeur=12+int(numpy.random.rand()*self.grandeurmax*complexity)
+ #creation du noyau gaussien
+ self.gauss=numpy.zeros((self.grandeur,self.grandeur))
+ x0 = y0 = self.grandeur/2.0
+ for i in xrange(self.grandeur):
+ for j in xrange(self.grandeur):
+ self.gauss[i,j]=numpy.exp(-((i-x0)**2 + (j-y0)**2) / self.sigma_gauss**2)
+ #creation de la fenetre de moyennage
+ self.moy=numpy.zeros((self.grandeur,self.grandeur))
+ x0 = y0 = self.grandeur/2
+ for i in xrange(0,self.grandeur):
+ for j in xrange(0,self.grandeur):
+ self.moy[i,j]=((numpy.sqrt(2*(self.grandeur/2.0)**2) -\
+ numpy.sqrt(numpy.abs(i-self.grandeur/2.0)**2+numpy.abs(j-self.grandeur/2.0)**2))/numpy.sqrt((self.grandeur/2.0)**2))**5
+ else:
+ self.sigma_gauss = 1 # eviter division par 0
+ self.grandeur=1
+ self.nb_chng = 0
+ self.effectuer = 0
+
+ return self._get_current_parameters()
+
+ def _get_current_parameters(self):
+ return [self.nb_chng,self.sigma_gauss,self.grandeur]
+
+
+ def transform_image(self, image):
+ if self.effectuer == 0:
+ return image
+ image=image.reshape((32,32))
+ filtered_image = ndimage.convolve(image,self.gauss,mode='constant')
+ assert image.shape == filtered_image.shape
+ filtered_image = (filtered_image - filtered_image.min() + image.min()) / (filtered_image.max() - filtered_image.min() + image.min()) * image.max()
+
+ #construction of the moyennage Mask
+ Mask = numpy.zeros((32,32))
+
+ for i in xrange(0,self.nb_chng):
+ x_bruit=int(numpy.random.randint(0,32))
+ y_bruit=int(numpy.random.randint(0,32))
+ offsetxmin = 0
+ offsetxmax = 0
+ offsetymin = 0
+ offsetymax = 0
+ if x_bruit < self.grandeur / 2:
+ offsetxmin = self.grandeur / 2 - x_bruit
+ if 32-x_bruit < numpy.ceil(self.grandeur / 2.0):
+ offsetxmax = numpy.ceil(self.grandeur / 2.0) - (32-x_bruit)
+ if y_bruit < self.grandeur / 2:
+ offsetymin = self.grandeur / 2 - y_bruit
+ if 32-y_bruit < numpy.ceil(self.grandeur / 2.0):
+ offsetymax = numpy.ceil(self.grandeur / 2.0) - (32-y_bruit)
+ Mask[x_bruit - self.grandeur/2 + offsetxmin : x_bruit + numpy.ceil(self.grandeur/2.0) - offsetxmax,\
+ y_bruit - self.grandeur/2 + offsetymin : y_bruit + numpy.ceil(self.grandeur/2.0)- offsetymax] +=\
+ self.moy[offsetxmin:self.grandeur - offsetxmax,offsetymin:self.grandeur - offsetymax]
+
+ return numpy.asarray((image + filtered_image*Mask)/(Mask+1),dtype='float32')
+
+#---TESTS---
+
+def _load_image():
+ f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
+ d = ft.read(f)
+ w=numpy.asarray(d[0])
+ return (w/255.0).astype('float')
+
+def _test(complexite):
+ img=_load_image()
+ transfo = BruitGauss()
+ pylab.imshow(img.reshape((32,32)))
+ pylab.show()
+ print transfo.get_settings_names()
+ print transfo.regenerate_parameters(complexite)
+
+ img_trans=transfo.transform_image(img)
+
+ pylab.imshow(img_trans.reshape((32,32)))
+ pylab.show()
+
+
+if __name__ == '__main__':
+ from pylearn.io import filetensor as ft
+ import pylab
+ _test(0.5)
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/DistorsionGauss.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/DistorsionGauss.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,100 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Ajout d'une composante aleatoire dans chaque pixel de l'image.
+C'est une distorsion gaussienne de moyenne 0 et d'écart type complexity/10
+
+Il y a 30% d'effectuer le bruitage
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+import numpy
+import random
+
+class DistorsionGauss():
+
+ def __init__(self,seed=3459):
+ self.ecart_type=0.1 #L'ecart type de la gaussienne
+ self.effectuer=1 #1=on effectue et 0=rien faire
+ self.seed=seed
+
+ #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
+ #numpy.random.seed(self.seed)
+ #random.seed(self.seed)
+
+ def get_settings_names(self):
+ return ['effectuer']
+
+ def get_seed(self):
+ return self.seed
+
+ def get_settings_names_determined_by_complexity(self,complexity):
+ return ['ecart_type']
+
+ def regenerate_parameters(self, complexity):
+ self.ecart_type=float(complexity)/10
+ self.effectuer =numpy.random.binomial(1,0.3) ##### On a 30% de faire un bruit #####
+ return self._get_current_parameters()
+
+ def _get_current_parameters(self):
+ return [self.effectuer]
+
+ def get_parameters_determined_by_complexity(self,complexity):
+ return [float(complexity)/10]
+
+ def transform_image(self, image):
+ if self.effectuer == 0:
+ return image
+
+ image=image.reshape(1024,1)
+ aleatoire=numpy.zeros((1024,1)).astype('float32')
+ for i in xrange(0,1024):
+ aleatoire[i]=float(random.gauss(0,self.ecart_type))
+ image=image+aleatoire
+
+
+ #Ramener tout entre 0 et 1. Ancienne facon de normaliser.
+ #Resultats moins interessant je trouve.
+## if numpy.min(image) < 0:
+## image-=numpy.min(image)
+## if numpy.max(image) > 1:
+## image/=numpy.max(image)
+
+ for i in xrange(0,1024):
+ image[i]=min(1,max(0,image[i]))
+
+ return image.reshape(32,32)
+
+
+#---TESTS---
+
+def _load_image():
+ f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
+ d = ft.read(f)
+ w=numpy.asarray(d[random.randint(0,100)])
+ return (w/255.0).astype('float')
+
+def _test(complexite):
+ img=_load_image()
+ transfo = DistorsionGauss()
+ pylab.imshow(img.reshape((32,32)))
+ pylab.show()
+ print transfo.get_settings_names()
+ print transfo.regenerate_parameters(complexite)
+
+ img_trans=transfo.transform_image(img)
+
+ pylab.imshow(img_trans.reshape((32,32)))
+ pylab.show()
+
+
+if __name__ == '__main__':
+ from pylearn.io import filetensor as ft
+ import pylab
+ for i in xrange(0,5):
+ _test(0.5)
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/Occlusion.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/Occlusion.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,161 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Ajout de bruit d'occlusion dans l'image originale.
+
+Le bruit provient d'un echantillon pris dans la seconde image puis rajoutee a
+gauche ou a droite de l'image originale. De plus, il se peut aussi que le
+bruit soit rajoute sur l'image originale, mais en plus pâle.
+
+Le fichier /data/lisa/data/ift6266h10/echantillon_occlusion.ft
+(sur le reseau DIRO) est necessaire.
+
+Il y a 30% de chance d'avoir une occlusion quelconque.
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+
+import numpy
+
+from pylearn.io import filetensor as ft
+
+class Occlusion():
+
+ def __init__(self,seed=9854):
+ #Ces 4 variables representent la taille du "crop" sur l'image2
+ #Ce "crop" est pris a partie de image1[15,15], le milieu de l'image1
+ self.haut=2
+ self.bas=2
+ self.gauche=2
+ self.droite=2
+
+ #Ces deux variables representent le deplacement en x et y par rapport
+ #au milieu du bord gauche ou droit
+ self.x_arrivee=0
+ self.y_arrivee=0
+
+ #Cette variable =1 si l'image est mise a gauche et -1 si a droite
+ #et =0 si au centre, mais plus pale
+ self.endroit=-1
+
+ #Cette variable determine l'opacite de l'ajout dans le cas ou on est au milieu
+ self.opacite=0.5 #C'est completement arbitraire. Possible de le changer si voulu
+
+ #Sert a dire si on fait quelque chose. 0=faire rien, 1 on fait quelque chose
+ self.appliquer=1
+
+ self.seed=seed
+ #numpy.random.seed(self.seed)
+
+ f3 = open('/data/lisa/data/ift6266h10/echantillon_occlusion.ft') #Doit etre sur le reseau DIRO.
+ #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/echantillon_occlusion.ft')
+ #Il faut arranger le path sinon
+ w=ft.read(f3)
+ f3.close()
+
+ self.longueur=len(w)
+ self.d=(w.astype('float'))/255
+
+
+ def get_settings_names(self):
+ return ['haut','bas','gauche','droite','x_arrivee','y_arrivee','endroit','rajout','appliquer']
+
+ def get_seed(self):
+ return self.seed
+
+ def regenerate_parameters(self, complexity):
+ self.haut=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
+ self.bas=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
+ self.gauche=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
+ self.droite=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
+ if self.haut+self.bas+self.gauche+self.droite==0: #Tres improbable
+ self.haut=1
+ self.bas=1
+ self.gauche=1
+ self.droite=1
+
+ #Ces deux valeurs seront controlees afin d'etre certain de ne pas depasser
+ self.x_arrivee=int(numpy.abs(numpy.random.normal(0,2))) #Complexity n'entre pas en jeu, pas besoin
+ self.y_arrivee=int(numpy.random.normal(0,3))
+
+ self.rajout=numpy.random.randint(0,self.longueur-1) #les bouts de quelle lettre
+ self.appliquer=numpy.random.binomial(1,0.4) ##### 40 % du temps, on met une occlusion #####
+
+ if complexity == 0: #On ne fait rien dans ce cas
+ self.applique=0
+
+ self.endroit=numpy.random.randint(-1,2)
+
+ return self._get_current_parameters()
+
+ def _get_current_parameters(self):
+ return [self.haut,self.bas,self.gauche,self.droite,self.x_arrivee,self.y_arrivee,self.endroit,self.rajout,self.appliquer]
+
+
+ def transform_image(self, image):
+ if self.appliquer == 0: #Si on fait rien, on retourne tout de suite l'image
+ return image
+
+ #Attrapper le bruit d'occlusion
+ bruit=self.d[self.rajout].reshape((32,32))[15-self.haut:15+self.bas+1,15-self.gauche:15+self.droite+1]
+
+ if self.x_arrivee+self.gauche+self.droite>32:
+ self.endroit*=-1 #On change de bord et on colle sur le cote
+ self.x_arrivee=0
+ if self.y_arrivee-self.haut <-16:
+ self.y_arrivee=self.haut-16#On colle le morceau en haut
+ if self.y_arrivee+self.bas > 15:
+ self.y_arrivee=15-self.bas #On colle le morceau en bas
+
+ if self.endroit==-1: #a gauche
+ for i in xrange(-self.haut,self.bas+1):
+ for j in xrange(0,self.gauche+self.droite+1):
+ image[16+self.y_arrivee+i,self.x_arrivee+j]=\
+ max(image[16+self.y_arrivee+i,self.x_arrivee+j],bruit[i+self.haut,j])
+
+ elif self.endroit==1: #a droite
+ for i in xrange(-self.haut,self.bas+1):
+ for j in xrange(-self.gauche-self.droite,1):
+ image[16+self.y_arrivee+i,31-self.x_arrivee+j]=\
+ max(image[16+self.y_arrivee+i,31-self.x_arrivee+j],bruit[i+self.haut,j+self.gauche+self.droite])
+
+ elif self.endroit==0: #au milieu
+ for i in xrange(-self.haut,self.bas+1):
+ for j in xrange(-self.gauche,self.droite+1):
+ image[16+i,16+j]=max(image[16+i,16+j],bruit[i+self.haut,j+self.gauche]*self.opacite)
+
+
+ return image
+
+#---TESTS---
+
+def _load_image():
+ f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
+ d = ft.read(f)
+ w=numpy.asarray(d[numpy.random.randint(0,50)])
+ return (w/255.0).astype('float')
+
+def _test(complexite):
+
+ transfo = Occlusion()
+ for i in xrange(0,20):
+ img = _load_image()
+ pylab.imshow(img.reshape((32,32)))
+ pylab.show()
+ print transfo.get_settings_names()
+ print transfo.regenerate_parameters(complexite)
+
+ img_trans=transfo.transform_image(img.reshape((32,32)))
+
+ print transfo.get_seed()
+ pylab.imshow(img_trans.reshape((32,32)))
+ pylab.show()
+
+
+if __name__ == '__main__':
+ import pylab
+ import scipy
+ _test(0.5)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/PermutPixel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/PermutPixel.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,114 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Un echange de pixels est effectue entre certain pixels choisit aleatoirement
+et un de ses 4 voisins, tout aussi choisi aleatoirement.
+
+Le nombre de pixels permutes est definit pas complexity*1024
+
+Il y a proba 20% d'effectuer le bruitage
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+import numpy
+import random
+
+class PermutPixel():
+
+ def __init__(self,seed=7152):
+ self.nombre=10 #Le nombre de pixels a permuter
+ self.proportion=0.3
+ self.effectuer=1 #1=on effectue, 0=rien faire
+ self.seed=seed
+
+ #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
+ #numpy.random.seed(self.seed)
+ #random.seed(self.seed)
+
+ def get_seed(self):
+ return self.seed
+
+ def get_settings_names(self):
+ return ['effectuer']
+
+ def get_settings_names_determined_by_complexity(self,complexity):
+ return ['nombre']
+
+ def regenerate_parameters(self, complexity):
+ self.proportion=float(complexity)/3
+ self.nombre=int(256*self.proportion)*4 #Par multiple de 4 (256=1024/4)
+ self.echantillon=random.sample(xrange(0,1024),self.nombre) #Les pixels qui seront permutes
+ self.effectuer =numpy.random.binomial(1,0.2) ##### On a 20% de faire un bruit #####
+ return self._get_current_parameters()
+
+ def _get_current_parameters(self):
+ return [self.effectuer]
+
+ def get_parameters_determined_by_complexity(self, complexity):
+ return [int(complexity*256)*4]
+
+ def transform_image(self, image):
+ if self.effectuer==0:
+ return image
+
+ image=image.reshape(1024,1)
+ temp=0 #variable temporaire
+
+ for i in xrange(0,self.nombre,4): #Par bonds de 4
+ #gauche
+ if self.echantillon[i] > 0:
+ temp=image[self.echantillon[i]-1]
+ image[self.echantillon[i]-1]=image[self.echantillon[i]]
+ image[self.echantillon[i]]=temp
+ #droite
+ if self.echantillon[i+1] < 1023:
+ temp=image[self.echantillon[i+1]+1]
+ image[self.echantillon[i+1]+1]=image[self.echantillon[i+1]]
+ image[self.echantillon[i+1]]=temp
+ #haut
+ if self.echantillon[i+2] > 31:
+ temp=image[self.echantillon[i+2]-32]
+ image[self.echantillon[i+2]-32]=image[self.echantillon[i+2]]
+ image[self.echantillon[i+2]]=temp
+ #bas
+ if self.echantillon[i+3] < 992:
+ temp=image[self.echantillon[i+3]+32]
+ image[self.echantillon[i+3]+32]=image[self.echantillon[i+3]]
+ image[self.echantillon[i+3]]=temp
+
+
+ return image.reshape((32,32))
+
+
+#---TESTS---
+
+def _load_image():
+ f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
+ d = ft.read(f)
+ w=numpy.asarray(d[random.randint(0,100)])
+ return (w/255.0).astype('float')
+
+def _test(complexite):
+ img=_load_image()
+ transfo = PermutPixel()
+ pylab.imshow(img.reshape((32,32)))
+ pylab.show()
+ print transfo.get_settings_names()
+ print transfo.regenerate_parameters(complexite)
+
+ img_trans=transfo.transform_image(img)
+
+ pylab.imshow(img_trans.reshape((32,32)))
+ pylab.show()
+
+
+if __name__ == '__main__':
+ from pylearn.io import filetensor as ft
+ import pylab
+ for i in xrange(0,5):
+ _test(0.5)
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/PoivreSel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/PoivreSel.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,94 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Ajout de bruit poivre et sel dans les donnees. Le bruit est distribue de facon
+aleatoire tire d'une uniforme tout comme la clarte des bites changees.
+
+La proportion de bites aleatoires est definit par complexity/5.
+Lorsque cette valeur est a 1 ==> Plus reconnaissable et 0 ==> Rien ne se passe
+
+On a maintenant 25% de chance d'effectuer un bruitage.
+
+Ce fichier prend pour acquis que les images sont donnees une a la fois
+sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+import numpy
+import random
+
+class PoivreSel():
+
+ def __init__(self,seed=9361):
+ self.proportion_bruit=0.08 #Le pourcentage des pixels qui seront bruites
+ self.nb_chng=10 #Le nombre de pixels changes. Seulement pour fin de calcul
+ self.effectuer=1 #Vaut 1 si on effectue et 0 sinon.
+
+ self.seed=seed
+ #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
+ #numpy.random.seed(self.seed)
+ #random.seed(self.seed)
+
+ def get_seed(self):
+ return self.seed
+
+ def get_settings_names(self):
+ return ['effectuer']
+
+ def get_settings_names_determined_by_complexity(self,complexity):
+ return ['proportion_bruit']
+
+ def regenerate_parameters(self, complexity):
+ self.proportion_bruit = float(complexity)/5
+ self.nb_chng=int(1024*self.proportion_bruit)
+ self.changements=random.sample(xrange(1024),self.nb_chng) #Les pixels qui seront changes
+ self.effectuer =numpy.random.binomial(1,0.25) ##### On a 25% de faire un bruit #####
+ return self._get_current_parameters()
+
+ def _get_current_parameters(self):
+ return [self.effectuer]
+
+ def get_parameters_determined_by_complexity(self, complexity):
+ return [float(complexity)/5]
+
+ def transform_image(self, image):
+ if self.effectuer == 0:
+ return image
+
+ image=image.reshape(1024,1)
+ for j in xrange(0,self.nb_chng):
+ image[self.changements[j]]=numpy.random.random() #On determine les nouvelles valeurs des pixels changes
+ return image.reshape(32,32)
+
+
+#---TESTS---
+
+def _load_image():
+ f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
+ d = ft.read(f)
+ w=numpy.asarray(d[0])
+ return (w/255.0).astype('float')
+
+def _test(complexite):
+ img=_load_image()
+ transfo = PoivreSel()
+ pylab.imshow(img.reshape((32,32)))
+ pylab.show()
+ print transfo.get_settings_names()
+ print transfo.regenerate_parameters(complexite)
+
+ img_trans=transfo.transform_image(img)
+
+ pylab.imshow(img_trans.reshape((32,32)))
+ pylab.show()
+
+
+if __name__ == '__main__':
+ from pylearn.io import filetensor as ft
+ import pylab
+ _test(0.5)
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/Rature.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/Rature.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,255 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Ajout d'une rature sur le caractère. La rature est en fait un 1 qui recoit une
+rotation et qui est ensuite appliqué sur le caractère. Un grossissement, puis deux
+erosions sont effectuees sur le 1 afin qu'il ne soit plus reconnaissable.
+Il y a des chances d'avoir plus d'une seule rature !
+
+Il y a 15% d'effectuer une rature.
+
+Ce fichier prend pour acquis que les images sont donnees une a la fois
+sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
+
+Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
+
+'''
+
+import numpy, Image, random
+import scipy.ndimage.morphology
+from pylearn.io import filetensor as ft
+
+
+class Rature():
+
+ def __init__(self,seed=1256):
+ self.angle=0 #Angle en degre de la rotation (entre 0 et 180)
+ self.numero=0 #Le numero du 1 choisi dans la banque de 1
+ self.gauche=-1 #Le numero de la colonne la plus a gauche contenant le 1
+ self.droite=-1
+ self.haut=-1
+ self.bas=-1
+ self.faire=1 #1=on effectue et 0=fait rien
+
+ self.crop_haut=0
+ self.crop_gauche=0 #Ces deux valeurs sont entre 0 et 31 afin de definir
+ #l'endroit ou sera pris le crop dans l'image du 1
+
+ self.largeur_bande=-1 #La largeur de la bande
+ self.smooth=-1 #La largeur de la matrice carree servant a l'erosion
+ self.nb_ratures=-1 #Le nombre de ratures appliques
+ self.fini=0 #1=fini de mettre toutes les couches 0=pas fini
+ self.complexity=0 #Pour garder en memoire la complexite si plusieurs couches sont necessaires
+ self.seed=seed
+
+ #numpy.random.seed(self.seed)
+
+ f3 = open('/data/lisa/data/ift6266h10/un_rature.ft') #Doit etre sur le reseau DIRO.
+ #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/un_rature.ft')
+ #Il faut arranger le path sinon
+ w=ft.read(f3)
+ f3.close()
+ self.d=(w.astype('float'))/255
+
+ self.patch=self.d[0].reshape((32,32)) #La patch de rature qui sera appliquee sur l'image
+
+ def get_settings_names(self):
+ return ['angle','numero','faire','crop_haut','crop_gauche','largeur_bande','smooth','nb_ratures']
+
+ def get_seed(self):
+ return self.seed
+
+ def regenerate_parameters(self, complexity,next_rature = False):
+
+
+ self.numero=random.randint(0,4999) #Ces bornes sont inclusives !
+ self.fini=0
+ self.complexity=complexity
+
+ if float(complexity) > 0:
+
+ self.gauche=self.droite=self.haut=self.bas=-1 #Remet tout a -1
+
+ self.angle=int(numpy.random.normal(90,100*complexity))
+
+ self.faire=numpy.random.binomial(1,0.15) ##### 15% d'effectuer une rature #####
+ if next_rature:
+ self.faire = 1
+ #self.faire=1 #Pour tester seulement
+
+ self.crop_haut=random.randint(0,17)
+ self.crop_gauche=random.randint(0,17)
+ if complexity <= 0.25 :
+ self.smooth=6
+ elif complexity <= 0.5:
+ self.smooth=5
+ elif complexity <= 0.75:
+ self.smooth=4
+ else:
+ self.smooth=3
+
+ p = numpy.random.rand()
+ if p < 0.5:
+ self.nb_ratures= 1
+ else:
+ if p < 0.8:
+ self.nb_ratures = 2
+ else:
+ self.nb_ratures = 3
+
+ #Creation de la "patch" de rature qui sera appliquee sur l'image
+ if self.faire == 1:
+ self.get_size()
+ self.get_image_rot() #On fait la "patch"
+
+ else:
+ self.faire=0 #On ne fait rien si complexity=0 !!
+
+ return self._get_current_parameters()
+
+
+ def get_image_rot(self):
+ image2=(self.d[self.numero].reshape((32,32))[self.haut:self.bas,self.gauche:self.droite])
+
+ im = Image.fromarray(numpy.asarray(image2*255,dtype='uint8'))
+
+ #La rotation et le resize sont de belle qualite afin d'avoir une image nette
+ im2 = im.rotate(self.angle,Image.BICUBIC,expand=False)
+ im3=im2.resize((50,50),Image.ANTIALIAS)
+
+ grosse=numpy.asarray(numpy.asarray(im3)/255.0,dtype='float32')
+ crop=grosse[self.haut:self.haut+32,self.gauche:self.gauche+32]
+
+ self.get_patch(crop)
+
+ def get_patch(self,crop):
+ smooting = numpy.ones((self.smooth,self.smooth))
+ #Il y a deux erosions afin d'avoir un beau resultat. Pas trop large et
+ #pas trop mince
+ trans=scipy.ndimage.morphology.grey_erosion\
+ (crop,size=smooting.shape,structure=smooting,mode='wrap')
+ trans1=scipy.ndimage.morphology.grey_erosion\
+ (trans,size=smooting.shape,structure=smooting,mode='wrap')
+
+
+ patch_img=Image.fromarray(numpy.asarray(trans1*255,dtype='uint8'))
+
+ patch_img2=patch_img.crop((4,4,28,28)).resize((32,32)) #Pour contrer les effets de bords !
+
+ trans2=numpy.asarray(numpy.asarray(patch_img2)/255.0,dtype='float32')
+
+
+ #Tout ramener entre 0 et 1
+ trans2=trans2-trans2.min() #On remet tout positif
+ trans2=trans2/trans2.max()
+
+ #La rayure a plus de chance d'etre en bas ou oblique le haut a 10h
+ if random.random() <= 0.5: #On renverse la matrice dans ce cas
+ for i in xrange(0,32):
+ self.patch[i,:]=trans2[31-i,:]
+ else:
+ self.patch=trans2
+
+
+
+
+ def get_size(self):
+ image=self.d[self.numero].reshape((32,32))
+
+ #haut
+ for i in xrange(0,32):
+ for j in xrange(0,32):
+ if(image[i,j]) != 0:
+ if self.haut == -1:
+ self.haut=i
+ break
+ if self.haut > -1:
+ break
+
+ #bas
+ for i in xrange(31,-1,-1):
+ for j in xrange(0,32):
+ if(image[i,j]) != 0:
+ if self.bas == -1:
+ self.bas=i
+ break
+ if self.bas > -1:
+ break
+
+ #gauche
+ for i in xrange(0,32):
+ for j in xrange(0,32):
+ if(image[j,i]) != 0:
+ if self.gauche == -1:
+ self.gauche=i
+ break
+ if self.gauche > -1:
+ break
+
+ #droite
+ for i in xrange(31,-1,-1):
+ for j in xrange(0,32):
+ if(image[j,i]) != 0:
+ if self.droite == -1:
+ self.droite=i
+ break
+ if self.droite > -1:
+ break
+
+
+ def _get_current_parameters(self):
+ return [self.angle,self.numero,self.faire,self.crop_haut,self.crop_gauche,self.largeur_bande,self.smooth,self.nb_ratures]
+
+ def transform_image(self, image):
+ if self.faire == 0: #Rien faire !!
+ return image
+
+ if self.fini == 0: #S'il faut rajouter des couches
+ patch_temp=self.patch
+ for w in xrange(1,self.nb_ratures):
+ self.regenerate_parameters(self.complexity,1)
+ for i in xrange(0,32):
+ for j in xrange(0,32):
+ patch_temp[i,j]=max(patch_temp[i,j],self.patch[i,j])
+ self.fini=1
+ self.patch=patch_temp
+
+ for i in xrange(0,32):
+ for j in xrange(0,32):
+ image[i,j]=max(image[i,j],self.patch[i,j])
+ self.patch*=0 #Remise a zero de la patch (pas necessaire)
+ return image
+
+
+#---TESTS---
+
+def _load_image():
+ f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
+ d = ft.read(f)
+ w=numpy.asarray(d[0:1000])
+ return (w/255.0).astype('float')
+
+def _test(complexite):
+ img=_load_image()
+ transfo = Rature()
+ for i in xrange(0,10):
+ img2=img[random.randint(0,1000)]
+ pylab.imshow(img2.reshape((32,32)))
+ pylab.show()
+ print transfo.get_settings_names()
+ print transfo.regenerate_parameters(complexite)
+ img2=img2.reshape((32,32))
+
+ img2_trans=transfo.transform_image(img2)
+
+ pylab.imshow(img2_trans.reshape((32,32)))
+ pylab.show()
+
+
+if __name__ == '__main__':
+ from pylearn.io import filetensor as ft
+ import pylab
+ _test(1)
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/add_background_image.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/add_background_image.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,112 @@
+#!/usr/bin/python
+# -*- coding: iso-8859-1 -*-
+
+'''
+ Implementation of random background adding to a specific image
+
+ Author: Guillaume Sicard
+'''
+
+import sys, os, random
+import cPickle
+import Image, numpy
+
+class AddBackground():
+ def __init__(self, threshold = 128, complexity = 1):
+ self.h = 32
+ self.w = 32
+ self.threshold = 1;
+ try: #in order to load locally if it is available
+ self.bg_image_file = '/Tmp/image_net/'
+ f=open(self.bg_image_file+'filelist.pkl')
+ except:
+ self.bg_image_file = '/data/lisa/data/ift6266h10/image_net/'
+ f=open(self.bg_image_file+'filelist.pkl')
+ self.image_files = cPickle.load(f)
+ f.close()
+ self.regenerate_parameters(complexity)
+
+ def get_current_parameters(self):
+ return [self.contrast]
+ # get threshold value
+ def get_settings_names(self):
+ return ['contrast']
+
+ # no need, except for testmod.py
+ def regenerate_parameters(self, complexity):
+ self.contrast = 1-numpy.random.rand()*complexity
+ return [self.contrast]
+
+ # load an image
+ def load_image(self,filename):
+ image = Image.open(filename).convert('L')
+ image = numpy.asarray(image)
+ image = (image / 255.0).astype(numpy.float32)
+ return image
+
+ # save an image
+ def save_image(self,array, filename):
+ image = (array * 255.0).astype('int')
+ image = Image.fromarray(image)
+ if (filename != ''):
+ image.save(filename)
+ else:
+ image.show()
+
+ # make a random 32x32 crop of an image
+ def rand_crop(self,image):
+ i_w, i_h = image.shape
+ x, y = random.randint(0, i_w - self.w), random.randint(0, i_h - self.h)
+ return image[x:x + self.w, y:y + self.h]
+
+ # select a random background image from "bg_image_file" and crops it
+ def rand_bg_image(self,maximage):
+ i = random.randint(0, len(self.image_files) - 1)
+
+ image = self.load_image(self.bg_image_file + self.image_files[i])
+ self.bg_image = self.rand_crop(image)
+ maxbg = self.bg_image.max()
+ self.bg_image = self.bg_image / maxbg * ( max(maximage - self.contrast,0.0) )
+
+ # set "bg_image" as background to "image", based on a pixels threshold
+ def set_bg(self,image):
+ tensor = numpy.asarray([self.bg_image,image],dtype='float32')
+ return tensor.max(0)
+
+ # transform an image file and return an array
+ def transform_image_from_file(self, filename):
+ self.rand_bg_image()
+ image = self.load_image(filename)
+ image = self.set_bg(image)
+ return image
+
+ # standard array to array transform
+ def transform_image(self, image):
+ self.rand_bg_image(image.max())
+ image = self.set_bg(image)
+ return image
+
+ # test method
+ def test(self,filename):
+ import time
+
+ sys.stdout.write('Starting addBackground test : loading image')
+ sys.stdout.flush()
+
+ image = self.load_image(filename)
+
+ t = 0
+ n = 500
+ for i in range(n):
+ t0 = time.time()
+ image2 = self.transform_image(image)
+ t = ( i * t + (time.time() - t0) ) / (i + 1)
+ sys.stdout.write('.')
+ sys.stdout.flush()
+
+ print "Done!\nAverage time : " + str(1000 * t) + " ms"
+
+if __name__ == '__main__':
+
+ myAddBackground = AddBackground()
+ myAddBackground.test('./images/0-LiberationSans-Italic.ttf.jpg')
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/affine_transform.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/affine_transform.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,89 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Simple implementation of random affine transformations based on the Python
+Imaging Module affine transformations.
+
+
+Author: Razvan Pascanu
+'''
+
+import numpy, Image
+
+
+
+class AffineTransformation():
+ def __init__( self, complexity = .5):
+ self.shape = (32,32)
+ self.complexity = complexity
+ params = numpy.random.uniform(size=6) -.5
+ self.a = 1. + params[0]*.6*complexity
+ self.b = 0. + params[1]*.6*complexity
+ self.c = params[2]*8.*complexity
+ self.d = 0. + params[3]*.6*complexity
+ self.e = 1. + params[4]*.6*complexity
+ self.f = params[5]*8.*complexity
+
+
+ def _get_current_parameters(self):
+ return [self.a, self.b, self.c, self.d, self.e, self.f]
+
+ def get_settings_names(self):
+ return ['a','b','c','d','e','f']
+
+ def regenerate_parameters(self, complexity):
+ # generate random affine transformation
+ # a point (x',y') of the new image corresponds to (x,y) of the old
+ # image where :
+ # x' = params[0]*x + params[1]*y + params[2]
+ # y' = params[3]*x + params[4]*y _ params[5]
+
+ # the ranges are set manually as to look acceptable
+
+ self.complexity = complexity
+ params = numpy.random.uniform(size=6) -.5
+ self.a = 1. + params[0]*.8*complexity
+ self.b = 0. + params[1]*.8*complexity
+ self.c = params[2]*9.*complexity
+ self.d = 0. + params[3]*.8*complexity
+ self.e = 1. + params[4]*.8*complexity
+ self.f = params[5]*9.*complexity
+ return self._get_current_parameters()
+
+
+
+
+ def transform_image(self,NIST_image):
+
+ im = Image.fromarray( \
+ numpy.asarray(\
+ NIST_image.reshape(self.shape)*255.0, dtype='uint8'))
+ nwim = im.transform( (32,32), Image.AFFINE, [self.a,self.b,self.c,self.d,self.e,self.f])
+ return numpy.asarray(numpy.asarray(nwim)/255.0,dtype='float32')
+
+
+
+if __name__ =='__main__':
+ print 'random test'
+
+ from pylearn.io import filetensor as ft
+ import pylab
+
+ datapath = '/data/lisa/data/nist/by_class/'
+
+ f = open(datapath+'digits/digits_train_data.ft')
+ d = ft.read(f)
+ f.close()
+
+
+ transformer = AffineTransformation()
+ id = numpy.random.randint(30)
+
+ pylab.figure()
+ pylab.imshow(d[id].reshape((32,32)))
+ pylab.figure()
+ pylab.imshow(transformer.transform_image(d[id]).reshape((32,32)))
+
+ pylab.show()
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/contrast.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/contrast.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,137 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Simple implementation of random contrast. This always switch half the time the polarity.
+then it decides of a random contrast dependant of the complexity, the mean of the maximum and minimum
+pixel value stays 0 (to avoid import bias change between exemples).
+
+Author: Xavier Glorot
+'''
+
+import numpy as N
+import copy
+
+
+class Contrast():
+ def __init__(self,complexity = 1):
+ #---------- private attributes
+ self.__nx__ = 32 #xdim of the images
+ self.__ny__ = 32 #ydim of the images
+ self.__Pinvert__ = 0.5 #probability to switch polarity
+ self.__mincontrast__ = 0.15
+ self.__resolution__ = 256
+ self.__rangecontrastres__ = self.__resolution__ - N.int(self.__mincontrast__*self.__resolution__)
+ #------------------------------------------------
+
+ #---------- generation parameters
+ self.regenerate_parameters(complexity)
+ #------------------------------------------------
+
+ def _get_current_parameters(self):
+ return [self.invert,self.contrast]
+
+ def get_settings_names(self):
+ return ['invert','contrast']
+
+ def regenerate_parameters(self, complexity):
+ self.invert = (N.random.uniform() < self.__Pinvert__)
+ self.contrast = self.__resolution__ - N.random.randint(1 + self.__rangecontrastres__ * complexity)
+ return self._get_current_parameters()
+
+ def transform_1_image(self,image): #the real transformation method
+ maxi = image.max()
+ mini = image.min()
+ if self.invert:
+ newimage = 1 - (self.__resolution__- self.contrast) / (2 * float(self.__resolution__)) -\
+ (image - mini) / float(maxi - mini) * self.contrast / float(self.__resolution__)
+ else:
+ newimage = (self.__resolution__- self.contrast) / (2 * float(self.__resolution__)) +\
+ (image - mini) / float(maxi - mini) * self.contrast / float(self.__resolution__)
+ if image.dtype == 'uint8':
+ return N.asarray(newimage*255,dtype='uint8')
+ else:
+ return N.asarray(newimage,dtype=image.dtype)
+
+ def transform_image(self,image): #handling different format
+ if image.shape == (self.__nx__,self.__ny__):
+ return self.transform_1_image(image)
+ if image.ndim == 3:
+ newimage = copy.copy(image)
+ for i in range(image.shape[0]):
+ newimage[i,:,:] = self.transform_1_image(image[i,:,:])
+ return newimage
+ if image.ndim == 2 and image.shape != (self.__nx__,self.__ny__):
+ newimage = N.reshape(image,(image.shape[0],self.__nx__,self.__ny__))
+ for i in range(image.shape[0]):
+ newimage[i,:,:] = self.transform_1_image(newimage[i,:,:])
+ return N.reshape(newimage,image.shape)
+ if image.ndim == 1:
+ newimage = N.reshape(image,(self.__nx__,self.__ny__))
+ newimage = self.transform_1_image(newimage)
+ return N.reshape(newimage,image.shape)
+ assert False #should never go there
+
+
+
+
+#test on NIST (you need pylearn and access to NIST to do that)
+
+if __name__ == '__main__':
+
+ from pylearn.io import filetensor as ft
+ import copy
+ import pygame
+ import time
+ datapath = '/data/lisa/data/nist/by_class/'
+ f = open(datapath+'digits/digits_train_data.ft')
+ d = ft.read(f)
+
+ pygame.surfarray.use_arraytype('numpy')
+
+ pygame.display.init()
+ screen = pygame.display.set_mode((8*2*32,8*32),0,8)
+ anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
+ screen.set_palette(anglcolorpalette)
+
+ MyContrast = Contrast()
+
+ debut=time.time()
+ MyContrast.transform_image(d)
+ fin=time.time()
+ print '------------------------------------------------'
+ print d.shape[0],' images transformed in :', fin-debut, ' seconds'
+ print '------------------------------------------------'
+ print (fin-debut)/d.shape[0]*1000000,' microseconds per image'
+ print '------------------------------------------------'
+ print MyContrast.get_settings_names()
+ print MyContrast._get_current_parameters()
+ print MyContrast.regenerate_parameters(0)
+ print MyContrast.regenerate_parameters(0.5)
+ print MyContrast.regenerate_parameters(1)
+ for i in range(10000):
+ a=d[i,:]
+ b=N.asarray(N.reshape(a,(32,32))).T
+
+ new=pygame.surfarray.make_surface(b)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new.set_palette(anglcolorpalette)
+ screen.blit(new,(0,0))
+
+ print MyContrast.get_settings_names(), MyContrast.regenerate_parameters(1)
+ c=MyContrast.transform_image(a)
+ b=N.asarray(N.reshape(c,(32,32))).T
+
+ new=pygame.surfarray.make_surface(b)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new.set_palette(anglcolorpalette)
+ screen.blit(new,(8*32,0))
+
+ pygame.display.update()
+ raw_input('Press Enter')
+
+ pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/filetensor.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/filetensor.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,232 @@
+"""
+Read and write the matrix file format described at
+U{http://www.cs.nyu.edu/~ylclab/data/norb-v1.0/index.html}
+
+The format is for dense tensors:
+
+ - magic number indicating type and endianness - 4bytes
+ - rank of tensor - int32
+ - dimensions - int32, int32, int32, ...
+ -
+
+The number of dimensions and rank is slightly tricky:
+ - for scalar: rank=0, dimensions = [1, 1, 1]
+ - for vector: rank=1, dimensions = [?, 1, 1]
+ - for matrix: rank=2, dimensions = [?, ?, 1]
+
+For rank >= 3, the number of dimensions matches the rank exactly.
+
+
+@todo: add complex type support
+
+"""
+import sys
+import numpy
+
+def _prod(lst):
+ p = 1
+ for l in lst:
+ p *= l
+ return p
+
+_magic_dtype = {
+ 0x1E3D4C51 : ('float32', 4),
+ #0x1E3D4C52 : ('packed matrix', 0), #what is a packed matrix?
+ 0x1E3D4C53 : ('float64', 8),
+ 0x1E3D4C54 : ('int32', 4),
+ 0x1E3D4C55 : ('uint8', 1),
+ 0x1E3D4C56 : ('int16', 2),
+ }
+_dtype_magic = {
+ 'float32': 0x1E3D4C51,
+ #'packed matrix': 0x1E3D4C52,
+ 'float64': 0x1E3D4C53,
+ 'int32': 0x1E3D4C54,
+ 'uint8': 0x1E3D4C55,
+ 'int16': 0x1E3D4C56
+ }
+
+def _read_int32(f):
+ """unpack a 4-byte integer from the current position in file f"""
+ s = f.read(4)
+ s_array = numpy.fromstring(s, dtype='int32')
+ return s_array.item()
+
+def _read_header(f, debug=False):
+ """
+ :returns: data type, element size, rank, shape, size
+ """
+ #what is the data type of this matrix?
+ #magic_s = f.read(4)
+ #magic = numpy.fromstring(magic_s, dtype='int32')
+ magic = _read_int32(f)
+ magic_t, elsize = _magic_dtype[magic]
+ if debug:
+ print 'header magic', magic, magic_t, elsize
+ if magic_t == 'packed matrix':
+ raise NotImplementedError('packed matrix not supported')
+
+ #what is the rank of the tensor?
+ ndim = _read_int32(f)
+ if debug: print 'header ndim', ndim
+
+ #what are the dimensions of the tensor?
+ dim = numpy.fromfile(f, dtype='int32', count=max(ndim,3))[:ndim]
+ dim_size = _prod(dim)
+ if debug: print 'header dim', dim, dim_size
+
+ return magic_t, elsize, ndim, dim, dim_size
+
+class arraylike(object):
+ """Provide an array-like interface to the filetensor in f.
+
+ The rank parameter to __init__ controls how this object interprets the underlying tensor.
+ Its behaviour should be clear from the following example.
+ Suppose the underlying tensor is MxNxK.
+
+ - If rank is 0, self[i] will be a scalar and len(self) == M*N*K.
+
+ - If rank is 1, self[i] is a vector of length K, and len(self) == M*N.
+
+ - If rank is 3, self[i] is a 3D tensor of size MxNxK, and len(self)==1.
+
+ - If rank is 5, self[i] is a 5D tensor of size 1x1xMxNxK, and len(self) == 1.
+
+
+ :note: Objects of this class generally require exclusive use of the underlying file handle, because
+ they call seek() every time you access an element.
+ """
+
+ f = None
+ """File-like object"""
+
+ magic_t = None
+ """numpy data type of array"""
+
+ elsize = None
+ """number of bytes per scalar element"""
+
+ ndim = None
+ """Rank of underlying tensor"""
+
+ dim = None
+ """tuple of array dimensions (aka shape)"""
+
+ dim_size = None
+ """number of scalars in the tensor (prod of dim)"""
+
+ f_start = None
+ """The file position of the first element of the tensor"""
+
+ readshape = None
+ """tuple of array dimensions of the block that we read"""
+
+ readsize = None
+ """number of elements we must read for each block"""
+
+ def __init__(self, f, rank=0, debug=False):
+ self.f = f
+ self.magic_t, self.elsize, self.ndim, self.dim, self.dim_size = _read_header(f,debug)
+ self.f_start = f.tell()
+
+ if rank <= self.ndim:
+ self.readshape = tuple(self.dim[self.ndim-rank:])
+ else:
+ self.readshape = tuple(self.dim)
+
+ #self.readshape = tuple(self.dim[self.ndim-rank:]) if rank <= self.ndim else tuple(self.dim)
+
+ if rank <= self.ndim:
+ padding = tuple()
+ else:
+ padding = (1,) * (rank - self.ndim)
+
+ #padding = tuple() if rank <= self.ndim else (1,) * (rank - self.ndim)
+ self.returnshape = padding + self.readshape
+ self.readsize = _prod(self.readshape)
+ if debug: print 'READ PARAM', self.readshape, self.returnshape, self.readsize
+
+ def __len__(self):
+ return _prod(self.dim[:self.ndim-len(self.readshape)])
+
+ def __getitem__(self, idx):
+ if idx >= len(self):
+ raise IndexError(idx)
+ self.f.seek(self.f_start + idx * self.elsize * self.readsize)
+ return numpy.fromfile(self.f,
+ dtype=self.magic_t,
+ count=self.readsize).reshape(self.returnshape)
+
+
+#
+# TODO: implement item selection:
+# e.g. load('some mat', subtensor=(:6, 2:5))
+#
+# This function should be memory efficient by:
+# - allocating an output matrix at the beginning
+# - seeking through the file, reading subtensors from multiple places
+def read(f, subtensor=None, debug=False):
+ """Load all or part of file 'f' into a numpy ndarray
+
+ @param f: file from which to read
+ @type f: file-like object
+
+ If subtensor is not None, it should be like the argument to
+ numpy.ndarray.__getitem__. The following two expressions should return
+ equivalent ndarray objects, but the one on the left may be faster and more
+ memory efficient if the underlying file f is big.
+
+ read(f, subtensor) <===> read(f)[*subtensor]
+
+ Support for subtensors is currently spotty, so check the code to see if your
+ particular type of subtensor is supported.
+
+ """
+ magic_t, elsize, ndim, dim, dim_size = _read_header(f,debug)
+ f_start = f.tell()
+
+ rval = None
+ if subtensor is None:
+ rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
+ elif isinstance(subtensor, slice):
+ if subtensor.step not in (None, 1):
+ raise NotImplementedError('slice with step', subtensor.step)
+ if subtensor.start not in (None, 0):
+ bytes_per_row = _prod(dim[1:]) * elsize
+ f.seek(f_start + subtensor.start * bytes_per_row)
+ dim[0] = min(dim[0], subtensor.stop) - subtensor.start
+ rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
+ else:
+ raise NotImplementedError('subtensor access not written yet:', subtensor)
+
+ return rval
+
+def write(f, mat):
+ """Write a numpy.ndarray to file.
+
+ @param f: file into which to write
+ @type f: file-like object
+
+ @param mat: array to write to file
+ @type mat: numpy ndarray or compatible
+
+ """
+ def _write_int32(f, i):
+ i_array = numpy.asarray(i, dtype='int32')
+ if 0: print 'writing int32', i, i_array
+ i_array.tofile(f)
+
+ try:
+ _write_int32(f, _dtype_magic[str(mat.dtype)])
+ except KeyError:
+ raise TypeError('Invalid ndarray dtype for filetensor format', mat.dtype)
+
+ _write_int32(f, len(mat.shape))
+ shape = mat.shape
+ if len(shape) < 3:
+ shape = list(shape) + [1] * (3 - len(shape))
+ if 0: print 'writing shape =', shape
+ for sh in shape:
+ _write_int32(f, sh)
+ mat.tofile(f)
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/gimp_script.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/gimp_script.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+'''
+Filtres GIMP sous Python
+Auteur: Nicolas Boulanger-Lewandowski
+Date: Hiver 2010
+
+run with: gimp -i --batch-interpreter python-fu-eval --batch - < gimp_script.py
+end with: pdb.gimp_quit(0)
+
+Implémente le motionblur et le pinch
+'''
+
+from gimpfu import *
+import numpy
+
+img = gimp.Image(32, 32, GRAY)
+img.disable_undo()
+layer1 = gimp.Layer(img, "layer1", 32, 32, GRAY_IMAGE, 100, NORMAL_MODE)
+img.add_layer(layer1, 0)
+dest_rgn = layer1.get_pixel_rgn(0, 0, 32, 32, True)
+
+def setpix(image):
+ dest_rgn[:,:] = (image.T*255).astype(numpy.uint8).tostring()
+ layer1.flush()
+ layer1.update(0, 0, 32, 32)
+
+def getpix():
+ return numpy.fromstring(dest_rgn[:,:], 'UInt8').astype(numpy.float32).reshape((32,32)).T / 255.0
+
+class GIMP1():
+ def get_settings_names(self):
+ return ['mblur_length', 'mblur_angle', 'pinch']
+
+ def regenerate_parameters(self, complexity):
+ if complexity:
+ self.mblur_length = abs(int(round(numpy.random.normal(0, 3*complexity))))
+ else:
+ self.mblur_length = 0
+ self.mblur_angle = int(round(numpy.random.uniform(0,360)))
+ self.pinch = numpy.random.uniform(-complexity, 0.7*complexity)
+
+ return [self.mblur_length, self.mblur_angle, self.pinch]
+
+ def transform_image(self, image):
+ if self.mblur_length or self.pinch:
+ setpix(image)
+ if self.mblur_length:
+ pdb.plug_in_mblur(img, layer1, 0, self.mblur_length, self.mblur_angle, 0, 0)
+ if self.pinch:
+ pdb.plug_in_whirl_pinch(img, layer1, 0.0, self.pinch, 1.0)
+ image = getpix()
+
+ return image
+
+# test
+if __name__ == '__main__':
+ import Image
+ im = numpy.asarray(Image.open("a.bmp").convert("L")) / 255.0
+
+ test = GIMP1()
+ print test.get_settings_names(), '=', test.regenerate_parameters(1)
+ #for i in range(1000):
+ im = test.transform_image(im)
+
+ import pylab
+ pylab.imshow(im, pylab.matplotlib.cm.Greys_r)
+ pylab.show()
+
+ pdb.gimp_quit(0)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/image_tiling.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/image_tiling.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,86 @@
+"""
+Illustrate filters (or data) in a grid of small image-shaped tiles.
+
+Note: taken from the pylearn codebase on Feb 4, 2010 (fsavard)
+"""
+
+import numpy
+from PIL import Image
+
+def scale_to_unit_interval(ndar,eps=1e-8):
+ ndar = ndar.copy()
+ ndar -= ndar.min()
+ ndar *= 1.0 / (ndar.max()+eps)
+ return ndar
+
+def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0,0),
+ scale_rows_to_unit_interval=True,
+ output_pixel_vals=True
+ ):
+ """
+ Transform an array with one flattened image per row, into an array in which images are
+ reshaped and layed out like tiles on a floor.
+
+ This function is useful for visualizing datasets whose rows are images, and also columns of
+ matrices for transforming those rows (such as the first layer of a neural net).
+
+ :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can be 2-D ndarrays or None
+ :param X: a 2-D array in which every row is a flattened image.
+ :type img_shape: tuple; (height, width)
+ :param img_shape: the original shape of each image
+ :type tile_shape: tuple; (rows, cols)
+ :param tile_shape: the number of images to tile (rows, cols)
+
+ :returns: array suitable for viewing as an image. (See:`PIL.Image.fromarray`.)
+ :rtype: a 2-d array with same dtype as X.
+
+ """
+ assert len(img_shape) == 2
+ assert len(tile_shape) == 2
+ assert len(tile_spacing) == 2
+
+ out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
+ in zip(img_shape, tile_shape, tile_spacing)]
+
+ if isinstance(X, tuple):
+ assert len(X) == 4
+ if output_pixel_vals:
+ out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype='uint8')
+ else:
+ out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype)
+
+ #colors default to 0, alpha defaults to 1 (opaque)
+ if output_pixel_vals:
+ channel_defaults = [0,0,0,255]
+ else:
+ channel_defaults = [0.,0.,0.,1.]
+
+ for i in xrange(4):
+ if X[i] is None:
+ out_array[:,:,i] = numpy.zeros(out_shape,
+ dtype='uint8' if output_pixel_vals else out_array.dtype
+ )+channel_defaults[i]
+ else:
+ out_array[:,:,i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals)
+ return out_array
+
+ else:
+ H, W = img_shape
+ Hs, Ws = tile_spacing
+
+ out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
+ for tile_row in xrange(tile_shape[0]):
+ for tile_col in xrange(tile_shape[1]):
+ if tile_row * tile_shape[1] + tile_col < X.shape[0]:
+ if scale_rows_to_unit_interval:
+ this_img = scale_to_unit_interval(X[tile_row * tile_shape[1] + tile_col].reshape(img_shape))
+ else:
+ this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
+ out_array[
+ tile_row * (H+Hs):tile_row*(H+Hs)+H,
+ tile_col * (W+Ws):tile_col*(W+Ws)+W
+ ] \
+ = this_img * (255 if output_pixel_vals else 1)
+ return out_array
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/local_elastic_distortions.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/local_elastic_distortions.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,456 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Implementation of elastic distortions as described in
+Simard, Steinkraus, Platt, "Best Practices for Convolutional
+ Neural Networks Applied to Visual Document Analysis", 2003
+
+Author: François Savard
+Date: Fall 2009, revised Winter 2010
+
+Usage: create the Distorter with proper alpha, sigma etc.
+ Then each time you want to change the distortion field applied,
+ call regenerate_field().
+
+ (The point behind this is that regeneration takes some time,
+ so we better reuse the fields a few times)
+'''
+
+import sys
+import math
+import numpy
+import numpy.random
+import scipy.signal # convolve2d
+
+_TEST_DIR = "/u/savardf/ift6266/debug_images/"
+
+def _raw_zeros(size):
+ return [[0 for i in range(size[1])] for j in range(size[0])]
+
+class ElasticDistortionParams():
+ def __init__(self, image_size=(32,32), alpha=0.0, sigma=0.0):
+ self.image_size = image_size
+ self.alpha = alpha
+ self.sigma = sigma
+
+ h,w = self.image_size
+
+ self.matrix_tl_corners_rows = _raw_zeros((h,w))
+ self.matrix_tl_corners_cols = _raw_zeros((h,w))
+
+ self.matrix_tr_corners_rows = _raw_zeros((h,w))
+ self.matrix_tr_corners_cols = _raw_zeros((h,w))
+
+ self.matrix_bl_corners_rows = _raw_zeros((h,w))
+ self.matrix_bl_corners_cols = _raw_zeros((h,w))
+
+ self.matrix_br_corners_rows = _raw_zeros((h,w))
+ self.matrix_br_corners_cols = _raw_zeros((h,w))
+
+ # those will hold the precomputed ratios for
+ # bilinear interpolation
+ self.matrix_tl_multiply = numpy.zeros((h,w))
+ self.matrix_tr_multiply = numpy.zeros((h,w))
+ self.matrix_bl_multiply = numpy.zeros((h,w))
+ self.matrix_br_multiply = numpy.zeros((h,w))
+
+ def alpha_sigma(self):
+ return [self.alpha, self.sigma]
+
+class LocalElasticDistorter():
+ def __init__(self, image_size=(32,32)):
+ self.image_size = image_size
+
+ self.current_complexity_10 = 0
+ self.current_complexity = 0
+
+ # number of precomputed fields
+ # (principle: as complexity doesn't change often, we can
+ # precompute a certain number of fields for a given complexity,
+ # each with its own parameters. That way, we have good
+ # randomization, but we're much faster).
+ self.to_precompute_per_complexity = 50
+
+ # Both use ElasticDistortionParams
+ self.current_params = None
+ self.precomputed_params = [[] for i in range(10)]
+
+ #
+ self.kernel_size = None
+ self.kernel = None
+
+ # set some defaults
+ self.regenerate_parameters(0.0)
+
+ def get_settings_names(self):
+ return []
+
+ def _floor_complexity(self, complexity):
+ return self._to_complexity_10(complexity) / 10.0
+
+ def _to_complexity_10(self, complexity):
+ return min(9, max(0, int(complexity * 10)))
+
+ def regenerate_parameters(self, complexity):
+ complexity_10 = self._to_complexity_10(complexity)
+
+ if complexity_10 != self.current_complexity_10:
+ self.current_complexity_10 = complexity_10
+ self.current_complexity = self._floor_complexity(complexity)
+
+ if len(self.precomputed_params[complexity_10]) <= self.to_precompute_per_complexity:
+ # not yet enough params generated, produce one more
+ # and append to list
+ new_params = self._initialize_new_params()
+ new_params = self._generate_fields(new_params)
+ self.current_params = new_params
+ self.precomputed_params[complexity_10].append(new_params)
+ else:
+ # if we have enough precomputed fields, just select one
+ # at random and set parameters to match what they were
+ # when the field was generated
+ idx = numpy.random.randint(0, len(self.precomputed_params[complexity_10]))
+ self.current_params = self.precomputed_params[complexity_10][idx]
+
+ # don't return anything, to avoid storing deterministic parameters
+ return [] # self.current_params.alpha_sigma()
+
+ def get_parameters_determined_by_complexity(self, complexity):
+ tmp_params = self._initialize_new_params(_floor_complexity(complexity))
+ return tmp_params.alpha_sigma()
+
+ def get_settings_names_determined_by_complexity(self, complexity):
+ return ['alpha', 'sigma']
+
+ # adapted from http://blenderartists.org/forum/showthread.php?t=163361
+ def _gen_gaussian_kernel(self, sigma):
+ # the kernel size can change DRAMATICALLY the time
+ # for the blur operation... so even though results are better
+ # with a bigger kernel, we need to compromise here
+ # 1*s is very different from 2*s, but there's not much difference
+ # between 2*s and 4*s
+ ks = self.kernel_size
+ s = sigma
+ target_ks = (1.5*s, 1.5*s)
+ if not ks is None and ks[0] == target_ks[0] and ks[1] == target_ks[1]:
+ # kernel size is good, ok, no need to regenerate
+ return
+ self.kernel_size = target_ks
+ h,w = self.kernel_size
+ a,b = h/2.0, w/2.0
+ y,x = numpy.ogrid[0:w, 0:h]
+ gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s))
+ # Normalize so we don't reduce image intensity
+ self.kernel = gauss/gauss.sum()
+
+ def _gen_distortion_field(self, params):
+ self._gen_gaussian_kernel(params.sigma)
+
+ # we add kernel_size on all four sides so blurring
+ # with the kernel produces a smoother result on borders
+ ks0 = self.kernel_size[0]
+ ks1 = self.kernel_size[1]
+ sz0 = self.image_size[1] + ks0
+ sz1 = self.image_size[0] + ks1
+ field = numpy.random.uniform(-1.0, 1.0, (sz0, sz1))
+ field = scipy.signal.convolve2d(field, self.kernel, mode='same')
+
+ # crop only image_size in the middle
+ field = field[ks0:ks0+self.image_size[0], ks1:ks1+self.image_size[1]]
+
+ return params.alpha * field
+
+
+ def _initialize_new_params(self, complexity=None):
+ if not complexity:
+ complexity = self.current_complexity
+
+ params = ElasticDistortionParams(self.image_size)
+
+ # pour faire progresser la complexité un peu plus vite
+ # tout en gardant les extrêmes de 0.0 et 1.0
+ complexity = complexity ** (1./3.)
+
+ # the smaller the alpha, the closest the pixels are fetched
+ # a max of 10 is reasonable
+ params.alpha = complexity * 10.0
+
+ # the bigger the sigma, the smoother is the distortion
+ # max of 1 is "reasonable", but produces VERY noisy results
+ # And the bigger the sigma, the bigger the blur kernel, and the
+ # slower the field generation, btw.
+ params.sigma = 10.0 - (7.0 * complexity)
+
+ return params
+
+ def _generate_fields(self, params):
+ '''
+ Here's how the code works:
+ - We first generate "distortion fields" for x and y with these steps:
+ - Uniform noise over [-1, 1] in a matrix of size (h,w)
+ - Blur with a Gaussian kernel of spread sigma
+ - Multiply by alpha
+ - Then (conceptually) to compose the distorted image, we loop over each pixel
+ of the new image and use the corresponding x and y distortions
+ (from the matrices generated above) to identify pixels
+ of the old image from which we fetch color data. As the
+ coordinates are not integer, we interpolate between the
+ 4 nearby pixels (top left, top right etc.).
+ - That's just conceptually. Here I'm using matrix operations
+ to speed up the computation. I first identify the 4 nearby
+ pixels in the old image for each pixel in the distorted image.
+ I can then use them as "fancy indices" to extract the proper
+ pixels for each new pixel.
+ - Then I multiply those extracted nearby points by precomputed
+ ratios for the bilinear interpolation.
+ '''
+
+ p = params
+
+ dist_fields = [None, None]
+ dist_fields[0] = self._gen_distortion_field(params)
+ dist_fields[1] = self._gen_distortion_field(params)
+
+ #pylab.imshow(dist_fields[0])
+ #pylab.show()
+
+ # regenerate distortion index matrices
+ # "_rows" are row indices
+ # "_cols" are column indices
+ # (separated due to the way fancy indexing works in numpy)
+ h,w = p.image_size
+
+ for y in range(h):
+ for x in range(w):
+ distort_x = dist_fields[0][y,x]
+ distort_y = dist_fields[1][y,x]
+
+ # the "target" is the coordinate we fetch color data from
+ # (in the original image)
+ # target_left and _top are the rounded coordinate on the
+ # left/top of this target (float) coordinate
+ target_pixel = (y+distort_y, x+distort_x)
+
+ target_left = int(math.floor(x + distort_x))
+ target_top = int(math.floor(y + distort_y))
+
+ index_tl = [target_top, target_left]
+ index_tr = [target_top, target_left+1]
+ index_bl = [target_top+1, target_left]
+ index_br = [target_top+1, target_left+1]
+
+ # x_ratio is the ratio of importance of left pixels
+ # y_ratio is the """" of top pixels
+ # (in bilinear combination)
+ y_ratio = 1.0 - (target_pixel[0] - target_top)
+ x_ratio = 1.0 - (target_pixel[1] - target_left)
+
+ # We use a default background color of 0 for displacements
+ # outside of boundaries of the image.
+
+ # if top left outside bounds
+ if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w:
+ p.matrix_tl_corners_rows[y][x] = 0
+ p.matrix_tl_corners_cols[y][x] = 0
+ p.matrix_tl_multiply[y,x] = 0
+ else:
+ p.matrix_tl_corners_rows[y][x] = index_tl[0]
+ p.matrix_tl_corners_cols[y][x] = index_tl[1]
+ p.matrix_tl_multiply[y,x] = x_ratio*y_ratio
+
+ # if top right outside bounds
+ if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w:
+ p.matrix_tr_corners_rows[y][x] = 0
+ p.matrix_tr_corners_cols[y][x] = 0
+ p.matrix_tr_multiply[y,x] = 0
+ else:
+ p.matrix_tr_corners_rows[y][x] = index_tr[0]
+ p.matrix_tr_corners_cols[y][x] = index_tr[1]
+ p.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio
+
+ # if bottom left outside bounds
+ if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w:
+ p.matrix_bl_corners_rows[y][x] = 0
+ p.matrix_bl_corners_cols[y][x] = 0
+ p.matrix_bl_multiply[y,x] = 0
+ else:
+ p.matrix_bl_corners_rows[y][x] = index_bl[0]
+ p.matrix_bl_corners_cols[y][x] = index_bl[1]
+ p.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio)
+
+ # if bottom right outside bounds
+ if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w:
+ p.matrix_br_corners_rows[y][x] = 0
+ p.matrix_br_corners_cols[y][x] = 0
+ p.matrix_br_multiply[y,x] = 0
+ else:
+ p.matrix_br_corners_rows[y][x] = index_br[0]
+ p.matrix_br_corners_cols[y][x] = index_br[1]
+ p.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio)
+
+ # not really necessary, but anyway
+ return p
+
+ def transform_image(self, image):
+ p = self.current_params
+
+ # index pixels to get the 4 corners for bilinear combination
+ tl_pixels = image[p.matrix_tl_corners_rows, p.matrix_tl_corners_cols]
+ tr_pixels = image[p.matrix_tr_corners_rows, p.matrix_tr_corners_cols]
+ bl_pixels = image[p.matrix_bl_corners_rows, p.matrix_bl_corners_cols]
+ br_pixels = image[p.matrix_br_corners_rows, p.matrix_br_corners_cols]
+
+ # bilinear ratios, elemwise multiply
+ tl_pixels = numpy.multiply(tl_pixels, p.matrix_tl_multiply)
+ tr_pixels = numpy.multiply(tr_pixels, p.matrix_tr_multiply)
+ bl_pixels = numpy.multiply(bl_pixels, p.matrix_bl_multiply)
+ br_pixels = numpy.multiply(br_pixels, p.matrix_br_multiply)
+
+ # sum to finish bilinear combination
+ return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0).astype(numpy.float32)
+
+# TESTS ----------------------------------------------------------------------
+
+def _load_image(filepath):
+ _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0]
+ img = Image.open(filepath)
+ img = numpy.asarray(img)
+ if len(img.shape) > 2:
+ img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
+ return (img / 255.0).astype('float')
+
+def _specific_test():
+ imgpath = os.path.join(_TEST_DIR, "d.png")
+ img = _load_image(imgpath)
+ dist = LocalElasticDistorter((32,32))
+ print dist.regenerate_parameters(0.5)
+ img = dist.transform_image(img)
+ print dist.get_parameters_determined_by_complexity(0.4)
+ pylab.imshow(img)
+ pylab.show()
+
+def _complexity_tests():
+ imgpath = os.path.join(_TEST_DIR, "d.png")
+ dist = LocalElasticDistorter((32,32))
+ orig_img = _load_image(imgpath)
+ html_content = '''Original:
'''
+ for complexity in numpy.arange(0.0, 1.1, 0.1):
+ html_content += '
Complexity: ' + str(complexity) + '
'
+ for i in range(10):
+ t1 = time.time()
+ dist.regenerate_parameters(complexity)
+ t2 = time.time()
+ print "diff", t2-t1
+ img = dist.transform_image(orig_img)
+ filename = "complexity_" + str(complexity) + "_" + str(i) + ".png"
+ new_path = os.path.join(_TEST_DIR, filename)
+ _save_image(img, new_path)
+ html_content += ''
+ html_content += ""
+ html_file = open(os.path.join(_TEST_DIR, "complexity.html"), "w")
+ html_file.write(html_content)
+ html_file.close()
+
+def _complexity_benchmark():
+ imgpath = os.path.join(_TEST_DIR, "d.png")
+ dist = LocalElasticDistorter((32,32))
+ orig_img = _load_image(imgpath)
+
+ for cpx in (0.21, 0.35):
+ # time the first 10
+ t1 = time.time()
+ for i in range(10):
+ dist.regenerate_parameters(cpx)
+ img = dist.transform_image(orig_img)
+ t2 = time.time()
+
+ print "first 10, total = ", t2-t1, ", avg=", (t2-t1)/10
+
+ # time the next 40
+ t1 = time.time()
+ for i in range(40):
+ dist.regenerate_parameters(cpx)
+ img = dist.transform_image(orig_img)
+ t2 = time.time()
+
+ print "next 40, total = ", t2-t1, ", avg=", (t2-t1)/40
+
+ # time the next 50
+ t1 = time.time()
+ for i in range(50):
+ dist.regenerate_parameters(cpx)
+ img = dist.transform_image(orig_img)
+ t2 = time.time()
+
+ print "next 50, total = ", t2-t1, ", avg=", (t2-t1)/50
+
+ # time the next 1000
+ t1 = time.time()
+ for i in range(1000):
+ dist.regenerate_parameters(cpx)
+ img = dist.transform_image(orig_img)
+ t2 = time.time()
+
+ print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
+
+ # time the next 1000 with old complexity
+ t1 = time.time()
+ for i in range(1000):
+ dist.regenerate_parameters(0.21)
+ img = dist.transform_image(orig_img)
+ t2 = time.time()
+
+ print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
+
+
+
+
+def _save_image(img, path):
+ img2 = Image.fromarray((img * 255).astype('uint8'), "L")
+ img2.save(path)
+
+# TODO: reformat to follow new class... it function of complexity now
+'''
+def _distorter_tests():
+ #import pylab
+ #pylab.imshow(img)
+ #pylab.show()
+
+ for letter in ("d", "a", "n", "o"):
+ img = _load_image("tests/" + letter + ".png")
+ for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
+ for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
+ id = LocalElasticDistorter((32,32))
+ img2 = id.distort_image(img)
+ img2 = Image.fromarray((img2 * 255).astype('uint8'), "L")
+ img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png")
+'''
+
+def _benchmark():
+ img = _load_image("tests/d.png")
+ dist = LocalElasticDistorter((32,32))
+ dist.regenerate_parameters(0.0)
+ import time
+ t1 = time.time()
+ for i in range(10000):
+ if i % 1000 == 0:
+ print "-"
+ dist.distort_image(img)
+ t2 = time.time()
+ print "t2-t1", t2-t1
+ print "avg", 10000/(t2-t1)
+
+if __name__ == '__main__':
+ import time
+ import pylab
+ import Image
+ import os.path
+ #_distorter_tests()
+ #_benchmark()
+ #_specific_test()
+ #_complexity_tests()
+ _complexity_benchmark()
+
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pipeline.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pipeline.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,391 @@
+#!/usr/bin/python
+# coding: utf-8
+
+from __future__ import with_statement
+
+# This is intended to be run as a GIMP script
+#from gimpfu import *
+
+import sys, os, getopt
+import numpy
+import filetensor as ft
+import random
+
+# To debug locally, also call with -s 100 (to stop after ~100)
+# (otherwise we allocate all needed memory, might be loonnng and/or crash
+# if, lucky like me, you have an age-old laptop creaking from everywhere)
+DEBUG = False
+DEBUG_X = False
+if DEBUG:
+ DEBUG_X = False # Debug under X (pylab.show())
+
+DEBUG_IMAGES_PATH = None
+if DEBUG:
+ # UNTESTED YET
+ # To avoid loading NIST if you don't have it handy
+ # (use with debug_images_iterator(), see main())
+ # To use NIST, leave as = None
+ DEBUG_IMAGES_PATH = None#'/home/francois/Desktop/debug_images'
+
+# Directory where to dump images to visualize results
+# (create it, otherwise it'll crash)
+DEBUG_OUTPUT_DIR = 'debug_out'
+
+DEFAULT_NIST_PATH = '/data/lisa/data/ift6266h10/train_data.ft'
+DEFAULT_LABEL_PATH = '/data/lisa/data/ift6266h10/train_labels.ft'
+DEFAULT_OCR_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft'
+DEFAULT_OCRLABEL_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-labels-shuffled.ft'
+ARGS_FILE = os.environ['PIPELINE_ARGS_TMPFILE']
+
+# PARSE COMMAND LINE ARGUMENTS
+def get_argv():
+ with open(ARGS_FILE) as f:
+ args = [l.rstrip() for l in f.readlines()]
+ return args
+
+def usage():
+ print '''
+Usage: run_pipeline.sh [-m ...] [-z ...] [-o ...] [-p ...]
+ -m, --max-complexity: max complexity to generate for an image
+ -z, --probability-zero: probability of using complexity=0 for an image
+ -o, --output-file: full path to file to use for output of images
+ -p, --params-output-file: path to file to output params to
+ -x, --labels-output-file: path to file to output labels to
+ -f, --data-file: path to filetensor (.ft) data file (NIST)
+ -l, --label-file: path to filetensor (.ft) labels file (NIST labels)
+ -c, --ocr-file: path to filetensor (.ft) data file (OCR)
+ -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels)
+ -a, --prob-font: probability of using a raw font image
+ -b, --prob-captcha: probability of using a captcha image
+ -g, --prob-ocr: probability of using an ocr image
+ -y, --seed: the job seed
+ '''
+
+try:
+ opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=",
+"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="])
+except getopt.GetoptError, err:
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ pdb.gimp_quit(0)
+ sys.exit(2)
+
+for o, a in opts:
+ if o in ('-y','--seed'):
+ random.seed(int(a))
+ numpy.random.seed(int(a))
+
+if DEBUG_X:
+ import pylab
+ pylab.ion()
+
+from PoivreSel import PoivreSel
+from thick import Thick
+from BruitGauss import BruitGauss
+from DistorsionGauss import DistorsionGauss
+from PermutPixel import PermutPixel
+from gimp_script import GIMP1
+from Rature import Rature
+from contrast import Contrast
+from local_elastic_distortions import LocalElasticDistorter
+from slant import Slant
+from Occlusion import Occlusion
+from add_background_image import AddBackground
+from affine_transform import AffineTransformation
+from ttf2jpg import ttf2jpg
+from Facade import generateCaptcha
+
+if DEBUG:
+ from visualizer import Visualizer
+ # Either put the visualizer as in the MODULES_INSTANCES list
+ # after each module you want to visualize, or in the
+ # AFTER_EACH_MODULE_HOOK list (but not both, it's redundant)
+ VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR, on_screen=False)
+
+###---------------------order of transformation module
+MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+
+# These should have a "after_transform_callback(self, image)" method
+# (called after each call to transform_image in a module)
+AFTER_EACH_MODULE_HOOK = []
+if DEBUG:
+ AFTER_EACH_MODULE_HOOK = [VISUALIZER]
+
+# These should have a "end_transform_callback(self, final_image" method
+# (called after all modules have been called)
+END_TRANSFORM_HOOK = []
+if DEBUG:
+ END_TRANSFORM_HOOK = [VISUALIZER]
+
+class Pipeline():
+ def __init__(self, modules, num_img, image_size=(32,32)):
+ self.modules = modules
+ self.num_img = num_img
+ self.num_params_stored = 0
+ self.image_size = image_size
+
+ self.init_memory()
+
+ def init_num_params_stored(self):
+ # just a dummy call to regenerate_parameters() to get the
+ # real number of params (only those which are stored)
+ self.num_params_stored = 0
+ for m in self.modules:
+ self.num_params_stored += len(m.regenerate_parameters(0.0))
+
+ def init_memory(self):
+ self.init_num_params_stored()
+
+ total = self.num_img
+ num_px = self.image_size[0] * self.image_size[1]
+
+ self.res_data = numpy.empty((total, num_px), dtype=numpy.uint8)
+ # +1 to store complexity
+ self.params = numpy.empty((total, self.num_params_stored+len(self.modules)))
+ self.res_labels = numpy.empty(total, dtype=numpy.int32)
+
+ def run(self, img_iterator, complexity_iterator):
+ img_size = self.image_size
+
+ should_hook_after_each = len(AFTER_EACH_MODULE_HOOK) != 0
+ should_hook_at_the_end = len(END_TRANSFORM_HOOK) != 0
+
+ for img_no, (img, label) in enumerate(img_iterator):
+ sys.stdout.flush()
+
+ global_idx = img_no
+
+ img = img.reshape(img_size)
+
+ param_idx = 0
+ mod_idx = 0
+ for mod in self.modules:
+ # This used to be done _per batch_,
+ # ie. out of the "for img" loop
+ complexity = complexity_iterator.next()
+ #better to do a complexity sampling for each transformations in order to have more variability
+ #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time))
+ #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall
+ #complexity
+ self.params[global_idx, mod_idx] = complexity
+ mod_idx += 1
+
+ p = mod.regenerate_parameters(complexity)
+ self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p
+ param_idx += len(p)
+
+ img = mod.transform_image(img)
+
+ if should_hook_after_each:
+ for hook in AFTER_EACH_MODULE_HOOK:
+ hook.after_transform_callback(img)
+
+ self.res_data[global_idx] = \
+ img.reshape((img_size[0] * img_size[1],))*255
+ self.res_labels[global_idx] = label
+
+ if should_hook_at_the_end:
+ for hook in END_TRANSFORM_HOOK:
+ hook.end_transform_callback(img)
+
+ def write_output(self, output_file_path, params_output_file_path, labels_output_file_path):
+ with open(output_file_path, 'wb') as f:
+ ft.write(f, self.res_data)
+
+ numpy.save(params_output_file_path, self.params)
+
+ with open(labels_output_file_path, 'wb') as f:
+ ft.write(f, self.res_labels)
+
+
+##############################################################################
+# COMPLEXITY ITERATORS
+# They're called once every img, to get the complexity to use for that img
+# they must be infinite (should never throw StopIteration when calling next())
+
+# probability of generating 0 complexity, otherwise
+# uniform over 0.0-max_complexity
+def range_complexity_iterator(probability_zero, max_complexity):
+ assert max_complexity <= 1.0
+ n = numpy.random.uniform(0.0, 1.0)
+ while True:
+ if n < probability_zero:
+ yield 0.0
+ else:
+ yield numpy.random.uniform(0.0, max_complexity)
+
+##############################################################################
+# DATA ITERATORS
+# They can be used to interleave different data sources etc.
+
+'''
+# Following code (DebugImages and iterator) is untested
+
+def load_image(filepath):
+ _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0]
+ img = Image.open(filepath)
+ img = numpy.asarray(img)
+ if len(img.shape) > 2:
+ img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
+ return (img / 255.0).astype('float')
+
+class DebugImages():
+ def __init__(self, images_dir_path):
+ import glob, os.path
+ self.filelist = glob.glob(os.path.join(images_dir_path, "*.png"))
+
+def debug_images_iterator(debug_images):
+ for path in debug_images.filelist:
+ yield load_image(path)
+'''
+
+class NistData():
+ def __init__(self, nist_path, label_path, ocr_path, ocrlabel_path):
+ self.train_data = open(nist_path, 'rb')
+ self.train_labels = open(label_path, 'rb')
+ self.dim = tuple(ft._read_header(self.train_data)[3])
+ # in order to seek to the beginning of the file
+ self.train_data.close()
+ self.train_data = open(nist_path, 'rb')
+ self.ocr_data = open(ocr_path, 'rb')
+ self.ocr_labels = open(ocrlabel_path, 'rb')
+
+# cet iterator load tout en ram
+def nist_supp_iterator(nist, prob_font, prob_captcha, prob_ocr, num_img):
+ img = ft.read(nist.train_data)
+ labels = ft.read(nist.train_labels)
+ if prob_ocr:
+ ocr_img = ft.read(nist.ocr_data)
+ ocr_labels = ft.read(nist.ocr_labels)
+ ttf = ttf2jpg()
+ L = [chr(ord('0')+x) for x in range(10)] + [chr(ord('A')+x) for x in range(26)] + [chr(ord('a')+x) for x in range(26)]
+
+ for i in xrange(num_img):
+ r = numpy.random.rand()
+ if r <= prob_font:
+ yield ttf.generate_image()
+ elif r <=prob_font + prob_captcha:
+ (arr, charac) = generateCaptcha(0,1)
+ yield arr.astype(numpy.float32)/255, L.index(charac[0])
+ elif r <= prob_font + prob_captcha + prob_ocr:
+ j = numpy.random.randint(len(ocr_labels))
+ yield ocr_img[j].astype(numpy.float32)/255, ocr_labels[j]
+ else:
+ j = numpy.random.randint(len(labels))
+ yield img[j].astype(numpy.float32)/255, labels[j]
+
+
+# Mostly for debugging, for the moment, just to see if we can
+# reload the images and parameters.
+def reload(output_file_path, params_output_file_path):
+ images_ft = open(output_file_path, 'rb')
+ images_ft_dim = tuple(ft._read_header(images_ft)[3])
+
+ print "Images dimensions: ", images_ft_dim
+
+ params = numpy.load(params_output_file_path)
+
+ print "Params dimensions: ", params.shape
+ print params
+
+
+##############################################################################
+# MAIN
+
+
+# Might be called locally or through dbidispatch. In all cases it should be
+# passed to the GIMP executable to be able to use GIMP filters.
+# Ex:
+def _main():
+ #global DEFAULT_NIST_PATH, DEFAULT_LABEL_PATH, DEFAULT_OCR_PATH, DEFAULT_OCRLABEL_PATH
+ #global getopt, get_argv
+
+ max_complexity = 0.5 # default
+ probability_zero = 0.1 # default
+ output_file_path = None
+ params_output_file_path = None
+ labels_output_file_path = None
+ nist_path = DEFAULT_NIST_PATH
+ label_path = DEFAULT_LABEL_PATH
+ ocr_path = DEFAULT_OCR_PATH
+ ocrlabel_path = DEFAULT_OCRLABEL_PATH
+ prob_font = 0.0
+ prob_captcha = 0.0
+ prob_ocr = 0.0
+ stop_after = None
+ reload_mode = False
+
+ for o, a in opts:
+ if o in ('-m', '--max-complexity'):
+ max_complexity = float(a)
+ assert max_complexity >= 0.0 and max_complexity <= 1.0
+ elif o in ('-r', '--reload'):
+ reload_mode = True
+ elif o in ("-z", "--probability-zero"):
+ probability_zero = float(a)
+ assert probability_zero >= 0.0 and probability_zero <= 1.0
+ elif o in ("-o", "--output-file"):
+ output_file_path = a
+ elif o in ('-p', "--params-output-file"):
+ params_output_file_path = a
+ elif o in ('-x', "--labels-output-file"):
+ labels_output_file_path = a
+ elif o in ('-s', "--stop-after"):
+ stop_after = int(a)
+ elif o in ('-f', "--data-file"):
+ nist_path = a
+ elif o in ('-l', "--label-file"):
+ label_path = a
+ elif o in ('-c', "--ocr-file"):
+ ocr_path = a
+ elif o in ('-d', "--ocrlabel-file"):
+ ocrlabel_path = a
+ elif o in ('-a', "--prob-font"):
+ prob_font = float(a)
+ elif o in ('-b', "--prob-captcha"):
+ prob_captcha = float(a)
+ elif o in ('-g', "--prob-ocr"):
+ prob_ocr = float(a)
+ elif o in ('-y', "--seed"):
+ pass
+ else:
+ assert False, "unhandled option"
+
+ if output_file_path == None or params_output_file_path == None or labels_output_file_path == None:
+ print "Must specify the three output files."
+ usage()
+ pdb.gimp_quit(0)
+ sys.exit(2)
+
+ if reload_mode:
+ reload(output_file_path, params_output_file_path)
+ else:
+ if DEBUG_IMAGES_PATH:
+ '''
+ # This code is yet untested
+ debug_images = DebugImages(DEBUG_IMAGES_PATH)
+ num_img = len(debug_images.filelist)
+ pl = Pipeline(modules=MODULE_INSTANCES, num_img=num_img, image_size=(32,32))
+ img_it = debug_images_iterator(debug_images)
+ '''
+ else:
+ nist = NistData(nist_path, label_path, ocr_path, ocrlabel_path)
+ num_img = 819200 # 800 Mb file
+ if stop_after:
+ num_img = stop_after
+ pl = Pipeline(modules=MODULE_INSTANCES, num_img=num_img, image_size=(32,32))
+ img_it = nist_supp_iterator(nist, prob_font, prob_captcha, prob_ocr, num_img)
+
+ cpx_it = range_complexity_iterator(probability_zero, max_complexity)
+ pl.run(img_it, cpx_it)
+ pl.write_output(output_file_path, params_output_file_path, labels_output_file_path)
+
+_main()
+
+if DEBUG_X:
+ pylab.ioff()
+ pylab.show()
+
+pdb.gimp_quit(0)
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/.DS_Store
Binary file data_generation/transformations/pycaptcha/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/BUGS
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/BUGS Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,7 @@
+Known bugs:
+
+- PersistentFactory() is almost certainly horrible at concurrent access
+- Tests are never invalidated with PersistentStorage(), as they aren't written back to the database
+- All files in Captcha/data are installed, including silly things like .svn directories and *~
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/COPYING
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/COPYING Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,19 @@
+Copyright (c) 2004 Micah Dowty
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/.DS_Store
Binary file data_generation/transformations/pycaptcha/Captcha/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Base.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Base.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,127 @@
+""" Captcha.Base
+
+Base class for all types of CAPTCHA tests. All tests have one or
+more solution, determined when the test is generated. Solutions
+can be any python object,
+
+All tests can be solved by presenting at least some preset number
+of correct solutions. Some tests may only have one solution and require
+one solution, but other tests may require N correct solutions of M
+possible solutions.
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+import random, string, time, shelve
+
+__all__ = ["BaseCaptcha", "Factory", "PersistentFactory"]
+
+
+def randomIdentifier(alphabet = string.ascii_letters + string.digits,
+ length = 24):
+ return "".join([random.choice(alphabet) for i in xrange(length)])
+
+
+class BaseCaptcha(object):
+ """Base class for all CAPTCHA tests"""
+ # Subclasses can override these to set the solution criteria
+ minCorrectSolutions = 1
+ maxIncorrectSolutions = 0
+
+ def __init__(self):
+ self.solutions = []
+ self.valid = True
+
+ # Each test has a unique identifier, used to refer to that test
+ # later, and a creation time so it can expire later.
+ self.id = randomIdentifier()
+ self.creationTime = time.time()
+
+ def addSolution(self, solution):
+ self.solutions.append(solution)
+
+ def testSolutions(self, solutions):
+ """Test whether the given solutions are sufficient for this CAPTCHA.
+ A given CAPTCHA can only be tested once, after that it is invalid
+ and always returns False. This makes random guessing much less effective.
+ """
+ if not self.valid:
+ return False
+ self.valid = False
+
+ numCorrect = 0
+ numIncorrect = 0
+
+ for solution in solutions:
+ if solution in self.solutions:
+ numCorrect += 1
+ else:
+ numIncorrect += 1
+
+ return numCorrect >= self.minCorrectSolutions and \
+ numIncorrect <= self.maxIncorrectSolutions
+
+
+class Factory(object):
+ """Creates BaseCaptcha instances on demand, and tests solutions.
+ CAPTCHAs expire after a given amount of time, given in seconds.
+ The default is 15 minutes.
+ """
+ def __init__(self, lifetime=60*15):
+ self.lifetime = lifetime
+ self.storedInstances = {}
+
+ def new(self, cls, *args, **kwargs):
+ """Create a new instance of our assigned BaseCaptcha subclass, passing
+ it any extra arguments we're given. This stores the result for
+ later testing.
+ """
+ self.clean()
+ inst = cls(*args, **kwargs)
+ self.storedInstances[inst.id] = inst
+ return inst
+
+ def get(self, id):
+ """Retrieve the CAPTCHA with the given ID. If it's expired already,
+ this will return None. A typical web application will need to
+ new() a CAPTCHA when generating an html page, then get() it later
+ when its images or sounds must be rendered.
+ """
+ return self.storedInstances.get(id)
+
+ def clean(self):
+ """Removed expired tests"""
+ expiredIds = []
+ now = time.time()
+ for inst in self.storedInstances.itervalues():
+ if inst.creationTime + self.lifetime < now:
+ expiredIds.append(inst.id)
+ for id in expiredIds:
+ del self.storedInstances[id]
+
+ def test(self, id, solutions):
+ """Test the given list of solutions against the BaseCaptcha instance
+ created earlier with the given id. Returns True if the test passed,
+ False on failure. In either case, the test is invalidated. Returns
+ False in the case of an invalid id.
+ """
+ self.clean()
+ inst = self.storedInstances.get(id)
+ if not inst:
+ return False
+ result = inst.testSolutions(solutions)
+ return result
+
+
+class PersistentFactory(Factory):
+ """A simple persistent factory, for use in CGI or multi-process environments
+ where the state must remain across python interpreter sessions.
+ This implementation uses the 'shelve' module.
+ """
+ def __init__(self, filename, lifetime=60*15):
+ Factory.__init__(self, lifetime)
+ self.storedInstances = shelve.open(filename)
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/File.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/File.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,53 @@
+""" Captcha.File
+
+Utilities for finding and picking random files from our 'data' directory
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+import os, random
+
+# Determine the data directory. This can be overridden after import-time if needed.
+dataDir = os.path.join(os.path.split(os.path.abspath(__file__))[0], "data")
+
+
+class RandomFileFactory(object):
+ """Given a list of files and/or directories, this picks a random file.
+ Directories are searched for files matching any of a list of extensions.
+ Files are relative to our data directory plus a subclass-specified base path.
+ """
+ extensions = []
+ basePath = "."
+
+ def __init__(self, *fileList):
+ self.fileList = fileList
+ self._fullPaths = None
+
+ def _checkExtension(self, name):
+ """Check the file against our given list of extensions"""
+ for ext in self.extensions:
+ if name.endswith(ext):
+ return True
+ return False
+
+ def _findFullPaths(self):
+ """From our given file list, find a list of full paths to files"""
+ paths = []
+ for name in self.fileList:
+ path = os.path.join(dataDir, self.basePath, name)
+ if os.path.isdir(path):
+ for content in os.listdir(path):
+ if self._checkExtension(content):
+ paths.append(os.path.join(path, content))
+ else:
+ paths.append(path)
+ return paths
+
+ def pick(self):
+ if self._fullPaths is None:
+ self._fullPaths = self._findFullPaths()
+ return random.choice(self._fullPaths)
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Backgrounds.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Backgrounds.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,95 @@
+""" Captcha.Visual.Backgrounds
+
+Background layers for visual CAPTCHAs
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+from Captcha.Visual import Layer, Pictures
+import random, os
+import ImageDraw, Image
+
+
+class SolidColor(Layer):
+ """A solid color background. Very weak on its own, but good
+ to combine with other backgrounds.
+ """
+ def __init__(self, color="white"):
+ self.color = color
+
+ def render(self, image):
+ image.paste(self.color)
+
+
+class Grid(Layer):
+ """A grid of lines, with a given foreground color.
+ The size is given in pixels. The background is transparent,
+ so another layer (like SolidColor) should be put behind it.
+ """
+ def __init__(self, size=16, foreground="black"):
+ self.size = size
+ self.foreground = foreground
+ self.offset = (random.uniform(0, self.size),
+ random.uniform(0, self.size))
+
+ def render(self, image):
+ draw = ImageDraw.Draw(image)
+
+ for i in xrange(image.size[0] / self.size + 1):
+ draw.line( (i*self.size+self.offset[0], 0,
+ i*self.size+self.offset[0], image.size[1]), fill=self.foreground)
+
+ for i in xrange(image.size[0] / self.size + 1):
+ draw.line( (0, i*self.size+self.offset[1],
+ image.size[0], i*self.size+self.offset[1]), fill=self.foreground)
+
+
+class TiledImage(Layer):
+ """Pick a random image and a random offset, and tile the rendered image with it"""
+ def __init__(self, imageFactory=Pictures.abstract):
+ self.tileName = imageFactory.pick()
+ self.offset = (random.uniform(0, 1),
+ random.uniform(0, 1))
+
+ def render(self, image):
+ tile = Image.open(self.tileName)
+ for j in xrange(-1, int(image.size[1] / tile.size[1]) + 1):
+ for i in xrange(-1, int(image.size[0] / tile.size[0]) + 1):
+ dest = (int((self.offset[0] + i) * tile.size[0]),
+ int((self.offset[1] + j) * tile.size[1]))
+ image.paste(tile, dest)
+
+
+class CroppedImage(Layer):
+ """Pick a random image, cropped randomly. Source images should be larger than the CAPTCHA."""
+ def __init__(self, imageFactory=Pictures.nature):
+ self.imageName = imageFactory.pick()
+ self.align = (random.uniform(0,1),
+ random.uniform(0,1))
+
+ def render(self, image):
+ i = Image.open(self.imageName)
+ image.paste(i, (int(self.align[0] * (image.size[0] - i.size[0])),
+ int(self.align[1] * (image.size[1] - i.size[1]))))
+
+
+class RandomDots(Layer):
+ """Draw random colored dots"""
+ def __init__(self, colors=("white", "black"), dotSize=4, numDots=400):
+ self.colors = colors
+ self.dotSize = dotSize
+ self.numDots = numDots
+ self.seed = random.random()
+
+ def render(self, image):
+ r = random.Random(self.seed)
+ for i in xrange(self.numDots):
+ bx = int(r.uniform(0, image.size[0]-self.dotSize))
+ by = int(r.uniform(0, image.size[1]-self.dotSize))
+ image.paste(r.choice(self.colors), (bx, by,
+ bx+self.dotSize-1,
+ by+self.dotSize-1))
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Base.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Base.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,70 @@
+""" Captcha.Visual.BAse
+
+Base classes for visual CAPTCHAs. We use the Python Imaging Library
+to manipulate these images.
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+import Captcha
+import Image
+
+__all__ = ['ImageCaptcha', 'Layer']
+
+
+class ImageCaptcha(Captcha.BaseCaptcha):
+ """Base class for image-based CAPTCHA tests.
+ The render() function generates the CAPTCHA image at the given size by
+ combining Layer instances from self.layers, which should be created by
+ the subclass-defined getLayers().
+ """
+ defaultSize = (32,32)
+ # anciennement a defaultSize(256,96)
+ def __init__(self, *args, **kwargs):
+ Captcha.BaseCaptcha.__init__(self)
+ self._layers = self.getLayers(*args, **kwargs)
+
+ def getImage(self):
+ """Get a PIL image representing this CAPTCHA test, creating it if necessary"""
+ if not self._image:
+ self._image = self.render()
+ return self._image
+
+ def getLayers(self):
+ """Subclasses must override this to return a list of Layer instances to render.
+ Lists within the list of layers are recursively rendered.
+ """
+ return []
+
+ def render(self, size=None):
+ """Render this CAPTCHA, returning a PIL image"""
+ if size is None:
+ size = self.defaultSize
+ img = Image.new("L", size)
+ # img = Image.new("RGB", size)
+ return self._renderList(self._layers, Image.new("L", size))
+
+ def _renderList(self, l, img):
+ for i in l:
+ if type(i) == tuple or type(i) == list:
+ img = self._renderList(i, img)
+ else:
+ img = i.render(img) or img
+ return img
+
+
+class Layer(object):
+ """A renderable object representing part of a CAPTCHA.
+ The render() function should return approximately the same result, regardless
+ of the image size. This means any randomization must occur in the constructor.
+
+ If the render() function returns something non-None, it is taken as an image to
+ replace the current image with. This can be used to implement transformations
+ that result in a separate image without having to copy the results back to the first.
+ """
+ def render(self, img):
+ pass
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Distortions.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Distortions.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,117 @@
+""" Captcha.Visual.Distortions
+
+Distortion layers for visual CAPTCHAs
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+from Captcha.Visual import Layer
+import ImageDraw, Image
+import random, math
+
+
+class WigglyBlocks(Layer):
+ """Randomly select and shift blocks of the image"""
+ def __init__(self, blockSize=3, sigma=0.01, iterations=300):
+ self.blockSize = blockSize
+ self.sigma = sigma
+ self.iterations = iterations
+ self.seed = random.random()
+
+ def render(self, image):
+ r = random.Random(self.seed)
+ for i in xrange(self.iterations):
+ # Select a block
+ bx = int(r.uniform(0, image.size[0]-self.blockSize))
+ by = int(r.uniform(0, image.size[1]-self.blockSize))
+ block = image.crop((bx, by, bx+self.blockSize-1, by+self.blockSize-1))
+
+ # Figure out how much to move it.
+ # The call to floor() is important so we always round toward
+ # 0 rather than to -inf. Just int() would bias the block motion.
+ mx = int(math.floor(r.normalvariate(0, self.sigma)))
+ my = int(math.floor(r.normalvariate(0, self.sigma)))
+
+ # Now actually move the block
+ image.paste(block, (bx+mx, by+my))
+
+
+class WarpBase(Layer):
+ """Abstract base class for image warping. Subclasses define a
+ function that maps points in the output image to points in the input image.
+ This warping engine runs a grid of points through this transform and uses
+ PIL's mesh transform to warp the image.
+ """
+ filtering = Image.BILINEAR
+ resolution = 10
+
+ def getTransform(self, image):
+ """Return a transformation function, subclasses should override this"""
+ return lambda x, y: (x, y)
+
+ def render(self, image):
+ r = self.resolution
+ xPoints = image.size[0] / r + 2
+ yPoints = image.size[1] / r + 2
+ f = self.getTransform(image)
+
+ # Create a list of arrays with transformed points
+ xRows = []
+ yRows = []
+ for j in xrange(yPoints):
+ xRow = []
+ yRow = []
+ for i in xrange(xPoints):
+ x, y = f(i*r, j*r)
+
+ # Clamp the edges so we don't get black undefined areas
+ x = max(0, min(image.size[0]-1, x))
+ y = max(0, min(image.size[1]-1, y))
+
+ xRow.append(x)
+ yRow.append(y)
+ xRows.append(xRow)
+ yRows.append(yRow)
+
+ # Create the mesh list, with a transformation for
+ # each square between points on the grid
+ mesh = []
+ for j in xrange(yPoints-1):
+ for i in xrange(xPoints-1):
+ mesh.append((
+ # Destination rectangle
+ (i*r, j*r,
+ (i+1)*r, (j+1)*r),
+ # Source quadrilateral
+ (xRows[j ][i ], yRows[j ][i ],
+ xRows[j+1][i ], yRows[j+1][i ],
+ xRows[j+1][i+1], yRows[j+1][i+1],
+ xRows[j ][i+1], yRows[j ][i+1]),
+ ))
+
+ return image.transform(image.size, Image.MESH, mesh, self.filtering)
+
+
+class SineWarp(WarpBase):
+ """Warp the image using a random composition of sine waves"""
+
+ def __init__(self,
+ amplitudeRange = (3, 6.5),
+ periodRange = (0.04, 0.1),
+ ):
+ self.amplitude = random.uniform(*amplitudeRange)
+ self.period = random.uniform(*periodRange)
+ self.offset = (random.uniform(0, math.pi * 2 / self.period),
+ random.uniform(0, math.pi * 2 / self.period))
+
+ def getTransform(self, image):
+ return (lambda x, y,
+ a = self.amplitude,
+ p = self.period,
+ o = self.offset:
+ (math.sin( (y+o[0])*p )*a + x,
+ math.sin( (x+o[1])*p )*a + y))
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Pictures.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Pictures.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,23 @@
+""" Captcha.Visual.Pictures
+
+Random collections of images
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+from Captcha import File
+import Image
+
+
+class ImageFactory(File.RandomFileFactory):
+ """A factory that generates random images from a list"""
+ extensions = [".png", ".jpeg"]
+ basePath = "pictures"
+
+
+abstract = ImageFactory("abstract")
+nature = ImageFactory("nature")
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Tests.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Tests.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,65 @@
+""" Captcha.Visual.Tests
+
+Visual CAPTCHA tests
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+from Captcha.Visual import Text, Backgrounds, Distortions, ImageCaptcha
+from Captcha import Words
+import random
+
+__all__ = ["PseudoGimpy", "AngryGimpy", "AntiSpam"]
+
+
+class PseudoGimpy(ImageCaptcha):
+ """A relatively easy CAPTCHA that's somewhat easy on the eyes"""
+ def getLayers(self):
+ word = Words.defaultWordList.pick()
+ self.addSolution(word)
+ return [
+ # random.choice([
+ # Backgrounds.CroppedImage(),
+ # Backgrounds.TiledImage(),
+ # ]),
+ Text.TextLayer(word, borderSize=1),
+ Distortions.SineWarp(),
+ ]
+
+
+class AngryGimpy(ImageCaptcha):
+ """A harder but less visually pleasing CAPTCHA"""
+ def getLayers(self):
+ word = Words.defaultWordList.pick()
+ self.addSolution(word)
+ return [
+ # suppression du background
+ # Backgrounds.TiledImage(),
+ # Backgrounds.RandomDots(),
+ Text.TextLayer(word, borderSize=1),
+ # Distortions.SineWarp(periodRange = (0.04, 0.07))
+ Distortions.WigglyBlocks(),
+ ]
+
+
+class AntiSpam(ImageCaptcha):
+ """A fixed-solution CAPTCHA that can be used to hide email addresses or URLs from bots"""
+ fontFactory = Text.FontFactory(20, "vera/VeraBd.ttf")
+ defaultSize = (512,50)
+
+ def getLayers(self, solution="murray@example.com"):
+ self.addSolution(solution)
+
+ textLayer = Text.TextLayer(solution,
+ borderSize = 2,
+ fontFactory = self.fontFactory)
+
+ return [
+ Backgrounds.CroppedImage(),
+ textLayer,
+ Distortions.SineWarp(amplitudeRange = (3, 5)),
+ ]
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/Text.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/Text.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,101 @@
+""" Captcha.Visual.Text
+
+Text generation for visual CAPTCHAs.
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+import random, os
+from Captcha import Visual, File
+import ImageFont, ImageDraw
+
+
+class FontFactory(File.RandomFileFactory):
+ """Picks random fonts and/or sizes from a given list.
+ 'sizes' can be a single size or a (min,max) tuple.
+ If any of the given files are directories, all *.ttf found
+ in that directory will be added.
+ """
+ extensions = [".ttf", ".TTF"]
+ basePath = "fonts"
+
+# arguments variables a modifier pour mettre le chemin vers les fontes.
+ def __init__(self, sizes, *fileNames):
+ File.RandomFileFactory.__init__(self, *fileNames)
+
+ if type(sizes) is tuple:
+ self.minSize = sizes[0]
+ self.maxSize = sizes[1]
+ else:
+ self.minSize = sizes
+ self.maxSize = sizes
+
+ def pick(self):
+ """Returns a (fileName, size) tuple that can be passed to ImageFont.truetype()"""
+ fileName = File.RandomFileFactory.pick(self)
+ size = int(random.uniform(self.minSize, self.maxSize) + 0.5)
+ return (fileName, size)
+
+# Predefined font factories
+defaultFontFactory = FontFactory(25, "allfonts")
+#defaultFontFactory = FontFactory((30, 40), "vera")
+
+class TextLayer(Visual.Layer):
+ """Represents a piece of text rendered within the image.
+ Alignment is given such that (0,0) places the text in the
+ top-left corner and (1,1) places it in the bottom-left.
+
+ The font and alignment are optional, if not specified one is
+ chosen randomly. If no font factory is specified, the default is used.
+ """
+ def __init__(self, text,
+ alignment = None,
+ font = None,
+ fontFactory = None,
+ textColor = "white",
+ borderSize = 0,
+ borderColor = None,
+ ):
+ if fontFactory is None:
+ global defaultFontFactory
+ fontFactory = defaultFontFactory
+
+ if font is None:
+ font = fontFactory.pick()
+
+ if alignment is None:
+ alignment = (random.uniform(0,1),
+ random.uniform(0,1))
+
+ self.text = text
+ self.alignment = alignment
+ self.font = font
+ self.textColor = textColor
+ self.borderSize = borderSize
+ self.borderColor = borderColor
+
+ def render(self, img):
+ font = ImageFont.truetype(*self.font)
+ textSize = font.getsize(self.text)
+ draw = ImageDraw.Draw(img)
+
+ # Find the text's origin given our alignment and current image size
+ x = int((img.size[0] - textSize[0] - self.borderSize*2) * self.alignment[0] + 0.5)
+ y = int((img.size[1] - textSize[1] - self.borderSize*2) * self.alignment[1] + 0.5)
+
+ # Draw the border if we need one. This is slow and ugly, but there doesn't
+ # seem to be a better way with PIL.
+ if self.borderSize > 0:
+ for bx in (-1,0,1):
+ for by in (-1,0,1):
+ if bx and by:
+ draw.text((x + bx * self.borderSize,
+ y + by * self.borderSize),
+ self.text, font=font, fill=self.borderColor)
+
+ # And the text itself...
+ draw.text((x,y), self.text, font=font, fill=self.textColor)
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Visual/__init__.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Visual/__init__.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,14 @@
+""" Captcha.Visual
+
+This package contains functionality specific to visual CAPTCHA tests.
+
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+# Convenience imports
+from Base import *
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/Words.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/Words.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,58 @@
+""" Captcha.Words
+
+Utilities for managing word lists and finding random words
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+import random, os
+import File
+
+
+class WordList(object):
+ """A class representing a word list read from disk lazily.
+ Blank lines and comment lines starting with '#' are ignored.
+ Any number of words per line may be used. The list can
+ optionally ingore words not within a given length range.
+ """
+ def __init__(self, fileName, minLength=None, maxLength=None):
+ self.words = None
+ self.fileName = fileName
+ self.minLength = minLength
+ self.maxLength = maxLength
+
+ def read(self):
+ """Read words from disk"""
+ f = open(os.path.join(File.dataDir, "words", self.fileName))
+
+ self.words = []
+ for line in f.xreadlines():
+ line = line.strip()
+ if not line:
+ continue
+ if line[0] == '#':
+ continue
+ for word in line.split():
+ if self.minLength is not None and len(word) < self.minLength:
+ continue
+ if self.maxLength is not None and len(word) > self.maxLength:
+ continue
+ self.words.append(word)
+
+ def pick(self):
+ """Pick a random word from the list, reading it in if necessary"""
+ if self.words is None:
+ self.read()
+ return random.choice(self.words)
+
+
+# Define several shared word lists that are read from disk on demand
+basic_english = WordList("basic-english")
+basic_english_restricted = WordList("basic-english", minLength=5, maxLength=8)
+characters = WordList("characters")
+defaultWordList = characters
+
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/__init__.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/__init__.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,41 @@
+""" Captcha
+
+This is the PyCAPTCHA package, a collection of Python modules
+implementing CAPTCHAs: automated tests that humans should pass,
+but current computer programs can't. These tests are often
+used for security.
+
+See http://www.captcha.net for more information and examples.
+
+This project was started because the CIA project, written in
+Python, needed a CAPTCHA to automate its user creation process
+safely. All existing implementations the author could find were
+written in Java or for the .NET framework, so a simple Python
+alternative was needed.
+"""
+#
+# PyCAPTCHA Package
+# Copyright (C) 2004 Micah Dowty
+#
+
+__version__ = "0.3-pre"
+
+
+# Check the python version here before we proceed further
+requiredPythonVersion = (2,2,1)
+def checkVersion():
+ import sys, string
+ if sys.version_info < requiredPythonVersion:
+ raise Exception("%s requires at least Python %s, found %s instead." % (
+ name,
+ string.join(map(str, requiredPythonVersion), "."),
+ string.join(map(str, sys.version_info), ".")))
+checkVersion()
+
+
+# Convenience imports
+from Base import *
+import File
+import Words
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/.DS_Store
Binary file data_generation/transformations/pycaptcha/Captcha/data/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/.DS_Store
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/allfonts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/allfonts Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,1 @@
+/Tmp/allfonts
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._atari-small.bdf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._atari-small.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._cursive.bdf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._cursive.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._radon-wide.bdf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/others/._radon-wide.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/CIDFnmap
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/others/CIDFnmap Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,10 @@
+/Dotum-Bold (/usr/share/fonts/truetype/unfonts/UnDotumBold.ttf) /Adobe-Korea1-Unicode ;
+/ZenHei (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /Adobe-GB1-Unicode ;
+/Batang-Regular (/usr/share/fonts/truetype/unfonts/UnBatang.ttf) /Adobe-Korea1-Unicode ;
+/VL-PGothic-Regular (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /Adobe-Japan1-Unicode ;
+/Dotum-Regular (/usr/share/fonts/truetype/unfonts/UnDotum.ttf) /Adobe-Korea1-Unicode ;
+/VL-Gothic-Regular-JaH (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /Adobe-Japan2-Unicode ;
+/VL-Gothic-Regular (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /Adobe-Japan1-Unicode ;
+/VL-PGothic-Regular-JaH (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /Adobe-Japan2-Unicode ;
+/ZenHei-CNS (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /Adobe-CNS1-Unicode ;
+/Batang-Bold (/usr/share/fonts/truetype/unfonts/UnBatangBold.ttf) /Adobe-Korea1-Unicode ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/FAPIfontmap
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/others/FAPIfontmap Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,155 @@
+/Garuda-Oblique << /Path (/usr/share/fonts/truetype/thai/Garuda-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Sans << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstOne << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstOne.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Vemana2000 << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/Vemana.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSerif-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypo-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSerif-BoldItalic << /Path (/usr/share/fonts/truetype/freefont/FreeSerifBoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush-Oblique << /Path (/usr/share/fonts/truetype/thai/Umpush-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationMono-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Malige << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/Malige-b.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Loma-Oblique << /Path (/usr/share/fonts/truetype/thai/Loma-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstBook << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstBook.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi-BoldItalic << /Path (/usr/share/fonts/truetype/thai/Norasi-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Sans-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Norasi-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeMono-BoldOblique << /Path (/usr/share/fonts/truetype/freefont/FreeMonoBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Serif << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSerif.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstOffice << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstOffice.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypist-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSans-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Waree-Oblique << /Path (/usr/share/fonts/truetype/thai/Waree-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationMono-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstFarsi << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstFarsi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgMono-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgMono-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Garuda-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Garuda-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSans-BoldOblique << /Path (/usr/share/fonts/truetype/freefont/FreeSansBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/utkal << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/utkal.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSerif-Italic << /Path (/usr/share/fonts/truetype/freefont/FreeSerifItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypist-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSerif-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Sawasdee-BoldOblique << /Path (/usr/share/fonts/truetype/thai/SawasdeeBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Umpush-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/cmex10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmex10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeMono-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi-Bold << /Path (/usr/share/fonts/truetype/thai/Norasi-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSans-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Loma << /Path (/usr/share/fonts/truetype/thai/Loma.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/wasy10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/wasy10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari-BoldItalic << /Path (/usr/share/fonts/truetype/thai/Kinnari-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstNaskh << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstNaskh.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSans-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Waree << /Path (/usr/share/fonts/truetype/thai/Waree.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Garuda << /Path (/usr/share/fonts/truetype/thai/Garuda.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/cmsy10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmsy10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypist-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Sawasdee-Bold << /Path (/usr/share/fonts/truetype/thai/SawasdeeBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Purisa << /Path (/usr/share/fonts/truetype/thai/Purisa.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstPoster << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstPoster.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSans-Oblique << /Path (/usr/share/fonts/truetype/freefont/FreeSansOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypo-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Lohit-Punjabi << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_pa.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Waree-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Waree-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypewriter-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Garuda-Bold << /Path (/usr/share/fonts/truetype/thai/Garuda-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/RachanaMedium << /Path (/usr/share/fonts/truetype/ttf-malayalam-fonts/Rachana_04.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstArt << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstArt.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationMono-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypo-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSerif-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeSerifBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSerif-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstDecorative << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstDecorative.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Lohit-Hindi << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_hi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush-LightOblique << /Path (/usr/share/fonts/truetype/thai/Umpush-LightOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSerif-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeSerif.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/mry_KacstQurn << /Path (/usr/share/fonts/truetype/ttf-kacst/mry_KacstQurn.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstDigital << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstDigital.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Sans-Mono-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Lohit-Gujarati << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_gu.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationMono-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstLetter << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstLetter.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypo << /Path (/usr/share/fonts/truetype/thai/TlwgTypo.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/msbm10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/msbm10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgMono-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Sans-Mono << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi-Italic << /Path (/usr/share/fonts/truetype/thai/Norasi-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstTitleL << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstTitleL.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypewriter << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeMono-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi-Oblique << /Path (/usr/share/fonts/truetype/thai/Norasi-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypewriter-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Phetsarath << /Path (/usr/share/fonts/truetype/ttf-lao/Phetsarath_OT.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/mukti << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/MuktiNarrow.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Sawasdee-Oblique << /Path (/usr/share/fonts/truetype/thai/SawasdeeOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/cmr10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmr10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush-Light << /Path (/usr/share/fonts/truetype/thai/Umpush-Light.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush-Bold << /Path (/usr/share/fonts/truetype/thai/Umpush-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/DejaVu-Serif-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSerif-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstTitle << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstTitle.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Norasi << /Path (/usr/share/fonts/truetype/thai/Norasi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari-Oblique << /Path (/usr/share/fonts/truetype/thai/Kinnari-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/muktinarrow << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/MuktiNarrowBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari-Italic << /Path (/usr/share/fonts/truetype/thai/Kinnari-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/kacstPen << /Path (/usr/share/fonts/truetype/ttf-kacst/kacstPen.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Kinnari-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypewriter-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeMono-Oblique << /Path (/usr/share/fonts/truetype/freefont/FreeMonoOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSans-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeSans.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSerif-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Umpush << /Path (/usr/share/fonts/truetype/thai/Umpush.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Sawasdee << /Path (/usr/share/fonts/truetype/thai/Sawasdee.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgMono << /Path (/usr/share/fonts/truetype/thai/TlwgMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstQurn << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstQurn.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari << /Path (/usr/share/fonts/truetype/thai/Kinnari.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgMono-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgMono-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/KacstScreen << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstScreen.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/FreeSans-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeSansBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/msam10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/msam10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/cmmi10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmmi10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Lohit-Tamil << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_ta.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/TlwgTypist << /Path (/usr/share/fonts/truetype/thai/TlwgTypist.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Waree-Bold << /Path (/usr/share/fonts/truetype/thai/Waree-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Kinnari-Bold << /Path (/usr/share/fonts/truetype/thai/Kinnari-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Loma-Bold << /Path (/usr/share/fonts/truetype/thai/Loma-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/LiberationSans-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Loma-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Loma-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
+/Palatino-Italic /URWPalladioL-Ital ;
+/Palatino-Bold /URWPalladioL-Bold ;
+/AvantGarde-BookOblique /URWGothicL-BookObli ;
+/Times-Bold /NimbusRomNo9L-Medi ;
+/HelveticaNarrow-BoldOblique /NimbusSanL-BoldCondItal ;
+/Times-Roman /NimbusRomNo9L-Regu ;
+/NewCenturySchlbk-Italic /CenturySchL-Ital ;
+/HelveticaNarrow /NimbusSanL-ReguCond ;
+/Helvetica-Narrow-Bold /NimbusSanL-BoldCond ;
+/Bookman-Light /URWBookmanL-Ligh ;
+/Palatino-BoldItalic /URWPalladioL-BoldItal ;
+/Traditional /KacstBook ;
+/Times-BoldItalic /NimbusRomNo9L-MediItal ;
+/AvantGarde-Book /URWGothicL-Book ;
+/AvantGarde-DemiOblique /URWGothicL-DemiObli ;
+/Helvetica-Narrow-Oblique /NimbusSanL-ReguCondItal ;
+/Helvetica-Bold /NimbusSanL-Bold ;
+/Courier-Oblique /NimbusMonL-ReguObli ;
+/Times-Italic /NimbusRomNo9L-ReguItal ;
+/Courier /NimbusMonL-Regu ;
+/Bookman-Demi /URWBookmanL-DemiBold ;
+/Helvetica-BoldOblique /NimbusSanL-BoldItal ;
+/Helvetica-Oblique /NimbusSanL-ReguItal ;
+/HelveticaNarrow-Oblique /NimbusSanL-ReguCondItal ;
+/NewCenturySchlbk-BoldItalic /CenturySchL-BoldItal ;
+/Courier-BoldOblique /NimbusMonL-BoldObli ;
+/HelveticaNarrow-Bold /NimbusSanL-BoldCond ;
+/AvantGarde-Demi /URWGothicL-Demi ;
+/Bookman-LightItalic /URWBookmanL-LighItal ;
+/ZapfDingbats /Dingbats ;
+/Helvetica-Narrow-BoldOblique /NimbusSanL-BoldCondItal ;
+/ZapfChancery-MediumItalic /URWChanceryL-MediItal ;
+/Helvetica /NimbusSanL-Regu ;
+/Bookman-DemiItalic /URWBookmanL-DemiBoldItal ;
+/Palatino-Roman /URWPalladioL-Roma ;
+/NewCenturySchlbk-Bold /CenturySchL-Bold ;
+/NewCenturySchlbk-Roman /CenturySchL-Roma ;
+/Courier-Bold /NimbusMonL-Bold ;
+/Arabic /KacstBook ;
+/Helvetica-Narrow /NimbusSanL-ReguCond ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/Fontmap
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/others/Fontmap Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,116 @@
+/LMTypewriter10-CapsOblique (lmtcso10.pfb) ;
+/Dingbats (d050000l.pfb) ;
+/URWBookmanL-DemiBoldItal (b018035l.pfb) ;
+/LMSansQuotation8-Bold (lmssqbx8.pfb) ;
+/Symbol (Symbol.pfb) ;
+/LMTypewriterVarWd10-DarkOblique (lmvtko10.pfb) ;
+/LMRoman10-Demi (lmb10.pfb) ;
+/URWPalladioL-Ital (p052023l.pfb) ;
+/LMTypewriter10-DarkOblique (lmtko10.pfb) ;
+/NimbusSanL-Regu (n019003l.pfb) ;
+/LMTypewriter10-Italic (lmtti10.pfb) ;
+/LMSansQuotation8-BoldOblique (lmssqbo8.pfb) ;
+/URWPalladioL-Roma (p052003l.pfb) ;
+/LMTypewriterVarWd10-Light (lmvtl10.pfb) ;
+/NimbusRomNo9L-Medi (n021004l.pfb) ;
+/NimbusSanL-ReguItal (n019023l.pfb) ;
+/NimbusMonL-Regu (n022003l.pfb) ;
+/LMSans10-Bold (lmssbx10.pfb) ;
+/LMRoman10-CapsOblique (lmcsco10.pfb) ;
+/CenturySchL-Roma (c059013l.pfb) ;
+/URWGothicL-BookObli (a010033l.pfb) ;
+/LMTypewriter10-LightCondensedOblique (lmtlco10.pfb) ;
+/LMSans10-DemiCondensedOblique (lmssdo10.pfb) ;
+/LMRoman10-CapsRegular (lmcsc10.pfb) ;
+/CenturySchL-BoldItal (c059036l.pfb) ;
+/LMRoman10-DemiOblique (lmbo10.pfb) ;
+/LMRoman10-Unslanted (lmu10.pfb) ;
+/LMRoman10-Bold (lmbx10.pfb) ;
+/LMSans10-DemiCondensed (lmssdc10.pfb) ;
+/URWChanceryL-MediItal (z003034l.pfb) ;
+/URWGothicL-DemiObli (a010035l.pfb) ;
+/LMTypewriterVarWd10-Oblique (lmvtto10.pfb) ;
+/NimbusMonL-Bold (n022004l.pfb) ;
+/LMTypewriter10-Oblique (lmtto10.pfb) ;
+/LMRoman10-BoldItalic (lmbxi10.pfb) ;
+/NimbusSanL-ReguCond (n019043l.pfb) ;
+/CenturySchL-Bold (c059016l.pfb) ;
+/LMTypewriterVarWd10-Regular (lmvtt10.pfb) ;
+/URWBookmanL-Ligh (b018012l.pfb) ;
+/LMSansQuotation8-Regular (lmssq8.pfb) ;
+/LMSans10-Regular (lmss10.pfb) ;
+/LMSans10-Oblique (lmsso10.pfb) ;
+/NimbusSanL-BoldCond (n019044l.pfb) ;
+/LMRoman10-Regular (lmr10.pfb) ;
+/LMTypewriter10-LightCondensed (lmtlc10.pfb) ;
+/LMTypewriterVarWd10-Dark (lmvtk10.pfb) ;
+/LMTypewriter10-CapsRegular (lmtcsc10.pfb) ;
+/LMSansQuotation8-Oblique (lmssqo8.pfb) ;
+/StandardSymL (s050000l.pfb) ;
+/NimbusRomNo9L-Regu (n021003l.pfb) ;
+/LMTypewriterVarWd10-LightOblique (lmvtlo10.pfb) ;
+/URWPalladioL-BoldItal (p052024l.pfb) ;
+/CenturySchL-Ital (c059033l.pfb) ;
+/LMRoman10-Dunhill (lmdunh10.pfb) ;
+/URWPalladioL-Bold (p052004l.pfb) ;
+/URWGothicL-Book (a010013l.pfb) ;
+/LMTypewriter10-Dark (lmtk10.pfb) ;
+/NimbusSanL-BoldItal (n019024l.pfb) ;
+/URWGothicL-Demi (a010015l.pfb) ;
+/LMTypewriter10-LightOblique (lmtlo10.pfb) ;
+/LMTypewriter10-Light (lmtl10.pfb) ;
+/NimbusSanL-BoldCondItal (n019064l.pfb) ;
+/LMRoman10-Italic (lmri10.pfb) ;
+/LMRoman10-DunhillOblique (lmduno10.pfb) ;
+/NimbusMonL-ReguObli (n022023l.pfb) ;
+/LMRoman10-Oblique (lmro10.pfb) ;
+/NimbusSanL-ReguCondItal (n019063l.pfb) ;
+/NimbusRomNo9L-MediItal (n021024l.pfb) ;
+/LMRoman10-BoldOblique (lmbxo10.pfb) ;
+/URWBookmanL-DemiBold (b018015l.pfb) ;
+/NimbusSanL-Bold (n019004l.pfb) ;
+/LMSans10-BoldOblique (lmssbo10.pfb) ;
+/URWBookmanL-LighItal (b018032l.pfb) ;
+/NimbusMonL-BoldObli (n022024l.pfb) ;
+/NimbusRomNo9L-ReguItal (n021023l.pfb) ;
+/LMTypewriter10-Regular (lmtt10.pfb) ;
+/Palatino-Italic /URWPalladioL-Ital ;
+/Palatino-Bold /URWPalladioL-Bold ;
+/AvantGarde-BookOblique /URWGothicL-BookObli ;
+/Times-Bold /NimbusRomNo9L-Medi ;
+/HelveticaNarrow-BoldOblique /NimbusSanL-BoldCondItal ;
+/Times-Roman /NimbusRomNo9L-Regu ;
+/NewCenturySchlbk-Italic /CenturySchL-Ital ;
+/HelveticaNarrow /NimbusSanL-ReguCond ;
+/Helvetica-Narrow-Bold /NimbusSanL-BoldCond ;
+/Bookman-Light /URWBookmanL-Ligh ;
+/Palatino-BoldItalic /URWPalladioL-BoldItal ;
+/Traditional /KacstBook ;
+/Times-BoldItalic /NimbusRomNo9L-MediItal ;
+/AvantGarde-Book /URWGothicL-Book ;
+/AvantGarde-DemiOblique /URWGothicL-DemiObli ;
+/Helvetica-Narrow-Oblique /NimbusSanL-ReguCondItal ;
+/Helvetica-Bold /NimbusSanL-Bold ;
+/Courier-Oblique /NimbusMonL-ReguObli ;
+/Times-Italic /NimbusRomNo9L-ReguItal ;
+/Courier /NimbusMonL-Regu ;
+/Bookman-Demi /URWBookmanL-DemiBold ;
+/Helvetica-BoldOblique /NimbusSanL-BoldItal ;
+/Helvetica-Oblique /NimbusSanL-ReguItal ;
+/HelveticaNarrow-Oblique /NimbusSanL-ReguCondItal ;
+/NewCenturySchlbk-BoldItalic /CenturySchL-BoldItal ;
+/Courier-BoldOblique /NimbusMonL-BoldObli ;
+/HelveticaNarrow-Bold /NimbusSanL-BoldCond ;
+/AvantGarde-Demi /URWGothicL-Demi ;
+/Bookman-LightItalic /URWBookmanL-LighItal ;
+/ZapfDingbats /Dingbats ;
+/Helvetica-Narrow-BoldOblique /NimbusSanL-BoldCondItal ;
+/ZapfChancery-MediumItalic /URWChanceryL-MediItal ;
+/Helvetica /NimbusSanL-Regu ;
+/Bookman-DemiItalic /URWBookmanL-DemiBoldItal ;
+/Palatino-Roman /URWPalladioL-Roma ;
+/NewCenturySchlbk-Bold /CenturySchL-Bold ;
+/NewCenturySchlbk-Roman /CenturySchL-Roma ;
+/Courier-Bold /NimbusMonL-Bold ;
+/Arabic /KacstBook ;
+/Helvetica-Narrow /NimbusSanL-ReguCond ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/others/cidfmap
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/others/cidfmap Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,10 @@
+/Dotum-Bold << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnDotumBold.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
+/ZenHei << /FileType /TrueType /Path (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /SubfontID 0 /CSI [(GB1) 0] >> ;
+/Batang-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnBatang.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
+/VL-PGothic-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /SubfontID 0 /CSI [(Japan1) 0] >> ;
+/Dotum-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnDotum.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
+/VL-Gothic-Regular-JaH << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /SubfontID 0 /CSI [(Japan2) 0] >> ;
+/VL-Gothic-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /SubfontID 0 /CSI [(Japan1) 0] >> ;
+/VL-PGothic-Regular-JaH << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /SubfontID 0 /CSI [(Japan2) 0] >> ;
+/ZenHei-CNS << /FileType /TrueType /Path (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /SubfontID 0 /CSI [(CNS1) 0] >> ;
+/Batang-Bold << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnBatangBold.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/COPYRIGHT.TXT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/COPYRIGHT.TXT Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,124 @@
+Bitstream Vera Fonts Copyright
+
+The fonts have a generous copyright, allowing derivative works (as
+long as "Bitstream" or "Vera" are not in the names), and full
+redistribution (so long as they are not *sold* by themselves). They
+can be be bundled, redistributed and sold with any software.
+
+The fonts are distributed under the following copyright:
+
+Copyright
+=========
+
+Copyright (c) 2003 by Bitstream, Inc. All Rights Reserved. Bitstream
+Vera is a trademark of Bitstream, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the fonts accompanying this license ("Fonts") and associated
+documentation files (the "Font Software"), to reproduce and distribute
+the Font Software, including without limitation the rights to use,
+copy, merge, publish, distribute, and/or sell copies of the Font
+Software, and to permit persons to whom the Font Software is furnished
+to do so, subject to the following conditions:
+
+The above copyright and trademark notices and this permission notice
+shall be included in all copies of one or more of the Font Software
+typefaces.
+
+The Font Software may be modified, altered, or added to, and in
+particular the designs of glyphs or characters in the Fonts may be
+modified and additional glyphs or characters may be added to the
+Fonts, only if the fonts are renamed to names not containing either
+the words "Bitstream" or the word "Vera".
+
+This License becomes null and void to the extent applicable to Fonts
+or Font Software that has been modified and is distributed under the
+"Bitstream Vera" names.
+
+The Font Software may be sold as part of a larger software package but
+no copy of one or more of the Font Software typefaces may be sold by
+itself.
+
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL
+BITSTREAM OR THE GNOME FOUNDATION BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL,
+OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT
+SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.
+
+Except as contained in this notice, the names of Gnome, the Gnome
+Foundation, and Bitstream Inc., shall not be used in advertising or
+otherwise to promote the sale, use or other dealings in this Font
+Software without prior written authorization from the Gnome Foundation
+or Bitstream Inc., respectively. For further information, contact:
+fonts at gnome dot org.
+
+Copyright FAQ
+=============
+
+ 1. I don't understand the resale restriction... What gives?
+
+ Bitstream is giving away these fonts, but wishes to ensure its
+ competitors can't just drop the fonts as is into a font sale system
+ and sell them as is. It seems fair that if Bitstream can't make money
+ from the Bitstream Vera fonts, their competitors should not be able to
+ do so either. You can sell the fonts as part of any software package,
+ however.
+
+ 2. I want to package these fonts separately for distribution and
+ sale as part of a larger software package or system. Can I do so?
+
+ Yes. A RPM or Debian package is a "larger software package" to begin
+ with, and you aren't selling them independently by themselves.
+ See 1. above.
+
+ 3. Are derivative works allowed?
+ Yes!
+
+ 4. Can I change or add to the font(s)?
+ Yes, but you must change the name(s) of the font(s).
+
+ 5. Under what terms are derivative works allowed?
+
+ You must change the name(s) of the fonts. This is to ensure the
+ quality of the fonts, both to protect Bitstream and Gnome. We want to
+ ensure that if an application has opened a font specifically of these
+ names, it gets what it expects (though of course, using fontconfig,
+ substitutions could still could have occurred during font
+ opening). You must include the Bitstream copyright. Additional
+ copyrights can be added, as per copyright law. Happy Font Hacking!
+
+ 6. If I have improvements for Bitstream Vera, is it possible they might get
+ adopted in future versions?
+
+ Yes. The contract between the Gnome Foundation and Bitstream has
+ provisions for working with Bitstream to ensure quality additions to
+ the Bitstream Vera font family. Please contact us if you have such
+ additions. Note, that in general, we will want such additions for the
+ entire family, not just a single font, and that you'll have to keep
+ both Gnome and Jim Lyles, Vera's designer, happy! To make sense to add
+ glyphs to the font, they must be stylistically in keeping with Vera's
+ design. Vera cannot become a "ransom note" font. Jim Lyles will be
+ providing a document describing the design elements used in Vera, as a
+ guide and aid for people interested in contributing to Vera.
+
+ 7. I want to sell a software package that uses these fonts: Can I do so?
+
+ Sure. Bundle the fonts with your software and sell your software
+ with the fonts. That is the intent of the copyright.
+
+ 8. If applications have built the names "Bitstream Vera" into them,
+ can I override this somehow to use fonts of my choosing?
+
+ This depends on exact details of the software. Most open source
+ systems and software (e.g., Gnome, KDE, etc.) are now converting to
+ use fontconfig (see www.fontconfig.org) to handle font configuration,
+ selection and substitution; it has provisions for overriding font
+ names and subsituting alternatives. An example is provided by the
+ supplied local.conf file, which chooses the family Bitstream Vera for
+ "sans", "serif" and "monospace". Other software (e.g., the XFree86
+ core server) has other mechanisms for font substitution.
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/README.TXT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/README.TXT Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,11 @@
+Contained herin is the Bitstream Vera font family.
+
+The Copyright information is found in the COPYRIGHT.TXT file (along
+with being incoporated into the fonts themselves).
+
+The releases notes are found in the file "RELEASENOTES.TXT".
+
+We hope you enjoy Vera!
+
+ Bitstream, Inc.
+ The Gnome Project
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/RELEASENOTES.TXT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/RELEASENOTES.TXT Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,162 @@
+Bitstream Vera Fonts - April 16, 2003
+=====================================
+
+The version number of these fonts is 1.10 to distinguish them from the
+beta test fonts.
+
+Note that the Vera copyright is incorporated in the fonts themselves.
+The License field in the fonts contains the copyright license as it
+appears below. The TrueType copyright field is not large enough to
+contain the full license, so the license is incorporated (as you might
+think if you thought about it) into the license field, which
+unfortunately can be obscure to find. (In pfaedit, see: Element->Font
+Info->TTFNames->License).
+
+Our apologies for it taking longer to complete the fonts than planned.
+Beta testers requested a tighter line spacing (less leading) and Jim
+Lyles redesigned Vera's accents to bring its line spacing to more
+typical of other fonts. This took additional time and effort. Our
+thanks to Jim for this effort above and beyond the call of duty.
+
+There are four monospace and sans faces (normal, oblique, bold, bold
+oblique) and two serif faces (normal and bold). Fontconfig/Xft2 (see
+www.fontconfig.org) can artificially oblique the serif faces for you:
+this loses hinting and distorts the faces slightly, but is visibly
+different than normal and bold, and reasonably pleasing.
+
+On systems with fontconfig 2.0 or 2.1 installed, making your sans,
+serif and monospace fonts default to these fonts is very easy. Just
+drop the file local.conf into your /etc/fonts directory. This will
+make the Bitstream fonts your default fonts for all applications using
+fontconfig (if sans, serif, or monospace names are used, as they often
+are as default values in many desktops). The XML in local.conf may
+need modification to enable subpixel decimation, if appropriate,
+however, the commented out phrase does so for XFree86 4.3, in the case
+that the server does not have sufficient information to identify the
+use of a flat panel. Fontconfig 2.2 adds Vera to the list of font
+families and will, by default use it as the default sans, serif and
+monospace fonts.
+
+During the testing of the final Vera fonts, we learned that screen
+fonts in general are only typically hinted to work correctly at
+integer pixel sizes. Vera is coded internally for integer sizes only.
+We need to investigate further to see if there are commonly used fonts
+that are hinted to be rounded but are not rounded to integer sizes due
+to oversights in their coding.
+
+Most fonts work best at 8 pixels and below if anti-aliased only, as
+the amount of work required to hint well at smaller and smaller sizes
+becomes astronomical. GASP tables are typically used to control
+whether hinting is used or not, but Freetype/Xft does not currently
+support GASP tables (which are present in Vera).
+
+To mitigate this problem, both for Vera and other fonts, there will be
+(very shortly) a new fontconfig 2.2 release that will, by default not
+apply hints if the size is below 8 pixels. if you should have a font
+that in fact has been hinted more agressively, you can use fontconfig
+to note this exception. We believe this should improve many hinted
+fonts in addition to Vera, though implemeting GASP support is likely
+the right long term solution.
+
+Font rendering in Gnome or KDE is the combination of algorithms in
+Xft2 and Freetype, along with hinting in the fonts themselves. It is
+vital to have sufficient information to disentangle problems that you
+may observe.
+
+Note that having your font rendering system set up correctly is vital
+to proper judgement of problems of the fonts:
+
+ * Freetype may or may not be configured to in ways that may
+ implement execution of possibly patented (in some parts of the world)
+ TrueType hinting algorithms, particularly at small sizes. Best
+ results are obtained while using these algorithms.
+
+ * The freetype autohinter (used when the possibly patented
+ algorithms are not used) continues to improve with each release. If
+ you are using the autohinter, please ensure you are using an up to
+ date version of freetype before reporting problems.
+
+ * Please identify what version of freetype you are using in any
+ bug reports, and how your freetype is configured.
+
+ * Make sure you are not using the freetype version included in
+ XFree86 4.3, as it has bugs that significantly degrade most fonts,
+ including Vera. if you build XFree86 4.3 from source yourself, you may
+ have installed this broken version without intending it (as I
+ did). Vera was verified with the recently released Freetype 2.1.4. On
+ many systems, 'ldd" can be used to see which freetype shared library
+ is actually being used.
+
+ * Xft/X Render does not (yet) implement gamma correction. This
+ causes significant problems rendering white text on a black background
+ (causing partial pixels to be insufficiently shaded) if the gamma of
+ your monitor has not been compensated for, and minor problems with
+ black text on a while background. The program "xgamma" can be used to
+ set a gamma correction value in the X server's color pallette. Most
+ monitors have a gamma near 2.
+
+ * Note that the Vera family uses minimal delta hinting. Your
+ results on other systems when not used anti-aliased may not be
+ entirely satisfying. We are primarily interested in reports of
+ problems on open source systems implementing Xft2/fontconfig/freetype
+ (which implements antialiasing and hinting adjustements, and
+ sophisticated subpixel decimation on flatpanels). Also, the
+ algorithms used by Xft2 adjust the hints to integer widths and the
+ results are crisper on open source systems than on Windows or
+ MacIntosh.
+
+ * Your fontconfig may (probably does) predate the release of
+ fontconfig 2.2, and you may see artifacts not present when the font is
+ used at very small sizes with hinting enabled. "vc-list -V" can be
+ used to see what version you have installed.
+
+We believe and hope that these fonts will resolve the problems
+reported during beta test. The largest change is the reduction of
+leading (interline spacing), which had annoyed a number of people, and
+reduced Vera's utility for some applcations. The Vera monospace font
+should also now make '0' and 'O' and '1' and 'l' more clearly
+distinguishable.
+
+The version of these fonts is version 1.10. Fontconfig should be
+choosing the new version of the fonts if both the released fonts and
+beta test fonts are installed (though please discard them: they have
+names of form tt20[1-12]gn.ttf). Note that older versions of
+fontconfig sometimes did not rebuild their cache correctly when new
+fonts are installed: please upgrade to fontconfig 2.2. "fc-cache -f"
+can be used to force rebuilding fontconfig's cache files.
+
+If you note problems, please send them to fonts at gnome dot org, with
+exactly which face and size and unicode point you observe the problem
+at. The xfd utility from XFree86 CVS may be useful for this (e.g. "xfd
+-fa sans"). A possibly more useful program to examine fonts at a
+variety of sizes is the "waterfall" program found in Keith Packard's
+CVS.
+
+ $ cvs -d :pserver:anoncvs@keithp.com:/local/src/CVS login
+ Logging in to :pserver:anoncvs@keithp.com:2401/local/src/CVS
+ CVS password:
+ $ cvs -d :pserver:anoncvs@keithp.com:/local/src/CVS co waterfall
+ $ cd waterfall
+ $ xmkmf -a
+ $ make
+ # make install
+ # make install.man
+
+Again, please make sure you are running an up-to-date freetype, and
+that you are only examining integer sizes.
+
+Reporting Problems
+==================
+
+Please send problem reports to fonts at gnome org, with the following
+information:
+
+ 1. Version of Freetype, Xft2 and fontconfig
+ 2. Whether TT hinting is being used, or the autohinter
+ 3. Application being used
+ 4. Character/Unicode code point that has problems (if applicable)
+ 5. Version of which operating system
+ 6. Please include a screenshot, when possible.
+
+Please check the fonts list archives before reporting problems to cut
+down on duplication.
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/Vera.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/Vera.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraBI.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraBI.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraBd.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraIt.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraIt.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoBI.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoBI.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoBd.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoIt.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMoIt.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMono.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraMono.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraSe.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraSe.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraSeBd.ttf
Binary file data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/VeraSeBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/local.conf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/fonts/vera/local.conf Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+ serif
+
+ Bitstream Vera Serif
+
+
+
+ sans-serif
+
+ Bitstream Vera Sans
+
+
+
+ monospace
+
+ Bitstream Vera Sans Mono
+
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/.DS_Store
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/1.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/1.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/10.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/10.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/11.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/11.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/12.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/12.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/2.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/2.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/3.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/3.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/4.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/4.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/5.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/5.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/6.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/6.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/7.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/7.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/8.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/8.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/9.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/9.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/pictures/abstract/README Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,3 @@
+These images were created by the author with Fyre, expressly for PyCAPTCHA.
+
+Copyright (c) 2004 Micah Dowty
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Craig_Barrington_ocotillo_and_mountains.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Craig_Barrington_ocotillo_and_mountains.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Kerry_Carloy_Chisos_Sunset.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Kerry_Carloy_Chisos_Sunset.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Paul_Dowty_Mt_Bross.jpeg
Binary file data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/Paul_Dowty_Mt_Bross.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/pictures/nature/README Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,2 @@
+These are uncopyrighted images gathered from various sources,
+including the author's family and national park service web sites.
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/words/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/words/README Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,4 @@
+These word lists are from various sources:
+
+basic-english:
+ http://simple.wikipedia.org/wiki/Basic_English_Alphabetical_Wordlist
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/words/basic-english
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/words/basic-english Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,852 @@
+a
+able
+about
+account
+acid
+across
+act
+addition
+adjustment
+advertisement
+agreement
+after
+again
+against
+air
+all
+almost
+among
+amount
+amusement
+and
+angle
+angry
+animal
+answer
+ant
+any
+apparatus
+apple
+approval
+arch
+argument
+arm
+army
+art
+as
+at
+attack
+attempt
+attention
+attraction
+authority
+automatic
+awake
+baby
+back
+bad
+bag
+balance
+ball
+band
+base
+basin
+basket
+bath
+be
+beautiful
+because
+bed
+bee
+before
+behavior
+belief
+bell
+bent
+berry
+between
+bird
+birth
+bit
+bite
+bitter
+black
+blade
+blood
+blow
+blue
+board
+boat
+body
+boiling
+bone
+book
+boot
+bottle
+box
+boy
+brain
+brake
+branch
+brass
+bread
+breath
+brick
+bridge
+bright
+broken
+brother
+brown
+brush
+bucket
+building
+bulb
+burn
+burst
+business
+but
+butter
+button
+by
+cake
+camera
+canvas
+card
+care
+carriage
+cart
+cat
+cause
+certain
+chain
+chalk
+chance
+change
+cheap
+cheese
+chemical
+chest
+chief
+chin
+church
+circle
+clean
+clear
+clock
+cloth
+cloud
+coal
+coat
+cold
+collar
+color
+comb
+come
+comfort
+committee
+common
+company
+comparison
+competition
+complete
+complex
+condition
+connection
+conscious
+control
+cook
+copper
+copy
+cord
+cork
+cotton
+cough
+country
+cover
+cow
+crack
+credit
+crime
+cruel
+crush
+cry
+cup
+current
+curtain
+curve
+cushion
+cut
+damage
+danger
+dark
+daughter
+day
+dead
+dear
+death
+debt
+decision
+deep
+degree
+delicate
+dependent
+design
+desire
+destruction
+detail
+development
+different
+digestion
+direction
+dirty
+discovery
+discussion
+disease
+disgust
+distance
+distribution
+division
+do
+dog
+door
+down
+doubt
+drain
+drawer
+dress
+drink
+driving
+drop
+dry
+dust
+ear
+early
+earth
+east
+edge
+education
+effect
+egg
+elastic
+electric
+end
+engine
+enough
+equal
+error
+even
+event
+ever
+every
+example
+exchange
+existence
+expansion
+experience
+expert
+eye
+face
+fact
+fall
+false
+family
+far
+farm
+fat
+father
+fear
+feather
+feeble
+feeling
+female
+fertile
+fiction
+field
+fight
+finger
+fire
+first
+fish
+fixed
+flag
+flame
+flat
+flight
+floor
+flower
+fly
+fold
+food
+foolish
+foot
+for
+force
+fork
+form
+forward
+fowl
+frame
+free
+frequent
+friend
+from
+front
+fruit
+full
+future
+garden
+general
+get
+girl
+give
+glass
+glove
+go
+goat
+gold
+good
+government
+grain
+grass
+great
+green
+grey/gray
+grip
+group
+growth
+guide
+gun
+hair
+hammer
+hand
+hanging
+happy
+harbor
+hard
+harmony
+hat
+hate
+have
+he
+head
+healthy
+hearing
+heart
+heat
+help
+here
+high
+history
+hole
+hollow
+hook
+hope
+horn
+horse
+hospital
+hour
+house
+how
+humor
+ice
+idea
+if
+ill
+important
+impulse
+in
+increase
+industry
+ink
+insect
+instrument
+insurance
+interest
+invention
+iron
+island
+jelly
+jewel
+join
+journey
+judge
+jump
+keep
+kettle
+key
+kick
+kind
+kiss
+knee
+knife
+knot
+knowledge
+land
+language
+last
+late
+laugh
+law
+lead
+leaf
+learning
+leather
+left
+leg
+let
+letter
+level
+library
+lift
+light
+like
+limit
+line
+linen
+lip
+liquid
+list
+little
+less
+least
+living
+lock
+long
+loose
+loss
+loud
+love
+low
+machine
+make
+male
+man
+manager
+map
+mark
+market
+married
+match
+material
+mass
+may
+meal
+measure
+meat
+medical
+meeting
+memory
+metal
+middle
+military
+milk
+mind
+mine
+minute
+mist
+mixed
+money
+monkey
+month
+moon
+morning
+mother
+motion
+mountain
+mouth
+move
+much
+more
+most
+muscle
+music
+nail
+name
+narrow
+nation
+natural
+near
+necessary
+neck
+need
+needle
+nerve
+net
+new
+news
+night
+no
+noise
+normal
+north
+nose
+not
+note
+now
+number
+nut
+observation
+of
+off
+offer
+office
+oil
+old
+on
+only
+open
+operation
+opposite
+opinion
+other
+or
+orange
+order
+organization
+ornament
+out
+oven
+over
+owner
+page
+pain
+paint
+paper
+parallel
+parcel
+part
+past
+paste
+payment
+peace
+pen
+pencil
+person
+physical
+picture
+pig
+pin
+pipe
+place
+plane
+plant
+plate
+play
+please
+pleasure
+plough/plow
+pocket
+point
+poison
+polish
+political
+poor
+porter
+position
+possible
+pot
+potato
+powder
+power
+present
+price
+print
+prison
+private
+probable
+process
+produce
+profit
+property
+prose
+protest
+public
+pull
+pump
+punishment
+purpose
+push
+put
+quality
+question
+quick
+quiet
+quite
+rail
+rain
+range
+rat
+rate
+ray
+reaction
+red
+reading
+ready
+reason
+receipt
+record
+regret
+regular
+relation
+religion
+representative
+request
+respect
+responsible
+rest
+reward
+rhythm
+rice
+right
+ring
+river
+road
+rod
+roll
+roof
+room
+root
+rough
+round
+rub
+rule
+run
+sad
+safe
+sail
+salt
+same
+sand
+say
+scale
+school
+science
+scissors
+screw
+sea
+seat
+second
+secret
+secretary
+see
+seed
+selection
+self
+send
+seem
+sense
+separate
+serious
+servant
+sex
+shade
+shake
+shame
+sharp
+sheep
+shelf
+ship
+shirt
+shock
+shoe
+short
+shut
+side
+sign
+silk
+silver
+simple
+sister
+size
+skin
+skirt
+sky
+sleep
+slip
+slope
+slow
+small
+smash
+smell
+smile
+smoke
+smooth
+snake
+sneeze
+snow
+so
+soap
+society
+sock
+soft
+solid
+some
+son
+song
+sort
+sound
+south
+soup
+space
+spade
+special
+sponge
+spoon
+spring
+square
+stamp
+stage
+star
+start
+statement
+station
+steam
+stem
+steel
+step
+stick
+still
+stitch
+stocking
+stomach
+stone
+stop
+store
+story
+strange
+street
+stretch
+sticky
+stiff
+straight
+strong
+structure
+substance
+sugar
+suggestion
+summer
+support
+surprise
+such
+sudden
+sun
+sweet
+swim
+system
+table
+tail
+take
+talk
+tall
+taste
+tax
+teaching
+tendency
+test
+than
+that
+the
+then
+theory
+there
+thick
+thin
+thing
+this
+thought
+thread
+throat
+though
+through
+thumb
+thunder
+ticket
+tight
+tired
+till
+time
+tin
+to
+toe
+together
+tomorrow
+tongue
+tooth
+top
+touch
+town
+trade
+train
+transport
+tray
+tree
+trick
+trousers
+true
+trouble
+turn
+twist
+umbrella
+under
+unit
+use
+up
+value
+verse
+very
+vessel
+view
+violent
+voice
+walk
+wall
+waiting
+war
+warm
+wash
+waste
+watch
+water
+wave
+wax
+way
+weather
+week
+weight
+well
+west
+wet
+wheel
+when
+where
+while
+whip
+whistle
+white
+who
+why
+wide
+will
+wind
+window
+wine
+wing
+winter
+wire
+wise
+with
+woman
+wood
+wool
+word
+work
+worm
+wound
+writing
+wrong
+year
+yellow
+yes
+yesterday
+you
+young
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Captcha/data/words/characters
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Captcha/data/words/characters Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,62 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/Facade.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/Facade.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+
+
+from Captcha.Visual.Tests import PseudoGimpy, AngryGimpy
+import numpy
+
+# Une fonction simple pour generer un captcha
+# ease : represente la difficulte du captcha a generer
+# 0 = facile et 1 (ou autre chose) = difficile
+#solution : specifie si on veut en retour un array numpy representant
+#l image ou un tuple contenant l'array et la solution du captcha.
+
+# Des fontes additionnelles peuvent etre ajoutees au dossier pyCaptcha/Captcha/data/fonts/others
+# Le programme choisit une fonte aleatoirement dans ce dossier ainsi que le dossir vera.
+
+
+def generateCaptcha (ease=0, solution=0):
+
+ if ease == 1:
+ g = AngryGimpy()
+
+ else:
+ g = PseudoGimpy()
+
+ i = g.render()
+ a = numpy.asarray(i)
+
+ if solution == 0:
+ return a
+
+ else :
+ return (a, g.solutions)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/README Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,51 @@
+======================
+Python CAPTCHA package
+======================
+
+About
+-----
+
+This is the PyCAPTCHA package, a collection of Python modules
+implementing CAPTCHAs: automated tests that humans should pass,
+but current computer programs can't. These tests are often
+used for security.
+
+See http://www.captcha.net for more information and examples.
+
+This project was started because the CIA project, written in
+Python, needed a CAPTCHA to automate its user creation process
+safely. All existing implementations the author could find were
+written in Java or for the .NET framework, so a simple Python
+alternative was needed.
+
+Examples
+--------
+
+Included are several example programs:
+
+ - simple_example.py is a bare-bones example that just generates
+ and displays an image.
+
+ - http_example.py is a longer example that uses BaseHTTPServer
+ to simulate a CAPTCHA's use in a web environment. Running this
+ example and connecting to it from your web browser is a quick
+ and easy way to see PyCAPTCHA in action
+
+ - modpython_example.py is a version of http_example that runs
+ from an Apache server equipped with a properly configured
+ mod_python.
+
+
+Dependencies
+------------
+
+- Python 2.2.1 or later
+- the Python Imaging Library, required for visual CAPTCHAs
+
+
+Contacts
+--------
+
+Micah Dowty
+
+'scanline' on irc.freenode.net
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/http_example.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/http_example.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+#
+# An example that presents CAPTCHA tests in a web environment
+# and gives the user a chance to solve them. Run it, optionally
+# specifying a port number on the command line, then point your web
+# browser at the given URL.
+#
+
+from Captcha.Visual import Tests
+from Captcha import Factory
+import BaseHTTPServer, urlparse, sys
+
+
+class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+ def do_GET(self):
+ scheme, host, path, parameters, query, fragment = urlparse.urlparse(self.path)
+
+ # Split the path into segments
+ pathSegments = path.split('/')[1:]
+
+ # Split the query into key-value pairs
+ args = {}
+ for pair in query.split("&"):
+ if pair.find("=") >= 0:
+ key, value = pair.split("=", 1)
+ args.setdefault(key, []).append(value)
+ else:
+ args[pair] = []
+
+ # A hack so it works with a proxy configured for VHostMonster :)
+ if pathSegments[0] == "vhost":
+ pathSegments = pathSegments[3:]
+
+ if pathSegments[0] == "":
+ self.handleRootPage(args.get('test', Tests.__all__)[0])
+
+ elif pathSegments[0] == "images":
+ self.handleImagePage(pathSegments[1])
+
+ elif pathSegments[0] == "solutions":
+ self.handleSolutionPage(pathSegments[1], args['word'][0])
+
+ else:
+ self.handle404()
+
+ def handle404(self):
+ self.send_response(404)
+ self.send_header("Content-Type", "text/html")
+ self.end_headers()
+ self.wfile.write("No such resource
")
+
+ def handleRootPage(self, testName):
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html")
+ self.end_headers()
+
+ test = self.captchaFactory.new(getattr(Tests, testName))
+
+ # Make a list of tests other than the one we're using
+ others = []
+ for t in Tests.__all__:
+ if t != testName:
+ others.append('%s' % (t,t))
+ others = "\n".join(others)
+
+ self.wfile.write("""
+
+PyCAPTCHA Example
+
+
+PyCAPTCHA Example
+
+ %s:
+ %s
+
+
+
+
+
+
+
+
+Or try...
+
+
+
+
+
+""" % (test.__class__.__name__, test.__doc__, test.id, test.id, others))
+
+ def handleImagePage(self, id):
+ test = self.captchaFactory.get(id)
+ if not test:
+ return self.handle404()
+
+ self.send_response(200)
+ self.send_header("Content-Type", "image/jpeg")
+ self.end_headers()
+ test.render().save(self.wfile, "JPEG")
+
+ def handleSolutionPage(self, id, word):
+ test = self.captchaFactory.get(id)
+ if not test:
+ return self.handle404()
+
+ if not test.valid:
+ # Invalid tests will always return False, to prevent
+ # random trial-and-error attacks. This could be confusing to a user...
+ result = "Test invalidated, try another test"
+ elif test.testSolutions([word]):
+ result = "Correct"
+ else:
+ result = "Incorrect"
+
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html")
+ self.end_headers()
+ self.wfile.write("""
+
+PyCAPTCHA Example
+
+
+PyCAPTCHA Example
+%s
+
+%s
+You guessed: %s
+Possible solutions: %s
+Try again
+
+
+""" % (test.__class__.__name__, test.id, result, word, ", ".join(test.solutions)))
+
+
+def main(port):
+ print "Starting server at http://localhost:%d/" % port
+ handler = RequestHandler
+ handler.captchaFactory = Factory()
+ BaseHTTPServer.HTTPServer(('', port), RequestHandler).serve_forever()
+
+if __name__ == "__main__":
+ # The port number can be specified on the command line, default is 8080
+ if len(sys.argv) >= 2:
+ port = int(sys.argv[1])
+ else:
+ port = 8080
+ main(port)
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/modpython_example.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/modpython_example.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,113 @@
+#
+# An example that presents CAPTCHA tests in a web environment
+# and gives the user a chance to solve them.
+#
+# This example is for use with Apache using mod_python and its
+# Publisher handler. For example, if your apache configuration
+# included something like:
+#
+# AddHandler python-program .py
+# PythonHandler mod_python.publisher
+#
+# You could place this script anywhere in your web space to see
+# the demo.
+#
+# --Micah
+#
+
+from Captcha.Visual import Tests
+import Captcha
+from mod_python import apache
+
+
+def _getFactory(req):
+ return Captcha.PersistentFactory("/tmp/pycaptcha_%s" % req.interpreter)
+
+
+def test(req, name=Tests.__all__[0]):
+ """Show a newly generated CAPTCHA of the given class.
+ Default is the first class name given in Tests.__all__
+ """
+ test = _getFactory(req).new(getattr(Tests, name))
+
+ # Make a list of tests other than the one we're using
+ others = []
+ for t in Tests.__all__:
+ if t != name:
+ others.append('%s' % (t,t))
+ others = "\n".join(others)
+
+ return """
+
+PyCAPTCHA Example
+
+
+PyCAPTCHA Example (for mod_python)
+
+ %s:
+ %s
+
+
+
+
+
+
+
+
+Or try...
+
+
+
+
+
+""" % (test.__class__.__name__, test.__doc__, test.id, test.id, others)
+
+
+def image(req, id):
+ """Generate an image for the CAPTCHA with the given ID string"""
+ test = _getFactory(req).get(id)
+ if not test:
+ raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
+ req.content_type = "image/jpeg"
+ test.render().save(req, "JPEG")
+ return apache.OK
+
+
+def solution(req, id, word):
+ """Grade a CAPTCHA given a solution word"""
+ test = _getFactory(req).get(id)
+ if not test:
+ raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
+
+ if not test.valid:
+ # Invalid tests will always return False, to prevent
+ # random trial-and-error attacks. This could be confusing to a user...
+ result = "Test invalidated, try another test"
+ elif test.testSolutions([word]):
+ result = "Correct"
+ else:
+ result = "Incorrect"
+
+ return """
+
+PyCAPTCHA Example
+
+
+PyCAPTCHA Example
+%s
+
+%s
+You guessed: %s
+Possible solutions: %s
+Try again
+
+
+""" % (test.__class__.__name__, test.id, result, word, ", ".join(test.solutions))
+
+### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/output.png
Binary file data_generation/transformations/pycaptcha/output.png has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/setup.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/setup.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+from distutils.core import setup
+from setup.my_install_data import *
+
+setup (name = "PyCAPTCHA",
+ version = "0.4",
+ description = "A Python framework for CAPTCHA tests",
+ maintainer = "Micah Dowty",
+ maintainer_email = "micah@navi.cx",
+ license = "MIT",
+ packages = [
+ 'Captcha',
+ 'Captcha.Visual',
+ ],
+ cmdclass = {
+ 'install_data': my_install_data,
+ },
+ data_files = [Data_Files(
+ preserve_path = 1,
+ base_dir = 'install_lib',
+ copy_to = 'Captcha/data',
+ strip_dirs = 2,
+ template = [
+ 'graft Captcha/data',
+ ],
+ )],
+ )
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/setup/__init__.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/setup/__init__.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,1 @@
+# Extra modules for use with distutils
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/setup/my_install_data.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/setup/my_install_data.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,191 @@
+"""my_install_data.py
+
+Provides a more sophisticated facility to install data files
+than distutils' install_data does.
+You can specify your files as a template like in MANIFEST.in
+and you have more control over the copy process.
+
+Copyright 2000 by Rene Liebscher, Germany.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+Note:
+This licence is only for this file.
+PyOpenGL has its own licence. (it is almost identical.)
+"""
+
+# created 2000/08/01, Rene Liebscher
+
+###########################################################################
+# import some modules we need
+
+import os,sys,string
+from types import StringType,TupleType,ListType
+from distutils.util import change_root
+from distutils.filelist import FileList
+from distutils.command.install_data import install_data
+
+###########################################################################
+# a container class for our more sophisticated install mechanism
+
+class Data_Files:
+ """ container for list of data files.
+ supports alternate base_dirs e.g. 'install_lib','install_header',...
+ supports a directory where to copy files
+ supports templates as in MANIFEST.in
+ supports preserving of paths in filenames
+ eg. foo/xyz is copied to base_dir/foo/xyz
+ supports stripping of leading dirs of source paths
+ eg. foo/bar1/xyz, foo/bar2/abc can be copied to bar1/xyz, bar2/abc
+ """
+
+ def __init__(self,base_dir=None,files=None,copy_to=None,template=None,preserve_path=0,strip_dirs=0):
+ self.base_dir = base_dir
+ self.files = files
+ self.copy_to = copy_to
+ self.template = template
+ self.preserve_path = preserve_path
+ self.strip_dirs = strip_dirs
+ self.finalized = 0
+
+ def warn (self, msg):
+ sys.stderr.write ("warning: %s: %s\n" %
+ ("install_data", msg))
+
+ def debug_print (self, msg):
+ """Print 'msg' to stdout if the global DEBUG (taken from the
+ DISTUTILS_DEBUG environment variable) flag is true.
+ """
+ from distutils.core import DEBUG
+ if DEBUG:
+ print msg
+
+
+ def finalize(self):
+ """ complete the files list by processing the given template """
+ if self.finalized:
+ return
+ if self.files == None:
+ self.files = []
+ if self.template != None:
+ if type(self.template) == StringType:
+ self.template = string.split(self.template,";")
+ filelist = FileList(self.warn,self.debug_print)
+ for line in self.template:
+ filelist.process_template_line(string.strip(line))
+ filelist.sort()
+ filelist.remove_duplicates()
+ self.files.extend(filelist.files)
+ self.finalized = 1
+
+# end class Data_Files
+
+###########################################################################
+# a more sophisticated install routine than distutils install_data
+
+class my_install_data (install_data):
+
+ def check_data(self,d):
+ """ check if data are in new format, if not create a suitable object.
+ returns finalized data object
+ """
+ if not isinstance(d, Data_Files):
+ self.warn(("old-style data files list found "
+ "-- please convert to Data_Files instance"))
+ if type(d) is TupleType:
+ if len(d) != 2 or not (type(d[1]) is ListType):
+ raise DistutilsSetupError, \
+ ("each element of 'data_files' option must be an "
+ "Data File instance, a string or 2-tuple (string,[strings])")
+ d = Data_Files(copy_to=d[0],files=d[1])
+ else:
+ if not (type(d) is StringType):
+ raise DistutilsSetupError, \
+ ("each element of 'data_files' option must be an "
+ "Data File instance, a string or 2-tuple (string,[strings])")
+ d = Data_Files(files=[d])
+ d.finalize()
+ return d
+
+ def run(self):
+ self.outfiles = []
+ install_cmd = self.get_finalized_command('install')
+
+ for d in self.data_files:
+ d = self.check_data(d)
+
+ install_dir = self.install_dir
+ # alternative base dir given => overwrite install_dir
+ if d.base_dir != None:
+ install_dir = getattr(install_cmd,d.base_dir)
+
+ # copy to an other directory
+ if d.copy_to != None:
+ if not os.path.isabs(d.copy_to):
+ # relatiev path to install_dir
+ dir = os.path.join(install_dir, d.copy_to)
+ elif install_cmd.root:
+ # absolute path and alternative root set
+ dir = change_root(self.root,d.copy_to)
+ else:
+ # absolute path
+ dir = d.copy_to
+ else:
+ # simply copy to install_dir
+ dir = install_dir
+ # warn if necceassary
+ self.warn("setup script did not provide a directory to copy files to "
+ " -- installing right in '%s'" % install_dir)
+
+ dir=os.path.normpath(dir)
+ # create path
+ self.mkpath(dir)
+
+ # copy all files
+ for src in d.files:
+ if d.strip_dirs > 0:
+ dst = string.join(string.split(src,os.sep)[d.strip_dirs:],os.sep)
+ else:
+ dst = src
+ if d.preserve_path:
+ # preserve path in filename
+ self.mkpath(os.path.dirname(os.path.join(dir,dst)))
+ out = self.copy_file(src, os.path.join(dir,dst))
+ else:
+ out = self.copy_file(src, dir)
+ if type(out) is TupleType:
+ out = out[0]
+ self.outfiles.append(out)
+
+ return self.outfiles
+
+ def get_inputs (self):
+ inputs = []
+ for d in self.data_files:
+ d = self.check_data(d)
+ inputs.append(d.files)
+ return inputs
+
+ def get_outputs (self):
+ return self.outfiles
+
+
+###########################################################################
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/simple_example.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/simple_example.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+#
+# A very simple example that creates a random image from the
+# PseudoGimpy CAPTCHA, saves and shows it, and prints the list
+# of solutions. Normally you would call testSolutions rather
+# than reading this list yourself.
+#
+from Captcha.Visual.Tests import PseudoGimpy, AngryGimpy
+import numpy
+#from numpy import *
+
+#g = AngryGimpy()
+#i = g.render()
+#a = numpy.asarray(i)
+#b = numpy.zeros((2, 2), numpy.int8)
+#c = a == b
+#print c
+#i.save("output.png")
+#i.show()
+#print a
+#print g.solutions
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/test.png
Binary file data_generation/transformations/pycaptcha/test.png has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/pycaptcha/transformations.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/pycaptcha/transformations.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,25 @@
+
+import Numeric, Image
+ #""" Transforme une image PIL en objet numpy.array et vice versa"""
+
+
+def image2array(im):
+ #""" image vers array numpy"""
+ if im.mode not in ("L", "F"):
+ raise ValueError, "can only convert single-layer images"
+ if im.mode == "L":
+ a = Numeric.fromstring(im.tostring(), Numeric.UnsignedInt8)
+ else:
+ a = Numeric.fromstring(im.tostring(), Numeric.Float32)
+ a.shape = im.size[1], im.size[0]
+ return a
+
+def array2image(a):
+ #""" array numpy vers image"""
+ if a.typecode() == Numeric.UnsignedInt8:
+ mode = "L"
+ elif a.typecode() == Numeric.Float32:
+ mode = "F"
+ else:
+ raise ValueError, "unsupported image mode"
+ return Image.fromstring(mode, (a.shape[1], a.shape[0]), a.tostring())
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/slant.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/slant.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,104 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Author: Youssouf
+
+this module add a slant effect to the image.
+
+To obtain the slant effect, each row of the array is shifted proportionately by a step controlled by the complexity.
+
+'''
+
+import numpy
+
+
+class Slant():
+ def __init__(self, complexity=1):
+ #---------- private attributes
+ self.direction = 1
+ self.angle = 0
+
+ #---------- generation parameters
+ self.regenerate_parameters(complexity)
+ #------------------------------------------------
+
+ def _get_current_parameters(self):
+ return [self.angle, self.direction]
+
+ def get_settings_names(self):
+ return ['angle', 'direction']
+
+ def regenerate_parameters(self, complexity):
+ self.angle = numpy.random.uniform(0.0, complexity)
+ P = numpy.random.uniform()
+ self.direction = 1;
+ if P < 0.5:
+ self.direction = -1;
+ return self._get_current_parameters()
+
+
+ def transform_image(self,image):
+ if self.angle == 0:
+ return image
+
+ ysize, xsize = image.shape
+ slant = self.direction*self.angle
+
+ output = image.copy()
+
+ # shift all the rows
+ for i in range(ysize):
+ line = image[i]
+ delta = round((i*slant)) % xsize
+ line1 = line[:xsize-delta]
+ line2 = line[xsize-delta:xsize]
+
+ output[i][delta:xsize] = line1
+ output[i][0:delta] = line2
+
+
+ #correction to center the image
+ correction = (self.direction)*round(self.angle*ysize/2)
+ correction = (xsize - correction) % xsize
+
+ # center the region
+ line1 = output[0:ysize,0:xsize-correction].copy()
+ line2 = output[0:ysize,xsize-correction:xsize].copy()
+ output[0:ysize,correction:xsize] = line1
+ output[0:ysize,0:correction] = line2
+
+
+ return output
+
+
+# Test function
+# Load an image in local and create several samples of the effect on the
+# original image with different parameter. All the samples are saved in a single image, the 1st image being the original.
+
+def test_slant():
+ import scipy
+ img_name = "test_img/mnist_0.png"
+ dest_img_name = "test_img/slanted.png"
+ nb_samples = 10
+ im = Image.open(img_name)
+ im = im.convert("L")
+ image = numpy.asarray(im)
+
+ image_final = image
+ slant = Slant()
+ for i in range(nb_samples):
+ slant.regenerate_parameters(1)
+ image_slant = slant.transform_image(image)
+ image_final = scipy.hstack((image_final,image_slant))
+
+ im = Image.fromarray(image_final.astype('uint8'), "L")
+ im.save(dest_img_name)
+
+# Test
+if __name__ == '__main__':
+ import sys, os, fnmatch
+ import Image
+
+ test_slant()
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/testmod.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/testmod.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,130 @@
+# This script is to test your modules to see if they conform to the module API
+# defined on the wiki.
+import random, numpy, gc, time, math, sys
+
+# this is an example module that does stupid image value shifting
+
+class DummyModule(object):
+ def get_settings_names(self):
+ return ['value']
+
+ def regenerate_parameters(self, complexity):
+ self._value = random.gauss(0, 0.5*complexity)
+ return [self._value]
+
+ def transform_image(self, image):
+ return numpy.clip(image+self._value, 0, 1)
+
+#import
+
+# instanciate your class here (rather than DummyModule)
+mod = DummyModule()
+
+def error(msg):
+ print "ERROR:", msg
+ sys.exit(1)
+
+def warn(msg):
+ print "WARNING:", msg
+
+def timeit(f, lbl):
+
+ gc.disable()
+ t = time.time()
+ f()
+ est = time.time() - t
+ gc.enable()
+
+ loops = max(1, int(10**math.floor(math.log(10/est, 10))))
+
+ gc.disable()
+ t = time.time()
+ for _ in xrange(loops):
+ f()
+
+ print lbl, "(", loops, "loops ):", (time.time() - t)/loops, "s"
+ gc.enable()
+
+########################
+# get_settings_names() #
+########################
+
+print "Testing get_settings_names()"
+
+names = mod.get_settings_names()
+
+if type(names) is not list:
+ error("Must return a list")
+
+if not all(type(e) is str for e in names):
+ warn("The elements of the list should be strings")
+
+###########################
+# regenerate_parameters() #
+###########################
+
+print "Testing regenerate_parameters()"
+
+params = mod.regenerate_parameters(0.2)
+
+if type(params) is not list:
+ error("Must return a list")
+
+if len(params) != len(names):
+ error("the returned parameter list must have the same length as the number of parameters")
+
+params2 = mod.regenerate_parameters(0.2)
+if len(names) != 0 and params == params2:
+ error("the complexity parameter determines the distribution of the parameters, not their value")
+
+mod.regenerate_parameters(0.0)
+mod.regenerate_parameters(1.0)
+
+mod.regenerate_parameters(0.5)
+
+#####################
+# transform_image() #
+#####################
+
+print "Testing transform_image()"
+
+imgr = numpy.random.random_sample((32, 32)).astype(numpy.float32)
+img1 = numpy.ones((32, 32), dtype=numpy.float32)
+img0 = numpy.zeros((32, 32), dtype=numpy.float32)
+
+resr = mod.transform_image(imgr)
+
+if type(resr) is not numpy.ndarray:
+ error("Must return an ndarray")
+
+if resr.shape != (32, 32):
+ error("Must return 32x32 array")
+
+if resr.dtype != numpy.float32:
+ error("Must return float32 array")
+
+res1 = mod.transform_image(img1)
+res0 = mod.transform_image(img0)
+
+if res1.max() > 1.0 or res0.max() > 1.0:
+ error("Must keep array values between 0 and 1")
+
+if res1.min() < 0.0 or res0.min() < 0.0:
+ error("Must keep array values between 0 and 1")
+
+mod.regenerate_parameters(0.0)
+mod.transform_image(imgr)
+mod.regenerate_parameters(1.0)
+mod.transform_image(imgr)
+
+print "Bonus Stage: timings"
+
+timeit(lambda: None, "empty")
+timeit(lambda: mod.regenerate_parameters(0.5), "regenerate_parameters()")
+timeit(lambda: mod.transform_image(imgr), "tranform_image()")
+
+def f():
+ mod.regenerate_parameters(0.2)
+ mod.transform_image(imgr)
+
+timeit(f, "regen and transform")
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/testtransformations.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/testtransformations.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+
+
+
+from pylearn.io import filetensor as ft
+import copy
+import pygame
+import time
+import numpy as N
+
+from ttf2jpg import ttf2jpg
+
+#from gimpfu import *
+
+
+from PoivreSel import PoivreSel
+from thick import Thick
+from BruitGauss import BruitGauss
+from DistorsionGauss import DistorsionGauss
+from PermutPixel import PermutPixel
+from gimp_script import GIMP1
+from Rature import Rature
+from contrast import Contrast
+from local_elastic_distortions import LocalElasticDistorter
+from slant import Slant
+from Occlusion import Occlusion
+from add_background_image import AddBackground
+from affine_transform import AffineTransformation
+
+###---------------------order of transformation module
+MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+
+###---------------------complexity associated to each of them
+complexity = 0.7
+#complexity = [0.5]*len(MODULE_INSTANCES)
+#complexity = [0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.]
+n=100
+
+def createimage(path,d):
+ for i in range(n):
+ screen.fill(0)
+ a=d[i,:]
+ off1=4*32
+ off2=0
+ for u in range(n):
+ b=N.asarray(N.reshape(a,(32,32)))
+ c=N.asarray([N.reshape(a*255.0,(32,32))]*3).T
+ new=pygame.surfarray.make_surface(c)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ #new.set_palette(anglcolorpalette)
+ screen.blit(new,(0,0))
+ exemple.blit(new,(0,0))
+
+ offset = 4*32
+ offset2 = 0
+ ct = 0
+ ctmp = N.random.rand()*complexity
+ print u
+ for j in MODULE_INSTANCES:
+ #max dilation
+ #ctmp = N.random.rand()*complexity[ct]
+ ctmp = N.random.rand()*complexity
+ #print j.get_settings_names(), j.regenerate_parameters(ctmp)
+ th=j.regenerate_parameters(ctmp)
+
+ b=j.transform_image(b)
+ c=N.asarray([b*255]*3).T
+ new=pygame.surfarray.make_surface(c)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ if u==0:
+ #new.set_palette(anglcolorpalette)
+ screen.blit(new,(offset,offset2))
+ font = pygame.font.SysFont('liberationserif',18)
+ text = font.render('%s '%(int(ctmp*100.0)/100.0) + j.__module__,0,(255,255,255),(0,0,0))
+ #if j.__module__ == 'Rature':
+ # text = font.render('%s,%s'%(th[-1],int(ctmp*100.0)/100.0) + j.__module__,0,(255,255,255),(0,0,0))
+ screen.blit(text,(offset,offset2+4*32))
+ if ct == len(MODULE_INSTANCES)/2-1:
+ offset = 0
+ offset2 = 4*32+20
+ else:
+ offset += 4*32
+ ct+=1
+ exemple.blit(new,(off1,off2))
+ if off1 != 9*4*32:
+ off1+=4*32
+ else:
+ off1=0
+ off2+=4*32
+ pygame.image.save(exemple,path+'/perimages/%s.PNG'%i)
+ pygame.image.save(screen,path+'/exemples/%s.PNG'%i)
+
+
+
+
+nbmodule = len(MODULE_INSTANCES)
+
+pygame.surfarray.use_arraytype('numpy')
+
+#pygame.display.init()
+screen = pygame.Surface((4*(nbmodule+1)/2*32,2*(4*32+20)),depth=32)
+exemple = pygame.Surface((N.ceil(N.sqrt(n))*4*32,N.ceil(N.sqrt(n))*4*32),depth=32)
+
+anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
+#pygame.Surface.set_palette(anglcolorpalette)
+#screen.set_palette(anglcolorpalette)
+
+pygame.font.init()
+
+d = N.zeros((n,1024))
+
+datapath = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft'
+f = open(datapath)
+d = ft.read(f)
+d = d[0:n,:]/255.0
+createimage('/u/glorotxa/transf/OCR',d)
+
+
+
+datapath = '/data/lisa/data/nist/by_class/'
+f = open(datapath+'digits_reshuffled/digits_reshuffled_train_data.ft')
+d = ft.read(f)
+d = d[0:n,:]/255.0
+createimage('/u/glorotxa/transf/NIST_digits',d)
+
+
+
+datapath = '/data/lisa/data/nist/by_class/'
+f = open(datapath+'upper/upper_train_data.ft')
+d = ft.read(f)
+d = d[0:n,:]/255.0
+createimage('/u/glorotxa/transf/NIST_upper',d)
+
+from Facade import *
+
+for i in range(n):
+ d[i,:]=N.asarray(N.reshape(generateCaptcha(0.8,0),(1,1024))/255.0,dtype='float32')
+
+createimage('/u/glorotxa/transf/capcha',d)
+
+
+for i in range(n):
+ myttf2jpg = ttf2jpg()
+ d[i,:]=N.reshape(myttf2jpg.generate_image()[0],(1,1024))
+createimage('/u/glorotxa/transf/fonts',d)
+
+datapath = '/data/lisa/data/nist/by_class/'
+f = open(datapath+'lower/lower_train_data.ft')
+d = ft.read(f)
+d = d[0:n,:]/255.0
+createimage('/u/glorotxa/transf/NIST_lower',d)
+
+
+#pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/thick.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/thick.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,198 @@
+#!/usr/bin/python
+# coding: utf-8
+
+'''
+Simple implementation of random thickness deformation using morphological
+operation of scipy.
+Only one morphological operation applied (dilation or erosion), the kernel is random
+out of a list of 12 symmetric kernels. (only 5 to be chosen for erosion because it can
+hurt the recognizability of the charater and 12 for dilation).
+
+Author: Xavier Glorot
+
+'''
+
+import scipy.ndimage.morphology
+import numpy as N
+
+
+class Thick():
+ def __init__(self,complexity = 1):
+ #---------- private attributes
+ self.__nx__ = 32 #xdim of the images
+ self.__ny__ = 32 #ydim of the images
+ self.__erodemax__ = 5 #nb of index max of erode structuring elements
+ self.__dilatemax__ = 9 #nb of index max of dilation structuring elements
+ self.__structuring_elements__ = [N.asarray([[1,1]]),N.asarray([[1],[1]]),\
+ N.asarray([[1,1],[1,1]]),N.asarray([[0,1,0],[1,1,1],[0,1,0]]),\
+ N.asarray([[1,1,1],[1,1,1]]),N.asarray([[1,1],[1,1],[1,1]]),\
+ N.asarray([[1,1,1],[1,1,1],[1,1,1]]),\
+ N.asarray([[1,1,1,1],[1,1,1,1],[1,1,1,1]]),\
+ N.asarray([[1,1,1],[1,1,1],[1,1,1],[1,1,1]]),\
+ N.asarray([[0,0,1,0,0],[0,1,1,1,0],[1,1,1,1,1],[0,1,1,1,0],[0,0,1,0,0]]),\
+ N.asarray([[1,1,1,1],[1,1,1,1]]),N.asarray([[1,1],[1,1],[1,1],[1,1]])]
+ #------------------------------------------------
+
+ #---------- generation parameters
+ self.regenerate_parameters(complexity)
+ #------------------------------------------------
+
+ def _get_current_parameters(self):
+ return [self.thick_param]
+
+ def get_settings_names(self):
+ return ['thick_param']
+
+ def regenerate_parameters(self, complexity):
+ self.erodenb = N.ceil(complexity * self.__erodemax__)
+ self.dilatenb = N.ceil(complexity * self.__dilatemax__)
+ self.Perode = self.erodenb / (self.dilatenb + self.erodenb + 1.0)
+ self.Pdilate = self.dilatenb / (self.dilatenb + self.erodenb + 1.0)
+ assert (self.Perode + self.Pdilate <= 1) & (self.Perode + self.Pdilate >= 0)
+ assert (complexity >= 0) & (complexity <= 1)
+ P = N.random.uniform()
+ if P>1-(self.Pdilate+self.Perode):
+ if P>1-(self.Pdilate+self.Perode)+self.Perode:
+ self.meth = 1
+ self.nb=N.random.randint(self.dilatenb)
+ else:
+ self.meth = -1
+ self.nb=N.random.randint(self.erodenb)
+ else:
+ self.meth = 0
+ self.nb = -1
+ self.thick_param = self.meth*self.nb
+ return self._get_current_parameters()
+
+ def transform_1_image(self,image): #the real transformation method
+ if self.meth!=0:
+ maxi = float(N.max(image))
+ mini = float(N.min(image))
+
+ imagenorm=image/maxi
+
+ if self.meth==1:
+ trans=scipy.ndimage.morphology.grey_dilation\
+ (imagenorm,size=self.__structuring_elements__[self.nb].shape,structure=self.__structuring_elements__[self.nb])
+ else:
+ trans=scipy.ndimage.morphology.grey_erosion\
+ (imagenorm,size=self.__structuring_elements__[self.nb].shape,structure=self.__structuring_elements__[self.nb])
+
+ #------renormalizing
+ maxit = N.max(trans)
+ minit = N.min(trans)
+ trans= N.asarray((trans - (minit+mini)) / (maxit - (minit+mini)) * maxi,dtype=image.dtype)
+ #--------
+ return trans
+ else:
+ return image
+
+ def transform_image(self,image): #handling different format
+ if image.shape == (self.__nx__,self.__ny__):
+ return self.transform_1_image(image)
+ if image.ndim == 3:
+ newimage = copy.copy(image)
+ for i in range(image.shape[0]):
+ newimage[i,:,:] = self.transform_1_image(image[i,:,:])
+ return newimage
+ if image.ndim == 2 and image.shape != (self.__nx__,self.__ny__):
+ newimage = N.reshape(image,(image.shape[0],self.__nx__,self.__ny__))
+ for i in range(image.shape[0]):
+ newimage[i,:,:] = self.transform_1_image(newimage[i,:,:])
+ return N.reshape(newimage,image.shape)
+ if image.ndim == 1:
+ newimage = N.reshape(image,(self.__nx__,self.__ny__))
+ newimage = self.transform_1_image(newimage)
+ return N.reshape(newimage,image.shape)
+ assert False #should never go there
+
+
+
+
+#test on NIST (you need pylearn and access to NIST to do that)
+
+if __name__ == '__main__':
+
+ from pylearn.io import filetensor as ft
+ import copy
+ import pygame
+ import time
+ datapath = '/data/lisa/data/nist/by_class/'
+ f = open(datapath+'digits/digits_train_data.ft')
+ d = ft.read(f)
+
+ pygame.surfarray.use_arraytype('numpy')
+
+ pygame.display.init()
+ screen = pygame.display.set_mode((8*4*32,8*32),0,8)
+ anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
+ screen.set_palette(anglcolorpalette)
+
+ MyThick = Thick()
+
+ #debut=time.time()
+ #MyThick.transform_image(d)
+ #fin=time.time()
+ #print '------------------------------------------------'
+ #print d.shape[0],' images transformed in :', fin-debut, ' seconds'
+ #print '------------------------------------------------'
+ #print (fin-debut)/d.shape[0]*1000000,' microseconds per image'
+ #print '------------------------------------------------'
+ #print MyThick.get_settings_names()
+ #print MyThick._get_current_parameters()
+ #print MyThick.regenerate_parameters(0)
+ #print MyThick.regenerate_parameters(0.5)
+ #print MyThick.regenerate_parameters(1)
+ for i in range(10000):
+ a=d[i,:]
+ b=N.asarray(N.reshape(a,(32,32))).T
+
+ new=pygame.surfarray.make_surface(b)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new.set_palette(anglcolorpalette)
+ screen.blit(new,(0,0))
+
+ #max dilation
+ MyThick.meth=1
+ MyThick.nb=MyThick.__dilatemax__
+ c=MyThick.transform_image(a)
+ b=N.asarray(N.reshape(c,(32,32))).T
+
+ new=pygame.surfarray.make_surface(b)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new.set_palette(anglcolorpalette)
+ screen.blit(new,(8*32,0))
+
+ #max erosion
+ MyThick.meth=-1
+ MyThick.nb=MyThick.__erodemax__
+ c=MyThick.transform_image(a)
+ b=N.asarray(N.reshape(c,(32,32))).T
+
+ new=pygame.surfarray.make_surface(b)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new.set_palette(anglcolorpalette)
+ screen.blit(new,(8*2*32,0))
+
+ #random
+ print MyThick.get_settings_names(), MyThick.regenerate_parameters(1)
+ c=MyThick.transform_image(a)
+ b=N.asarray(N.reshape(c,(32,32))).T
+
+ new=pygame.surfarray.make_surface(b)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new=pygame.transform.scale2x(new)
+ new.set_palette(anglcolorpalette)
+ screen.blit(new,(8*3*32,0))
+
+ pygame.display.update()
+ raw_input('Press Enter')
+
+ pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/ttf2jpg.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/ttf2jpg.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,121 @@
+#!/usr/bin/python
+# -*- coding: iso-8859-1 -*-
+
+'''
+ Implementation of font image generator
+ download fonts from http://www.dafont.com for exemple
+
+ Author: Guillaume Sicard
+'''
+
+import sys, os, fnmatch, random
+import Image, ImageFont, ImageDraw, numpy
+
+class ttf2jpg():
+ def __init__(self, font_file = ''):
+ self.w = 32
+ self.h = 32
+ self.font_dir = '/Tmp/allfonts/'
+ self.font_file = font_file
+ self.image_dir = './images/'
+ self.pattern = '*.ttf'
+ self.char_list = []
+ for i in range(0,10):
+ self.char_list.append(chr(ord('0') + i) )
+ for i in range(0,26):
+ self.char_list.append(chr(ord('A') + i) )
+ for i in range(0,26):
+ self.char_list.append(chr(ord('a') + i) )
+ files = os.listdir(self.font_dir)
+ self.font_files = fnmatch.filter(files, '*.ttf') + fnmatch.filter(files, '*.TTF')
+
+ # get font name
+ def get_settings_names(self):
+ return [self.font_file]
+
+ # save an image
+ def save_image(self,array, filename = ''):
+ image = (array * 255.0).astype('int')
+ image = Image.fromarray(image).convert('L')
+ if (filename != ''):
+ image.save(filename)
+ else:
+ image.show()
+
+ # set a random font for character generation
+ def set_random_font(self):
+ i = random.randint(0, len(self.font_files) - 1)
+ self.font_file = self.font_dir + self.font_files[i]
+
+ # return a picture array of "text" with font "font_file"
+ def create_image(self, text):
+ # create a w x h black picture, and a drawing space
+ image = Image.new('L', (self.w, self.h), 'Black')
+ draw = ImageDraw.Draw(image)
+
+ # load the font with the right size
+ font = ImageFont.truetype(self.font_file, 28)
+ d_w,d_h = draw.textsize(text, font=font)
+
+ # write text and aligns it
+ draw.text(((32 - d_w) / 2, ((32 - d_h) / 2)), text, font=font, fill='White')
+
+ image = numpy.asarray(image)
+ image = (image / 255.0).astype(numpy.float32)
+
+ return image
+
+ # write all the letters and numbers into pictures
+ def process_font(self):
+ for i in range(0, len(self.char_list) ):
+ image = self.create_image(self.char_list[i])
+ self.save_image(image, self.image_dir + self.char_list[i] + '-' + os.path.basename(self.font_file) + '.jpg')
+ sys.stdout.write('.')
+ sys.stdout.flush()
+ return (len(self.char_list))
+
+ # generate the character from the font_file and returns a numpy array
+ def generate_image_from_char(self, character, font_file = ''):
+ if (font_file != ''):
+ self.font_file = font_file
+
+ return self.create_image(character)
+
+ # generate random character from random font file as a numpy array
+ def generate_image(self):
+ self.set_random_font()
+ i = random.randint(0, len(self.char_list) - 1)
+ return self.generate_image_from_char(self.char_list[i]), i
+
+ # test method, create character images for all fonts in "font_dir" in dir "image_dir"
+ def test(self):
+ import time
+
+ # look for ttf files
+ files = os.listdir(self.font_dir)
+ font_files = fnmatch.filter(files, self.pattern)
+
+ # create "image_dir" if it doesn't exist
+ if not os.path.isdir(self.image_dir):
+ os.mkdir(self.image_dir)
+
+ sys.stdout.write( str(len(font_files)) + ' fonts found, generating jpg images in folder ' + self.image_dir )
+ sys.stdout.flush()
+
+ # main loop
+ t = time.time()
+ n = 0
+
+ for font_file in font_files:
+ self.font_file = self.font_dir + font_file
+ n += self.process_font()
+ t = time.time() - t
+
+ sys.stdout.write('\nall done!\n' + str(n) + ' images generated in ' + str(t) + 's (average : ' + str(1000 * t / n) + ' ms/im)\n')
+
+if __name__ == '__main__':
+
+ myttf2jpg = ttf2jpg()
+ #myttf2jpg.test()
+ image, i = myttf2jpg.generate_image()
+ myttf2jpg.save_image(image, '')
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 data_generation/transformations/visualizer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_generation/transformations/visualizer.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+
+import numpy
+import Image
+from image_tiling import tile_raster_images
+import pylab
+import time
+
+class Visualizer():
+ def __init__(self, num_columns=10, image_size=(32,32), to_dir=None, on_screen=False):
+ self.list = []
+ self.image_size = image_size
+ self.num_columns = num_columns
+
+ self.on_screen = on_screen
+ self.to_dir = to_dir
+
+ self.cur_grid_image = None
+
+ self.cur_index = 0
+
+ def visualize_stop_and_flush(self):
+ self.make_grid_image()
+
+ if self.on_screen:
+ self.visualize()
+ if self.to_dir:
+ self.dump_to_disk()
+
+ self.stop_and_wait()
+ self.flush()
+
+ self.cur_index += 1
+
+ def make_grid_image(self):
+ num_rows = len(self.list) / self.num_columns
+ if len(self.list) % self.num_columns != 0:
+ num_rows += 1
+ grid_shape = (num_rows, self.num_columns)
+ self.cur_grid_image = tile_raster_images(numpy.array(self.list), self.image_size, grid_shape, tile_spacing=(5,5), output_pixel_vals=False)
+
+ def visualize(self):
+ pylab.imshow(self.cur_grid_image)
+ pylab.draw()
+
+ def dump_to_disk(self):
+ gi = Image.fromarray((self.cur_grid_image * 255).astype('uint8'), "L")
+ gi.save(self.to_dir + "/grid_" + str(self.cur_index) + ".png")
+
+ def stop_and_wait(self):
+ # can't raw_input under gimp, so sleep)
+ print "New image generated, sleeping 5 secs"
+ time.sleep(5)
+
+ def flush(self):
+ self.list = []
+
+ def get_parameters_names(self):
+ return []
+
+ def regenerate_parameters(self):
+ return []
+
+ def after_transform_callback(self, image):
+ self.transform_image(image)
+
+ def end_transform_callback(self, final_image):
+ self.visualize_stop_and_flush()
+
+ def transform_image(self, image):
+ sz = self.image_size
+ self.list.append(image.copy().reshape((sz[0] * sz[1])))
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/convolutional_dae/stacked_convolutional_dae.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/convolutional_dae/stacked_convolutional_dae.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,415 @@
+import numpy
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import theano.sandbox.softsign
+
+from theano.tensor.signal import downsample
+from theano.tensor.nnet import conv
+import gzip
+import cPickle
+
+
+class LogisticRegression(object):
+
+ def __init__(self, input, n_in, n_out):
+
+ self.W = theano.shared( value=numpy.zeros((n_in,n_out),
+ dtype = theano.config.floatX) )
+
+ self.b = theano.shared( value=numpy.zeros((n_out,),
+ dtype = theano.config.floatX) )
+
+ self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+
+
+ self.y_pred=T.argmax(self.p_y_given_x, axis=1)
+
+ self.params = [self.W, self.b]
+
+ def negative_log_likelihood(self, y):
+ return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+
+ def MSE(self, y):
+ return -T.mean(abs((self.p_y_given_x)[T.arange(y.shape[0]),y]-y)**2)
+
+ def errors(self, y):
+ if y.ndim != self.y_pred.ndim:
+ raise TypeError('y should have the same shape as self.y_pred',
+ ('y', target.type, 'y_pred', self.y_pred.type))
+
+
+ if y.dtype.startswith('int'):
+ return T.mean(T.neq(self.y_pred, y))
+ else:
+ raise NotImplementedError()
+
+
+class SigmoidalLayer(object):
+ def __init__(self, rng, input, n_in, n_out):
+
+ self.input = input
+
+ W_values = numpy.asarray( rng.uniform( \
+ low = -numpy.sqrt(6./(n_in+n_out)), \
+ high = numpy.sqrt(6./(n_in+n_out)), \
+ size = (n_in, n_out)), dtype = theano.config.floatX)
+ self.W = theano.shared(value = W_values)
+
+ b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+ self.b = theano.shared(value= b_values)
+
+ self.output = T.tanh(T.dot(input, self.W) + self.b)
+ self.params = [self.W, self.b]
+
+class dA_conv(object):
+
+ def __init__(self, corruption_level = 0.1, input = None, shared_W = None,\
+ shared_b = None, filter_shape = None, image_shape = None, poolsize = (2,2)):
+
+ theano_rng = RandomStreams()
+
+ fan_in = numpy.prod(filter_shape[1:])
+ fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])
+
+ center = theano.shared(value = 1, name="center")
+ scale = theano.shared(value = 2, name="scale")
+
+ if shared_W != None and shared_b != None :
+ self.W = shared_W
+ self.b = shared_b
+ else:
+ initial_W = numpy.asarray( numpy.random.uniform( \
+ low = -numpy.sqrt(6./(fan_in+fan_out)), \
+ high = numpy.sqrt(6./(fan_in+fan_out)), \
+ size = filter_shape), dtype = theano.config.floatX)
+ initial_b = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
+
+
+ self.W = theano.shared(value = initial_W, name = "W")
+ self.b = theano.shared(value = initial_b, name = "b")
+
+
+ initial_b_prime= numpy.zeros((filter_shape[1],))
+
+ self.W_prime=T.dtensor4('W_prime')
+
+ self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime")
+
+ self.x = input
+
+ self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x
+
+ conv1_out = conv.conv2d(self.tilde_x, self.W, \
+ filter_shape=filter_shape, \
+ image_shape=image_shape, border_mode='valid')
+
+
+ self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
+
+
+ da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
+ filter_shape[3] ]
+ da_image_shape = [ image_shape[0],filter_shape[0],image_shape[2]-filter_shape[2]+1, \
+ image_shape[3]-filter_shape[3]+1 ]
+ initial_W_prime = numpy.asarray( numpy.random.uniform( \
+ low = -numpy.sqrt(6./(fan_in+fan_out)), \
+ high = numpy.sqrt(6./(fan_in+fan_out)), \
+ size = da_filter_shape), dtype = theano.config.floatX)
+ self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
+
+ #import pdb;pdb.set_trace()
+
+ conv2_out = conv.conv2d(self.y, self.W_prime, \
+ filter_shape = da_filter_shape, image_shape = da_image_shape ,\
+ border_mode='full')
+
+ self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale
+
+ scaled_x = (self.x + center) / scale
+
+ self.L = - T.sum( scaled_x*T.log(self.z) + (1-scaled_x)*T.log(1-self.z), axis=1 )
+
+ self.cost = T.mean(self.L)
+
+ self.params = [ self.W, self.b, self.b_prime ]
+
+
+
+class LeNetConvPoolLayer(object):
+ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
+ assert image_shape[1]==filter_shape[1]
+ self.input = input
+
+ W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
+ self.W = theano.shared(value = W_values)
+
+ b_values = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
+ self.b = theano.shared(value= b_values)
+
+ conv_out = conv.conv2d(input, self.W,
+ filter_shape=filter_shape, image_shape=image_shape)
+
+
+ fan_in = numpy.prod(filter_shape[1:])
+ fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
+
+ W_bound = numpy.sqrt(6./(fan_in + fan_out))
+ self.W.value = numpy.asarray(
+ rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
+ dtype = theano.config.floatX)
+
+
+ pooled_out = downsample.max_pool2D(conv_out, poolsize, ignore_border=True)
+
+ self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
+ self.params = [self.W, self.b]
+
+
+class SdA():
+ def __init__(self, input, n_ins_conv, n_ins_mlp, train_set_x, train_set_y, batch_size, \
+ conv_hidden_layers_sizes, mlp_hidden_layers_sizes, corruption_levels, \
+ rng, n_out, pretrain_lr, finetune_lr):
+
+ self.layers = []
+ self.pretrain_functions = []
+ self.params = []
+ self.conv_n_layers = len(conv_hidden_layers_sizes)
+ self.mlp_n_layers = len(mlp_hidden_layers_sizes)
+
+ index = T.lscalar() # index to a [mini]batch
+ self.x = T.dmatrix('x') # the data is presented as rasterized images
+ self.y = T.ivector('y') # the labels are presented as 1D vector of
+
+
+
+ for i in xrange( self.conv_n_layers ):
+
+ filter_shape=conv_hidden_layers_sizes[i][0]
+ image_shape=conv_hidden_layers_sizes[i][1]
+ max_poolsize=conv_hidden_layers_sizes[i][2]
+
+ if i == 0 :
+ layer_input=self.x.reshape((batch_size,1,28,28))
+ else:
+ layer_input=self.layers[-1].output
+
+ layer = LeNetConvPoolLayer(rng, input=layer_input, \
+ image_shape=image_shape, \
+ filter_shape=filter_shape,poolsize=max_poolsize)
+ print 'Convolutional layer '+str(i+1)+' created'
+
+ self.layers += [layer]
+ self.params += layer.params
+
+ da_layer = dA_conv(corruption_level = corruption_levels[0],\
+ input = layer_input, \
+ shared_W = layer.W, shared_b = layer.b,\
+ filter_shape = filter_shape , image_shape = image_shape )
+
+
+ gparams = T.grad(da_layer.cost, da_layer.params)
+
+ updates = {}
+ for param, gparam in zip(da_layer.params, gparams):
+ updates[param] = param - gparam * pretrain_lr
+
+
+ update_fn = theano.function([index], da_layer.cost, \
+ updates = updates,
+ givens = {
+ self.x : train_set_x[index*batch_size:(index+1)*batch_size]} )
+
+ self.pretrain_functions += [update_fn]
+
+ for i in xrange( self.mlp_n_layers ):
+ if i == 0 :
+ input_size = n_ins_mlp
+ else:
+ input_size = mlp_hidden_layers_sizes[i-1]
+
+ if i == 0 :
+ if len( self.layers ) == 0 :
+ layer_input=self.x
+ else :
+ layer_input = self.layers[-1].output.flatten(2)
+ else:
+ layer_input = self.layers[-1].output
+
+ layer = SigmoidalLayer(rng, layer_input, input_size,
+ mlp_hidden_layers_sizes[i] )
+
+ self.layers += [layer]
+ self.params += layer.params
+
+
+ print 'MLP layer '+str(i+1)+' created'
+
+ self.logLayer = LogisticRegression(input=self.layers[-1].output, \
+ n_in=mlp_hidden_layers_sizes[-1], n_out=n_out)
+ self.params += self.logLayer.params
+
+ cost = self.logLayer.negative_log_likelihood(self.y)
+
+ gparams = T.grad(cost, self.params)
+ updates = {}
+
+ for param,gparam in zip(self.params, gparams):
+ updates[param] = param - gparam*finetune_lr
+
+ self.finetune = theano.function([index], cost,
+ updates = updates,
+ givens = {
+ self.x : train_set_x[index*batch_size:(index+1)*batch_size],
+ self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
+
+
+ self.errors = self.logLayer.errors(self.y)
+
+
+
+def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \
+ pretrain_lr = 0.01, training_epochs = 1000, \
+ dataset='mnist.pkl.gz'):
+
+ f = gzip.open(dataset,'rb')
+ train_set, valid_set, test_set = cPickle.load(f)
+ f.close()
+
+
+ def shared_dataset(data_xy):
+ data_x, data_y = data_xy
+ shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+ shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+ return shared_x, T.cast(shared_y, 'int32')
+
+
+ test_set_x, test_set_y = shared_dataset(test_set)
+ valid_set_x, valid_set_y = shared_dataset(valid_set)
+ train_set_x, train_set_y = shared_dataset(train_set)
+
+ batch_size = 500 # size of the minibatch
+
+
+ n_train_batches = train_set_x.value.shape[0] / batch_size
+ n_valid_batches = valid_set_x.value.shape[0] / batch_size
+ n_test_batches = test_set_x.value.shape[0] / batch_size
+
+ # allocate symbolic variables for the data
+ index = T.lscalar() # index to a [mini]batch
+ x = T.matrix('x') # the data is presented as rasterized images
+ y = T.ivector('y') # the labels are presented as 1d vector of
+ # [int] labels
+ layer0_input = x.reshape((batch_size,1,28,28))
+
+
+ # Setup the convolutional layers with their DAs(add as many as you want)
+ corruption_levels = [ 0.2, 0.2, 0.2]
+ rng = numpy.random.RandomState(1234)
+ ker1=2
+ ker2=2
+ conv_layers=[]
+ conv_layers.append([[ker1,1,5,5], [batch_size,1,28,28], [2,2] ])
+ conv_layers.append([[ker2,ker1,5,5], [batch_size,ker1,12,12], [2,2] ])
+
+ # Setup the MLP layers of the network
+ mlp_layers=[500]
+
+ network = SdA(input = layer0_input, n_ins_conv = 28*28, n_ins_mlp = ker2*4*4, \
+ train_set_x = train_set_x, train_set_y = train_set_y, batch_size = batch_size,
+ conv_hidden_layers_sizes = conv_layers, \
+ mlp_hidden_layers_sizes = mlp_layers, \
+ corruption_levels = corruption_levels , n_out = 10, \
+ rng = rng , pretrain_lr = pretrain_lr , finetune_lr = learning_rate )
+
+ test_model = theano.function([index], network.errors,
+ givens = {
+ network.x: test_set_x[index*batch_size:(index+1)*batch_size],
+ network.y: test_set_y[index*batch_size:(index+1)*batch_size]})
+
+ validate_model = theano.function([index], network.errors,
+ givens = {
+ network.x: valid_set_x[index*batch_size:(index+1)*batch_size],
+ network.y: valid_set_y[index*batch_size:(index+1)*batch_size]})
+
+
+
+ start_time = time.clock()
+ for i in xrange(len(network.layers)-len(mlp_layers)):
+ for epoch in xrange(pretraining_epochs):
+ for batch_index in xrange(n_train_batches):
+ c = network.pretrain_functions[i](batch_index)
+ print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch),c
+
+ patience = 10000 # look as this many examples regardless
+ patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS
+ # FOUND
+ improvement_threshold = 0.995 # a relative improvement of this much is
+
+ validation_frequency = min(n_train_batches, patience/2)
+
+
+ best_params = None
+ best_validation_loss = float('inf')
+ test_score = 0.
+ start_time = time.clock()
+
+ done_looping = False
+ epoch = 0
+
+ while (epoch < training_epochs) and (not done_looping):
+ epoch = epoch + 1
+ for minibatch_index in xrange(n_train_batches):
+
+ cost_ij = network.finetune(minibatch_index)
+ iter = epoch * n_train_batches + minibatch_index
+
+ if (iter+1) % validation_frequency == 0:
+
+ validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
+ this_validation_loss = numpy.mean(validation_losses)
+ print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+ (epoch, minibatch_index+1, n_train_batches, \
+ this_validation_loss*100.))
+
+
+ # if we got the best validation score until now
+ if this_validation_loss < best_validation_loss:
+
+ #improve patience if loss improvement is good enough
+ if this_validation_loss < best_validation_loss * \
+ improvement_threshold :
+ patience = max(patience, iter * patience_increase)
+
+ # save best validation score and iteration number
+ best_validation_loss = this_validation_loss
+ best_iter = iter
+
+ # test it on the test set
+ test_losses = [test_model(i) for i in xrange(n_test_batches)]
+ test_score = numpy.mean(test_losses)
+ print((' epoch %i, minibatch %i/%i, test error of best '
+ 'model %f %%') %
+ (epoch, minibatch_index+1, n_train_batches,
+ test_score*100.))
+
+
+ if patience <= iter :
+ done_looping = True
+ break
+
+ end_time = time.clock()
+ print(('Optimization complete with best validation score of %f %%,'
+ 'with test performance %f %%') %
+ (best_validation_loss * 100., test_score*100.))
+ print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
+
+
+
+
+
+
+if __name__ == '__main__':
+ sgd_optimization_mnist()
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/__init__.py
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/mnist_sda.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/mnist_sda.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+# coding: utf-8
+
+# Parameterize call to sgd_optimization for MNIST
+
+import numpy
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+from sgd_optimization import SdaSgdOptimizer
+import cPickle, gzip
+from jobman import DD
+
+MNIST_LOCATION = '/u/savardf/datasets/mnist.pkl.gz'
+
+def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 2, \
+ pretrain_lr = 0.1, training_epochs = 5, \
+ dataset='mnist.pkl.gz'):
+ # Load the dataset
+ f = gzip.open(dataset,'rb')
+ # this gives us train, valid, test (each with .x, .y)
+ dataset = cPickle.load(f)
+ f.close()
+
+ n_ins = 28*28
+ n_outs = 10
+
+ hyperparameters = DD({'finetuning_lr':learning_rate,
+ 'pretraining_lr':pretrain_lr,
+ 'pretraining_epochs_per_layer':pretraining_epochs,
+ 'max_finetuning_epochs':training_epochs,
+ 'hidden_layers_sizes':[100],
+ 'corruption_levels':[0.2],
+ 'minibatch_size':20})
+
+ optimizer = SdaSgdOptimizer(dataset, hyperparameters, n_ins, n_outs)
+ optimizer.pretrain()
+ optimizer.finetune()
+
+if __name__ == '__main__':
+ sgd_optimization_mnist(dataset=MNIST_LOCATION)
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/nist_sda.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/nist_sda.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,264 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import numpy
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import copy
+
+import sys
+import os.path
+
+from sgd_optimization import SdaSgdOptimizer
+
+from jobman import DD
+import jobman, jobman.sql
+from pylearn.io import filetensor
+
+from utils import produit_croise_jobs
+
+TEST_CONFIG = False
+
+NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
+
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/'
+REDUCE_TRAIN_TO = None
+MAX_FINETUNING_EPOCHS = 1000
+if TEST_CONFIG:
+ JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/'
+ REDUCE_TRAIN_TO = 1000
+ MAX_FINETUNING_EPOCHS = 2
+
+JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs'
+JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results'
+EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint"
+
+# There used to be
+# 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1]
+# and
+# 'num_hidden_layers':[1,2,3]
+# but this is now handled by a special mechanism in SgdOptimizer
+# to reuse intermediate results (for the same training of lower layers,
+# we can test many finetuning_lr)
+JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001],
+ 'pretraining_epochs_per_layer': [10,20],
+ 'hidden_layers_sizes': [300,800],
+ 'corruption_levels': [0.1,0.2],
+ 'minibatch_size': [20],
+ 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]}
+FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001]
+NUM_HIDDEN_LAYERS_VALS = [1,2,3]
+
+# Just useful for tests... minimal number of epochs
+DEFAULT_HP_NIST = DD({'finetuning_lr':0.01,
+ 'pretraining_lr':0.01,
+ 'pretraining_epochs_per_layer':1,
+ 'max_finetuning_epochs':1,
+ 'hidden_layers_sizes':[1000],
+ 'corruption_levels':[0.2],
+ 'minibatch_size':20})
+
+def jobman_entrypoint(state, channel):
+ state = copy.copy(state)
+
+ print "Will load NIST"
+ nist = NIST(20)
+ print "NIST loaded"
+
+ rtt = None
+ if state.has_key('reduce_train_to'):
+ rtt = state['reduce_train_to']
+ elif REDUCE_TRAIN_TO:
+ rtt = REDUCE_TRAIN_TO
+
+ if rtt:
+ print "Reducing training set to ", rtt, " examples"
+ nist.reduce_train_set(rtt)
+
+ train,valid,test = nist.get_tvt()
+ dataset = (train,valid,test)
+
+ n_ins = 32*32
+ n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+
+ db = jobman.sql.db(JOBDB_RESULTS)
+ optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\
+ input_divider=255.0, job_tree=True, results_db=db, \
+ experiment=EXPERIMENT_PATH, \
+ finetuning_lr_to_try=FINETUNING_LR_VALS, \
+ num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS)
+ optimizer.train()
+
+ return channel.COMPLETE
+
+def estimate_pretraining_time(job):
+ job = DD(job)
+ # time spent on pretraining estimated as O(n^2) where n=num hidens
+ # no need to multiply by num_hidden_layers, as results from num=1
+ # is reused for num=2, or 3, so in the end we get the same time
+ # as if we were training 3 times a single layer
+ # constants:
+ # - 20 mins to pretrain a layer with 1000 units (per 1 epoch)
+ # - 12 mins to finetune (per 1 epoch)
+ # basically the job_tree trick gives us a 5 times speedup on the
+ # pretraining time due to reusing for finetuning_lr
+ # and gives us a second x2 speedup for reusing previous layers
+ # to explore num_hidden_layers
+ return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \
+ * job.hidden_layer_sizes * job.hidden_layer_sizes)
+
+def estimate_total_time():
+ jobs = produit_croise_jobs(JOB_VALS)
+ sumtime = 0.0
+ sum_without = 0.0
+ for job in jobs:
+ sumtime += estimate_pretraining_time(job)
+ # 12 mins per epoch * 30 epochs
+ # 5 finetuning_lr per pretraining combination
+ sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS)
+ sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20
+ print "num jobs=", len(jobs)
+ print "estimate", sumtime/60, " hours"
+ print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without
+
+def jobman_insert_nist():
+ jobs = produit_croise_jobs(JOB_VALS)
+
+ db = jobman.sql.db(JOBDB_JOBS)
+ for job in jobs:
+ job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
+ jobman.sql.insert_dict(job, db)
+
+ print "inserted"
+
+class NIST:
+ def __init__(self, minibatch_size, basepath=None, reduce_train_to=None):
+ global NIST_ALL_LOCATION
+
+ self.minibatch_size = minibatch_size
+ self.basepath = basepath and basepath or NIST_ALL_LOCATION
+
+ self.set_filenames()
+
+ # arrays of 2 elements: .x, .y
+ self.train = [None, None]
+ self.test = [None, None]
+
+ self.load_train_test()
+
+ self.valid = [[], []]
+ self.split_train_valid()
+ if reduce_train_to:
+ self.reduce_train_set(reduce_train_to)
+
+ def get_tvt(self):
+ return self.train, self.valid, self.test
+
+ def set_filenames(self):
+ self.train_files = ['all_train_data.ft',
+ 'all_train_labels.ft']
+
+ self.test_files = ['all_test_data.ft',
+ 'all_test_labels.ft']
+
+ def load_train_test(self):
+ self.load_data_labels(self.train_files, self.train)
+ self.load_data_labels(self.test_files, self.test)
+
+ def load_data_labels(self, filenames, pair):
+ for i, fn in enumerate(filenames):
+ f = open(os.path.join(self.basepath, fn))
+ pair[i] = filetensor.read(f)
+ f.close()
+
+ def reduce_train_set(self, max):
+ self.train[0] = self.train[0][:max]
+ self.train[1] = self.train[1][:max]
+
+ if max < len(self.test[0]):
+ for ar in (self.test, self.valid):
+ ar[0] = ar[0][:max]
+ ar[1] = ar[1][:max]
+
+ def split_train_valid(self):
+ test_len = len(self.test[0])
+
+ new_train_x = self.train[0][:-test_len]
+ new_train_y = self.train[1][:-test_len]
+
+ self.valid[0] = self.train[0][-test_len:]
+ self.valid[1] = self.train[1][-test_len:]
+
+ self.train[0] = new_train_x
+ self.train[1] = new_train_y
+
+def test_load_nist():
+ print "Will load NIST"
+
+ import time
+ t1 = time.time()
+ nist = NIST(20)
+ t2 = time.time()
+
+ print "NIST loaded. time delta = ", t2-t1
+
+ tr,v,te = nist.get_tvt()
+
+ print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0])
+
+ raw_input("Press any key")
+
+# hp for hyperparameters
+def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'):
+ global DEFAULT_HP_NIST
+ hp = hp and hp or DEFAULT_HP_NIST
+
+ print "Will load NIST"
+
+ import time
+ t1 = time.time()
+ nist = NIST(20, reduce_train_to=100)
+ t2 = time.time()
+
+ print "NIST loaded. time delta = ", t2-t1
+
+ train,valid,test = nist.get_tvt()
+ dataset = (train,valid,test)
+
+ print train[0][15]
+ print type(train[0][1])
+
+
+ print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0])
+
+ n_ins = 32*32
+ n_outs = 62 # 10 digits, 26*2 (lower, capitals)
+
+ optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0)
+ optimizer.train()
+
+if __name__ == '__main__':
+
+ import sys
+
+ args = sys.argv[1:]
+
+ if len(args) > 0 and args[0] == 'load_nist':
+ test_load_nist()
+
+ elif len(args) > 0 and args[0] == 'jobman_insert':
+ jobman_insert_nist()
+ elif len(args) > 0 and args[0] == 'test_job_tree':
+ # dont forget to comment out sql.inserts and make reduce_train_to=100
+ print "TESTING JOB TREE"
+ chanmock = {'COMPLETE':0}
+ hp = copy.copy(DEFAULT_HP_NIST)
+ hp.update({'reduce_train_to':100})
+ jobman_entrypoint(hp, chanmock)
+ elif len(args) > 0 and args[0] == 'estimate':
+ estimate_total_time()
+ else:
+ sgd_optimization_nist()
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/sgd_optimization.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/sgd_optimization.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,270 @@
+#!/usr/bin/python
+# coding: utf-8
+
+# Generic SdA optimization loop, adapted from the deeplearning.net tutorial
+
+import numpy
+import theano
+import time
+import theano.tensor as T
+import copy
+import sys
+
+from jobman import DD
+import jobman, jobman.sql
+
+from stacked_dae import SdA
+
+def shared_dataset(data_xy):
+ data_x, data_y = data_xy
+ #shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+ #shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+ #shared_y = T.cast(shared_y, 'int32')
+ shared_x = theano.shared(data_x)
+ shared_y = theano.shared(data_y)
+ return shared_x, shared_y
+
+class SdaSgdOptimizer:
+ def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0,\
+ job_tree=False, results_db=None,\
+ experiment="",\
+ num_hidden_layers_to_try=[1,2,3], \
+ finetuning_lr_to_try=[0.1, 0.01, 0.001, 0.0001, 0.00001]):
+
+ self.dataset = dataset
+ self.hp = copy.copy(hyperparameters)
+ self.n_ins = n_ins
+ self.n_outs = n_outs
+ self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
+
+ self.job_tree = job_tree
+ self.results_db = results_db
+ self.experiment = experiment
+ if self.job_tree:
+ assert(not results_db is None)
+ # these hp should not be there, so we insert default values
+ # we use 3 hidden layers as we'll iterate through 1,2,3
+ self.hp.finetuning_lr = 0.1 # dummy value, will be replaced anyway
+ cl = self.hp.corruption_levels
+ nh = self.hp.hidden_layers_sizes
+ self.hp.corruption_levels = [cl,cl,cl]
+ self.hp.hidden_layers_sizes = [nh,nh,nh]
+
+ self.num_hidden_layers_to_try = num_hidden_layers_to_try
+ self.finetuning_lr_to_try = finetuning_lr_to_try
+
+ self.printout_frequency = 1000
+
+ self.rng = numpy.random.RandomState(1234)
+
+ self.init_datasets()
+ self.init_classifier()
+
+ def init_datasets(self):
+ print "init_datasets"
+ train_set, valid_set, test_set = self.dataset
+ self.test_set_x, self.test_set_y = shared_dataset(test_set)
+ self.valid_set_x, self.valid_set_y = shared_dataset(valid_set)
+ self.train_set_x, self.train_set_y = shared_dataset(train_set)
+
+ # compute number of minibatches for training, validation and testing
+ self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
+ self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
+ self.n_test_batches = self.test_set_x.value.shape[0] / self.hp.minibatch_size
+
+ def init_classifier(self):
+ print "Constructing classifier"
+ # construct the stacked denoising autoencoder class
+ self.classifier = SdA( \
+ train_set_x= self.train_set_x, \
+ train_set_y = self.train_set_y,\
+ batch_size = self.hp.minibatch_size, \
+ n_ins= self.n_ins, \
+ hidden_layers_sizes = self.hp.hidden_layers_sizes, \
+ n_outs = self.n_outs, \
+ corruption_levels = self.hp.corruption_levels,\
+ rng = self.rng,\
+ pretrain_lr = self.hp.pretraining_lr, \
+ finetune_lr = self.hp.finetuning_lr,\
+ input_divider = self.input_divider )
+
+ def train(self):
+ self.pretrain()
+ if not self.job_tree:
+ # if job_tree is True, finetuning was already performed
+ self.finetune()
+
+ def pretrain(self):
+ print "STARTING PRETRAINING"
+
+ printout_acc = 0.0
+ last_error = 0.0
+
+ start_time = time.clock()
+ ## Pre-train layer-wise
+ for i in xrange(self.classifier.n_layers):
+ # go through pretraining epochs
+ for epoch in xrange(self.hp.pretraining_epochs_per_layer):
+ # go through the training set
+ for batch_index in xrange(self.n_train_batches):
+ c = self.classifier.pretrain_functions[i](batch_index)
+
+ printout_acc += c / self.printout_frequency
+ if (batch_index+1) % self.printout_frequency == 0:
+ print batch_index, "reconstruction cost avg=", printout_acc
+ last_error = printout_acc
+ printout_acc = 0.0
+
+ print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
+
+ self.job_splitter(i+1, time.clock()-start_time, last_error)
+
+ end_time = time.clock()
+
+ print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
+
+ # Save time by reusing intermediate results
+ def job_splitter(self, current_pretraining_layer, pretraining_time, last_error):
+
+ state_copy = None
+ original_classifier = None
+
+ if self.job_tree and current_pretraining_layer in self.num_hidden_layers_to_try:
+ for lr in self.finetuning_lr_to_try:
+ sys.stdout.flush()
+ sys.stderr.flush()
+
+ state_copy = copy.copy(self.hp)
+
+ self.hp.update({'num_hidden_layers':current_pretraining_layer, \
+ 'finetuning_lr':lr,\
+ 'pretraining_time':pretraining_time,\
+ 'last_reconstruction_error':last_error})
+
+ original_classifier = self.classifier
+ print "ORIGINAL CLASSIFIER MEANS",original_classifier.get_params_means()
+ self.classifier = SdA.copy_reusing_lower_layers(original_classifier, current_pretraining_layer, new_finetuning_lr=lr)
+
+ self.finetune()
+
+ self.insert_finished_job()
+
+ print "NEW CLASSIFIER MEANS AFTERWARDS",self.classifier.get_params_means()
+ print "ORIGINAL CLASSIFIER MEANS AFTERWARDS",original_classifier.get_params_means()
+ self.classifier = original_classifier
+ self.hp = state_copy
+
+ def insert_finished_job(self):
+ job = copy.copy(self.hp)
+ job[jobman.sql.STATUS] = jobman.sql.DONE
+ job[jobman.sql.EXPERIMENT] = self.experiment
+
+ # don,t try to store arrays in db
+ job['hidden_layers_sizes'] = job.hidden_layers_sizes[0]
+ job['corruption_levels'] = job.corruption_levels[0]
+
+ print "Will insert finished job", job
+ jobman.sql.insert_dict(jobman.flatten(job), self.results_db)
+
+ def finetune(self):
+ print "STARTING FINETUNING"
+
+ index = T.lscalar() # index to a [mini]batch
+ minibatch_size = self.hp.minibatch_size
+
+ # create a function to compute the mistakes that are made by the model
+ # on the validation set, or testing set
+ test_model = theano.function([index], self.classifier.errors,
+ givens = {
+ self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / self.input_divider,
+ self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]})
+
+ validate_model = theano.function([index], self.classifier.errors,
+ givens = {
+ self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / self.input_divider,
+ self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]})
+
+
+ # early-stopping parameters
+ patience = 10000 # look as this many examples regardless
+ patience_increase = 2. # wait this much longer when a new best is
+ # found
+ improvement_threshold = 0.995 # a relative improvement of this much is
+ # considered significant
+ validation_frequency = min(self.n_train_batches, patience/2)
+ # go through this many
+ # minibatche before checking the network
+ # on the validation set; in this case we
+ # check every epoch
+
+ best_params = None
+ best_validation_loss = float('inf')
+ test_score = 0.
+ start_time = time.clock()
+
+ done_looping = False
+ epoch = 0
+
+ printout_acc = 0.0
+
+ if not self.hp.has_key('max_finetuning_epochs'):
+ self.hp.max_finetuning_epochs = 1000
+
+ while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
+ epoch = epoch + 1
+ for minibatch_index in xrange(self.n_train_batches):
+
+ cost_ij = self.classifier.finetune(minibatch_index)
+ iter = epoch * self.n_train_batches + minibatch_index
+
+ printout_acc += cost_ij / float(self.printout_frequency * minibatch_size)
+ if (iter+1) % self.printout_frequency == 0:
+ print iter, "cost avg=", printout_acc
+ printout_acc = 0.0
+
+ if (iter+1) % validation_frequency == 0:
+
+ validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)]
+ this_validation_loss = numpy.mean(validation_losses)
+ print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+ (epoch, minibatch_index+1, self.n_train_batches, \
+ this_validation_loss*100.))
+
+
+ # if we got the best validation score until now
+ if this_validation_loss < best_validation_loss:
+
+ #improve patience if loss improvement is good enough
+ if this_validation_loss < best_validation_loss * \
+ improvement_threshold :
+ patience = max(patience, iter * patience_increase)
+
+ # save best validation score and iteration number
+ best_validation_loss = this_validation_loss
+ best_iter = iter
+
+ # test it on the test set
+ test_losses = [test_model(i) for i in xrange(self.n_test_batches)]
+ test_score = numpy.mean(test_losses)
+ print((' epoch %i, minibatch %i/%i, test error of best '
+ 'model %f %%') %
+ (epoch, minibatch_index+1, self.n_train_batches,
+ test_score*100.))
+
+
+ if patience <= iter :
+ done_looping = True
+ break
+
+ end_time = time.clock()
+ self.hp.update({'finetuning_time':end_time-start_time,\
+ 'best_validation_error':best_validation_loss,\
+ 'test_score':test_score,
+ 'num_finetuning_epochs':epoch})
+ print(('Optimization complete with best validation score of %f %%,'
+ 'with test performance %f %%') %
+ (best_validation_loss * 100., test_score*100.))
+ print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
+
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/stacked_dae.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/stacked_dae.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,287 @@
+#!/usr/bin/python
+# coding: utf-8
+
+import numpy
+import theano
+import time
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+import copy
+
+from utils import update_locals
+
+class LogisticRegression(object):
+ def __init__(self, input, n_in, n_out):
+ # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
+ self.W = theano.shared( value=numpy.zeros((n_in,n_out),
+ dtype = theano.config.floatX) )
+ # initialize the baises b as a vector of n_out 0s
+ self.b = theano.shared( value=numpy.zeros((n_out,),
+ dtype = theano.config.floatX) )
+ # compute vector of class-membership probabilities in symbolic form
+ self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
+
+ # compute prediction as class whose probability is maximal in
+ # symbolic form
+ self.y_pred=T.argmax(self.p_y_given_x, axis=1)
+
+ # list of parameters for this layer
+ self.params = [self.W, self.b]
+
+ def negative_log_likelihood(self, y):
+ return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
+
+ def errors(self, y):
+ # check if y has same dimension of y_pred
+ if y.ndim != self.y_pred.ndim:
+ raise TypeError('y should have the same shape as self.y_pred',
+ ('y', target.type, 'y_pred', self.y_pred.type))
+
+ # check if y is of the correct datatype
+ if y.dtype.startswith('int'):
+ # the T.neq operator returns a vector of 0s and 1s, where 1
+ # represents a mistake in prediction
+ return T.mean(T.neq(self.y_pred, y))
+ else:
+ raise NotImplementedError()
+
+
+class SigmoidalLayer(object):
+ def __init__(self, rng, input, n_in, n_out):
+ self.input = input
+
+ W_values = numpy.asarray( rng.uniform( \
+ low = -numpy.sqrt(6./(n_in+n_out)), \
+ high = numpy.sqrt(6./(n_in+n_out)), \
+ size = (n_in, n_out)), dtype = theano.config.floatX)
+ self.W = theano.shared(value = W_values)
+
+ b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
+ self.b = theano.shared(value= b_values)
+
+ self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
+ self.params = [self.W, self.b]
+
+
+
+class dA(object):
+ def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\
+ input = None, shared_W = None, shared_b = None):
+ self.n_visible = n_visible
+ self.n_hidden = n_hidden
+
+ # create a Theano random generator that gives symbolic random values
+ theano_rng = RandomStreams()
+
+ if shared_W != None and shared_b != None :
+ self.W = shared_W
+ self.b = shared_b
+ else:
+ # initial values for weights and biases
+ # note : W' was written as `W_prime` and b' as `b_prime`
+
+ # W is initialized with `initial_W` which is uniformely sampled
+ # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible)
+ # the output of uniform if converted using asarray to dtype
+ # theano.config.floatX so that the code is runable on GPU
+ initial_W = numpy.asarray( numpy.random.uniform( \
+ low = -numpy.sqrt(6./(n_hidden+n_visible)), \
+ high = numpy.sqrt(6./(n_hidden+n_visible)), \
+ size = (n_visible, n_hidden)), dtype = theano.config.floatX)
+ initial_b = numpy.zeros(n_hidden, dtype = theano.config.floatX)
+
+
+ # theano shared variables for weights and biases
+ self.W = theano.shared(value = initial_W, name = "W")
+ self.b = theano.shared(value = initial_b, name = "b")
+
+
+ initial_b_prime= numpy.zeros(n_visible)
+ # tied weights, therefore W_prime is W transpose
+ self.W_prime = self.W.T
+ self.b_prime = theano.shared(value = initial_b_prime, name = "b'")
+
+ # if no input is given, generate a variable representing the input
+ if input == None :
+ # we use a matrix because we expect a minibatch of several examples,
+ # each example being a row
+ self.x = T.dmatrix(name = 'input')
+ else:
+ self.x = input
+ # Equation (1)
+ # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs
+ # note : first argument of theano.rng.binomial is the shape(size) of
+ # random numbers that it should produce
+ # second argument is the number of trials
+ # third argument is the probability of success of any trial
+ #
+ # this will produce an array of 0s and 1s where 1 has a
+ # probability of 1 - ``corruption_level`` and 0 with
+ # ``corruption_level``
+ self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x
+ # Equation (2)
+ # note : y is stored as an attribute of the class so that it can be
+ # used later when stacking dAs.
+ self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b)
+ # Equation (3)
+ self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
+ # Equation (4)
+ # note : we sum over the size of a datapoint; if we are using minibatches,
+ # L will be a vector, with one entry per example in minibatch
+ self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
+ # note : L is now a vector, where each element is the cross-entropy cost
+ # of the reconstruction of the corresponding example of the
+ # minibatch. We need to compute the average of all these to get
+ # the cost of the minibatch
+ self.cost = T.mean(self.L)
+
+ self.params = [ self.W, self.b, self.b_prime ]
+
+
+
+
+class SdA(object):
+ def __init__(self, train_set_x, train_set_y, batch_size, n_ins,
+ hidden_layers_sizes, n_outs,
+ corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0):
+ update_locals(self, locals())
+
+ self.layers = []
+ self.pretrain_functions = []
+ self.params = []
+ self.n_layers = len(hidden_layers_sizes)
+
+ self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
+
+ if len(hidden_layers_sizes) < 1 :
+ raiseException (' You must have at least one hidden layer ')
+
+
+ # allocate symbolic variables for the data
+ index = T.lscalar() # index to a [mini]batch
+ self.x = T.matrix('x') # the data is presented as rasterized images
+ self.y = T.ivector('y') # the labels are presented as 1D vector of
+ # [int] labels
+
+ for i in xrange( self.n_layers ):
+ # construct the sigmoidal layer
+
+ # the size of the input is either the number of hidden units of
+ # the layer below or the input size if we are on the first layer
+ if i == 0 :
+ input_size = n_ins
+ else:
+ input_size = hidden_layers_sizes[i-1]
+
+ # the input to this layer is either the activation of the hidden
+ # layer below or the input of the SdA if you are on the first
+ # layer
+ if i == 0 :
+ layer_input = self.x
+ else:
+ layer_input = self.layers[-1].output
+
+ layer = SigmoidalLayer(rng, layer_input, input_size,
+ hidden_layers_sizes[i] )
+ # add the layer to the
+ self.layers += [layer]
+ self.params += layer.params
+
+ # Construct a denoising autoencoder that shared weights with this
+ # layer
+ dA_layer = dA(input_size, hidden_layers_sizes[i], \
+ corruption_level = corruption_levels[0],\
+ input = layer_input, \
+ shared_W = layer.W, shared_b = layer.b)
+
+ # Construct a function that trains this dA
+ # compute gradients of layer parameters
+ gparams = T.grad(dA_layer.cost, dA_layer.params)
+ # compute the list of updates
+ updates = {}
+ for param, gparam in zip(dA_layer.params, gparams):
+ updates[param] = param - gparam * pretrain_lr
+
+ # create a function that trains the dA
+ update_fn = theano.function([index], dA_layer.cost, \
+ updates = updates,
+ givens = {
+ self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.input_divider})
+ # collect this function into a list
+ self.pretrain_functions += [update_fn]
+
+
+ # We now need to add a logistic layer on top of the MLP
+ self.logLayer = LogisticRegression(\
+ input = self.layers[-1].output,\
+ n_in = hidden_layers_sizes[-1], n_out = n_outs)
+
+ self.params += self.logLayer.params
+ # construct a function that implements one step of finetunining
+
+ # compute the cost, defined as the negative log likelihood
+ cost = self.logLayer.negative_log_likelihood(self.y)
+ # compute the gradients with respect to the model parameters
+ gparams = T.grad(cost, self.params)
+ # compute list of updates
+ updates = {}
+ for param,gparam in zip(self.params, gparams):
+ updates[param] = param - gparam*finetune_lr
+
+ self.finetune = theano.function([index], cost,
+ updates = updates,
+ givens = {
+ self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.input_divider,
+ self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
+
+ # symbolic variable that points to the number of errors made on the
+ # minibatch given by self.x and self.y
+
+ self.errors = self.logLayer.errors(self.y)
+
+ @classmethod
+ def copy_reusing_lower_layers(cls, obj, num_hidden_layers, new_finetuning_lr=None):
+ assert(num_hidden_layers <= obj.n_layers)
+
+ if not new_finetuning_lr:
+ new_finetuning_lr = obj.finetune_lr
+
+ new_sda = cls(train_set_x= obj.train_set_x, \
+ train_set_y = obj.train_set_y,\
+ batch_size = obj.batch_size, \
+ n_ins= obj.n_ins, \
+ hidden_layers_sizes = obj.hidden_layers_sizes[:num_hidden_layers], \
+ n_outs = obj.n_outs, \
+ corruption_levels = obj.corruption_levels[:num_hidden_layers],\
+ rng = obj.rng,\
+ pretrain_lr = obj.pretrain_lr, \
+ finetune_lr = new_finetuning_lr, \
+ input_divider = obj.input_divider )
+
+ # new_sda.layers contains only the hidden layers actually
+ for i, layer in enumerate(new_sda.layers):
+ original_layer = obj.layers[i]
+ for p1,p2 in zip(layer.params, original_layer.params):
+ p1.value = p2.value.copy()
+
+ return new_sda
+
+ def get_params_copy(self):
+ return copy.deepcopy(self.params)
+
+ def set_params_from_copy(self, copy):
+ # We don't want to replace the var, as the functions have pointers in there
+ # We only want to replace values.
+ for i, p in enumerate(self.params):
+ p.value = copy[i].value
+
+ def get_params_means(self):
+ s = []
+ for p in self.params:
+ s.append(numpy.mean(p.value))
+ return s
+
+if __name__ == '__main__':
+ import sys
+ args = sys.argv[1:]
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 deep/stacked_dae/utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/utils.py Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,57 @@
+#!/usr/bin/python
+
+from jobman import DD
+
+# from pylearn codebase
+def update_locals(obj, dct):
+ if 'self' in dct:
+ del dct['self']
+ obj.__dict__.update(dct)
+
+def produit_croise_jobs(val_dict):
+ job_list = [DD()]
+ all_keys = val_dict.keys()
+
+ for key in all_keys:
+ possible_values = val_dict[key]
+ new_job_list = []
+ for val in possible_values:
+ for job in job_list:
+ to_insert = job.copy()
+ to_insert.update({key: val})
+ new_job_list.append(to_insert)
+ job_list = new_job_list
+
+ return job_list
+
+def test_produit_croise_jobs():
+ vals = {'a': [1,2], 'b': [3,4,5]}
+ print produit_croise_jobs(vals)
+
+
+# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python
+"""Simple module for getting amount of memory used by a specified user's
+processes on a UNIX system.
+It uses UNIX ps utility to get the memory usage for a specified username and
+pipe it to awk for summing up per application memory usage and return the total.
+Python's Popen() from subprocess module is used for spawning ps and awk.
+
+"""
+
+import subprocess
+
+class MemoryMonitor(object):
+
+ def __init__(self, username):
+ """Create new MemoryMonitor instance."""
+ self.username = username
+
+ def usage(self):
+ """Return int containing memory used by user's processes."""
+ self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username,
+ shell=True,
+ stdout=subprocess.PIPE,
+ )
+ self.stdout_list = self.process.communicate()[0].split('\n')
+ return int(self.stdout_list[0])
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/.DS_Store
Binary file pycaptcha/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/BUGS
--- a/pycaptcha/BUGS Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-Known bugs:
-
-- PersistentFactory() is almost certainly horrible at concurrent access
-- Tests are never invalidated with PersistentStorage(), as they aren't written back to the database
-- All files in Captcha/data are installed, including silly things like .svn directories and *~
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/COPYING
--- a/pycaptcha/COPYING Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-Copyright (c) 2004 Micah Dowty
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/.DS_Store
Binary file pycaptcha/Captcha/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Base.py
--- a/pycaptcha/Captcha/Base.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-""" Captcha.Base
-
-Base class for all types of CAPTCHA tests. All tests have one or
-more solution, determined when the test is generated. Solutions
-can be any python object,
-
-All tests can be solved by presenting at least some preset number
-of correct solutions. Some tests may only have one solution and require
-one solution, but other tests may require N correct solutions of M
-possible solutions.
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-import random, string, time, shelve
-
-__all__ = ["BaseCaptcha", "Factory", "PersistentFactory"]
-
-
-def randomIdentifier(alphabet = string.ascii_letters + string.digits,
- length = 24):
- return "".join([random.choice(alphabet) for i in xrange(length)])
-
-
-class BaseCaptcha(object):
- """Base class for all CAPTCHA tests"""
- # Subclasses can override these to set the solution criteria
- minCorrectSolutions = 1
- maxIncorrectSolutions = 0
-
- def __init__(self):
- self.solutions = []
- self.valid = True
-
- # Each test has a unique identifier, used to refer to that test
- # later, and a creation time so it can expire later.
- self.id = randomIdentifier()
- self.creationTime = time.time()
-
- def addSolution(self, solution):
- self.solutions.append(solution)
-
- def testSolutions(self, solutions):
- """Test whether the given solutions are sufficient for this CAPTCHA.
- A given CAPTCHA can only be tested once, after that it is invalid
- and always returns False. This makes random guessing much less effective.
- """
- if not self.valid:
- return False
- self.valid = False
-
- numCorrect = 0
- numIncorrect = 0
-
- for solution in solutions:
- if solution in self.solutions:
- numCorrect += 1
- else:
- numIncorrect += 1
-
- return numCorrect >= self.minCorrectSolutions and \
- numIncorrect <= self.maxIncorrectSolutions
-
-
-class Factory(object):
- """Creates BaseCaptcha instances on demand, and tests solutions.
- CAPTCHAs expire after a given amount of time, given in seconds.
- The default is 15 minutes.
- """
- def __init__(self, lifetime=60*15):
- self.lifetime = lifetime
- self.storedInstances = {}
-
- def new(self, cls, *args, **kwargs):
- """Create a new instance of our assigned BaseCaptcha subclass, passing
- it any extra arguments we're given. This stores the result for
- later testing.
- """
- self.clean()
- inst = cls(*args, **kwargs)
- self.storedInstances[inst.id] = inst
- return inst
-
- def get(self, id):
- """Retrieve the CAPTCHA with the given ID. If it's expired already,
- this will return None. A typical web application will need to
- new() a CAPTCHA when generating an html page, then get() it later
- when its images or sounds must be rendered.
- """
- return self.storedInstances.get(id)
-
- def clean(self):
- """Removed expired tests"""
- expiredIds = []
- now = time.time()
- for inst in self.storedInstances.itervalues():
- if inst.creationTime + self.lifetime < now:
- expiredIds.append(inst.id)
- for id in expiredIds:
- del self.storedInstances[id]
-
- def test(self, id, solutions):
- """Test the given list of solutions against the BaseCaptcha instance
- created earlier with the given id. Returns True if the test passed,
- False on failure. In either case, the test is invalidated. Returns
- False in the case of an invalid id.
- """
- self.clean()
- inst = self.storedInstances.get(id)
- if not inst:
- return False
- result = inst.testSolutions(solutions)
- return result
-
-
-class PersistentFactory(Factory):
- """A simple persistent factory, for use in CGI or multi-process environments
- where the state must remain across python interpreter sessions.
- This implementation uses the 'shelve' module.
- """
- def __init__(self, filename, lifetime=60*15):
- Factory.__init__(self, lifetime)
- self.storedInstances = shelve.open(filename)
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/File.py
--- a/pycaptcha/Captcha/File.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-""" Captcha.File
-
-Utilities for finding and picking random files from our 'data' directory
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-import os, random
-
-# Determine the data directory. This can be overridden after import-time if needed.
-dataDir = os.path.join(os.path.split(os.path.abspath(__file__))[0], "data")
-
-
-class RandomFileFactory(object):
- """Given a list of files and/or directories, this picks a random file.
- Directories are searched for files matching any of a list of extensions.
- Files are relative to our data directory plus a subclass-specified base path.
- """
- extensions = []
- basePath = "."
-
- def __init__(self, *fileList):
- self.fileList = fileList
- self._fullPaths = None
-
- def _checkExtension(self, name):
- """Check the file against our given list of extensions"""
- for ext in self.extensions:
- if name.endswith(ext):
- return True
- return False
-
- def _findFullPaths(self):
- """From our given file list, find a list of full paths to files"""
- paths = []
- for name in self.fileList:
- path = os.path.join(dataDir, self.basePath, name)
- if os.path.isdir(path):
- for content in os.listdir(path):
- if self._checkExtension(content):
- paths.append(os.path.join(path, content))
- else:
- paths.append(path)
- return paths
-
- def pick(self):
- if self._fullPaths is None:
- self._fullPaths = self._findFullPaths()
- return random.choice(self._fullPaths)
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Backgrounds.py
--- a/pycaptcha/Captcha/Visual/Backgrounds.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-""" Captcha.Visual.Backgrounds
-
-Background layers for visual CAPTCHAs
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-from Captcha.Visual import Layer, Pictures
-import random, os
-import ImageDraw, Image
-
-
-class SolidColor(Layer):
- """A solid color background. Very weak on its own, but good
- to combine with other backgrounds.
- """
- def __init__(self, color="white"):
- self.color = color
-
- def render(self, image):
- image.paste(self.color)
-
-
-class Grid(Layer):
- """A grid of lines, with a given foreground color.
- The size is given in pixels. The background is transparent,
- so another layer (like SolidColor) should be put behind it.
- """
- def __init__(self, size=16, foreground="black"):
- self.size = size
- self.foreground = foreground
- self.offset = (random.uniform(0, self.size),
- random.uniform(0, self.size))
-
- def render(self, image):
- draw = ImageDraw.Draw(image)
-
- for i in xrange(image.size[0] / self.size + 1):
- draw.line( (i*self.size+self.offset[0], 0,
- i*self.size+self.offset[0], image.size[1]), fill=self.foreground)
-
- for i in xrange(image.size[0] / self.size + 1):
- draw.line( (0, i*self.size+self.offset[1],
- image.size[0], i*self.size+self.offset[1]), fill=self.foreground)
-
-
-class TiledImage(Layer):
- """Pick a random image and a random offset, and tile the rendered image with it"""
- def __init__(self, imageFactory=Pictures.abstract):
- self.tileName = imageFactory.pick()
- self.offset = (random.uniform(0, 1),
- random.uniform(0, 1))
-
- def render(self, image):
- tile = Image.open(self.tileName)
- for j in xrange(-1, int(image.size[1] / tile.size[1]) + 1):
- for i in xrange(-1, int(image.size[0] / tile.size[0]) + 1):
- dest = (int((self.offset[0] + i) * tile.size[0]),
- int((self.offset[1] + j) * tile.size[1]))
- image.paste(tile, dest)
-
-
-class CroppedImage(Layer):
- """Pick a random image, cropped randomly. Source images should be larger than the CAPTCHA."""
- def __init__(self, imageFactory=Pictures.nature):
- self.imageName = imageFactory.pick()
- self.align = (random.uniform(0,1),
- random.uniform(0,1))
-
- def render(self, image):
- i = Image.open(self.imageName)
- image.paste(i, (int(self.align[0] * (image.size[0] - i.size[0])),
- int(self.align[1] * (image.size[1] - i.size[1]))))
-
-
-class RandomDots(Layer):
- """Draw random colored dots"""
- def __init__(self, colors=("white", "black"), dotSize=4, numDots=400):
- self.colors = colors
- self.dotSize = dotSize
- self.numDots = numDots
- self.seed = random.random()
-
- def render(self, image):
- r = random.Random(self.seed)
- for i in xrange(self.numDots):
- bx = int(r.uniform(0, image.size[0]-self.dotSize))
- by = int(r.uniform(0, image.size[1]-self.dotSize))
- image.paste(r.choice(self.colors), (bx, by,
- bx+self.dotSize-1,
- by+self.dotSize-1))
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Base.py
--- a/pycaptcha/Captcha/Visual/Base.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-""" Captcha.Visual.BAse
-
-Base classes for visual CAPTCHAs. We use the Python Imaging Library
-to manipulate these images.
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-import Captcha
-import Image
-
-__all__ = ['ImageCaptcha', 'Layer']
-
-
-class ImageCaptcha(Captcha.BaseCaptcha):
- """Base class for image-based CAPTCHA tests.
- The render() function generates the CAPTCHA image at the given size by
- combining Layer instances from self.layers, which should be created by
- the subclass-defined getLayers().
- """
- defaultSize = (32,32)
- # anciennement a defaultSize(256,96)
- def __init__(self, *args, **kwargs):
- Captcha.BaseCaptcha.__init__(self)
- self._layers = self.getLayers(*args, **kwargs)
-
- def getImage(self):
- """Get a PIL image representing this CAPTCHA test, creating it if necessary"""
- if not self._image:
- self._image = self.render()
- return self._image
-
- def getLayers(self):
- """Subclasses must override this to return a list of Layer instances to render.
- Lists within the list of layers are recursively rendered.
- """
- return []
-
- def render(self, size=None):
- """Render this CAPTCHA, returning a PIL image"""
- if size is None:
- size = self.defaultSize
- img = Image.new("L", size)
- # img = Image.new("RGB", size)
- return self._renderList(self._layers, Image.new("L", size))
-
- def _renderList(self, l, img):
- for i in l:
- if type(i) == tuple or type(i) == list:
- img = self._renderList(i, img)
- else:
- img = i.render(img) or img
- return img
-
-
-class Layer(object):
- """A renderable object representing part of a CAPTCHA.
- The render() function should return approximately the same result, regardless
- of the image size. This means any randomization must occur in the constructor.
-
- If the render() function returns something non-None, it is taken as an image to
- replace the current image with. This can be used to implement transformations
- that result in a separate image without having to copy the results back to the first.
- """
- def render(self, img):
- pass
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Distortions.py
--- a/pycaptcha/Captcha/Visual/Distortions.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-""" Captcha.Visual.Distortions
-
-Distortion layers for visual CAPTCHAs
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-from Captcha.Visual import Layer
-import ImageDraw, Image
-import random, math
-
-
-class WigglyBlocks(Layer):
- """Randomly select and shift blocks of the image"""
- def __init__(self, blockSize=3, sigma=0.01, iterations=300):
- self.blockSize = blockSize
- self.sigma = sigma
- self.iterations = iterations
- self.seed = random.random()
-
- def render(self, image):
- r = random.Random(self.seed)
- for i in xrange(self.iterations):
- # Select a block
- bx = int(r.uniform(0, image.size[0]-self.blockSize))
- by = int(r.uniform(0, image.size[1]-self.blockSize))
- block = image.crop((bx, by, bx+self.blockSize-1, by+self.blockSize-1))
-
- # Figure out how much to move it.
- # The call to floor() is important so we always round toward
- # 0 rather than to -inf. Just int() would bias the block motion.
- mx = int(math.floor(r.normalvariate(0, self.sigma)))
- my = int(math.floor(r.normalvariate(0, self.sigma)))
-
- # Now actually move the block
- image.paste(block, (bx+mx, by+my))
-
-
-class WarpBase(Layer):
- """Abstract base class for image warping. Subclasses define a
- function that maps points in the output image to points in the input image.
- This warping engine runs a grid of points through this transform and uses
- PIL's mesh transform to warp the image.
- """
- filtering = Image.BILINEAR
- resolution = 10
-
- def getTransform(self, image):
- """Return a transformation function, subclasses should override this"""
- return lambda x, y: (x, y)
-
- def render(self, image):
- r = self.resolution
- xPoints = image.size[0] / r + 2
- yPoints = image.size[1] / r + 2
- f = self.getTransform(image)
-
- # Create a list of arrays with transformed points
- xRows = []
- yRows = []
- for j in xrange(yPoints):
- xRow = []
- yRow = []
- for i in xrange(xPoints):
- x, y = f(i*r, j*r)
-
- # Clamp the edges so we don't get black undefined areas
- x = max(0, min(image.size[0]-1, x))
- y = max(0, min(image.size[1]-1, y))
-
- xRow.append(x)
- yRow.append(y)
- xRows.append(xRow)
- yRows.append(yRow)
-
- # Create the mesh list, with a transformation for
- # each square between points on the grid
- mesh = []
- for j in xrange(yPoints-1):
- for i in xrange(xPoints-1):
- mesh.append((
- # Destination rectangle
- (i*r, j*r,
- (i+1)*r, (j+1)*r),
- # Source quadrilateral
- (xRows[j ][i ], yRows[j ][i ],
- xRows[j+1][i ], yRows[j+1][i ],
- xRows[j+1][i+1], yRows[j+1][i+1],
- xRows[j ][i+1], yRows[j ][i+1]),
- ))
-
- return image.transform(image.size, Image.MESH, mesh, self.filtering)
-
-
-class SineWarp(WarpBase):
- """Warp the image using a random composition of sine waves"""
-
- def __init__(self,
- amplitudeRange = (3, 6.5),
- periodRange = (0.04, 0.1),
- ):
- self.amplitude = random.uniform(*amplitudeRange)
- self.period = random.uniform(*periodRange)
- self.offset = (random.uniform(0, math.pi * 2 / self.period),
- random.uniform(0, math.pi * 2 / self.period))
-
- def getTransform(self, image):
- return (lambda x, y,
- a = self.amplitude,
- p = self.period,
- o = self.offset:
- (math.sin( (y+o[0])*p )*a + x,
- math.sin( (x+o[1])*p )*a + y))
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Pictures.py
--- a/pycaptcha/Captcha/Visual/Pictures.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-""" Captcha.Visual.Pictures
-
-Random collections of images
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-from Captcha import File
-import Image
-
-
-class ImageFactory(File.RandomFileFactory):
- """A factory that generates random images from a list"""
- extensions = [".png", ".jpeg"]
- basePath = "pictures"
-
-
-abstract = ImageFactory("abstract")
-nature = ImageFactory("nature")
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Tests.py
--- a/pycaptcha/Captcha/Visual/Tests.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-""" Captcha.Visual.Tests
-
-Visual CAPTCHA tests
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-from Captcha.Visual import Text, Backgrounds, Distortions, ImageCaptcha
-from Captcha import Words
-import random
-
-__all__ = ["PseudoGimpy", "AngryGimpy", "AntiSpam"]
-
-
-class PseudoGimpy(ImageCaptcha):
- """A relatively easy CAPTCHA that's somewhat easy on the eyes"""
- def getLayers(self):
- word = Words.defaultWordList.pick()
- self.addSolution(word)
- return [
- # random.choice([
- # Backgrounds.CroppedImage(),
- # Backgrounds.TiledImage(),
- # ]),
- Text.TextLayer(word, borderSize=1),
- Distortions.SineWarp(),
- ]
-
-
-class AngryGimpy(ImageCaptcha):
- """A harder but less visually pleasing CAPTCHA"""
- def getLayers(self):
- word = Words.defaultWordList.pick()
- self.addSolution(word)
- return [
- # suppression du background
- # Backgrounds.TiledImage(),
- # Backgrounds.RandomDots(),
- Text.TextLayer(word, borderSize=1),
- # Distortions.SineWarp(periodRange = (0.04, 0.07))
- Distortions.WigglyBlocks(),
- ]
-
-
-class AntiSpam(ImageCaptcha):
- """A fixed-solution CAPTCHA that can be used to hide email addresses or URLs from bots"""
- fontFactory = Text.FontFactory(20, "vera/VeraBd.ttf")
- defaultSize = (512,50)
-
- def getLayers(self, solution="murray@example.com"):
- self.addSolution(solution)
-
- textLayer = Text.TextLayer(solution,
- borderSize = 2,
- fontFactory = self.fontFactory)
-
- return [
- Backgrounds.CroppedImage(),
- textLayer,
- Distortions.SineWarp(amplitudeRange = (3, 5)),
- ]
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/Text.py
--- a/pycaptcha/Captcha/Visual/Text.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-""" Captcha.Visual.Text
-
-Text generation for visual CAPTCHAs.
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-import random, os
-from Captcha import Visual, File
-import ImageFont, ImageDraw
-
-
-class FontFactory(File.RandomFileFactory):
- """Picks random fonts and/or sizes from a given list.
- 'sizes' can be a single size or a (min,max) tuple.
- If any of the given files are directories, all *.ttf found
- in that directory will be added.
- """
- extensions = [".ttf", ".TTF"]
- basePath = "fonts"
-
-# arguments variables a modifier pour mettre le chemin vers les fontes.
- def __init__(self, sizes, *fileNames):
- File.RandomFileFactory.__init__(self, *fileNames)
-
- if type(sizes) is tuple:
- self.minSize = sizes[0]
- self.maxSize = sizes[1]
- else:
- self.minSize = sizes
- self.maxSize = sizes
-
- def pick(self):
- """Returns a (fileName, size) tuple that can be passed to ImageFont.truetype()"""
- fileName = File.RandomFileFactory.pick(self)
- size = int(random.uniform(self.minSize, self.maxSize) + 0.5)
- return (fileName, size)
-
-# Predefined font factories
-defaultFontFactory = FontFactory(25, "allfonts")
-#defaultFontFactory = FontFactory((30, 40), "vera")
-
-class TextLayer(Visual.Layer):
- """Represents a piece of text rendered within the image.
- Alignment is given such that (0,0) places the text in the
- top-left corner and (1,1) places it in the bottom-left.
-
- The font and alignment are optional, if not specified one is
- chosen randomly. If no font factory is specified, the default is used.
- """
- def __init__(self, text,
- alignment = None,
- font = None,
- fontFactory = None,
- textColor = "white",
- borderSize = 0,
- borderColor = None,
- ):
- if fontFactory is None:
- global defaultFontFactory
- fontFactory = defaultFontFactory
-
- if font is None:
- font = fontFactory.pick()
-
- if alignment is None:
- alignment = (random.uniform(0,1),
- random.uniform(0,1))
-
- self.text = text
- self.alignment = alignment
- self.font = font
- self.textColor = textColor
- self.borderSize = borderSize
- self.borderColor = borderColor
-
- def render(self, img):
- font = ImageFont.truetype(*self.font)
- textSize = font.getsize(self.text)
- draw = ImageDraw.Draw(img)
-
- # Find the text's origin given our alignment and current image size
- x = int((img.size[0] - textSize[0] - self.borderSize*2) * self.alignment[0] + 0.5)
- y = int((img.size[1] - textSize[1] - self.borderSize*2) * self.alignment[1] + 0.5)
-
- # Draw the border if we need one. This is slow and ugly, but there doesn't
- # seem to be a better way with PIL.
- if self.borderSize > 0:
- for bx in (-1,0,1):
- for by in (-1,0,1):
- if bx and by:
- draw.text((x + bx * self.borderSize,
- y + by * self.borderSize),
- self.text, font=font, fill=self.borderColor)
-
- # And the text itself...
- draw.text((x,y), self.text, font=font, fill=self.textColor)
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Visual/__init__.py
--- a/pycaptcha/Captcha/Visual/__init__.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-""" Captcha.Visual
-
-This package contains functionality specific to visual CAPTCHA tests.
-
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-# Convenience imports
-from Base import *
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/Words.py
--- a/pycaptcha/Captcha/Words.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-""" Captcha.Words
-
-Utilities for managing word lists and finding random words
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-import random, os
-import File
-
-
-class WordList(object):
- """A class representing a word list read from disk lazily.
- Blank lines and comment lines starting with '#' are ignored.
- Any number of words per line may be used. The list can
- optionally ingore words not within a given length range.
- """
- def __init__(self, fileName, minLength=None, maxLength=None):
- self.words = None
- self.fileName = fileName
- self.minLength = minLength
- self.maxLength = maxLength
-
- def read(self):
- """Read words from disk"""
- f = open(os.path.join(File.dataDir, "words", self.fileName))
-
- self.words = []
- for line in f.xreadlines():
- line = line.strip()
- if not line:
- continue
- if line[0] == '#':
- continue
- for word in line.split():
- if self.minLength is not None and len(word) < self.minLength:
- continue
- if self.maxLength is not None and len(word) > self.maxLength:
- continue
- self.words.append(word)
-
- def pick(self):
- """Pick a random word from the list, reading it in if necessary"""
- if self.words is None:
- self.read()
- return random.choice(self.words)
-
-
-# Define several shared word lists that are read from disk on demand
-basic_english = WordList("basic-english")
-basic_english_restricted = WordList("basic-english", minLength=5, maxLength=8)
-characters = WordList("characters")
-defaultWordList = characters
-
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/__init__.py
--- a/pycaptcha/Captcha/__init__.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-""" Captcha
-
-This is the PyCAPTCHA package, a collection of Python modules
-implementing CAPTCHAs: automated tests that humans should pass,
-but current computer programs can't. These tests are often
-used for security.
-
-See http://www.captcha.net for more information and examples.
-
-This project was started because the CIA project, written in
-Python, needed a CAPTCHA to automate its user creation process
-safely. All existing implementations the author could find were
-written in Java or for the .NET framework, so a simple Python
-alternative was needed.
-"""
-#
-# PyCAPTCHA Package
-# Copyright (C) 2004 Micah Dowty
-#
-
-__version__ = "0.3-pre"
-
-
-# Check the python version here before we proceed further
-requiredPythonVersion = (2,2,1)
-def checkVersion():
- import sys, string
- if sys.version_info < requiredPythonVersion:
- raise Exception("%s requires at least Python %s, found %s instead." % (
- name,
- string.join(map(str, requiredPythonVersion), "."),
- string.join(map(str, sys.version_info), ".")))
-checkVersion()
-
-
-# Convenience imports
-from Base import *
-import File
-import Words
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/.DS_Store
Binary file pycaptcha/Captcha/data/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/.DS_Store
Binary file pycaptcha/Captcha/data/fonts/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/allfonts
--- a/pycaptcha/Captcha/data/fonts/allfonts Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-/Tmp/allfonts
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/._atari-small.bdf
Binary file pycaptcha/Captcha/data/fonts/others/._atari-small.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/._cursive.bdf
Binary file pycaptcha/Captcha/data/fonts/others/._cursive.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/._radon-wide.bdf
Binary file pycaptcha/Captcha/data/fonts/others/._radon-wide.bdf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/CIDFnmap
--- a/pycaptcha/Captcha/data/fonts/others/CIDFnmap Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-/Dotum-Bold (/usr/share/fonts/truetype/unfonts/UnDotumBold.ttf) /Adobe-Korea1-Unicode ;
-/ZenHei (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /Adobe-GB1-Unicode ;
-/Batang-Regular (/usr/share/fonts/truetype/unfonts/UnBatang.ttf) /Adobe-Korea1-Unicode ;
-/VL-PGothic-Regular (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /Adobe-Japan1-Unicode ;
-/Dotum-Regular (/usr/share/fonts/truetype/unfonts/UnDotum.ttf) /Adobe-Korea1-Unicode ;
-/VL-Gothic-Regular-JaH (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /Adobe-Japan2-Unicode ;
-/VL-Gothic-Regular (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /Adobe-Japan1-Unicode ;
-/VL-PGothic-Regular-JaH (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /Adobe-Japan2-Unicode ;
-/ZenHei-CNS (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /Adobe-CNS1-Unicode ;
-/Batang-Bold (/usr/share/fonts/truetype/unfonts/UnBatangBold.ttf) /Adobe-Korea1-Unicode ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/FAPIfontmap
--- a/pycaptcha/Captcha/data/fonts/others/FAPIfontmap Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,155 +0,0 @@
-/Garuda-Oblique << /Path (/usr/share/fonts/truetype/thai/Garuda-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Sans << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstOne << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstOne.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Vemana2000 << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/Vemana.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSerif-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypo-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSerif-BoldItalic << /Path (/usr/share/fonts/truetype/freefont/FreeSerifBoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush-Oblique << /Path (/usr/share/fonts/truetype/thai/Umpush-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationMono-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Malige << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/Malige-b.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Loma-Oblique << /Path (/usr/share/fonts/truetype/thai/Loma-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstBook << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstBook.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi-BoldItalic << /Path (/usr/share/fonts/truetype/thai/Norasi-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Sans-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Norasi-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeMono-BoldOblique << /Path (/usr/share/fonts/truetype/freefont/FreeMonoBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Serif << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSerif.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstOffice << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstOffice.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypist-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSans-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Waree-Oblique << /Path (/usr/share/fonts/truetype/thai/Waree-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationMono-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstFarsi << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstFarsi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgMono-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgMono-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Garuda-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Garuda-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSans-BoldOblique << /Path (/usr/share/fonts/truetype/freefont/FreeSansBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/utkal << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/utkal.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSerif-Italic << /Path (/usr/share/fonts/truetype/freefont/FreeSerifItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypist-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSerif-Italic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Sawasdee-BoldOblique << /Path (/usr/share/fonts/truetype/thai/SawasdeeBoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Umpush-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/cmex10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmex10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeMono-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi-Bold << /Path (/usr/share/fonts/truetype/thai/Norasi-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSans-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Loma << /Path (/usr/share/fonts/truetype/thai/Loma.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/wasy10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/wasy10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari-BoldItalic << /Path (/usr/share/fonts/truetype/thai/Kinnari-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstNaskh << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstNaskh.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSans-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Waree << /Path (/usr/share/fonts/truetype/thai/Waree.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Garuda << /Path (/usr/share/fonts/truetype/thai/Garuda.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/cmsy10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmsy10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypist-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypist-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Sawasdee-Bold << /Path (/usr/share/fonts/truetype/thai/SawasdeeBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Purisa << /Path (/usr/share/fonts/truetype/thai/Purisa.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstPoster << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstPoster.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSans-Oblique << /Path (/usr/share/fonts/truetype/freefont/FreeSansOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypo-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Lohit-Punjabi << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_pa.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Waree-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Waree-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypewriter-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Garuda-Bold << /Path (/usr/share/fonts/truetype/thai/Garuda-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/RachanaMedium << /Path (/usr/share/fonts/truetype/ttf-malayalam-fonts/Rachana_04.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstArt << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstArt.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationMono-Bold << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypo-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypo-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSerif-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeSerifBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSerif-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstDecorative << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstDecorative.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Lohit-Hindi << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_hi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush-LightOblique << /Path (/usr/share/fonts/truetype/thai/Umpush-LightOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSerif-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeSerif.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/mry_KacstQurn << /Path (/usr/share/fonts/truetype/ttf-kacst/mry_KacstQurn.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstDigital << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstDigital.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Sans-Mono-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Lohit-Gujarati << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_gu.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationMono-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationMono-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstLetter << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstLetter.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypo << /Path (/usr/share/fonts/truetype/thai/TlwgTypo.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/msbm10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/msbm10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgMono-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgMono-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Sans-Mono << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSansMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi-Italic << /Path (/usr/share/fonts/truetype/thai/Norasi-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstTitleL << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstTitleL.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypewriter << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeMono-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi-Oblique << /Path (/usr/share/fonts/truetype/thai/Norasi-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypewriter-Oblique << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Phetsarath << /Path (/usr/share/fonts/truetype/ttf-lao/Phetsarath_OT.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/mukti << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/MuktiNarrow.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Sawasdee-Oblique << /Path (/usr/share/fonts/truetype/thai/SawasdeeOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/cmr10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmr10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush-Light << /Path (/usr/share/fonts/truetype/thai/Umpush-Light.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush-Bold << /Path (/usr/share/fonts/truetype/thai/Umpush-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/DejaVu-Serif-Bold << /Path (/usr/share/fonts/truetype/ttf-dejavu/DejaVuSerif-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstTitle << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstTitle.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Norasi << /Path (/usr/share/fonts/truetype/thai/Norasi.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari-Oblique << /Path (/usr/share/fonts/truetype/thai/Kinnari-Oblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/muktinarrow << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/MuktiNarrowBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari-Italic << /Path (/usr/share/fonts/truetype/thai/Kinnari-Italic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/kacstPen << /Path (/usr/share/fonts/truetype/ttf-kacst/kacstPen.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Kinnari-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypewriter-Bold << /Path (/usr/share/fonts/truetype/thai/TlwgTypewriter-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeMono-Oblique << /Path (/usr/share/fonts/truetype/freefont/FreeMonoOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSans-Medium << /Path (/usr/share/fonts/truetype/freefont/FreeSans.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSerif-Regular << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSerif-Regular.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Umpush << /Path (/usr/share/fonts/truetype/thai/Umpush.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Sawasdee << /Path (/usr/share/fonts/truetype/thai/Sawasdee.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgMono << /Path (/usr/share/fonts/truetype/thai/TlwgMono.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstQurn << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstQurn.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari << /Path (/usr/share/fonts/truetype/thai/Kinnari.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgMono-BoldOblique << /Path (/usr/share/fonts/truetype/thai/TlwgMono-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/KacstScreen << /Path (/usr/share/fonts/truetype/ttf-kacst/KacstScreen.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/FreeSans-Bold << /Path (/usr/share/fonts/truetype/freefont/FreeSansBold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/msam10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/msam10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/cmmi10 << /Path (/usr/share/fonts/truetype/latex-xft-fonts/cmmi10.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Lohit-Tamil << /Path (/usr/share/fonts/truetype/ttf-indic-fonts-core/lohit_ta.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/TlwgTypist << /Path (/usr/share/fonts/truetype/thai/TlwgTypist.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Waree-Bold << /Path (/usr/share/fonts/truetype/thai/Waree-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Kinnari-Bold << /Path (/usr/share/fonts/truetype/thai/Kinnari-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Loma-Bold << /Path (/usr/share/fonts/truetype/thai/Loma-Bold.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/LiberationSans-BoldItalic << /Path (/usr/share/fonts/truetype/ttf-liberation/LiberationSans-BoldItalic.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Loma-BoldOblique << /Path (/usr/share/fonts/truetype/thai/Loma-BoldOblique.ttf) /FontType 1 /FAPI /FreeType /SubfontId 0 >> ;
-/Palatino-Italic /URWPalladioL-Ital ;
-/Palatino-Bold /URWPalladioL-Bold ;
-/AvantGarde-BookOblique /URWGothicL-BookObli ;
-/Times-Bold /NimbusRomNo9L-Medi ;
-/HelveticaNarrow-BoldOblique /NimbusSanL-BoldCondItal ;
-/Times-Roman /NimbusRomNo9L-Regu ;
-/NewCenturySchlbk-Italic /CenturySchL-Ital ;
-/HelveticaNarrow /NimbusSanL-ReguCond ;
-/Helvetica-Narrow-Bold /NimbusSanL-BoldCond ;
-/Bookman-Light /URWBookmanL-Ligh ;
-/Palatino-BoldItalic /URWPalladioL-BoldItal ;
-/Traditional /KacstBook ;
-/Times-BoldItalic /NimbusRomNo9L-MediItal ;
-/AvantGarde-Book /URWGothicL-Book ;
-/AvantGarde-DemiOblique /URWGothicL-DemiObli ;
-/Helvetica-Narrow-Oblique /NimbusSanL-ReguCondItal ;
-/Helvetica-Bold /NimbusSanL-Bold ;
-/Courier-Oblique /NimbusMonL-ReguObli ;
-/Times-Italic /NimbusRomNo9L-ReguItal ;
-/Courier /NimbusMonL-Regu ;
-/Bookman-Demi /URWBookmanL-DemiBold ;
-/Helvetica-BoldOblique /NimbusSanL-BoldItal ;
-/Helvetica-Oblique /NimbusSanL-ReguItal ;
-/HelveticaNarrow-Oblique /NimbusSanL-ReguCondItal ;
-/NewCenturySchlbk-BoldItalic /CenturySchL-BoldItal ;
-/Courier-BoldOblique /NimbusMonL-BoldObli ;
-/HelveticaNarrow-Bold /NimbusSanL-BoldCond ;
-/AvantGarde-Demi /URWGothicL-Demi ;
-/Bookman-LightItalic /URWBookmanL-LighItal ;
-/ZapfDingbats /Dingbats ;
-/Helvetica-Narrow-BoldOblique /NimbusSanL-BoldCondItal ;
-/ZapfChancery-MediumItalic /URWChanceryL-MediItal ;
-/Helvetica /NimbusSanL-Regu ;
-/Bookman-DemiItalic /URWBookmanL-DemiBoldItal ;
-/Palatino-Roman /URWPalladioL-Roma ;
-/NewCenturySchlbk-Bold /CenturySchL-Bold ;
-/NewCenturySchlbk-Roman /CenturySchL-Roma ;
-/Courier-Bold /NimbusMonL-Bold ;
-/Arabic /KacstBook ;
-/Helvetica-Narrow /NimbusSanL-ReguCond ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/Fontmap
--- a/pycaptcha/Captcha/data/fonts/others/Fontmap Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,116 +0,0 @@
-/LMTypewriter10-CapsOblique (lmtcso10.pfb) ;
-/Dingbats (d050000l.pfb) ;
-/URWBookmanL-DemiBoldItal (b018035l.pfb) ;
-/LMSansQuotation8-Bold (lmssqbx8.pfb) ;
-/Symbol (Symbol.pfb) ;
-/LMTypewriterVarWd10-DarkOblique (lmvtko10.pfb) ;
-/LMRoman10-Demi (lmb10.pfb) ;
-/URWPalladioL-Ital (p052023l.pfb) ;
-/LMTypewriter10-DarkOblique (lmtko10.pfb) ;
-/NimbusSanL-Regu (n019003l.pfb) ;
-/LMTypewriter10-Italic (lmtti10.pfb) ;
-/LMSansQuotation8-BoldOblique (lmssqbo8.pfb) ;
-/URWPalladioL-Roma (p052003l.pfb) ;
-/LMTypewriterVarWd10-Light (lmvtl10.pfb) ;
-/NimbusRomNo9L-Medi (n021004l.pfb) ;
-/NimbusSanL-ReguItal (n019023l.pfb) ;
-/NimbusMonL-Regu (n022003l.pfb) ;
-/LMSans10-Bold (lmssbx10.pfb) ;
-/LMRoman10-CapsOblique (lmcsco10.pfb) ;
-/CenturySchL-Roma (c059013l.pfb) ;
-/URWGothicL-BookObli (a010033l.pfb) ;
-/LMTypewriter10-LightCondensedOblique (lmtlco10.pfb) ;
-/LMSans10-DemiCondensedOblique (lmssdo10.pfb) ;
-/LMRoman10-CapsRegular (lmcsc10.pfb) ;
-/CenturySchL-BoldItal (c059036l.pfb) ;
-/LMRoman10-DemiOblique (lmbo10.pfb) ;
-/LMRoman10-Unslanted (lmu10.pfb) ;
-/LMRoman10-Bold (lmbx10.pfb) ;
-/LMSans10-DemiCondensed (lmssdc10.pfb) ;
-/URWChanceryL-MediItal (z003034l.pfb) ;
-/URWGothicL-DemiObli (a010035l.pfb) ;
-/LMTypewriterVarWd10-Oblique (lmvtto10.pfb) ;
-/NimbusMonL-Bold (n022004l.pfb) ;
-/LMTypewriter10-Oblique (lmtto10.pfb) ;
-/LMRoman10-BoldItalic (lmbxi10.pfb) ;
-/NimbusSanL-ReguCond (n019043l.pfb) ;
-/CenturySchL-Bold (c059016l.pfb) ;
-/LMTypewriterVarWd10-Regular (lmvtt10.pfb) ;
-/URWBookmanL-Ligh (b018012l.pfb) ;
-/LMSansQuotation8-Regular (lmssq8.pfb) ;
-/LMSans10-Regular (lmss10.pfb) ;
-/LMSans10-Oblique (lmsso10.pfb) ;
-/NimbusSanL-BoldCond (n019044l.pfb) ;
-/LMRoman10-Regular (lmr10.pfb) ;
-/LMTypewriter10-LightCondensed (lmtlc10.pfb) ;
-/LMTypewriterVarWd10-Dark (lmvtk10.pfb) ;
-/LMTypewriter10-CapsRegular (lmtcsc10.pfb) ;
-/LMSansQuotation8-Oblique (lmssqo8.pfb) ;
-/StandardSymL (s050000l.pfb) ;
-/NimbusRomNo9L-Regu (n021003l.pfb) ;
-/LMTypewriterVarWd10-LightOblique (lmvtlo10.pfb) ;
-/URWPalladioL-BoldItal (p052024l.pfb) ;
-/CenturySchL-Ital (c059033l.pfb) ;
-/LMRoman10-Dunhill (lmdunh10.pfb) ;
-/URWPalladioL-Bold (p052004l.pfb) ;
-/URWGothicL-Book (a010013l.pfb) ;
-/LMTypewriter10-Dark (lmtk10.pfb) ;
-/NimbusSanL-BoldItal (n019024l.pfb) ;
-/URWGothicL-Demi (a010015l.pfb) ;
-/LMTypewriter10-LightOblique (lmtlo10.pfb) ;
-/LMTypewriter10-Light (lmtl10.pfb) ;
-/NimbusSanL-BoldCondItal (n019064l.pfb) ;
-/LMRoman10-Italic (lmri10.pfb) ;
-/LMRoman10-DunhillOblique (lmduno10.pfb) ;
-/NimbusMonL-ReguObli (n022023l.pfb) ;
-/LMRoman10-Oblique (lmro10.pfb) ;
-/NimbusSanL-ReguCondItal (n019063l.pfb) ;
-/NimbusRomNo9L-MediItal (n021024l.pfb) ;
-/LMRoman10-BoldOblique (lmbxo10.pfb) ;
-/URWBookmanL-DemiBold (b018015l.pfb) ;
-/NimbusSanL-Bold (n019004l.pfb) ;
-/LMSans10-BoldOblique (lmssbo10.pfb) ;
-/URWBookmanL-LighItal (b018032l.pfb) ;
-/NimbusMonL-BoldObli (n022024l.pfb) ;
-/NimbusRomNo9L-ReguItal (n021023l.pfb) ;
-/LMTypewriter10-Regular (lmtt10.pfb) ;
-/Palatino-Italic /URWPalladioL-Ital ;
-/Palatino-Bold /URWPalladioL-Bold ;
-/AvantGarde-BookOblique /URWGothicL-BookObli ;
-/Times-Bold /NimbusRomNo9L-Medi ;
-/HelveticaNarrow-BoldOblique /NimbusSanL-BoldCondItal ;
-/Times-Roman /NimbusRomNo9L-Regu ;
-/NewCenturySchlbk-Italic /CenturySchL-Ital ;
-/HelveticaNarrow /NimbusSanL-ReguCond ;
-/Helvetica-Narrow-Bold /NimbusSanL-BoldCond ;
-/Bookman-Light /URWBookmanL-Ligh ;
-/Palatino-BoldItalic /URWPalladioL-BoldItal ;
-/Traditional /KacstBook ;
-/Times-BoldItalic /NimbusRomNo9L-MediItal ;
-/AvantGarde-Book /URWGothicL-Book ;
-/AvantGarde-DemiOblique /URWGothicL-DemiObli ;
-/Helvetica-Narrow-Oblique /NimbusSanL-ReguCondItal ;
-/Helvetica-Bold /NimbusSanL-Bold ;
-/Courier-Oblique /NimbusMonL-ReguObli ;
-/Times-Italic /NimbusRomNo9L-ReguItal ;
-/Courier /NimbusMonL-Regu ;
-/Bookman-Demi /URWBookmanL-DemiBold ;
-/Helvetica-BoldOblique /NimbusSanL-BoldItal ;
-/Helvetica-Oblique /NimbusSanL-ReguItal ;
-/HelveticaNarrow-Oblique /NimbusSanL-ReguCondItal ;
-/NewCenturySchlbk-BoldItalic /CenturySchL-BoldItal ;
-/Courier-BoldOblique /NimbusMonL-BoldObli ;
-/HelveticaNarrow-Bold /NimbusSanL-BoldCond ;
-/AvantGarde-Demi /URWGothicL-Demi ;
-/Bookman-LightItalic /URWBookmanL-LighItal ;
-/ZapfDingbats /Dingbats ;
-/Helvetica-Narrow-BoldOblique /NimbusSanL-BoldCondItal ;
-/ZapfChancery-MediumItalic /URWChanceryL-MediItal ;
-/Helvetica /NimbusSanL-Regu ;
-/Bookman-DemiItalic /URWBookmanL-DemiBoldItal ;
-/Palatino-Roman /URWPalladioL-Roma ;
-/NewCenturySchlbk-Bold /CenturySchL-Bold ;
-/NewCenturySchlbk-Roman /CenturySchL-Roma ;
-/Courier-Bold /NimbusMonL-Bold ;
-/Arabic /KacstBook ;
-/Helvetica-Narrow /NimbusSanL-ReguCond ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/others/cidfmap
--- a/pycaptcha/Captcha/data/fonts/others/cidfmap Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-/Dotum-Bold << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnDotumBold.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
-/ZenHei << /FileType /TrueType /Path (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /SubfontID 0 /CSI [(GB1) 0] >> ;
-/Batang-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnBatang.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
-/VL-PGothic-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /SubfontID 0 /CSI [(Japan1) 0] >> ;
-/Dotum-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnDotum.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
-/VL-Gothic-Regular-JaH << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /SubfontID 0 /CSI [(Japan2) 0] >> ;
-/VL-Gothic-Regular << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-Gothic-Regular.ttf) /SubfontID 0 /CSI [(Japan1) 0] >> ;
-/VL-PGothic-Regular-JaH << /FileType /TrueType /Path (/usr/share/fonts/truetype/vlgothic/VL-PGothic-Regular.ttf) /SubfontID 0 /CSI [(Japan2) 0] >> ;
-/ZenHei-CNS << /FileType /TrueType /Path (/usr/share/fonts/truetype/wqy/wqy-zenhei.ttf) /SubfontID 0 /CSI [(CNS1) 0] >> ;
-/Batang-Bold << /FileType /TrueType /Path (/usr/share/fonts/truetype/unfonts/UnBatangBold.ttf) /SubfontID 0 /CSI [(Korea1) 0] >> ;
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/COPYRIGHT.TXT
--- a/pycaptcha/Captcha/data/fonts/vera/COPYRIGHT.TXT Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-Bitstream Vera Fonts Copyright
-
-The fonts have a generous copyright, allowing derivative works (as
-long as "Bitstream" or "Vera" are not in the names), and full
-redistribution (so long as they are not *sold* by themselves). They
-can be be bundled, redistributed and sold with any software.
-
-The fonts are distributed under the following copyright:
-
-Copyright
-=========
-
-Copyright (c) 2003 by Bitstream, Inc. All Rights Reserved. Bitstream
-Vera is a trademark of Bitstream, Inc.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of the fonts accompanying this license ("Fonts") and associated
-documentation files (the "Font Software"), to reproduce and distribute
-the Font Software, including without limitation the rights to use,
-copy, merge, publish, distribute, and/or sell copies of the Font
-Software, and to permit persons to whom the Font Software is furnished
-to do so, subject to the following conditions:
-
-The above copyright and trademark notices and this permission notice
-shall be included in all copies of one or more of the Font Software
-typefaces.
-
-The Font Software may be modified, altered, or added to, and in
-particular the designs of glyphs or characters in the Fonts may be
-modified and additional glyphs or characters may be added to the
-Fonts, only if the fonts are renamed to names not containing either
-the words "Bitstream" or the word "Vera".
-
-This License becomes null and void to the extent applicable to Fonts
-or Font Software that has been modified and is distributed under the
-"Bitstream Vera" names.
-
-The Font Software may be sold as part of a larger software package but
-no copy of one or more of the Font Software typefaces may be sold by
-itself.
-
-THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
-OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL
-BITSTREAM OR THE GNOME FOUNDATION BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL,
-OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT
-SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.
-
-Except as contained in this notice, the names of Gnome, the Gnome
-Foundation, and Bitstream Inc., shall not be used in advertising or
-otherwise to promote the sale, use or other dealings in this Font
-Software without prior written authorization from the Gnome Foundation
-or Bitstream Inc., respectively. For further information, contact:
-fonts at gnome dot org.
-
-Copyright FAQ
-=============
-
- 1. I don't understand the resale restriction... What gives?
-
- Bitstream is giving away these fonts, but wishes to ensure its
- competitors can't just drop the fonts as is into a font sale system
- and sell them as is. It seems fair that if Bitstream can't make money
- from the Bitstream Vera fonts, their competitors should not be able to
- do so either. You can sell the fonts as part of any software package,
- however.
-
- 2. I want to package these fonts separately for distribution and
- sale as part of a larger software package or system. Can I do so?
-
- Yes. A RPM or Debian package is a "larger software package" to begin
- with, and you aren't selling them independently by themselves.
- See 1. above.
-
- 3. Are derivative works allowed?
- Yes!
-
- 4. Can I change or add to the font(s)?
- Yes, but you must change the name(s) of the font(s).
-
- 5. Under what terms are derivative works allowed?
-
- You must change the name(s) of the fonts. This is to ensure the
- quality of the fonts, both to protect Bitstream and Gnome. We want to
- ensure that if an application has opened a font specifically of these
- names, it gets what it expects (though of course, using fontconfig,
- substitutions could still could have occurred during font
- opening). You must include the Bitstream copyright. Additional
- copyrights can be added, as per copyright law. Happy Font Hacking!
-
- 6. If I have improvements for Bitstream Vera, is it possible they might get
- adopted in future versions?
-
- Yes. The contract between the Gnome Foundation and Bitstream has
- provisions for working with Bitstream to ensure quality additions to
- the Bitstream Vera font family. Please contact us if you have such
- additions. Note, that in general, we will want such additions for the
- entire family, not just a single font, and that you'll have to keep
- both Gnome and Jim Lyles, Vera's designer, happy! To make sense to add
- glyphs to the font, they must be stylistically in keeping with Vera's
- design. Vera cannot become a "ransom note" font. Jim Lyles will be
- providing a document describing the design elements used in Vera, as a
- guide and aid for people interested in contributing to Vera.
-
- 7. I want to sell a software package that uses these fonts: Can I do so?
-
- Sure. Bundle the fonts with your software and sell your software
- with the fonts. That is the intent of the copyright.
-
- 8. If applications have built the names "Bitstream Vera" into them,
- can I override this somehow to use fonts of my choosing?
-
- This depends on exact details of the software. Most open source
- systems and software (e.g., Gnome, KDE, etc.) are now converting to
- use fontconfig (see www.fontconfig.org) to handle font configuration,
- selection and substitution; it has provisions for overriding font
- names and subsituting alternatives. An example is provided by the
- supplied local.conf file, which chooses the family Bitstream Vera for
- "sans", "serif" and "monospace". Other software (e.g., the XFree86
- core server) has other mechanisms for font substitution.
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/README.TXT
--- a/pycaptcha/Captcha/data/fonts/vera/README.TXT Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-Contained herin is the Bitstream Vera font family.
-
-The Copyright information is found in the COPYRIGHT.TXT file (along
-with being incoporated into the fonts themselves).
-
-The releases notes are found in the file "RELEASENOTES.TXT".
-
-We hope you enjoy Vera!
-
- Bitstream, Inc.
- The Gnome Project
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/RELEASENOTES.TXT
--- a/pycaptcha/Captcha/data/fonts/vera/RELEASENOTES.TXT Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-Bitstream Vera Fonts - April 16, 2003
-=====================================
-
-The version number of these fonts is 1.10 to distinguish them from the
-beta test fonts.
-
-Note that the Vera copyright is incorporated in the fonts themselves.
-The License field in the fonts contains the copyright license as it
-appears below. The TrueType copyright field is not large enough to
-contain the full license, so the license is incorporated (as you might
-think if you thought about it) into the license field, which
-unfortunately can be obscure to find. (In pfaedit, see: Element->Font
-Info->TTFNames->License).
-
-Our apologies for it taking longer to complete the fonts than planned.
-Beta testers requested a tighter line spacing (less leading) and Jim
-Lyles redesigned Vera's accents to bring its line spacing to more
-typical of other fonts. This took additional time and effort. Our
-thanks to Jim for this effort above and beyond the call of duty.
-
-There are four monospace and sans faces (normal, oblique, bold, bold
-oblique) and two serif faces (normal and bold). Fontconfig/Xft2 (see
-www.fontconfig.org) can artificially oblique the serif faces for you:
-this loses hinting and distorts the faces slightly, but is visibly
-different than normal and bold, and reasonably pleasing.
-
-On systems with fontconfig 2.0 or 2.1 installed, making your sans,
-serif and monospace fonts default to these fonts is very easy. Just
-drop the file local.conf into your /etc/fonts directory. This will
-make the Bitstream fonts your default fonts for all applications using
-fontconfig (if sans, serif, or monospace names are used, as they often
-are as default values in many desktops). The XML in local.conf may
-need modification to enable subpixel decimation, if appropriate,
-however, the commented out phrase does so for XFree86 4.3, in the case
-that the server does not have sufficient information to identify the
-use of a flat panel. Fontconfig 2.2 adds Vera to the list of font
-families and will, by default use it as the default sans, serif and
-monospace fonts.
-
-During the testing of the final Vera fonts, we learned that screen
-fonts in general are only typically hinted to work correctly at
-integer pixel sizes. Vera is coded internally for integer sizes only.
-We need to investigate further to see if there are commonly used fonts
-that are hinted to be rounded but are not rounded to integer sizes due
-to oversights in their coding.
-
-Most fonts work best at 8 pixels and below if anti-aliased only, as
-the amount of work required to hint well at smaller and smaller sizes
-becomes astronomical. GASP tables are typically used to control
-whether hinting is used or not, but Freetype/Xft does not currently
-support GASP tables (which are present in Vera).
-
-To mitigate this problem, both for Vera and other fonts, there will be
-(very shortly) a new fontconfig 2.2 release that will, by default not
-apply hints if the size is below 8 pixels. if you should have a font
-that in fact has been hinted more agressively, you can use fontconfig
-to note this exception. We believe this should improve many hinted
-fonts in addition to Vera, though implemeting GASP support is likely
-the right long term solution.
-
-Font rendering in Gnome or KDE is the combination of algorithms in
-Xft2 and Freetype, along with hinting in the fonts themselves. It is
-vital to have sufficient information to disentangle problems that you
-may observe.
-
-Note that having your font rendering system set up correctly is vital
-to proper judgement of problems of the fonts:
-
- * Freetype may or may not be configured to in ways that may
- implement execution of possibly patented (in some parts of the world)
- TrueType hinting algorithms, particularly at small sizes. Best
- results are obtained while using these algorithms.
-
- * The freetype autohinter (used when the possibly patented
- algorithms are not used) continues to improve with each release. If
- you are using the autohinter, please ensure you are using an up to
- date version of freetype before reporting problems.
-
- * Please identify what version of freetype you are using in any
- bug reports, and how your freetype is configured.
-
- * Make sure you are not using the freetype version included in
- XFree86 4.3, as it has bugs that significantly degrade most fonts,
- including Vera. if you build XFree86 4.3 from source yourself, you may
- have installed this broken version without intending it (as I
- did). Vera was verified with the recently released Freetype 2.1.4. On
- many systems, 'ldd" can be used to see which freetype shared library
- is actually being used.
-
- * Xft/X Render does not (yet) implement gamma correction. This
- causes significant problems rendering white text on a black background
- (causing partial pixels to be insufficiently shaded) if the gamma of
- your monitor has not been compensated for, and minor problems with
- black text on a while background. The program "xgamma" can be used to
- set a gamma correction value in the X server's color pallette. Most
- monitors have a gamma near 2.
-
- * Note that the Vera family uses minimal delta hinting. Your
- results on other systems when not used anti-aliased may not be
- entirely satisfying. We are primarily interested in reports of
- problems on open source systems implementing Xft2/fontconfig/freetype
- (which implements antialiasing and hinting adjustements, and
- sophisticated subpixel decimation on flatpanels). Also, the
- algorithms used by Xft2 adjust the hints to integer widths and the
- results are crisper on open source systems than on Windows or
- MacIntosh.
-
- * Your fontconfig may (probably does) predate the release of
- fontconfig 2.2, and you may see artifacts not present when the font is
- used at very small sizes with hinting enabled. "vc-list -V" can be
- used to see what version you have installed.
-
-We believe and hope that these fonts will resolve the problems
-reported during beta test. The largest change is the reduction of
-leading (interline spacing), which had annoyed a number of people, and
-reduced Vera's utility for some applcations. The Vera monospace font
-should also now make '0' and 'O' and '1' and 'l' more clearly
-distinguishable.
-
-The version of these fonts is version 1.10. Fontconfig should be
-choosing the new version of the fonts if both the released fonts and
-beta test fonts are installed (though please discard them: they have
-names of form tt20[1-12]gn.ttf). Note that older versions of
-fontconfig sometimes did not rebuild their cache correctly when new
-fonts are installed: please upgrade to fontconfig 2.2. "fc-cache -f"
-can be used to force rebuilding fontconfig's cache files.
-
-If you note problems, please send them to fonts at gnome dot org, with
-exactly which face and size and unicode point you observe the problem
-at. The xfd utility from XFree86 CVS may be useful for this (e.g. "xfd
--fa sans"). A possibly more useful program to examine fonts at a
-variety of sizes is the "waterfall" program found in Keith Packard's
-CVS.
-
- $ cvs -d :pserver:anoncvs@keithp.com:/local/src/CVS login
- Logging in to :pserver:anoncvs@keithp.com:2401/local/src/CVS
- CVS password:
- $ cvs -d :pserver:anoncvs@keithp.com:/local/src/CVS co waterfall
- $ cd waterfall
- $ xmkmf -a
- $ make
- # make install
- # make install.man
-
-Again, please make sure you are running an up-to-date freetype, and
-that you are only examining integer sizes.
-
-Reporting Problems
-==================
-
-Please send problem reports to fonts at gnome org, with the following
-information:
-
- 1. Version of Freetype, Xft2 and fontconfig
- 2. Whether TT hinting is being used, or the autohinter
- 3. Application being used
- 4. Character/Unicode code point that has problems (if applicable)
- 5. Version of which operating system
- 6. Please include a screenshot, when possible.
-
-Please check the fonts list archives before reporting problems to cut
-down on duplication.
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/Vera.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/Vera.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraBI.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraBI.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraBd.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraIt.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraIt.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraMoBI.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraMoBI.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraMoBd.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraMoBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraMoIt.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraMoIt.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraMono.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraMono.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraSe.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraSe.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/VeraSeBd.ttf
Binary file pycaptcha/Captcha/data/fonts/vera/VeraSeBd.ttf has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/fonts/vera/local.conf
--- a/pycaptcha/Captcha/data/fonts/vera/local.conf Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-
-
-
-
-
-
-
- serif
-
- Bitstream Vera Serif
-
-
-
- sans-serif
-
- Bitstream Vera Sans
-
-
-
- monospace
-
- Bitstream Vera Sans Mono
-
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/.DS_Store
Binary file pycaptcha/Captcha/data/pictures/.DS_Store has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/1.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/1.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/10.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/10.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/11.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/11.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/12.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/12.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/2.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/2.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/3.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/3.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/4.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/4.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/5.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/5.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/6.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/6.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/7.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/7.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/8.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/8.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/9.jpeg
Binary file pycaptcha/Captcha/data/pictures/abstract/9.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/abstract/README
--- a/pycaptcha/Captcha/data/pictures/abstract/README Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-These images were created by the author with Fyre, expressly for PyCAPTCHA.
-
-Copyright (c) 2004 Micah Dowty
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/nature/Craig_Barrington_ocotillo_and_mountains.jpeg
Binary file pycaptcha/Captcha/data/pictures/nature/Craig_Barrington_ocotillo_and_mountains.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/nature/Kerry_Carloy_Chisos_Sunset.jpeg
Binary file pycaptcha/Captcha/data/pictures/nature/Kerry_Carloy_Chisos_Sunset.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/nature/Paul_Dowty_Mt_Bross.jpeg
Binary file pycaptcha/Captcha/data/pictures/nature/Paul_Dowty_Mt_Bross.jpeg has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/pictures/nature/README
--- a/pycaptcha/Captcha/data/pictures/nature/README Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-These are uncopyrighted images gathered from various sources,
-including the author's family and national park service web sites.
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/words/README
--- a/pycaptcha/Captcha/data/words/README Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-These word lists are from various sources:
-
-basic-english:
- http://simple.wikipedia.org/wiki/Basic_English_Alphabetical_Wordlist
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/words/basic-english
--- a/pycaptcha/Captcha/data/words/basic-english Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,852 +0,0 @@
-a
-able
-about
-account
-acid
-across
-act
-addition
-adjustment
-advertisement
-agreement
-after
-again
-against
-air
-all
-almost
-among
-amount
-amusement
-and
-angle
-angry
-animal
-answer
-ant
-any
-apparatus
-apple
-approval
-arch
-argument
-arm
-army
-art
-as
-at
-attack
-attempt
-attention
-attraction
-authority
-automatic
-awake
-baby
-back
-bad
-bag
-balance
-ball
-band
-base
-basin
-basket
-bath
-be
-beautiful
-because
-bed
-bee
-before
-behavior
-belief
-bell
-bent
-berry
-between
-bird
-birth
-bit
-bite
-bitter
-black
-blade
-blood
-blow
-blue
-board
-boat
-body
-boiling
-bone
-book
-boot
-bottle
-box
-boy
-brain
-brake
-branch
-brass
-bread
-breath
-brick
-bridge
-bright
-broken
-brother
-brown
-brush
-bucket
-building
-bulb
-burn
-burst
-business
-but
-butter
-button
-by
-cake
-camera
-canvas
-card
-care
-carriage
-cart
-cat
-cause
-certain
-chain
-chalk
-chance
-change
-cheap
-cheese
-chemical
-chest
-chief
-chin
-church
-circle
-clean
-clear
-clock
-cloth
-cloud
-coal
-coat
-cold
-collar
-color
-comb
-come
-comfort
-committee
-common
-company
-comparison
-competition
-complete
-complex
-condition
-connection
-conscious
-control
-cook
-copper
-copy
-cord
-cork
-cotton
-cough
-country
-cover
-cow
-crack
-credit
-crime
-cruel
-crush
-cry
-cup
-current
-curtain
-curve
-cushion
-cut
-damage
-danger
-dark
-daughter
-day
-dead
-dear
-death
-debt
-decision
-deep
-degree
-delicate
-dependent
-design
-desire
-destruction
-detail
-development
-different
-digestion
-direction
-dirty
-discovery
-discussion
-disease
-disgust
-distance
-distribution
-division
-do
-dog
-door
-down
-doubt
-drain
-drawer
-dress
-drink
-driving
-drop
-dry
-dust
-ear
-early
-earth
-east
-edge
-education
-effect
-egg
-elastic
-electric
-end
-engine
-enough
-equal
-error
-even
-event
-ever
-every
-example
-exchange
-existence
-expansion
-experience
-expert
-eye
-face
-fact
-fall
-false
-family
-far
-farm
-fat
-father
-fear
-feather
-feeble
-feeling
-female
-fertile
-fiction
-field
-fight
-finger
-fire
-first
-fish
-fixed
-flag
-flame
-flat
-flight
-floor
-flower
-fly
-fold
-food
-foolish
-foot
-for
-force
-fork
-form
-forward
-fowl
-frame
-free
-frequent
-friend
-from
-front
-fruit
-full
-future
-garden
-general
-get
-girl
-give
-glass
-glove
-go
-goat
-gold
-good
-government
-grain
-grass
-great
-green
-grey/gray
-grip
-group
-growth
-guide
-gun
-hair
-hammer
-hand
-hanging
-happy
-harbor
-hard
-harmony
-hat
-hate
-have
-he
-head
-healthy
-hearing
-heart
-heat
-help
-here
-high
-history
-hole
-hollow
-hook
-hope
-horn
-horse
-hospital
-hour
-house
-how
-humor
-ice
-idea
-if
-ill
-important
-impulse
-in
-increase
-industry
-ink
-insect
-instrument
-insurance
-interest
-invention
-iron
-island
-jelly
-jewel
-join
-journey
-judge
-jump
-keep
-kettle
-key
-kick
-kind
-kiss
-knee
-knife
-knot
-knowledge
-land
-language
-last
-late
-laugh
-law
-lead
-leaf
-learning
-leather
-left
-leg
-let
-letter
-level
-library
-lift
-light
-like
-limit
-line
-linen
-lip
-liquid
-list
-little
-less
-least
-living
-lock
-long
-loose
-loss
-loud
-love
-low
-machine
-make
-male
-man
-manager
-map
-mark
-market
-married
-match
-material
-mass
-may
-meal
-measure
-meat
-medical
-meeting
-memory
-metal
-middle
-military
-milk
-mind
-mine
-minute
-mist
-mixed
-money
-monkey
-month
-moon
-morning
-mother
-motion
-mountain
-mouth
-move
-much
-more
-most
-muscle
-music
-nail
-name
-narrow
-nation
-natural
-near
-necessary
-neck
-need
-needle
-nerve
-net
-new
-news
-night
-no
-noise
-normal
-north
-nose
-not
-note
-now
-number
-nut
-observation
-of
-off
-offer
-office
-oil
-old
-on
-only
-open
-operation
-opposite
-opinion
-other
-or
-orange
-order
-organization
-ornament
-out
-oven
-over
-owner
-page
-pain
-paint
-paper
-parallel
-parcel
-part
-past
-paste
-payment
-peace
-pen
-pencil
-person
-physical
-picture
-pig
-pin
-pipe
-place
-plane
-plant
-plate
-play
-please
-pleasure
-plough/plow
-pocket
-point
-poison
-polish
-political
-poor
-porter
-position
-possible
-pot
-potato
-powder
-power
-present
-price
-print
-prison
-private
-probable
-process
-produce
-profit
-property
-prose
-protest
-public
-pull
-pump
-punishment
-purpose
-push
-put
-quality
-question
-quick
-quiet
-quite
-rail
-rain
-range
-rat
-rate
-ray
-reaction
-red
-reading
-ready
-reason
-receipt
-record
-regret
-regular
-relation
-religion
-representative
-request
-respect
-responsible
-rest
-reward
-rhythm
-rice
-right
-ring
-river
-road
-rod
-roll
-roof
-room
-root
-rough
-round
-rub
-rule
-run
-sad
-safe
-sail
-salt
-same
-sand
-say
-scale
-school
-science
-scissors
-screw
-sea
-seat
-second
-secret
-secretary
-see
-seed
-selection
-self
-send
-seem
-sense
-separate
-serious
-servant
-sex
-shade
-shake
-shame
-sharp
-sheep
-shelf
-ship
-shirt
-shock
-shoe
-short
-shut
-side
-sign
-silk
-silver
-simple
-sister
-size
-skin
-skirt
-sky
-sleep
-slip
-slope
-slow
-small
-smash
-smell
-smile
-smoke
-smooth
-snake
-sneeze
-snow
-so
-soap
-society
-sock
-soft
-solid
-some
-son
-song
-sort
-sound
-south
-soup
-space
-spade
-special
-sponge
-spoon
-spring
-square
-stamp
-stage
-star
-start
-statement
-station
-steam
-stem
-steel
-step
-stick
-still
-stitch
-stocking
-stomach
-stone
-stop
-store
-story
-strange
-street
-stretch
-sticky
-stiff
-straight
-strong
-structure
-substance
-sugar
-suggestion
-summer
-support
-surprise
-such
-sudden
-sun
-sweet
-swim
-system
-table
-tail
-take
-talk
-tall
-taste
-tax
-teaching
-tendency
-test
-than
-that
-the
-then
-theory
-there
-thick
-thin
-thing
-this
-thought
-thread
-throat
-though
-through
-thumb
-thunder
-ticket
-tight
-tired
-till
-time
-tin
-to
-toe
-together
-tomorrow
-tongue
-tooth
-top
-touch
-town
-trade
-train
-transport
-tray
-tree
-trick
-trousers
-true
-trouble
-turn
-twist
-umbrella
-under
-unit
-use
-up
-value
-verse
-very
-vessel
-view
-violent
-voice
-walk
-wall
-waiting
-war
-warm
-wash
-waste
-watch
-water
-wave
-wax
-way
-weather
-week
-weight
-well
-west
-wet
-wheel
-when
-where
-while
-whip
-whistle
-white
-who
-why
-wide
-will
-wind
-window
-wine
-wing
-winter
-wire
-wise
-with
-woman
-wood
-wool
-word
-work
-worm
-wound
-writing
-wrong
-year
-yellow
-yes
-yesterday
-you
-young
\ No newline at end of file
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Captcha/data/words/characters
--- a/pycaptcha/Captcha/data/words/characters Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/Facade.py
--- a/pycaptcha/Facade.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-#!/usr/bin/env python
-
-
-
-from Captcha.Visual.Tests import PseudoGimpy, AngryGimpy
-import numpy
-
-# Une fonction simple pour generer un captcha
-# ease : represente la difficulte du captcha a generer
-# 0 = facile et 1 (ou autre chose) = difficile
-#solution : specifie si on veut en retour un array numpy representant
-#l image ou un tuple contenant l'array et la solution du captcha.
-
-# Des fontes additionnelles peuvent etre ajoutees au dossier pyCaptcha/Captcha/data/fonts/others
-# Le programme choisit une fonte aleatoirement dans ce dossier ainsi que le dossir vera.
-
-
-def generateCaptcha (ease=0, solution=0):
-
- if ease == 1:
- g = AngryGimpy()
-
- else:
- g = PseudoGimpy()
-
- i = g.render()
- a = numpy.asarray(i)
-
- if solution == 0:
- return a
-
- else :
- return (a, g.solutions)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/README
--- a/pycaptcha/README Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-======================
-Python CAPTCHA package
-======================
-
-About
------
-
-This is the PyCAPTCHA package, a collection of Python modules
-implementing CAPTCHAs: automated tests that humans should pass,
-but current computer programs can't. These tests are often
-used for security.
-
-See http://www.captcha.net for more information and examples.
-
-This project was started because the CIA project, written in
-Python, needed a CAPTCHA to automate its user creation process
-safely. All existing implementations the author could find were
-written in Java or for the .NET framework, so a simple Python
-alternative was needed.
-
-Examples
---------
-
-Included are several example programs:
-
- - simple_example.py is a bare-bones example that just generates
- and displays an image.
-
- - http_example.py is a longer example that uses BaseHTTPServer
- to simulate a CAPTCHA's use in a web environment. Running this
- example and connecting to it from your web browser is a quick
- and easy way to see PyCAPTCHA in action
-
- - modpython_example.py is a version of http_example that runs
- from an Apache server equipped with a properly configured
- mod_python.
-
-
-Dependencies
-------------
-
-- Python 2.2.1 or later
-- the Python Imaging Library, required for visual CAPTCHAs
-
-
-Contacts
---------
-
-Micah Dowty
-
-'scanline' on irc.freenode.net
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/http_example.py
--- a/pycaptcha/http_example.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-#!/usr/bin/env python
-#
-# An example that presents CAPTCHA tests in a web environment
-# and gives the user a chance to solve them. Run it, optionally
-# specifying a port number on the command line, then point your web
-# browser at the given URL.
-#
-
-from Captcha.Visual import Tests
-from Captcha import Factory
-import BaseHTTPServer, urlparse, sys
-
-
-class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
- def do_GET(self):
- scheme, host, path, parameters, query, fragment = urlparse.urlparse(self.path)
-
- # Split the path into segments
- pathSegments = path.split('/')[1:]
-
- # Split the query into key-value pairs
- args = {}
- for pair in query.split("&"):
- if pair.find("=") >= 0:
- key, value = pair.split("=", 1)
- args.setdefault(key, []).append(value)
- else:
- args[pair] = []
-
- # A hack so it works with a proxy configured for VHostMonster :)
- if pathSegments[0] == "vhost":
- pathSegments = pathSegments[3:]
-
- if pathSegments[0] == "":
- self.handleRootPage(args.get('test', Tests.__all__)[0])
-
- elif pathSegments[0] == "images":
- self.handleImagePage(pathSegments[1])
-
- elif pathSegments[0] == "solutions":
- self.handleSolutionPage(pathSegments[1], args['word'][0])
-
- else:
- self.handle404()
-
- def handle404(self):
- self.send_response(404)
- self.send_header("Content-Type", "text/html")
- self.end_headers()
- self.wfile.write("No such resource
")
-
- def handleRootPage(self, testName):
- self.send_response(200)
- self.send_header("Content-Type", "text/html")
- self.end_headers()
-
- test = self.captchaFactory.new(getattr(Tests, testName))
-
- # Make a list of tests other than the one we're using
- others = []
- for t in Tests.__all__:
- if t != testName:
- others.append('%s' % (t,t))
- others = "\n".join(others)
-
- self.wfile.write("""
-
-PyCAPTCHA Example
-
-
-PyCAPTCHA Example
-
- %s:
- %s
-
-
-
-
-
-
-
-
-Or try...
-
-
-
-
-
-""" % (test.__class__.__name__, test.__doc__, test.id, test.id, others))
-
- def handleImagePage(self, id):
- test = self.captchaFactory.get(id)
- if not test:
- return self.handle404()
-
- self.send_response(200)
- self.send_header("Content-Type", "image/jpeg")
- self.end_headers()
- test.render().save(self.wfile, "JPEG")
-
- def handleSolutionPage(self, id, word):
- test = self.captchaFactory.get(id)
- if not test:
- return self.handle404()
-
- if not test.valid:
- # Invalid tests will always return False, to prevent
- # random trial-and-error attacks. This could be confusing to a user...
- result = "Test invalidated, try another test"
- elif test.testSolutions([word]):
- result = "Correct"
- else:
- result = "Incorrect"
-
- self.send_response(200)
- self.send_header("Content-Type", "text/html")
- self.end_headers()
- self.wfile.write("""
-
-PyCAPTCHA Example
-
-
-PyCAPTCHA Example
-%s
-
-%s
-You guessed: %s
-Possible solutions: %s
-Try again
-
-
-""" % (test.__class__.__name__, test.id, result, word, ", ".join(test.solutions)))
-
-
-def main(port):
- print "Starting server at http://localhost:%d/" % port
- handler = RequestHandler
- handler.captchaFactory = Factory()
- BaseHTTPServer.HTTPServer(('', port), RequestHandler).serve_forever()
-
-if __name__ == "__main__":
- # The port number can be specified on the command line, default is 8080
- if len(sys.argv) >= 2:
- port = int(sys.argv[1])
- else:
- port = 8080
- main(port)
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/modpython_example.py
--- a/pycaptcha/modpython_example.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-#
-# An example that presents CAPTCHA tests in a web environment
-# and gives the user a chance to solve them.
-#
-# This example is for use with Apache using mod_python and its
-# Publisher handler. For example, if your apache configuration
-# included something like:
-#
-# AddHandler python-program .py
-# PythonHandler mod_python.publisher
-#
-# You could place this script anywhere in your web space to see
-# the demo.
-#
-# --Micah
-#
-
-from Captcha.Visual import Tests
-import Captcha
-from mod_python import apache
-
-
-def _getFactory(req):
- return Captcha.PersistentFactory("/tmp/pycaptcha_%s" % req.interpreter)
-
-
-def test(req, name=Tests.__all__[0]):
- """Show a newly generated CAPTCHA of the given class.
- Default is the first class name given in Tests.__all__
- """
- test = _getFactory(req).new(getattr(Tests, name))
-
- # Make a list of tests other than the one we're using
- others = []
- for t in Tests.__all__:
- if t != name:
- others.append('%s' % (t,t))
- others = "\n".join(others)
-
- return """
-
-PyCAPTCHA Example
-
-
-PyCAPTCHA Example (for mod_python)
-
- %s:
- %s
-
-
-
-
-
-
-
-
-Or try...
-
-
-
-
-
-""" % (test.__class__.__name__, test.__doc__, test.id, test.id, others)
-
-
-def image(req, id):
- """Generate an image for the CAPTCHA with the given ID string"""
- test = _getFactory(req).get(id)
- if not test:
- raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
- req.content_type = "image/jpeg"
- test.render().save(req, "JPEG")
- return apache.OK
-
-
-def solution(req, id, word):
- """Grade a CAPTCHA given a solution word"""
- test = _getFactory(req).get(id)
- if not test:
- raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
-
- if not test.valid:
- # Invalid tests will always return False, to prevent
- # random trial-and-error attacks. This could be confusing to a user...
- result = "Test invalidated, try another test"
- elif test.testSolutions([word]):
- result = "Correct"
- else:
- result = "Incorrect"
-
- return """
-
-PyCAPTCHA Example
-
-
-PyCAPTCHA Example
-%s
-
-%s
-You guessed: %s
-Possible solutions: %s
-Try again
-
-
-""" % (test.__class__.__name__, test.id, result, word, ", ".join(test.solutions))
-
-### The End ###
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/output.png
Binary file pycaptcha/output.png has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/setup.py
--- a/pycaptcha/setup.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-#!/usr/bin/env python
-from distutils.core import setup
-from setup.my_install_data import *
-
-setup (name = "PyCAPTCHA",
- version = "0.4",
- description = "A Python framework for CAPTCHA tests",
- maintainer = "Micah Dowty",
- maintainer_email = "micah@navi.cx",
- license = "MIT",
- packages = [
- 'Captcha',
- 'Captcha.Visual',
- ],
- cmdclass = {
- 'install_data': my_install_data,
- },
- data_files = [Data_Files(
- preserve_path = 1,
- base_dir = 'install_lib',
- copy_to = 'Captcha/data',
- strip_dirs = 2,
- template = [
- 'graft Captcha/data',
- ],
- )],
- )
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/setup/__init__.py
--- a/pycaptcha/setup/__init__.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-# Extra modules for use with distutils
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/setup/my_install_data.py
--- a/pycaptcha/setup/my_install_data.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-"""my_install_data.py
-
-Provides a more sophisticated facility to install data files
-than distutils' install_data does.
-You can specify your files as a template like in MANIFEST.in
-and you have more control over the copy process.
-
-Copyright 2000 by Rene Liebscher, Germany.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-Note:
-This licence is only for this file.
-PyOpenGL has its own licence. (it is almost identical.)
-"""
-
-# created 2000/08/01, Rene Liebscher
-
-###########################################################################
-# import some modules we need
-
-import os,sys,string
-from types import StringType,TupleType,ListType
-from distutils.util import change_root
-from distutils.filelist import FileList
-from distutils.command.install_data import install_data
-
-###########################################################################
-# a container class for our more sophisticated install mechanism
-
-class Data_Files:
- """ container for list of data files.
- supports alternate base_dirs e.g. 'install_lib','install_header',...
- supports a directory where to copy files
- supports templates as in MANIFEST.in
- supports preserving of paths in filenames
- eg. foo/xyz is copied to base_dir/foo/xyz
- supports stripping of leading dirs of source paths
- eg. foo/bar1/xyz, foo/bar2/abc can be copied to bar1/xyz, bar2/abc
- """
-
- def __init__(self,base_dir=None,files=None,copy_to=None,template=None,preserve_path=0,strip_dirs=0):
- self.base_dir = base_dir
- self.files = files
- self.copy_to = copy_to
- self.template = template
- self.preserve_path = preserve_path
- self.strip_dirs = strip_dirs
- self.finalized = 0
-
- def warn (self, msg):
- sys.stderr.write ("warning: %s: %s\n" %
- ("install_data", msg))
-
- def debug_print (self, msg):
- """Print 'msg' to stdout if the global DEBUG (taken from the
- DISTUTILS_DEBUG environment variable) flag is true.
- """
- from distutils.core import DEBUG
- if DEBUG:
- print msg
-
-
- def finalize(self):
- """ complete the files list by processing the given template """
- if self.finalized:
- return
- if self.files == None:
- self.files = []
- if self.template != None:
- if type(self.template) == StringType:
- self.template = string.split(self.template,";")
- filelist = FileList(self.warn,self.debug_print)
- for line in self.template:
- filelist.process_template_line(string.strip(line))
- filelist.sort()
- filelist.remove_duplicates()
- self.files.extend(filelist.files)
- self.finalized = 1
-
-# end class Data_Files
-
-###########################################################################
-# a more sophisticated install routine than distutils install_data
-
-class my_install_data (install_data):
-
- def check_data(self,d):
- """ check if data are in new format, if not create a suitable object.
- returns finalized data object
- """
- if not isinstance(d, Data_Files):
- self.warn(("old-style data files list found "
- "-- please convert to Data_Files instance"))
- if type(d) is TupleType:
- if len(d) != 2 or not (type(d[1]) is ListType):
- raise DistutilsSetupError, \
- ("each element of 'data_files' option must be an "
- "Data File instance, a string or 2-tuple (string,[strings])")
- d = Data_Files(copy_to=d[0],files=d[1])
- else:
- if not (type(d) is StringType):
- raise DistutilsSetupError, \
- ("each element of 'data_files' option must be an "
- "Data File instance, a string or 2-tuple (string,[strings])")
- d = Data_Files(files=[d])
- d.finalize()
- return d
-
- def run(self):
- self.outfiles = []
- install_cmd = self.get_finalized_command('install')
-
- for d in self.data_files:
- d = self.check_data(d)
-
- install_dir = self.install_dir
- # alternative base dir given => overwrite install_dir
- if d.base_dir != None:
- install_dir = getattr(install_cmd,d.base_dir)
-
- # copy to an other directory
- if d.copy_to != None:
- if not os.path.isabs(d.copy_to):
- # relatiev path to install_dir
- dir = os.path.join(install_dir, d.copy_to)
- elif install_cmd.root:
- # absolute path and alternative root set
- dir = change_root(self.root,d.copy_to)
- else:
- # absolute path
- dir = d.copy_to
- else:
- # simply copy to install_dir
- dir = install_dir
- # warn if necceassary
- self.warn("setup script did not provide a directory to copy files to "
- " -- installing right in '%s'" % install_dir)
-
- dir=os.path.normpath(dir)
- # create path
- self.mkpath(dir)
-
- # copy all files
- for src in d.files:
- if d.strip_dirs > 0:
- dst = string.join(string.split(src,os.sep)[d.strip_dirs:],os.sep)
- else:
- dst = src
- if d.preserve_path:
- # preserve path in filename
- self.mkpath(os.path.dirname(os.path.join(dir,dst)))
- out = self.copy_file(src, os.path.join(dir,dst))
- else:
- out = self.copy_file(src, dir)
- if type(out) is TupleType:
- out = out[0]
- self.outfiles.append(out)
-
- return self.outfiles
-
- def get_inputs (self):
- inputs = []
- for d in self.data_files:
- d = self.check_data(d)
- inputs.append(d.files)
- return inputs
-
- def get_outputs (self):
- return self.outfiles
-
-
-###########################################################################
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/simple_example.py
--- a/pycaptcha/simple_example.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-#
-# A very simple example that creates a random image from the
-# PseudoGimpy CAPTCHA, saves and shows it, and prints the list
-# of solutions. Normally you would call testSolutions rather
-# than reading this list yourself.
-#
-from Captcha.Visual.Tests import PseudoGimpy, AngryGimpy
-import numpy
-#from numpy import *
-
-#g = AngryGimpy()
-#i = g.render()
-#a = numpy.asarray(i)
-#b = numpy.zeros((2, 2), numpy.int8)
-#c = a == b
-#print c
-#i.save("output.png")
-#i.show()
-#print a
-#print g.solutions
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/test.png
Binary file pycaptcha/test.png has changed
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 pycaptcha/transformations.py
--- a/pycaptcha/transformations.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-
-import Numeric, Image
- #""" Transforme une image PIL en objet numpy.array et vice versa"""
-
-
-def image2array(im):
- #""" image vers array numpy"""
- if im.mode not in ("L", "F"):
- raise ValueError, "can only convert single-layer images"
- if im.mode == "L":
- a = Numeric.fromstring(im.tostring(), Numeric.UnsignedInt8)
- else:
- a = Numeric.fromstring(im.tostring(), Numeric.Float32)
- a.shape = im.size[1], im.size[0]
- return a
-
-def array2image(a):
- #""" array numpy vers image"""
- if a.typecode() == Numeric.UnsignedInt8:
- mode = "L"
- elif a.typecode() == Numeric.Float32:
- mode = "F"
- else:
- raise ValueError, "unsupported image mode"
- return Image.fromstring(mode, (a.shape[1], a.shape[0]), a.tostring())
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/run_pipeline.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/run_pipeline.sh Fri Feb 26 14:15:38 2010 -0500
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# This is one _ugly_ hack, but I couldn't figure out how
+# to cleanly pass command line options to the script if
+# invoking using the "gimp --batch < script.py" syntax
+
+# Basically I create a temp file, put the args into it,
+# then the script gets the filename and reads back the
+# args
+
+export PIPELINE_ARGS_TMPFILE=`mktemp`
+
+for arg in "$@"
+do
+ echo $arg >> $PIPELINE_ARGS_TMPFILE
+done
+
+gimp -i --batch-interpreter python-fu-eval --batch - < pipeline.py
+
+
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/__init__.py
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/mnist_sda.py
--- a/scripts/stacked_dae/mnist_sda.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-# Parameterize call to sgd_optimization for MNIST
-
-import numpy
-import theano
-import time
-import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
-
-from sgd_optimization import SdaSgdOptimizer
-import cPickle, gzip
-from jobman import DD
-
-MNIST_LOCATION = '/u/savardf/datasets/mnist.pkl.gz'
-
-def sgd_optimization_mnist(learning_rate=0.1, pretraining_epochs = 2, \
- pretrain_lr = 0.1, training_epochs = 5, \
- dataset='mnist.pkl.gz'):
- # Load the dataset
- f = gzip.open(dataset,'rb')
- # this gives us train, valid, test (each with .x, .y)
- dataset = cPickle.load(f)
- f.close()
-
- n_ins = 28*28
- n_outs = 10
-
- hyperparameters = DD({'finetuning_lr':learning_rate,
- 'pretraining_lr':pretrain_lr,
- 'pretraining_epochs_per_layer':pretraining_epochs,
- 'max_finetuning_epochs':training_epochs,
- 'hidden_layers_sizes':[100],
- 'corruption_levels':[0.2],
- 'minibatch_size':20})
-
- optimizer = SdaSgdOptimizer(dataset, hyperparameters, n_ins, n_outs)
- optimizer.pretrain()
- optimizer.finetune()
-
-if __name__ == '__main__':
- sgd_optimization_mnist(dataset=MNIST_LOCATION)
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/nist_sda.py
--- a/scripts/stacked_dae/nist_sda.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,264 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-import numpy
-import theano
-import time
-import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
-import copy
-
-import sys
-import os.path
-
-from sgd_optimization import SdaSgdOptimizer
-
-from jobman import DD
-import jobman, jobman.sql
-from pylearn.io import filetensor
-
-from utils import produit_croise_jobs
-
-TEST_CONFIG = False
-
-NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
-
-JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/'
-REDUCE_TRAIN_TO = None
-MAX_FINETUNING_EPOCHS = 1000
-if TEST_CONFIG:
- JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/'
- REDUCE_TRAIN_TO = 1000
- MAX_FINETUNING_EPOCHS = 2
-
-JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs'
-JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results'
-EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint"
-
-# There used to be
-# 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1]
-# and
-# 'num_hidden_layers':[1,2,3]
-# but this is now handled by a special mechanism in SgdOptimizer
-# to reuse intermediate results (for the same training of lower layers,
-# we can test many finetuning_lr)
-JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001],
- 'pretraining_epochs_per_layer': [10,20],
- 'hidden_layers_sizes': [300,800],
- 'corruption_levels': [0.1,0.2],
- 'minibatch_size': [20],
- 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]}
-FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001]
-NUM_HIDDEN_LAYERS_VALS = [1,2,3]
-
-# Just useful for tests... minimal number of epochs
-DEFAULT_HP_NIST = DD({'finetuning_lr':0.01,
- 'pretraining_lr':0.01,
- 'pretraining_epochs_per_layer':1,
- 'max_finetuning_epochs':1,
- 'hidden_layers_sizes':[1000],
- 'corruption_levels':[0.2],
- 'minibatch_size':20})
-
-def jobman_entrypoint(state, channel):
- state = copy.copy(state)
-
- print "Will load NIST"
- nist = NIST(20)
- print "NIST loaded"
-
- rtt = None
- if state.has_key('reduce_train_to'):
- rtt = state['reduce_train_to']
- elif REDUCE_TRAIN_TO:
- rtt = REDUCE_TRAIN_TO
-
- if rtt:
- print "Reducing training set to ", rtt, " examples"
- nist.reduce_train_set(rtt)
-
- train,valid,test = nist.get_tvt()
- dataset = (train,valid,test)
-
- n_ins = 32*32
- n_outs = 62 # 10 digits, 26*2 (lower, capitals)
-
- db = jobman.sql.db(JOBDB_RESULTS)
- optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\
- input_divider=255.0, job_tree=True, results_db=db, \
- experiment=EXPERIMENT_PATH, \
- finetuning_lr_to_try=FINETUNING_LR_VALS, \
- num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS)
- optimizer.train()
-
- return channel.COMPLETE
-
-def estimate_pretraining_time(job):
- job = DD(job)
- # time spent on pretraining estimated as O(n^2) where n=num hidens
- # no need to multiply by num_hidden_layers, as results from num=1
- # is reused for num=2, or 3, so in the end we get the same time
- # as if we were training 3 times a single layer
- # constants:
- # - 20 mins to pretrain a layer with 1000 units (per 1 epoch)
- # - 12 mins to finetune (per 1 epoch)
- # basically the job_tree trick gives us a 5 times speedup on the
- # pretraining time due to reusing for finetuning_lr
- # and gives us a second x2 speedup for reusing previous layers
- # to explore num_hidden_layers
- return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \
- * job.hidden_layer_sizes * job.hidden_layer_sizes)
-
-def estimate_total_time():
- jobs = produit_croise_jobs(JOB_VALS)
- sumtime = 0.0
- sum_without = 0.0
- for job in jobs:
- sumtime += estimate_pretraining_time(job)
- # 12 mins per epoch * 30 epochs
- # 5 finetuning_lr per pretraining combination
- sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS)
- sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20
- print "num jobs=", len(jobs)
- print "estimate", sumtime/60, " hours"
- print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without
-
-def jobman_insert_nist():
- jobs = produit_croise_jobs(JOB_VALS)
-
- db = jobman.sql.db(JOBDB_JOBS)
- for job in jobs:
- job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
- jobman.sql.insert_dict(job, db)
-
- print "inserted"
-
-class NIST:
- def __init__(self, minibatch_size, basepath=None, reduce_train_to=None):
- global NIST_ALL_LOCATION
-
- self.minibatch_size = minibatch_size
- self.basepath = basepath and basepath or NIST_ALL_LOCATION
-
- self.set_filenames()
-
- # arrays of 2 elements: .x, .y
- self.train = [None, None]
- self.test = [None, None]
-
- self.load_train_test()
-
- self.valid = [[], []]
- self.split_train_valid()
- if reduce_train_to:
- self.reduce_train_set(reduce_train_to)
-
- def get_tvt(self):
- return self.train, self.valid, self.test
-
- def set_filenames(self):
- self.train_files = ['all_train_data.ft',
- 'all_train_labels.ft']
-
- self.test_files = ['all_test_data.ft',
- 'all_test_labels.ft']
-
- def load_train_test(self):
- self.load_data_labels(self.train_files, self.train)
- self.load_data_labels(self.test_files, self.test)
-
- def load_data_labels(self, filenames, pair):
- for i, fn in enumerate(filenames):
- f = open(os.path.join(self.basepath, fn))
- pair[i] = filetensor.read(f)
- f.close()
-
- def reduce_train_set(self, max):
- self.train[0] = self.train[0][:max]
- self.train[1] = self.train[1][:max]
-
- if max < len(self.test[0]):
- for ar in (self.test, self.valid):
- ar[0] = ar[0][:max]
- ar[1] = ar[1][:max]
-
- def split_train_valid(self):
- test_len = len(self.test[0])
-
- new_train_x = self.train[0][:-test_len]
- new_train_y = self.train[1][:-test_len]
-
- self.valid[0] = self.train[0][-test_len:]
- self.valid[1] = self.train[1][-test_len:]
-
- self.train[0] = new_train_x
- self.train[1] = new_train_y
-
-def test_load_nist():
- print "Will load NIST"
-
- import time
- t1 = time.time()
- nist = NIST(20)
- t2 = time.time()
-
- print "NIST loaded. time delta = ", t2-t1
-
- tr,v,te = nist.get_tvt()
-
- print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0])
-
- raw_input("Press any key")
-
-# hp for hyperparameters
-def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'):
- global DEFAULT_HP_NIST
- hp = hp and hp or DEFAULT_HP_NIST
-
- print "Will load NIST"
-
- import time
- t1 = time.time()
- nist = NIST(20, reduce_train_to=100)
- t2 = time.time()
-
- print "NIST loaded. time delta = ", t2-t1
-
- train,valid,test = nist.get_tvt()
- dataset = (train,valid,test)
-
- print train[0][15]
- print type(train[0][1])
-
-
- print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0])
-
- n_ins = 32*32
- n_outs = 62 # 10 digits, 26*2 (lower, capitals)
-
- optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0)
- optimizer.train()
-
-if __name__ == '__main__':
-
- import sys
-
- args = sys.argv[1:]
-
- if len(args) > 0 and args[0] == 'load_nist':
- test_load_nist()
-
- elif len(args) > 0 and args[0] == 'jobman_insert':
- jobman_insert_nist()
- elif len(args) > 0 and args[0] == 'test_job_tree':
- # dont forget to comment out sql.inserts and make reduce_train_to=100
- print "TESTING JOB TREE"
- chanmock = {'COMPLETE':0}
- hp = copy.copy(DEFAULT_HP_NIST)
- hp.update({'reduce_train_to':100})
- jobman_entrypoint(hp, chanmock)
- elif len(args) > 0 and args[0] == 'estimate':
- estimate_total_time()
- else:
- sgd_optimization_nist()
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/sgd_optimization.py
--- a/scripts/stacked_dae/sgd_optimization.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,270 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-# Generic SdA optimization loop, adapted from the deeplearning.net tutorial
-
-import numpy
-import theano
-import time
-import theano.tensor as T
-import copy
-import sys
-
-from jobman import DD
-import jobman, jobman.sql
-
-from stacked_dae import SdA
-
-def shared_dataset(data_xy):
- data_x, data_y = data_xy
- #shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
- #shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
- #shared_y = T.cast(shared_y, 'int32')
- shared_x = theano.shared(data_x)
- shared_y = theano.shared(data_y)
- return shared_x, shared_y
-
-class SdaSgdOptimizer:
- def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0,\
- job_tree=False, results_db=None,\
- experiment="",\
- num_hidden_layers_to_try=[1,2,3], \
- finetuning_lr_to_try=[0.1, 0.01, 0.001, 0.0001, 0.00001]):
-
- self.dataset = dataset
- self.hp = copy.copy(hyperparameters)
- self.n_ins = n_ins
- self.n_outs = n_outs
- self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
-
- self.job_tree = job_tree
- self.results_db = results_db
- self.experiment = experiment
- if self.job_tree:
- assert(not results_db is None)
- # these hp should not be there, so we insert default values
- # we use 3 hidden layers as we'll iterate through 1,2,3
- self.hp.finetuning_lr = 0.1 # dummy value, will be replaced anyway
- cl = self.hp.corruption_levels
- nh = self.hp.hidden_layers_sizes
- self.hp.corruption_levels = [cl,cl,cl]
- self.hp.hidden_layers_sizes = [nh,nh,nh]
-
- self.num_hidden_layers_to_try = num_hidden_layers_to_try
- self.finetuning_lr_to_try = finetuning_lr_to_try
-
- self.printout_frequency = 1000
-
- self.rng = numpy.random.RandomState(1234)
-
- self.init_datasets()
- self.init_classifier()
-
- def init_datasets(self):
- print "init_datasets"
- train_set, valid_set, test_set = self.dataset
- self.test_set_x, self.test_set_y = shared_dataset(test_set)
- self.valid_set_x, self.valid_set_y = shared_dataset(valid_set)
- self.train_set_x, self.train_set_y = shared_dataset(train_set)
-
- # compute number of minibatches for training, validation and testing
- self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size
- self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size
- self.n_test_batches = self.test_set_x.value.shape[0] / self.hp.minibatch_size
-
- def init_classifier(self):
- print "Constructing classifier"
- # construct the stacked denoising autoencoder class
- self.classifier = SdA( \
- train_set_x= self.train_set_x, \
- train_set_y = self.train_set_y,\
- batch_size = self.hp.minibatch_size, \
- n_ins= self.n_ins, \
- hidden_layers_sizes = self.hp.hidden_layers_sizes, \
- n_outs = self.n_outs, \
- corruption_levels = self.hp.corruption_levels,\
- rng = self.rng,\
- pretrain_lr = self.hp.pretraining_lr, \
- finetune_lr = self.hp.finetuning_lr,\
- input_divider = self.input_divider )
-
- def train(self):
- self.pretrain()
- if not self.job_tree:
- # if job_tree is True, finetuning was already performed
- self.finetune()
-
- def pretrain(self):
- print "STARTING PRETRAINING"
-
- printout_acc = 0.0
- last_error = 0.0
-
- start_time = time.clock()
- ## Pre-train layer-wise
- for i in xrange(self.classifier.n_layers):
- # go through pretraining epochs
- for epoch in xrange(self.hp.pretraining_epochs_per_layer):
- # go through the training set
- for batch_index in xrange(self.n_train_batches):
- c = self.classifier.pretrain_functions[i](batch_index)
-
- printout_acc += c / self.printout_frequency
- if (batch_index+1) % self.printout_frequency == 0:
- print batch_index, "reconstruction cost avg=", printout_acc
- last_error = printout_acc
- printout_acc = 0.0
-
- print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c
-
- self.job_splitter(i+1, time.clock()-start_time, last_error)
-
- end_time = time.clock()
-
- print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
-
- # Save time by reusing intermediate results
- def job_splitter(self, current_pretraining_layer, pretraining_time, last_error):
-
- state_copy = None
- original_classifier = None
-
- if self.job_tree and current_pretraining_layer in self.num_hidden_layers_to_try:
- for lr in self.finetuning_lr_to_try:
- sys.stdout.flush()
- sys.stderr.flush()
-
- state_copy = copy.copy(self.hp)
-
- self.hp.update({'num_hidden_layers':current_pretraining_layer, \
- 'finetuning_lr':lr,\
- 'pretraining_time':pretraining_time,\
- 'last_reconstruction_error':last_error})
-
- original_classifier = self.classifier
- print "ORIGINAL CLASSIFIER MEANS",original_classifier.get_params_means()
- self.classifier = SdA.copy_reusing_lower_layers(original_classifier, current_pretraining_layer, new_finetuning_lr=lr)
-
- self.finetune()
-
- self.insert_finished_job()
-
- print "NEW CLASSIFIER MEANS AFTERWARDS",self.classifier.get_params_means()
- print "ORIGINAL CLASSIFIER MEANS AFTERWARDS",original_classifier.get_params_means()
- self.classifier = original_classifier
- self.hp = state_copy
-
- def insert_finished_job(self):
- job = copy.copy(self.hp)
- job[jobman.sql.STATUS] = jobman.sql.DONE
- job[jobman.sql.EXPERIMENT] = self.experiment
-
- # don,t try to store arrays in db
- job['hidden_layers_sizes'] = job.hidden_layers_sizes[0]
- job['corruption_levels'] = job.corruption_levels[0]
-
- print "Will insert finished job", job
- jobman.sql.insert_dict(jobman.flatten(job), self.results_db)
-
- def finetune(self):
- print "STARTING FINETUNING"
-
- index = T.lscalar() # index to a [mini]batch
- minibatch_size = self.hp.minibatch_size
-
- # create a function to compute the mistakes that are made by the model
- # on the validation set, or testing set
- test_model = theano.function([index], self.classifier.errors,
- givens = {
- self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / self.input_divider,
- self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]})
-
- validate_model = theano.function([index], self.classifier.errors,
- givens = {
- self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / self.input_divider,
- self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]})
-
-
- # early-stopping parameters
- patience = 10000 # look as this many examples regardless
- patience_increase = 2. # wait this much longer when a new best is
- # found
- improvement_threshold = 0.995 # a relative improvement of this much is
- # considered significant
- validation_frequency = min(self.n_train_batches, patience/2)
- # go through this many
- # minibatche before checking the network
- # on the validation set; in this case we
- # check every epoch
-
- best_params = None
- best_validation_loss = float('inf')
- test_score = 0.
- start_time = time.clock()
-
- done_looping = False
- epoch = 0
-
- printout_acc = 0.0
-
- if not self.hp.has_key('max_finetuning_epochs'):
- self.hp.max_finetuning_epochs = 1000
-
- while (epoch < self.hp.max_finetuning_epochs) and (not done_looping):
- epoch = epoch + 1
- for minibatch_index in xrange(self.n_train_batches):
-
- cost_ij = self.classifier.finetune(minibatch_index)
- iter = epoch * self.n_train_batches + minibatch_index
-
- printout_acc += cost_ij / float(self.printout_frequency * minibatch_size)
- if (iter+1) % self.printout_frequency == 0:
- print iter, "cost avg=", printout_acc
- printout_acc = 0.0
-
- if (iter+1) % validation_frequency == 0:
-
- validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)]
- this_validation_loss = numpy.mean(validation_losses)
- print('epoch %i, minibatch %i/%i, validation error %f %%' % \
- (epoch, minibatch_index+1, self.n_train_batches, \
- this_validation_loss*100.))
-
-
- # if we got the best validation score until now
- if this_validation_loss < best_validation_loss:
-
- #improve patience if loss improvement is good enough
- if this_validation_loss < best_validation_loss * \
- improvement_threshold :
- patience = max(patience, iter * patience_increase)
-
- # save best validation score and iteration number
- best_validation_loss = this_validation_loss
- best_iter = iter
-
- # test it on the test set
- test_losses = [test_model(i) for i in xrange(self.n_test_batches)]
- test_score = numpy.mean(test_losses)
- print((' epoch %i, minibatch %i/%i, test error of best '
- 'model %f %%') %
- (epoch, minibatch_index+1, self.n_train_batches,
- test_score*100.))
-
-
- if patience <= iter :
- done_looping = True
- break
-
- end_time = time.clock()
- self.hp.update({'finetuning_time':end_time-start_time,\
- 'best_validation_error':best_validation_loss,\
- 'test_score':test_score,
- 'num_finetuning_epochs':epoch})
- print(('Optimization complete with best validation score of %f %%,'
- 'with test performance %f %%') %
- (best_validation_loss * 100., test_score*100.))
- print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
-
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/stacked_convolutional_dae.py
--- a/scripts/stacked_dae/stacked_convolutional_dae.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,415 +0,0 @@
-import numpy
-import theano
-import time
-import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
-import theano.sandbox.softsign
-
-from theano.tensor.signal import downsample
-from theano.tensor.nnet import conv
-import gzip
-import cPickle
-
-
-class LogisticRegression(object):
-
- def __init__(self, input, n_in, n_out):
-
- self.W = theano.shared( value=numpy.zeros((n_in,n_out),
- dtype = theano.config.floatX) )
-
- self.b = theano.shared( value=numpy.zeros((n_out,),
- dtype = theano.config.floatX) )
-
- self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
-
-
- self.y_pred=T.argmax(self.p_y_given_x, axis=1)
-
- self.params = [self.W, self.b]
-
- def negative_log_likelihood(self, y):
- return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
-
- def MSE(self, y):
- return -T.mean(abs((self.p_y_given_x)[T.arange(y.shape[0]),y]-y)**2)
-
- def errors(self, y):
- if y.ndim != self.y_pred.ndim:
- raise TypeError('y should have the same shape as self.y_pred',
- ('y', target.type, 'y_pred', self.y_pred.type))
-
-
- if y.dtype.startswith('int'):
- return T.mean(T.neq(self.y_pred, y))
- else:
- raise NotImplementedError()
-
-
-class SigmoidalLayer(object):
- def __init__(self, rng, input, n_in, n_out):
-
- self.input = input
-
- W_values = numpy.asarray( rng.uniform( \
- low = -numpy.sqrt(6./(n_in+n_out)), \
- high = numpy.sqrt(6./(n_in+n_out)), \
- size = (n_in, n_out)), dtype = theano.config.floatX)
- self.W = theano.shared(value = W_values)
-
- b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
- self.b = theano.shared(value= b_values)
-
- self.output = T.tanh(T.dot(input, self.W) + self.b)
- self.params = [self.W, self.b]
-
-class dA_conv(object):
-
- def __init__(self, corruption_level = 0.1, input = None, shared_W = None,\
- shared_b = None, filter_shape = None, image_shape = None, poolsize = (2,2)):
-
- theano_rng = RandomStreams()
-
- fan_in = numpy.prod(filter_shape[1:])
- fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])
-
- center = theano.shared(value = 1, name="center")
- scale = theano.shared(value = 2, name="scale")
-
- if shared_W != None and shared_b != None :
- self.W = shared_W
- self.b = shared_b
- else:
- initial_W = numpy.asarray( numpy.random.uniform( \
- low = -numpy.sqrt(6./(fan_in+fan_out)), \
- high = numpy.sqrt(6./(fan_in+fan_out)), \
- size = filter_shape), dtype = theano.config.floatX)
- initial_b = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
-
-
- self.W = theano.shared(value = initial_W, name = "W")
- self.b = theano.shared(value = initial_b, name = "b")
-
-
- initial_b_prime= numpy.zeros((filter_shape[1],))
-
- self.W_prime=T.dtensor4('W_prime')
-
- self.b_prime = theano.shared(value = initial_b_prime, name = "b_prime")
-
- self.x = input
-
- self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x
-
- conv1_out = conv.conv2d(self.tilde_x, self.W, \
- filter_shape=filter_shape, \
- image_shape=image_shape, border_mode='valid')
-
-
- self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x'))
-
-
- da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\
- filter_shape[3] ]
- da_image_shape = [ image_shape[0],filter_shape[0],image_shape[2]-filter_shape[2]+1, \
- image_shape[3]-filter_shape[3]+1 ]
- initial_W_prime = numpy.asarray( numpy.random.uniform( \
- low = -numpy.sqrt(6./(fan_in+fan_out)), \
- high = numpy.sqrt(6./(fan_in+fan_out)), \
- size = da_filter_shape), dtype = theano.config.floatX)
- self.W_prime = theano.shared(value = initial_W_prime, name = "W_prime")
-
- #import pdb;pdb.set_trace()
-
- conv2_out = conv.conv2d(self.y, self.W_prime, \
- filter_shape = da_filter_shape, image_shape = da_image_shape ,\
- border_mode='full')
-
- self.z = (T.tanh(conv2_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))+center) / scale
-
- scaled_x = (self.x + center) / scale
-
- self.L = - T.sum( scaled_x*T.log(self.z) + (1-scaled_x)*T.log(1-self.z), axis=1 )
-
- self.cost = T.mean(self.L)
-
- self.params = [ self.W, self.b, self.b_prime ]
-
-
-
-class LeNetConvPoolLayer(object):
- def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2)):
- assert image_shape[1]==filter_shape[1]
- self.input = input
-
- W_values = numpy.zeros(filter_shape, dtype=theano.config.floatX)
- self.W = theano.shared(value = W_values)
-
- b_values = numpy.zeros((filter_shape[0],), dtype= theano.config.floatX)
- self.b = theano.shared(value= b_values)
-
- conv_out = conv.conv2d(input, self.W,
- filter_shape=filter_shape, image_shape=image_shape)
-
-
- fan_in = numpy.prod(filter_shape[1:])
- fan_out = filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)
-
- W_bound = numpy.sqrt(6./(fan_in + fan_out))
- self.W.value = numpy.asarray(
- rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
- dtype = theano.config.floatX)
-
-
- pooled_out = downsample.max_pool2D(conv_out, poolsize, ignore_border=True)
-
- self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
- self.params = [self.W, self.b]
-
-
-class SdA():
- def __init__(self, input, n_ins_conv, n_ins_mlp, train_set_x, train_set_y, batch_size, \
- conv_hidden_layers_sizes, mlp_hidden_layers_sizes, corruption_levels, \
- rng, n_out, pretrain_lr, finetune_lr):
-
- self.layers = []
- self.pretrain_functions = []
- self.params = []
- self.conv_n_layers = len(conv_hidden_layers_sizes)
- self.mlp_n_layers = len(mlp_hidden_layers_sizes)
-
- index = T.lscalar() # index to a [mini]batch
- self.x = T.dmatrix('x') # the data is presented as rasterized images
- self.y = T.ivector('y') # the labels are presented as 1D vector of
-
-
-
- for i in xrange( self.conv_n_layers ):
-
- filter_shape=conv_hidden_layers_sizes[i][0]
- image_shape=conv_hidden_layers_sizes[i][1]
- max_poolsize=conv_hidden_layers_sizes[i][2]
-
- if i == 0 :
- layer_input=self.x.reshape((batch_size,1,28,28))
- else:
- layer_input=self.layers[-1].output
-
- layer = LeNetConvPoolLayer(rng, input=layer_input, \
- image_shape=image_shape, \
- filter_shape=filter_shape,poolsize=max_poolsize)
- print 'Convolutional layer '+str(i+1)+' created'
-
- self.layers += [layer]
- self.params += layer.params
-
- da_layer = dA_conv(corruption_level = corruption_levels[0],\
- input = layer_input, \
- shared_W = layer.W, shared_b = layer.b,\
- filter_shape = filter_shape , image_shape = image_shape )
-
-
- gparams = T.grad(da_layer.cost, da_layer.params)
-
- updates = {}
- for param, gparam in zip(da_layer.params, gparams):
- updates[param] = param - gparam * pretrain_lr
-
-
- update_fn = theano.function([index], da_layer.cost, \
- updates = updates,
- givens = {
- self.x : train_set_x[index*batch_size:(index+1)*batch_size]} )
-
- self.pretrain_functions += [update_fn]
-
- for i in xrange( self.mlp_n_layers ):
- if i == 0 :
- input_size = n_ins_mlp
- else:
- input_size = mlp_hidden_layers_sizes[i-1]
-
- if i == 0 :
- if len( self.layers ) == 0 :
- layer_input=self.x
- else :
- layer_input = self.layers[-1].output.flatten(2)
- else:
- layer_input = self.layers[-1].output
-
- layer = SigmoidalLayer(rng, layer_input, input_size,
- mlp_hidden_layers_sizes[i] )
-
- self.layers += [layer]
- self.params += layer.params
-
-
- print 'MLP layer '+str(i+1)+' created'
-
- self.logLayer = LogisticRegression(input=self.layers[-1].output, \
- n_in=mlp_hidden_layers_sizes[-1], n_out=n_out)
- self.params += self.logLayer.params
-
- cost = self.logLayer.negative_log_likelihood(self.y)
-
- gparams = T.grad(cost, self.params)
- updates = {}
-
- for param,gparam in zip(self.params, gparams):
- updates[param] = param - gparam*finetune_lr
-
- self.finetune = theano.function([index], cost,
- updates = updates,
- givens = {
- self.x : train_set_x[index*batch_size:(index+1)*batch_size],
- self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
-
-
- self.errors = self.logLayer.errors(self.y)
-
-
-
-def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 2, \
- pretrain_lr = 0.01, training_epochs = 1000, \
- dataset='mnist.pkl.gz'):
-
- f = gzip.open(dataset,'rb')
- train_set, valid_set, test_set = cPickle.load(f)
- f.close()
-
-
- def shared_dataset(data_xy):
- data_x, data_y = data_xy
- shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
- shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
- return shared_x, T.cast(shared_y, 'int32')
-
-
- test_set_x, test_set_y = shared_dataset(test_set)
- valid_set_x, valid_set_y = shared_dataset(valid_set)
- train_set_x, train_set_y = shared_dataset(train_set)
-
- batch_size = 500 # size of the minibatch
-
-
- n_train_batches = train_set_x.value.shape[0] / batch_size
- n_valid_batches = valid_set_x.value.shape[0] / batch_size
- n_test_batches = test_set_x.value.shape[0] / batch_size
-
- # allocate symbolic variables for the data
- index = T.lscalar() # index to a [mini]batch
- x = T.matrix('x') # the data is presented as rasterized images
- y = T.ivector('y') # the labels are presented as 1d vector of
- # [int] labels
- layer0_input = x.reshape((batch_size,1,28,28))
-
-
- # Setup the convolutional layers with their DAs(add as many as you want)
- corruption_levels = [ 0.2, 0.2, 0.2]
- rng = numpy.random.RandomState(1234)
- ker1=2
- ker2=2
- conv_layers=[]
- conv_layers.append([[ker1,1,5,5], [batch_size,1,28,28], [2,2] ])
- conv_layers.append([[ker2,ker1,5,5], [batch_size,ker1,12,12], [2,2] ])
-
- # Setup the MLP layers of the network
- mlp_layers=[500]
-
- network = SdA(input = layer0_input, n_ins_conv = 28*28, n_ins_mlp = ker2*4*4, \
- train_set_x = train_set_x, train_set_y = train_set_y, batch_size = batch_size,
- conv_hidden_layers_sizes = conv_layers, \
- mlp_hidden_layers_sizes = mlp_layers, \
- corruption_levels = corruption_levels , n_out = 10, \
- rng = rng , pretrain_lr = pretrain_lr , finetune_lr = learning_rate )
-
- test_model = theano.function([index], network.errors,
- givens = {
- network.x: test_set_x[index*batch_size:(index+1)*batch_size],
- network.y: test_set_y[index*batch_size:(index+1)*batch_size]})
-
- validate_model = theano.function([index], network.errors,
- givens = {
- network.x: valid_set_x[index*batch_size:(index+1)*batch_size],
- network.y: valid_set_y[index*batch_size:(index+1)*batch_size]})
-
-
-
- start_time = time.clock()
- for i in xrange(len(network.layers)-len(mlp_layers)):
- for epoch in xrange(pretraining_epochs):
- for batch_index in xrange(n_train_batches):
- c = network.pretrain_functions[i](batch_index)
- print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch),c
-
- patience = 10000 # look as this many examples regardless
- patience_increase = 2. # WAIT THIS MUCH LONGER WHEN A NEW BEST IS
- # FOUND
- improvement_threshold = 0.995 # a relative improvement of this much is
-
- validation_frequency = min(n_train_batches, patience/2)
-
-
- best_params = None
- best_validation_loss = float('inf')
- test_score = 0.
- start_time = time.clock()
-
- done_looping = False
- epoch = 0
-
- while (epoch < training_epochs) and (not done_looping):
- epoch = epoch + 1
- for minibatch_index in xrange(n_train_batches):
-
- cost_ij = network.finetune(minibatch_index)
- iter = epoch * n_train_batches + minibatch_index
-
- if (iter+1) % validation_frequency == 0:
-
- validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
- this_validation_loss = numpy.mean(validation_losses)
- print('epoch %i, minibatch %i/%i, validation error %f %%' % \
- (epoch, minibatch_index+1, n_train_batches, \
- this_validation_loss*100.))
-
-
- # if we got the best validation score until now
- if this_validation_loss < best_validation_loss:
-
- #improve patience if loss improvement is good enough
- if this_validation_loss < best_validation_loss * \
- improvement_threshold :
- patience = max(patience, iter * patience_increase)
-
- # save best validation score and iteration number
- best_validation_loss = this_validation_loss
- best_iter = iter
-
- # test it on the test set
- test_losses = [test_model(i) for i in xrange(n_test_batches)]
- test_score = numpy.mean(test_losses)
- print((' epoch %i, minibatch %i/%i, test error of best '
- 'model %f %%') %
- (epoch, minibatch_index+1, n_train_batches,
- test_score*100.))
-
-
- if patience <= iter :
- done_looping = True
- break
-
- end_time = time.clock()
- print(('Optimization complete with best validation score of %f %%,'
- 'with test performance %f %%') %
- (best_validation_loss * 100., test_score*100.))
- print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
-
-
-
-
-
-
-if __name__ == '__main__':
- sgd_optimization_mnist()
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/stacked_dae.py
--- a/scripts/stacked_dae/stacked_dae.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,287 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-import numpy
-import theano
-import time
-import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
-import copy
-
-from utils import update_locals
-
-class LogisticRegression(object):
- def __init__(self, input, n_in, n_out):
- # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
- self.W = theano.shared( value=numpy.zeros((n_in,n_out),
- dtype = theano.config.floatX) )
- # initialize the baises b as a vector of n_out 0s
- self.b = theano.shared( value=numpy.zeros((n_out,),
- dtype = theano.config.floatX) )
- # compute vector of class-membership probabilities in symbolic form
- self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b)
-
- # compute prediction as class whose probability is maximal in
- # symbolic form
- self.y_pred=T.argmax(self.p_y_given_x, axis=1)
-
- # list of parameters for this layer
- self.params = [self.W, self.b]
-
- def negative_log_likelihood(self, y):
- return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
-
- def errors(self, y):
- # check if y has same dimension of y_pred
- if y.ndim != self.y_pred.ndim:
- raise TypeError('y should have the same shape as self.y_pred',
- ('y', target.type, 'y_pred', self.y_pred.type))
-
- # check if y is of the correct datatype
- if y.dtype.startswith('int'):
- # the T.neq operator returns a vector of 0s and 1s, where 1
- # represents a mistake in prediction
- return T.mean(T.neq(self.y_pred, y))
- else:
- raise NotImplementedError()
-
-
-class SigmoidalLayer(object):
- def __init__(self, rng, input, n_in, n_out):
- self.input = input
-
- W_values = numpy.asarray( rng.uniform( \
- low = -numpy.sqrt(6./(n_in+n_out)), \
- high = numpy.sqrt(6./(n_in+n_out)), \
- size = (n_in, n_out)), dtype = theano.config.floatX)
- self.W = theano.shared(value = W_values)
-
- b_values = numpy.zeros((n_out,), dtype= theano.config.floatX)
- self.b = theano.shared(value= b_values)
-
- self.output = T.nnet.sigmoid(T.dot(input, self.W) + self.b)
- self.params = [self.W, self.b]
-
-
-
-class dA(object):
- def __init__(self, n_visible= 784, n_hidden= 500, corruption_level = 0.1,\
- input = None, shared_W = None, shared_b = None):
- self.n_visible = n_visible
- self.n_hidden = n_hidden
-
- # create a Theano random generator that gives symbolic random values
- theano_rng = RandomStreams()
-
- if shared_W != None and shared_b != None :
- self.W = shared_W
- self.b = shared_b
- else:
- # initial values for weights and biases
- # note : W' was written as `W_prime` and b' as `b_prime`
-
- # W is initialized with `initial_W` which is uniformely sampled
- # from -6./sqrt(n_visible+n_hidden) and 6./sqrt(n_hidden+n_visible)
- # the output of uniform if converted using asarray to dtype
- # theano.config.floatX so that the code is runable on GPU
- initial_W = numpy.asarray( numpy.random.uniform( \
- low = -numpy.sqrt(6./(n_hidden+n_visible)), \
- high = numpy.sqrt(6./(n_hidden+n_visible)), \
- size = (n_visible, n_hidden)), dtype = theano.config.floatX)
- initial_b = numpy.zeros(n_hidden, dtype = theano.config.floatX)
-
-
- # theano shared variables for weights and biases
- self.W = theano.shared(value = initial_W, name = "W")
- self.b = theano.shared(value = initial_b, name = "b")
-
-
- initial_b_prime= numpy.zeros(n_visible)
- # tied weights, therefore W_prime is W transpose
- self.W_prime = self.W.T
- self.b_prime = theano.shared(value = initial_b_prime, name = "b'")
-
- # if no input is given, generate a variable representing the input
- if input == None :
- # we use a matrix because we expect a minibatch of several examples,
- # each example being a row
- self.x = T.dmatrix(name = 'input')
- else:
- self.x = input
- # Equation (1)
- # keep 90% of the inputs the same and zero-out randomly selected subset of 10% of the inputs
- # note : first argument of theano.rng.binomial is the shape(size) of
- # random numbers that it should produce
- # second argument is the number of trials
- # third argument is the probability of success of any trial
- #
- # this will produce an array of 0s and 1s where 1 has a
- # probability of 1 - ``corruption_level`` and 0 with
- # ``corruption_level``
- self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level) * self.x
- # Equation (2)
- # note : y is stored as an attribute of the class so that it can be
- # used later when stacking dAs.
- self.y = T.nnet.sigmoid(T.dot(self.tilde_x, self.W ) + self.b)
- # Equation (3)
- self.z = T.nnet.sigmoid(T.dot(self.y, self.W_prime) + self.b_prime)
- # Equation (4)
- # note : we sum over the size of a datapoint; if we are using minibatches,
- # L will be a vector, with one entry per example in minibatch
- self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 )
- # note : L is now a vector, where each element is the cross-entropy cost
- # of the reconstruction of the corresponding example of the
- # minibatch. We need to compute the average of all these to get
- # the cost of the minibatch
- self.cost = T.mean(self.L)
-
- self.params = [ self.W, self.b, self.b_prime ]
-
-
-
-
-class SdA(object):
- def __init__(self, train_set_x, train_set_y, batch_size, n_ins,
- hidden_layers_sizes, n_outs,
- corruption_levels, rng, pretrain_lr, finetune_lr, input_divider=1.0):
- update_locals(self, locals())
-
- self.layers = []
- self.pretrain_functions = []
- self.params = []
- self.n_layers = len(hidden_layers_sizes)
-
- self.input_divider = numpy.asarray(input_divider, dtype=theano.config.floatX)
-
- if len(hidden_layers_sizes) < 1 :
- raiseException (' You must have at least one hidden layer ')
-
-
- # allocate symbolic variables for the data
- index = T.lscalar() # index to a [mini]batch
- self.x = T.matrix('x') # the data is presented as rasterized images
- self.y = T.ivector('y') # the labels are presented as 1D vector of
- # [int] labels
-
- for i in xrange( self.n_layers ):
- # construct the sigmoidal layer
-
- # the size of the input is either the number of hidden units of
- # the layer below or the input size if we are on the first layer
- if i == 0 :
- input_size = n_ins
- else:
- input_size = hidden_layers_sizes[i-1]
-
- # the input to this layer is either the activation of the hidden
- # layer below or the input of the SdA if you are on the first
- # layer
- if i == 0 :
- layer_input = self.x
- else:
- layer_input = self.layers[-1].output
-
- layer = SigmoidalLayer(rng, layer_input, input_size,
- hidden_layers_sizes[i] )
- # add the layer to the
- self.layers += [layer]
- self.params += layer.params
-
- # Construct a denoising autoencoder that shared weights with this
- # layer
- dA_layer = dA(input_size, hidden_layers_sizes[i], \
- corruption_level = corruption_levels[0],\
- input = layer_input, \
- shared_W = layer.W, shared_b = layer.b)
-
- # Construct a function that trains this dA
- # compute gradients of layer parameters
- gparams = T.grad(dA_layer.cost, dA_layer.params)
- # compute the list of updates
- updates = {}
- for param, gparam in zip(dA_layer.params, gparams):
- updates[param] = param - gparam * pretrain_lr
-
- # create a function that trains the dA
- update_fn = theano.function([index], dA_layer.cost, \
- updates = updates,
- givens = {
- self.x : train_set_x[index*batch_size:(index+1)*batch_size] / self.input_divider})
- # collect this function into a list
- self.pretrain_functions += [update_fn]
-
-
- # We now need to add a logistic layer on top of the MLP
- self.logLayer = LogisticRegression(\
- input = self.layers[-1].output,\
- n_in = hidden_layers_sizes[-1], n_out = n_outs)
-
- self.params += self.logLayer.params
- # construct a function that implements one step of finetunining
-
- # compute the cost, defined as the negative log likelihood
- cost = self.logLayer.negative_log_likelihood(self.y)
- # compute the gradients with respect to the model parameters
- gparams = T.grad(cost, self.params)
- # compute list of updates
- updates = {}
- for param,gparam in zip(self.params, gparams):
- updates[param] = param - gparam*finetune_lr
-
- self.finetune = theano.function([index], cost,
- updates = updates,
- givens = {
- self.x : train_set_x[index*batch_size:(index+1)*batch_size]/self.input_divider,
- self.y : train_set_y[index*batch_size:(index+1)*batch_size]} )
-
- # symbolic variable that points to the number of errors made on the
- # minibatch given by self.x and self.y
-
- self.errors = self.logLayer.errors(self.y)
-
- @classmethod
- def copy_reusing_lower_layers(cls, obj, num_hidden_layers, new_finetuning_lr=None):
- assert(num_hidden_layers <= obj.n_layers)
-
- if not new_finetuning_lr:
- new_finetuning_lr = obj.finetune_lr
-
- new_sda = cls(train_set_x= obj.train_set_x, \
- train_set_y = obj.train_set_y,\
- batch_size = obj.batch_size, \
- n_ins= obj.n_ins, \
- hidden_layers_sizes = obj.hidden_layers_sizes[:num_hidden_layers], \
- n_outs = obj.n_outs, \
- corruption_levels = obj.corruption_levels[:num_hidden_layers],\
- rng = obj.rng,\
- pretrain_lr = obj.pretrain_lr, \
- finetune_lr = new_finetuning_lr, \
- input_divider = obj.input_divider )
-
- # new_sda.layers contains only the hidden layers actually
- for i, layer in enumerate(new_sda.layers):
- original_layer = obj.layers[i]
- for p1,p2 in zip(layer.params, original_layer.params):
- p1.value = p2.value.copy()
-
- return new_sda
-
- def get_params_copy(self):
- return copy.deepcopy(self.params)
-
- def set_params_from_copy(self, copy):
- # We don't want to replace the var, as the functions have pointers in there
- # We only want to replace values.
- for i, p in enumerate(self.params):
- p.value = copy[i].value
-
- def get_params_means(self):
- s = []
- for p in self.params:
- s.append(numpy.mean(p.value))
- return s
-
-if __name__ == '__main__':
- import sys
- args = sys.argv[1:]
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 scripts/stacked_dae/utils.py
--- a/scripts/stacked_dae/utils.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-#!/usr/bin/python
-
-from jobman import DD
-
-# from pylearn codebase
-def update_locals(obj, dct):
- if 'self' in dct:
- del dct['self']
- obj.__dict__.update(dct)
-
-def produit_croise_jobs(val_dict):
- job_list = [DD()]
- all_keys = val_dict.keys()
-
- for key in all_keys:
- possible_values = val_dict[key]
- new_job_list = []
- for val in possible_values:
- for job in job_list:
- to_insert = job.copy()
- to_insert.update({key: val})
- new_job_list.append(to_insert)
- job_list = new_job_list
-
- return job_list
-
-def test_produit_croise_jobs():
- vals = {'a': [1,2], 'b': [3,4,5]}
- print produit_croise_jobs(vals)
-
-
-# taken from http://stackoverflow.com/questions/276052/how-to-get-current-cpu-and-ram-usage-in-python
-"""Simple module for getting amount of memory used by a specified user's
-processes on a UNIX system.
-It uses UNIX ps utility to get the memory usage for a specified username and
-pipe it to awk for summing up per application memory usage and return the total.
-Python's Popen() from subprocess module is used for spawning ps and awk.
-
-"""
-
-import subprocess
-
-class MemoryMonitor(object):
-
- def __init__(self, username):
- """Create new MemoryMonitor instance."""
- self.username = username
-
- def usage(self):
- """Return int containing memory used by user's processes."""
- self.process = subprocess.Popen("ps -u %s -o rss | awk '{sum+=$1} END {print sum}'" % self.username,
- shell=True,
- stdout=subprocess.PIPE,
- )
- self.stdout_list = self.process.communicate()[0].split('\n')
- return int(self.stdout_list[0])
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/BruitGauss.py
--- a/transformations/BruitGauss.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,138 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Ajout de bruit gaussien dans les donnees. A chaque iteration, un bruit poivre
-et sel est ajoute, puis un lissage gaussien autour de ce point est ajoute.
-On fait un nombre d'iteration = 1024*complexity/25 ce qui equivaud
-a complexity/25 des points qui recoivent le centre du noyau gaussien.
-Il y en a beaucoup moins que le bruit poivre et sel, car la transformation
-est plutôt aggressive et touche beaucoup de pixels autour du centre
-
-La grandeur de la gaussienne ainsi que son ecart type sont definit par complexity
-et par une composante aleatoire normale.
-
-On a 25 % de chances d'effectuer le bruitage
-
-Ce fichier prend pour acquis que les images sont donnees une a la fois
-sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-import numpy
-#import random
-import scipy
-from scipy import ndimage
-
-class BruitGauss():
-
- def __init__(self,complexity=1,seed=6378):
- self.nb_chngmax =10 #Le nombre de pixels changes. Seulement pour fin de calcul
- self.grandeurmax = 20
- self.sigmamax = 6.0
- self.regenerate_parameters(complexity)
- self.seed=seed
-
- #numpy.random.seed(self.seed)
-
- def get_seed(self):
- return self.seed
-
- def get_settings_names(self):
- return ['nb_chng','sigma_gauss','grandeur']
-
- def regenerate_parameters(self, complexity):
- self.effectuer =numpy.random.binomial(1,0.25) ##### On a 25% de faire un bruit #####
-
-
- if self.effectuer and complexity > 0:
- self.nb_chng=3+int(numpy.random.rand()*self.nb_chngmax*complexity)
- self.sigma_gauss=2.0 + numpy.random.rand()*self.sigmamax*complexity
- self.grandeur=12+int(numpy.random.rand()*self.grandeurmax*complexity)
- #creation du noyau gaussien
- self.gauss=numpy.zeros((self.grandeur,self.grandeur))
- x0 = y0 = self.grandeur/2.0
- for i in xrange(self.grandeur):
- for j in xrange(self.grandeur):
- self.gauss[i,j]=numpy.exp(-((i-x0)**2 + (j-y0)**2) / self.sigma_gauss**2)
- #creation de la fenetre de moyennage
- self.moy=numpy.zeros((self.grandeur,self.grandeur))
- x0 = y0 = self.grandeur/2
- for i in xrange(0,self.grandeur):
- for j in xrange(0,self.grandeur):
- self.moy[i,j]=((numpy.sqrt(2*(self.grandeur/2.0)**2) -\
- numpy.sqrt(numpy.abs(i-self.grandeur/2.0)**2+numpy.abs(j-self.grandeur/2.0)**2))/numpy.sqrt((self.grandeur/2.0)**2))**5
- else:
- self.sigma_gauss = 1 # eviter division par 0
- self.grandeur=1
- self.nb_chng = 0
- self.effectuer = 0
-
- return self._get_current_parameters()
-
- def _get_current_parameters(self):
- return [self.nb_chng,self.sigma_gauss,self.grandeur]
-
-
- def transform_image(self, image):
- if self.effectuer == 0:
- return image
- image=image.reshape((32,32))
- filtered_image = ndimage.convolve(image,self.gauss,mode='constant')
- assert image.shape == filtered_image.shape
- filtered_image = (filtered_image - filtered_image.min() + image.min()) / (filtered_image.max() - filtered_image.min() + image.min()) * image.max()
-
- #construction of the moyennage Mask
- Mask = numpy.zeros((32,32))
-
- for i in xrange(0,self.nb_chng):
- x_bruit=int(numpy.random.randint(0,32))
- y_bruit=int(numpy.random.randint(0,32))
- offsetxmin = 0
- offsetxmax = 0
- offsetymin = 0
- offsetymax = 0
- if x_bruit < self.grandeur / 2:
- offsetxmin = self.grandeur / 2 - x_bruit
- if 32-x_bruit < numpy.ceil(self.grandeur / 2.0):
- offsetxmax = numpy.ceil(self.grandeur / 2.0) - (32-x_bruit)
- if y_bruit < self.grandeur / 2:
- offsetymin = self.grandeur / 2 - y_bruit
- if 32-y_bruit < numpy.ceil(self.grandeur / 2.0):
- offsetymax = numpy.ceil(self.grandeur / 2.0) - (32-y_bruit)
- Mask[x_bruit - self.grandeur/2 + offsetxmin : x_bruit + numpy.ceil(self.grandeur/2.0) - offsetxmax,\
- y_bruit - self.grandeur/2 + offsetymin : y_bruit + numpy.ceil(self.grandeur/2.0)- offsetymax] +=\
- self.moy[offsetxmin:self.grandeur - offsetxmax,offsetymin:self.grandeur - offsetymax]
-
- return numpy.asarray((image + filtered_image*Mask)/(Mask+1),dtype='float32')
-
-#---TESTS---
-
-def _load_image():
- f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
- d = ft.read(f)
- w=numpy.asarray(d[0])
- return (w/255.0).astype('float')
-
-def _test(complexite):
- img=_load_image()
- transfo = BruitGauss()
- pylab.imshow(img.reshape((32,32)))
- pylab.show()
- print transfo.get_settings_names()
- print transfo.regenerate_parameters(complexite)
-
- img_trans=transfo.transform_image(img)
-
- pylab.imshow(img_trans.reshape((32,32)))
- pylab.show()
-
-
-if __name__ == '__main__':
- from pylearn.io import filetensor as ft
- import pylab
- _test(0.5)
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/DistorsionGauss.py
--- a/transformations/DistorsionGauss.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Ajout d'une composante aleatoire dans chaque pixel de l'image.
-C'est une distorsion gaussienne de moyenne 0 et d'écart type complexity/10
-
-Il y a 30% d'effectuer le bruitage
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-import numpy
-import random
-
-class DistorsionGauss():
-
- def __init__(self,seed=3459):
- self.ecart_type=0.1 #L'ecart type de la gaussienne
- self.effectuer=1 #1=on effectue et 0=rien faire
- self.seed=seed
-
- #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
- #numpy.random.seed(self.seed)
- #random.seed(self.seed)
-
- def get_settings_names(self):
- return ['effectuer']
-
- def get_seed(self):
- return self.seed
-
- def get_settings_names_determined_by_complexity(self,complexity):
- return ['ecart_type']
-
- def regenerate_parameters(self, complexity):
- self.ecart_type=float(complexity)/10
- self.effectuer =numpy.random.binomial(1,0.3) ##### On a 30% de faire un bruit #####
- return self._get_current_parameters()
-
- def _get_current_parameters(self):
- return [self.effectuer]
-
- def get_parameters_determined_by_complexity(self,complexity):
- return [float(complexity)/10]
-
- def transform_image(self, image):
- if self.effectuer == 0:
- return image
-
- image=image.reshape(1024,1)
- aleatoire=numpy.zeros((1024,1)).astype('float32')
- for i in xrange(0,1024):
- aleatoire[i]=float(random.gauss(0,self.ecart_type))
- image=image+aleatoire
-
-
- #Ramener tout entre 0 et 1. Ancienne facon de normaliser.
- #Resultats moins interessant je trouve.
-## if numpy.min(image) < 0:
-## image-=numpy.min(image)
-## if numpy.max(image) > 1:
-## image/=numpy.max(image)
-
- for i in xrange(0,1024):
- image[i]=min(1,max(0,image[i]))
-
- return image.reshape(32,32)
-
-
-#---TESTS---
-
-def _load_image():
- f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
- d = ft.read(f)
- w=numpy.asarray(d[random.randint(0,100)])
- return (w/255.0).astype('float')
-
-def _test(complexite):
- img=_load_image()
- transfo = DistorsionGauss()
- pylab.imshow(img.reshape((32,32)))
- pylab.show()
- print transfo.get_settings_names()
- print transfo.regenerate_parameters(complexite)
-
- img_trans=transfo.transform_image(img)
-
- pylab.imshow(img_trans.reshape((32,32)))
- pylab.show()
-
-
-if __name__ == '__main__':
- from pylearn.io import filetensor as ft
- import pylab
- for i in xrange(0,5):
- _test(0.5)
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/Occlusion.py
--- a/transformations/Occlusion.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,161 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Ajout de bruit d'occlusion dans l'image originale.
-
-Le bruit provient d'un echantillon pris dans la seconde image puis rajoutee a
-gauche ou a droite de l'image originale. De plus, il se peut aussi que le
-bruit soit rajoute sur l'image originale, mais en plus pâle.
-
-Le fichier /data/lisa/data/ift6266h10/echantillon_occlusion.ft
-(sur le reseau DIRO) est necessaire.
-
-Il y a 30% de chance d'avoir une occlusion quelconque.
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-
-import numpy
-
-from pylearn.io import filetensor as ft
-
-class Occlusion():
-
- def __init__(self,seed=9854):
- #Ces 4 variables representent la taille du "crop" sur l'image2
- #Ce "crop" est pris a partie de image1[15,15], le milieu de l'image1
- self.haut=2
- self.bas=2
- self.gauche=2
- self.droite=2
-
- #Ces deux variables representent le deplacement en x et y par rapport
- #au milieu du bord gauche ou droit
- self.x_arrivee=0
- self.y_arrivee=0
-
- #Cette variable =1 si l'image est mise a gauche et -1 si a droite
- #et =0 si au centre, mais plus pale
- self.endroit=-1
-
- #Cette variable determine l'opacite de l'ajout dans le cas ou on est au milieu
- self.opacite=0.5 #C'est completement arbitraire. Possible de le changer si voulu
-
- #Sert a dire si on fait quelque chose. 0=faire rien, 1 on fait quelque chose
- self.appliquer=1
-
- self.seed=seed
- #numpy.random.seed(self.seed)
-
- f3 = open('/data/lisa/data/ift6266h10/echantillon_occlusion.ft') #Doit etre sur le reseau DIRO.
- #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/echantillon_occlusion.ft')
- #Il faut arranger le path sinon
- w=ft.read(f3)
- f3.close()
-
- self.longueur=len(w)
- self.d=(w.astype('float'))/255
-
-
- def get_settings_names(self):
- return ['haut','bas','gauche','droite','x_arrivee','y_arrivee','endroit','rajout','appliquer']
-
- def get_seed(self):
- return self.seed
-
- def regenerate_parameters(self, complexity):
- self.haut=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
- self.bas=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
- self.gauche=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
- self.droite=min(15,int(numpy.abs(numpy.random.normal(int(8*complexity),2))))
- if self.haut+self.bas+self.gauche+self.droite==0: #Tres improbable
- self.haut=1
- self.bas=1
- self.gauche=1
- self.droite=1
-
- #Ces deux valeurs seront controlees afin d'etre certain de ne pas depasser
- self.x_arrivee=int(numpy.abs(numpy.random.normal(0,2))) #Complexity n'entre pas en jeu, pas besoin
- self.y_arrivee=int(numpy.random.normal(0,3))
-
- self.rajout=numpy.random.randint(0,self.longueur-1) #les bouts de quelle lettre
- self.appliquer=numpy.random.binomial(1,0.4) ##### 40 % du temps, on met une occlusion #####
-
- if complexity == 0: #On ne fait rien dans ce cas
- self.applique=0
-
- self.endroit=numpy.random.randint(-1,2)
-
- return self._get_current_parameters()
-
- def _get_current_parameters(self):
- return [self.haut,self.bas,self.gauche,self.droite,self.x_arrivee,self.y_arrivee,self.endroit,self.rajout,self.appliquer]
-
-
- def transform_image(self, image):
- if self.appliquer == 0: #Si on fait rien, on retourne tout de suite l'image
- return image
-
- #Attrapper le bruit d'occlusion
- bruit=self.d[self.rajout].reshape((32,32))[15-self.haut:15+self.bas+1,15-self.gauche:15+self.droite+1]
-
- if self.x_arrivee+self.gauche+self.droite>32:
- self.endroit*=-1 #On change de bord et on colle sur le cote
- self.x_arrivee=0
- if self.y_arrivee-self.haut <-16:
- self.y_arrivee=self.haut-16#On colle le morceau en haut
- if self.y_arrivee+self.bas > 15:
- self.y_arrivee=15-self.bas #On colle le morceau en bas
-
- if self.endroit==-1: #a gauche
- for i in xrange(-self.haut,self.bas+1):
- for j in xrange(0,self.gauche+self.droite+1):
- image[16+self.y_arrivee+i,self.x_arrivee+j]=\
- max(image[16+self.y_arrivee+i,self.x_arrivee+j],bruit[i+self.haut,j])
-
- elif self.endroit==1: #a droite
- for i in xrange(-self.haut,self.bas+1):
- for j in xrange(-self.gauche-self.droite,1):
- image[16+self.y_arrivee+i,31-self.x_arrivee+j]=\
- max(image[16+self.y_arrivee+i,31-self.x_arrivee+j],bruit[i+self.haut,j+self.gauche+self.droite])
-
- elif self.endroit==0: #au milieu
- for i in xrange(-self.haut,self.bas+1):
- for j in xrange(-self.gauche,self.droite+1):
- image[16+i,16+j]=max(image[16+i,16+j],bruit[i+self.haut,j+self.gauche]*self.opacite)
-
-
- return image
-
-#---TESTS---
-
-def _load_image():
- f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
- d = ft.read(f)
- w=numpy.asarray(d[numpy.random.randint(0,50)])
- return (w/255.0).astype('float')
-
-def _test(complexite):
-
- transfo = Occlusion()
- for i in xrange(0,20):
- img = _load_image()
- pylab.imshow(img.reshape((32,32)))
- pylab.show()
- print transfo.get_settings_names()
- print transfo.regenerate_parameters(complexite)
-
- img_trans=transfo.transform_image(img.reshape((32,32)))
-
- print transfo.get_seed()
- pylab.imshow(img_trans.reshape((32,32)))
- pylab.show()
-
-
-if __name__ == '__main__':
- import pylab
- import scipy
- _test(0.5)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/PermutPixel.py
--- a/transformations/PermutPixel.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,114 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Un echange de pixels est effectue entre certain pixels choisit aleatoirement
-et un de ses 4 voisins, tout aussi choisi aleatoirement.
-
-Le nombre de pixels permutes est definit pas complexity*1024
-
-Il y a proba 20% d'effectuer le bruitage
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-import numpy
-import random
-
-class PermutPixel():
-
- def __init__(self,seed=7152):
- self.nombre=10 #Le nombre de pixels a permuter
- self.proportion=0.3
- self.effectuer=1 #1=on effectue, 0=rien faire
- self.seed=seed
-
- #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
- #numpy.random.seed(self.seed)
- #random.seed(self.seed)
-
- def get_seed(self):
- return self.seed
-
- def get_settings_names(self):
- return ['effectuer']
-
- def get_settings_names_determined_by_complexity(self,complexity):
- return ['nombre']
-
- def regenerate_parameters(self, complexity):
- self.proportion=float(complexity)/3
- self.nombre=int(256*self.proportion)*4 #Par multiple de 4 (256=1024/4)
- self.echantillon=random.sample(xrange(0,1024),self.nombre) #Les pixels qui seront permutes
- self.effectuer =numpy.random.binomial(1,0.2) ##### On a 20% de faire un bruit #####
- return self._get_current_parameters()
-
- def _get_current_parameters(self):
- return [self.effectuer]
-
- def get_parameters_determined_by_complexity(self, complexity):
- return [int(complexity*256)*4]
-
- def transform_image(self, image):
- if self.effectuer==0:
- return image
-
- image=image.reshape(1024,1)
- temp=0 #variable temporaire
-
- for i in xrange(0,self.nombre,4): #Par bonds de 4
- #gauche
- if self.echantillon[i] > 0:
- temp=image[self.echantillon[i]-1]
- image[self.echantillon[i]-1]=image[self.echantillon[i]]
- image[self.echantillon[i]]=temp
- #droite
- if self.echantillon[i+1] < 1023:
- temp=image[self.echantillon[i+1]+1]
- image[self.echantillon[i+1]+1]=image[self.echantillon[i+1]]
- image[self.echantillon[i+1]]=temp
- #haut
- if self.echantillon[i+2] > 31:
- temp=image[self.echantillon[i+2]-32]
- image[self.echantillon[i+2]-32]=image[self.echantillon[i+2]]
- image[self.echantillon[i+2]]=temp
- #bas
- if self.echantillon[i+3] < 992:
- temp=image[self.echantillon[i+3]+32]
- image[self.echantillon[i+3]+32]=image[self.echantillon[i+3]]
- image[self.echantillon[i+3]]=temp
-
-
- return image.reshape((32,32))
-
-
-#---TESTS---
-
-def _load_image():
- f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
- d = ft.read(f)
- w=numpy.asarray(d[random.randint(0,100)])
- return (w/255.0).astype('float')
-
-def _test(complexite):
- img=_load_image()
- transfo = PermutPixel()
- pylab.imshow(img.reshape((32,32)))
- pylab.show()
- print transfo.get_settings_names()
- print transfo.regenerate_parameters(complexite)
-
- img_trans=transfo.transform_image(img)
-
- pylab.imshow(img_trans.reshape((32,32)))
- pylab.show()
-
-
-if __name__ == '__main__':
- from pylearn.io import filetensor as ft
- import pylab
- for i in xrange(0,5):
- _test(0.5)
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/PoivreSel.py
--- a/transformations/PoivreSel.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Ajout de bruit poivre et sel dans les donnees. Le bruit est distribue de facon
-aleatoire tire d'une uniforme tout comme la clarte des bites changees.
-
-La proportion de bites aleatoires est definit par complexity/5.
-Lorsque cette valeur est a 1 ==> Plus reconnaissable et 0 ==> Rien ne se passe
-
-On a maintenant 25% de chance d'effectuer un bruitage.
-
-Ce fichier prend pour acquis que les images sont donnees une a la fois
-sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-import numpy
-import random
-
-class PoivreSel():
-
- def __init__(self,seed=9361):
- self.proportion_bruit=0.08 #Le pourcentage des pixels qui seront bruites
- self.nb_chng=10 #Le nombre de pixels changes. Seulement pour fin de calcul
- self.effectuer=1 #Vaut 1 si on effectue et 0 sinon.
-
- self.seed=seed
- #Les deux generateurs sont de types differents, avoir la meme seed n'a pas d'influence
- #numpy.random.seed(self.seed)
- #random.seed(self.seed)
-
- def get_seed(self):
- return self.seed
-
- def get_settings_names(self):
- return ['effectuer']
-
- def get_settings_names_determined_by_complexity(self,complexity):
- return ['proportion_bruit']
-
- def regenerate_parameters(self, complexity):
- self.proportion_bruit = float(complexity)/5
- self.nb_chng=int(1024*self.proportion_bruit)
- self.changements=random.sample(xrange(1024),self.nb_chng) #Les pixels qui seront changes
- self.effectuer =numpy.random.binomial(1,0.25) ##### On a 25% de faire un bruit #####
- return self._get_current_parameters()
-
- def _get_current_parameters(self):
- return [self.effectuer]
-
- def get_parameters_determined_by_complexity(self, complexity):
- return [float(complexity)/5]
-
- def transform_image(self, image):
- if self.effectuer == 0:
- return image
-
- image=image.reshape(1024,1)
- for j in xrange(0,self.nb_chng):
- image[self.changements[j]]=numpy.random.random() #On determine les nouvelles valeurs des pixels changes
- return image.reshape(32,32)
-
-
-#---TESTS---
-
-def _load_image():
- f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
- d = ft.read(f)
- w=numpy.asarray(d[0])
- return (w/255.0).astype('float')
-
-def _test(complexite):
- img=_load_image()
- transfo = PoivreSel()
- pylab.imshow(img.reshape((32,32)))
- pylab.show()
- print transfo.get_settings_names()
- print transfo.regenerate_parameters(complexite)
-
- img_trans=transfo.transform_image(img)
-
- pylab.imshow(img_trans.reshape((32,32)))
- pylab.show()
-
-
-if __name__ == '__main__':
- from pylearn.io import filetensor as ft
- import pylab
- _test(0.5)
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/Rature.py
--- a/transformations/Rature.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,255 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Ajout d'une rature sur le caractère. La rature est en fait un 1 qui recoit une
-rotation et qui est ensuite appliqué sur le caractère. Un grossissement, puis deux
-erosions sont effectuees sur le 1 afin qu'il ne soit plus reconnaissable.
-Il y a des chances d'avoir plus d'une seule rature !
-
-Il y a 15% d'effectuer une rature.
-
-Ce fichier prend pour acquis que les images sont donnees une a la fois
-sous forme de numpy.array de 1024 (32 x 32) valeurs entre 0 et 1.
-
-Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010
-
-'''
-
-import numpy, Image, random
-import scipy.ndimage.morphology
-from pylearn.io import filetensor as ft
-
-
-class Rature():
-
- def __init__(self,seed=1256):
- self.angle=0 #Angle en degre de la rotation (entre 0 et 180)
- self.numero=0 #Le numero du 1 choisi dans la banque de 1
- self.gauche=-1 #Le numero de la colonne la plus a gauche contenant le 1
- self.droite=-1
- self.haut=-1
- self.bas=-1
- self.faire=1 #1=on effectue et 0=fait rien
-
- self.crop_haut=0
- self.crop_gauche=0 #Ces deux valeurs sont entre 0 et 31 afin de definir
- #l'endroit ou sera pris le crop dans l'image du 1
-
- self.largeur_bande=-1 #La largeur de la bande
- self.smooth=-1 #La largeur de la matrice carree servant a l'erosion
- self.nb_ratures=-1 #Le nombre de ratures appliques
- self.fini=0 #1=fini de mettre toutes les couches 0=pas fini
- self.complexity=0 #Pour garder en memoire la complexite si plusieurs couches sont necessaires
- self.seed=seed
-
- #numpy.random.seed(self.seed)
-
- f3 = open('/data/lisa/data/ift6266h10/un_rature.ft') #Doit etre sur le reseau DIRO.
- #f3 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/un_rature.ft')
- #Il faut arranger le path sinon
- w=ft.read(f3)
- f3.close()
- self.d=(w.astype('float'))/255
-
- self.patch=self.d[0].reshape((32,32)) #La patch de rature qui sera appliquee sur l'image
-
- def get_settings_names(self):
- return ['angle','numero','faire','crop_haut','crop_gauche','largeur_bande','smooth','nb_ratures']
-
- def get_seed(self):
- return self.seed
-
- def regenerate_parameters(self, complexity,next_rature = False):
-
-
- self.numero=random.randint(0,4999) #Ces bornes sont inclusives !
- self.fini=0
- self.complexity=complexity
-
- if float(complexity) > 0:
-
- self.gauche=self.droite=self.haut=self.bas=-1 #Remet tout a -1
-
- self.angle=int(numpy.random.normal(90,100*complexity))
-
- self.faire=numpy.random.binomial(1,0.15) ##### 15% d'effectuer une rature #####
- if next_rature:
- self.faire = 1
- #self.faire=1 #Pour tester seulement
-
- self.crop_haut=random.randint(0,17)
- self.crop_gauche=random.randint(0,17)
- if complexity <= 0.25 :
- self.smooth=6
- elif complexity <= 0.5:
- self.smooth=5
- elif complexity <= 0.75:
- self.smooth=4
- else:
- self.smooth=3
-
- p = numpy.random.rand()
- if p < 0.5:
- self.nb_ratures= 1
- else:
- if p < 0.8:
- self.nb_ratures = 2
- else:
- self.nb_ratures = 3
-
- #Creation de la "patch" de rature qui sera appliquee sur l'image
- if self.faire == 1:
- self.get_size()
- self.get_image_rot() #On fait la "patch"
-
- else:
- self.faire=0 #On ne fait rien si complexity=0 !!
-
- return self._get_current_parameters()
-
-
- def get_image_rot(self):
- image2=(self.d[self.numero].reshape((32,32))[self.haut:self.bas,self.gauche:self.droite])
-
- im = Image.fromarray(numpy.asarray(image2*255,dtype='uint8'))
-
- #La rotation et le resize sont de belle qualite afin d'avoir une image nette
- im2 = im.rotate(self.angle,Image.BICUBIC,expand=False)
- im3=im2.resize((50,50),Image.ANTIALIAS)
-
- grosse=numpy.asarray(numpy.asarray(im3)/255.0,dtype='float32')
- crop=grosse[self.haut:self.haut+32,self.gauche:self.gauche+32]
-
- self.get_patch(crop)
-
- def get_patch(self,crop):
- smooting = numpy.ones((self.smooth,self.smooth))
- #Il y a deux erosions afin d'avoir un beau resultat. Pas trop large et
- #pas trop mince
- trans=scipy.ndimage.morphology.grey_erosion\
- (crop,size=smooting.shape,structure=smooting,mode='wrap')
- trans1=scipy.ndimage.morphology.grey_erosion\
- (trans,size=smooting.shape,structure=smooting,mode='wrap')
-
-
- patch_img=Image.fromarray(numpy.asarray(trans1*255,dtype='uint8'))
-
- patch_img2=patch_img.crop((4,4,28,28)).resize((32,32)) #Pour contrer les effets de bords !
-
- trans2=numpy.asarray(numpy.asarray(patch_img2)/255.0,dtype='float32')
-
-
- #Tout ramener entre 0 et 1
- trans2=trans2-trans2.min() #On remet tout positif
- trans2=trans2/trans2.max()
-
- #La rayure a plus de chance d'etre en bas ou oblique le haut a 10h
- if random.random() <= 0.5: #On renverse la matrice dans ce cas
- for i in xrange(0,32):
- self.patch[i,:]=trans2[31-i,:]
- else:
- self.patch=trans2
-
-
-
-
- def get_size(self):
- image=self.d[self.numero].reshape((32,32))
-
- #haut
- for i in xrange(0,32):
- for j in xrange(0,32):
- if(image[i,j]) != 0:
- if self.haut == -1:
- self.haut=i
- break
- if self.haut > -1:
- break
-
- #bas
- for i in xrange(31,-1,-1):
- for j in xrange(0,32):
- if(image[i,j]) != 0:
- if self.bas == -1:
- self.bas=i
- break
- if self.bas > -1:
- break
-
- #gauche
- for i in xrange(0,32):
- for j in xrange(0,32):
- if(image[j,i]) != 0:
- if self.gauche == -1:
- self.gauche=i
- break
- if self.gauche > -1:
- break
-
- #droite
- for i in xrange(31,-1,-1):
- for j in xrange(0,32):
- if(image[j,i]) != 0:
- if self.droite == -1:
- self.droite=i
- break
- if self.droite > -1:
- break
-
-
- def _get_current_parameters(self):
- return [self.angle,self.numero,self.faire,self.crop_haut,self.crop_gauche,self.largeur_bande,self.smooth,self.nb_ratures]
-
- def transform_image(self, image):
- if self.faire == 0: #Rien faire !!
- return image
-
- if self.fini == 0: #S'il faut rajouter des couches
- patch_temp=self.patch
- for w in xrange(1,self.nb_ratures):
- self.regenerate_parameters(self.complexity,1)
- for i in xrange(0,32):
- for j in xrange(0,32):
- patch_temp[i,j]=max(patch_temp[i,j],self.patch[i,j])
- self.fini=1
- self.patch=patch_temp
-
- for i in xrange(0,32):
- for j in xrange(0,32):
- image[i,j]=max(image[i,j],self.patch[i,j])
- self.patch*=0 #Remise a zero de la patch (pas necessaire)
- return image
-
-
-#---TESTS---
-
-def _load_image():
- f = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/lower_test_data.ft') #Le jeu de donnees est en local.
- d = ft.read(f)
- w=numpy.asarray(d[0:1000])
- return (w/255.0).astype('float')
-
-def _test(complexite):
- img=_load_image()
- transfo = Rature()
- for i in xrange(0,10):
- img2=img[random.randint(0,1000)]
- pylab.imshow(img2.reshape((32,32)))
- pylab.show()
- print transfo.get_settings_names()
- print transfo.regenerate_parameters(complexite)
- img2=img2.reshape((32,32))
-
- img2_trans=transfo.transform_image(img2)
-
- pylab.imshow(img2_trans.reshape((32,32)))
- pylab.show()
-
-
-if __name__ == '__main__':
- from pylearn.io import filetensor as ft
- import pylab
- _test(1)
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/add_background_image.py
--- a/transformations/add_background_image.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-#!/usr/bin/python
-# -*- coding: iso-8859-1 -*-
-
-'''
- Implementation of random background adding to a specific image
-
- Author: Guillaume Sicard
-'''
-
-import sys, os, random
-import cPickle
-import Image, numpy
-
-class AddBackground():
- def __init__(self, threshold = 128, complexity = 1):
- self.h = 32
- self.w = 32
- self.threshold = 1;
- try: #in order to load locally if it is available
- self.bg_image_file = '/Tmp/image_net/'
- f=open(self.bg_image_file+'filelist.pkl')
- except:
- self.bg_image_file = '/data/lisa/data/ift6266h10/image_net/'
- f=open(self.bg_image_file+'filelist.pkl')
- self.image_files = cPickle.load(f)
- f.close()
- self.regenerate_parameters(complexity)
-
- def get_current_parameters(self):
- return [self.contrast]
- # get threshold value
- def get_settings_names(self):
- return ['contrast']
-
- # no need, except for testmod.py
- def regenerate_parameters(self, complexity):
- self.contrast = 1-numpy.random.rand()*complexity
- return [self.contrast]
-
- # load an image
- def load_image(self,filename):
- image = Image.open(filename).convert('L')
- image = numpy.asarray(image)
- image = (image / 255.0).astype(numpy.float32)
- return image
-
- # save an image
- def save_image(self,array, filename):
- image = (array * 255.0).astype('int')
- image = Image.fromarray(image)
- if (filename != ''):
- image.save(filename)
- else:
- image.show()
-
- # make a random 32x32 crop of an image
- def rand_crop(self,image):
- i_w, i_h = image.shape
- x, y = random.randint(0, i_w - self.w), random.randint(0, i_h - self.h)
- return image[x:x + self.w, y:y + self.h]
-
- # select a random background image from "bg_image_file" and crops it
- def rand_bg_image(self,maximage):
- i = random.randint(0, len(self.image_files) - 1)
-
- image = self.load_image(self.bg_image_file + self.image_files[i])
- self.bg_image = self.rand_crop(image)
- maxbg = self.bg_image.max()
- self.bg_image = self.bg_image / maxbg * ( max(maximage - self.contrast,0.0) )
-
- # set "bg_image" as background to "image", based on a pixels threshold
- def set_bg(self,image):
- tensor = numpy.asarray([self.bg_image,image],dtype='float32')
- return tensor.max(0)
-
- # transform an image file and return an array
- def transform_image_from_file(self, filename):
- self.rand_bg_image()
- image = self.load_image(filename)
- image = self.set_bg(image)
- return image
-
- # standard array to array transform
- def transform_image(self, image):
- self.rand_bg_image(image.max())
- image = self.set_bg(image)
- return image
-
- # test method
- def test(self,filename):
- import time
-
- sys.stdout.write('Starting addBackground test : loading image')
- sys.stdout.flush()
-
- image = self.load_image(filename)
-
- t = 0
- n = 500
- for i in range(n):
- t0 = time.time()
- image2 = self.transform_image(image)
- t = ( i * t + (time.time() - t0) ) / (i + 1)
- sys.stdout.write('.')
- sys.stdout.flush()
-
- print "Done!\nAverage time : " + str(1000 * t) + " ms"
-
-if __name__ == '__main__':
-
- myAddBackground = AddBackground()
- myAddBackground.test('./images/0-LiberationSans-Italic.ttf.jpg')
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/affine_transform.py
--- a/transformations/affine_transform.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Simple implementation of random affine transformations based on the Python
-Imaging Module affine transformations.
-
-
-Author: Razvan Pascanu
-'''
-
-import numpy, Image
-
-
-
-class AffineTransformation():
- def __init__( self, complexity = .5):
- self.shape = (32,32)
- self.complexity = complexity
- params = numpy.random.uniform(size=6) -.5
- self.a = 1. + params[0]*.6*complexity
- self.b = 0. + params[1]*.6*complexity
- self.c = params[2]*8.*complexity
- self.d = 0. + params[3]*.6*complexity
- self.e = 1. + params[4]*.6*complexity
- self.f = params[5]*8.*complexity
-
-
- def _get_current_parameters(self):
- return [self.a, self.b, self.c, self.d, self.e, self.f]
-
- def get_settings_names(self):
- return ['a','b','c','d','e','f']
-
- def regenerate_parameters(self, complexity):
- # generate random affine transformation
- # a point (x',y') of the new image corresponds to (x,y) of the old
- # image where :
- # x' = params[0]*x + params[1]*y + params[2]
- # y' = params[3]*x + params[4]*y _ params[5]
-
- # the ranges are set manually as to look acceptable
-
- self.complexity = complexity
- params = numpy.random.uniform(size=6) -.5
- self.a = 1. + params[0]*.8*complexity
- self.b = 0. + params[1]*.8*complexity
- self.c = params[2]*9.*complexity
- self.d = 0. + params[3]*.8*complexity
- self.e = 1. + params[4]*.8*complexity
- self.f = params[5]*9.*complexity
- return self._get_current_parameters()
-
-
-
-
- def transform_image(self,NIST_image):
-
- im = Image.fromarray( \
- numpy.asarray(\
- NIST_image.reshape(self.shape)*255.0, dtype='uint8'))
- nwim = im.transform( (32,32), Image.AFFINE, [self.a,self.b,self.c,self.d,self.e,self.f])
- return numpy.asarray(numpy.asarray(nwim)/255.0,dtype='float32')
-
-
-
-if __name__ =='__main__':
- print 'random test'
-
- from pylearn.io import filetensor as ft
- import pylab
-
- datapath = '/data/lisa/data/nist/by_class/'
-
- f = open(datapath+'digits/digits_train_data.ft')
- d = ft.read(f)
- f.close()
-
-
- transformer = AffineTransformation()
- id = numpy.random.randint(30)
-
- pylab.figure()
- pylab.imshow(d[id].reshape((32,32)))
- pylab.figure()
- pylab.imshow(transformer.transform_image(d[id]).reshape((32,32)))
-
- pylab.show()
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/contrast.py
--- a/transformations/contrast.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Simple implementation of random contrast. This always switch half the time the polarity.
-then it decides of a random contrast dependant of the complexity, the mean of the maximum and minimum
-pixel value stays 0 (to avoid import bias change between exemples).
-
-Author: Xavier Glorot
-'''
-
-import numpy as N
-import copy
-
-
-class Contrast():
- def __init__(self,complexity = 1):
- #---------- private attributes
- self.__nx__ = 32 #xdim of the images
- self.__ny__ = 32 #ydim of the images
- self.__Pinvert__ = 0.5 #probability to switch polarity
- self.__mincontrast__ = 0.15
- self.__resolution__ = 256
- self.__rangecontrastres__ = self.__resolution__ - N.int(self.__mincontrast__*self.__resolution__)
- #------------------------------------------------
-
- #---------- generation parameters
- self.regenerate_parameters(complexity)
- #------------------------------------------------
-
- def _get_current_parameters(self):
- return [self.invert,self.contrast]
-
- def get_settings_names(self):
- return ['invert','contrast']
-
- def regenerate_parameters(self, complexity):
- self.invert = (N.random.uniform() < self.__Pinvert__)
- self.contrast = self.__resolution__ - N.random.randint(1 + self.__rangecontrastres__ * complexity)
- return self._get_current_parameters()
-
- def transform_1_image(self,image): #the real transformation method
- maxi = image.max()
- mini = image.min()
- if self.invert:
- newimage = 1 - (self.__resolution__- self.contrast) / (2 * float(self.__resolution__)) -\
- (image - mini) / float(maxi - mini) * self.contrast / float(self.__resolution__)
- else:
- newimage = (self.__resolution__- self.contrast) / (2 * float(self.__resolution__)) +\
- (image - mini) / float(maxi - mini) * self.contrast / float(self.__resolution__)
- if image.dtype == 'uint8':
- return N.asarray(newimage*255,dtype='uint8')
- else:
- return N.asarray(newimage,dtype=image.dtype)
-
- def transform_image(self,image): #handling different format
- if image.shape == (self.__nx__,self.__ny__):
- return self.transform_1_image(image)
- if image.ndim == 3:
- newimage = copy.copy(image)
- for i in range(image.shape[0]):
- newimage[i,:,:] = self.transform_1_image(image[i,:,:])
- return newimage
- if image.ndim == 2 and image.shape != (self.__nx__,self.__ny__):
- newimage = N.reshape(image,(image.shape[0],self.__nx__,self.__ny__))
- for i in range(image.shape[0]):
- newimage[i,:,:] = self.transform_1_image(newimage[i,:,:])
- return N.reshape(newimage,image.shape)
- if image.ndim == 1:
- newimage = N.reshape(image,(self.__nx__,self.__ny__))
- newimage = self.transform_1_image(newimage)
- return N.reshape(newimage,image.shape)
- assert False #should never go there
-
-
-
-
-#test on NIST (you need pylearn and access to NIST to do that)
-
-if __name__ == '__main__':
-
- from pylearn.io import filetensor as ft
- import copy
- import pygame
- import time
- datapath = '/data/lisa/data/nist/by_class/'
- f = open(datapath+'digits/digits_train_data.ft')
- d = ft.read(f)
-
- pygame.surfarray.use_arraytype('numpy')
-
- pygame.display.init()
- screen = pygame.display.set_mode((8*2*32,8*32),0,8)
- anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
- screen.set_palette(anglcolorpalette)
-
- MyContrast = Contrast()
-
- debut=time.time()
- MyContrast.transform_image(d)
- fin=time.time()
- print '------------------------------------------------'
- print d.shape[0],' images transformed in :', fin-debut, ' seconds'
- print '------------------------------------------------'
- print (fin-debut)/d.shape[0]*1000000,' microseconds per image'
- print '------------------------------------------------'
- print MyContrast.get_settings_names()
- print MyContrast._get_current_parameters()
- print MyContrast.regenerate_parameters(0)
- print MyContrast.regenerate_parameters(0.5)
- print MyContrast.regenerate_parameters(1)
- for i in range(10000):
- a=d[i,:]
- b=N.asarray(N.reshape(a,(32,32))).T
-
- new=pygame.surfarray.make_surface(b)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new.set_palette(anglcolorpalette)
- screen.blit(new,(0,0))
-
- print MyContrast.get_settings_names(), MyContrast.regenerate_parameters(1)
- c=MyContrast.transform_image(a)
- b=N.asarray(N.reshape(c,(32,32))).T
-
- new=pygame.surfarray.make_surface(b)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new.set_palette(anglcolorpalette)
- screen.blit(new,(8*32,0))
-
- pygame.display.update()
- raw_input('Press Enter')
-
- pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/filetensor.py
--- a/transformations/filetensor.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,232 +0,0 @@
-"""
-Read and write the matrix file format described at
-U{http://www.cs.nyu.edu/~ylclab/data/norb-v1.0/index.html}
-
-The format is for dense tensors:
-
- - magic number indicating type and endianness - 4bytes
- - rank of tensor - int32
- - dimensions - int32, int32, int32, ...
- -
-
-The number of dimensions and rank is slightly tricky:
- - for scalar: rank=0, dimensions = [1, 1, 1]
- - for vector: rank=1, dimensions = [?, 1, 1]
- - for matrix: rank=2, dimensions = [?, ?, 1]
-
-For rank >= 3, the number of dimensions matches the rank exactly.
-
-
-@todo: add complex type support
-
-"""
-import sys
-import numpy
-
-def _prod(lst):
- p = 1
- for l in lst:
- p *= l
- return p
-
-_magic_dtype = {
- 0x1E3D4C51 : ('float32', 4),
- #0x1E3D4C52 : ('packed matrix', 0), #what is a packed matrix?
- 0x1E3D4C53 : ('float64', 8),
- 0x1E3D4C54 : ('int32', 4),
- 0x1E3D4C55 : ('uint8', 1),
- 0x1E3D4C56 : ('int16', 2),
- }
-_dtype_magic = {
- 'float32': 0x1E3D4C51,
- #'packed matrix': 0x1E3D4C52,
- 'float64': 0x1E3D4C53,
- 'int32': 0x1E3D4C54,
- 'uint8': 0x1E3D4C55,
- 'int16': 0x1E3D4C56
- }
-
-def _read_int32(f):
- """unpack a 4-byte integer from the current position in file f"""
- s = f.read(4)
- s_array = numpy.fromstring(s, dtype='int32')
- return s_array.item()
-
-def _read_header(f, debug=False):
- """
- :returns: data type, element size, rank, shape, size
- """
- #what is the data type of this matrix?
- #magic_s = f.read(4)
- #magic = numpy.fromstring(magic_s, dtype='int32')
- magic = _read_int32(f)
- magic_t, elsize = _magic_dtype[magic]
- if debug:
- print 'header magic', magic, magic_t, elsize
- if magic_t == 'packed matrix':
- raise NotImplementedError('packed matrix not supported')
-
- #what is the rank of the tensor?
- ndim = _read_int32(f)
- if debug: print 'header ndim', ndim
-
- #what are the dimensions of the tensor?
- dim = numpy.fromfile(f, dtype='int32', count=max(ndim,3))[:ndim]
- dim_size = _prod(dim)
- if debug: print 'header dim', dim, dim_size
-
- return magic_t, elsize, ndim, dim, dim_size
-
-class arraylike(object):
- """Provide an array-like interface to the filetensor in f.
-
- The rank parameter to __init__ controls how this object interprets the underlying tensor.
- Its behaviour should be clear from the following example.
- Suppose the underlying tensor is MxNxK.
-
- - If rank is 0, self[i] will be a scalar and len(self) == M*N*K.
-
- - If rank is 1, self[i] is a vector of length K, and len(self) == M*N.
-
- - If rank is 3, self[i] is a 3D tensor of size MxNxK, and len(self)==1.
-
- - If rank is 5, self[i] is a 5D tensor of size 1x1xMxNxK, and len(self) == 1.
-
-
- :note: Objects of this class generally require exclusive use of the underlying file handle, because
- they call seek() every time you access an element.
- """
-
- f = None
- """File-like object"""
-
- magic_t = None
- """numpy data type of array"""
-
- elsize = None
- """number of bytes per scalar element"""
-
- ndim = None
- """Rank of underlying tensor"""
-
- dim = None
- """tuple of array dimensions (aka shape)"""
-
- dim_size = None
- """number of scalars in the tensor (prod of dim)"""
-
- f_start = None
- """The file position of the first element of the tensor"""
-
- readshape = None
- """tuple of array dimensions of the block that we read"""
-
- readsize = None
- """number of elements we must read for each block"""
-
- def __init__(self, f, rank=0, debug=False):
- self.f = f
- self.magic_t, self.elsize, self.ndim, self.dim, self.dim_size = _read_header(f,debug)
- self.f_start = f.tell()
-
- if rank <= self.ndim:
- self.readshape = tuple(self.dim[self.ndim-rank:])
- else:
- self.readshape = tuple(self.dim)
-
- #self.readshape = tuple(self.dim[self.ndim-rank:]) if rank <= self.ndim else tuple(self.dim)
-
- if rank <= self.ndim:
- padding = tuple()
- else:
- padding = (1,) * (rank - self.ndim)
-
- #padding = tuple() if rank <= self.ndim else (1,) * (rank - self.ndim)
- self.returnshape = padding + self.readshape
- self.readsize = _prod(self.readshape)
- if debug: print 'READ PARAM', self.readshape, self.returnshape, self.readsize
-
- def __len__(self):
- return _prod(self.dim[:self.ndim-len(self.readshape)])
-
- def __getitem__(self, idx):
- if idx >= len(self):
- raise IndexError(idx)
- self.f.seek(self.f_start + idx * self.elsize * self.readsize)
- return numpy.fromfile(self.f,
- dtype=self.magic_t,
- count=self.readsize).reshape(self.returnshape)
-
-
-#
-# TODO: implement item selection:
-# e.g. load('some mat', subtensor=(:6, 2:5))
-#
-# This function should be memory efficient by:
-# - allocating an output matrix at the beginning
-# - seeking through the file, reading subtensors from multiple places
-def read(f, subtensor=None, debug=False):
- """Load all or part of file 'f' into a numpy ndarray
-
- @param f: file from which to read
- @type f: file-like object
-
- If subtensor is not None, it should be like the argument to
- numpy.ndarray.__getitem__. The following two expressions should return
- equivalent ndarray objects, but the one on the left may be faster and more
- memory efficient if the underlying file f is big.
-
- read(f, subtensor) <===> read(f)[*subtensor]
-
- Support for subtensors is currently spotty, so check the code to see if your
- particular type of subtensor is supported.
-
- """
- magic_t, elsize, ndim, dim, dim_size = _read_header(f,debug)
- f_start = f.tell()
-
- rval = None
- if subtensor is None:
- rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
- elif isinstance(subtensor, slice):
- if subtensor.step not in (None, 1):
- raise NotImplementedError('slice with step', subtensor.step)
- if subtensor.start not in (None, 0):
- bytes_per_row = _prod(dim[1:]) * elsize
- f.seek(f_start + subtensor.start * bytes_per_row)
- dim[0] = min(dim[0], subtensor.stop) - subtensor.start
- rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
- else:
- raise NotImplementedError('subtensor access not written yet:', subtensor)
-
- return rval
-
-def write(f, mat):
- """Write a numpy.ndarray to file.
-
- @param f: file into which to write
- @type f: file-like object
-
- @param mat: array to write to file
- @type mat: numpy ndarray or compatible
-
- """
- def _write_int32(f, i):
- i_array = numpy.asarray(i, dtype='int32')
- if 0: print 'writing int32', i, i_array
- i_array.tofile(f)
-
- try:
- _write_int32(f, _dtype_magic[str(mat.dtype)])
- except KeyError:
- raise TypeError('Invalid ndarray dtype for filetensor format', mat.dtype)
-
- _write_int32(f, len(mat.shape))
- shape = mat.shape
- if len(shape) < 3:
- shape = list(shape) + [1] * (3 - len(shape))
- if 0: print 'writing shape =', shape
- for sh in shape:
- _write_int32(f, sh)
- mat.tofile(f)
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/gimp_script.py
--- a/transformations/gimp_script.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-'''
-Filtres GIMP sous Python
-Auteur: Nicolas Boulanger-Lewandowski
-Date: Hiver 2010
-
-run with: gimp -i --batch-interpreter python-fu-eval --batch - < gimp_script.py
-end with: pdb.gimp_quit(0)
-
-Implémente le motionblur et le pinch
-'''
-
-from gimpfu import *
-import numpy
-
-img = gimp.Image(32, 32, GRAY)
-img.disable_undo()
-layer1 = gimp.Layer(img, "layer1", 32, 32, GRAY_IMAGE, 100, NORMAL_MODE)
-img.add_layer(layer1, 0)
-dest_rgn = layer1.get_pixel_rgn(0, 0, 32, 32, True)
-
-def setpix(image):
- dest_rgn[:,:] = (image.T*255).astype(numpy.uint8).tostring()
- layer1.flush()
- layer1.update(0, 0, 32, 32)
-
-def getpix():
- return numpy.fromstring(dest_rgn[:,:], 'UInt8').astype(numpy.float32).reshape((32,32)).T / 255.0
-
-class GIMP1():
- def get_settings_names(self):
- return ['mblur_length', 'mblur_angle', 'pinch']
-
- def regenerate_parameters(self, complexity):
- if complexity:
- self.mblur_length = abs(int(round(numpy.random.normal(0, 3*complexity))))
- else:
- self.mblur_length = 0
- self.mblur_angle = int(round(numpy.random.uniform(0,360)))
- self.pinch = numpy.random.uniform(-complexity, 0.7*complexity)
-
- return [self.mblur_length, self.mblur_angle, self.pinch]
-
- def transform_image(self, image):
- if self.mblur_length or self.pinch:
- setpix(image)
- if self.mblur_length:
- pdb.plug_in_mblur(img, layer1, 0, self.mblur_length, self.mblur_angle, 0, 0)
- if self.pinch:
- pdb.plug_in_whirl_pinch(img, layer1, 0.0, self.pinch, 1.0)
- image = getpix()
-
- return image
-
-# test
-if __name__ == '__main__':
- import Image
- im = numpy.asarray(Image.open("a.bmp").convert("L")) / 255.0
-
- test = GIMP1()
- print test.get_settings_names(), '=', test.regenerate_parameters(1)
- #for i in range(1000):
- im = test.transform_image(im)
-
- import pylab
- pylab.imshow(im, pylab.matplotlib.cm.Greys_r)
- pylab.show()
-
- pdb.gimp_quit(0)
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/image_tiling.py
--- a/transformations/image_tiling.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-"""
-Illustrate filters (or data) in a grid of small image-shaped tiles.
-
-Note: taken from the pylearn codebase on Feb 4, 2010 (fsavard)
-"""
-
-import numpy
-from PIL import Image
-
-def scale_to_unit_interval(ndar,eps=1e-8):
- ndar = ndar.copy()
- ndar -= ndar.min()
- ndar *= 1.0 / (ndar.max()+eps)
- return ndar
-
-def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0,0),
- scale_rows_to_unit_interval=True,
- output_pixel_vals=True
- ):
- """
- Transform an array with one flattened image per row, into an array in which images are
- reshaped and layed out like tiles on a floor.
-
- This function is useful for visualizing datasets whose rows are images, and also columns of
- matrices for transforming those rows (such as the first layer of a neural net).
-
- :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can be 2-D ndarrays or None
- :param X: a 2-D array in which every row is a flattened image.
- :type img_shape: tuple; (height, width)
- :param img_shape: the original shape of each image
- :type tile_shape: tuple; (rows, cols)
- :param tile_shape: the number of images to tile (rows, cols)
-
- :returns: array suitable for viewing as an image. (See:`PIL.Image.fromarray`.)
- :rtype: a 2-d array with same dtype as X.
-
- """
- assert len(img_shape) == 2
- assert len(tile_shape) == 2
- assert len(tile_spacing) == 2
-
- out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
- in zip(img_shape, tile_shape, tile_spacing)]
-
- if isinstance(X, tuple):
- assert len(X) == 4
- if output_pixel_vals:
- out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype='uint8')
- else:
- out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype)
-
- #colors default to 0, alpha defaults to 1 (opaque)
- if output_pixel_vals:
- channel_defaults = [0,0,0,255]
- else:
- channel_defaults = [0.,0.,0.,1.]
-
- for i in xrange(4):
- if X[i] is None:
- out_array[:,:,i] = numpy.zeros(out_shape,
- dtype='uint8' if output_pixel_vals else out_array.dtype
- )+channel_defaults[i]
- else:
- out_array[:,:,i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals)
- return out_array
-
- else:
- H, W = img_shape
- Hs, Ws = tile_spacing
-
- out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
- for tile_row in xrange(tile_shape[0]):
- for tile_col in xrange(tile_shape[1]):
- if tile_row * tile_shape[1] + tile_col < X.shape[0]:
- if scale_rows_to_unit_interval:
- this_img = scale_to_unit_interval(X[tile_row * tile_shape[1] + tile_col].reshape(img_shape))
- else:
- this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
- out_array[
- tile_row * (H+Hs):tile_row*(H+Hs)+H,
- tile_col * (W+Ws):tile_col*(W+Ws)+W
- ] \
- = this_img * (255 if output_pixel_vals else 1)
- return out_array
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/local_elastic_distortions.py
--- a/transformations/local_elastic_distortions.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,456 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Implementation of elastic distortions as described in
-Simard, Steinkraus, Platt, "Best Practices for Convolutional
- Neural Networks Applied to Visual Document Analysis", 2003
-
-Author: François Savard
-Date: Fall 2009, revised Winter 2010
-
-Usage: create the Distorter with proper alpha, sigma etc.
- Then each time you want to change the distortion field applied,
- call regenerate_field().
-
- (The point behind this is that regeneration takes some time,
- so we better reuse the fields a few times)
-'''
-
-import sys
-import math
-import numpy
-import numpy.random
-import scipy.signal # convolve2d
-
-_TEST_DIR = "/u/savardf/ift6266/debug_images/"
-
-def _raw_zeros(size):
- return [[0 for i in range(size[1])] for j in range(size[0])]
-
-class ElasticDistortionParams():
- def __init__(self, image_size=(32,32), alpha=0.0, sigma=0.0):
- self.image_size = image_size
- self.alpha = alpha
- self.sigma = sigma
-
- h,w = self.image_size
-
- self.matrix_tl_corners_rows = _raw_zeros((h,w))
- self.matrix_tl_corners_cols = _raw_zeros((h,w))
-
- self.matrix_tr_corners_rows = _raw_zeros((h,w))
- self.matrix_tr_corners_cols = _raw_zeros((h,w))
-
- self.matrix_bl_corners_rows = _raw_zeros((h,w))
- self.matrix_bl_corners_cols = _raw_zeros((h,w))
-
- self.matrix_br_corners_rows = _raw_zeros((h,w))
- self.matrix_br_corners_cols = _raw_zeros((h,w))
-
- # those will hold the precomputed ratios for
- # bilinear interpolation
- self.matrix_tl_multiply = numpy.zeros((h,w))
- self.matrix_tr_multiply = numpy.zeros((h,w))
- self.matrix_bl_multiply = numpy.zeros((h,w))
- self.matrix_br_multiply = numpy.zeros((h,w))
-
- def alpha_sigma(self):
- return [self.alpha, self.sigma]
-
-class LocalElasticDistorter():
- def __init__(self, image_size=(32,32)):
- self.image_size = image_size
-
- self.current_complexity_10 = 0
- self.current_complexity = 0
-
- # number of precomputed fields
- # (principle: as complexity doesn't change often, we can
- # precompute a certain number of fields for a given complexity,
- # each with its own parameters. That way, we have good
- # randomization, but we're much faster).
- self.to_precompute_per_complexity = 50
-
- # Both use ElasticDistortionParams
- self.current_params = None
- self.precomputed_params = [[] for i in range(10)]
-
- #
- self.kernel_size = None
- self.kernel = None
-
- # set some defaults
- self.regenerate_parameters(0.0)
-
- def get_settings_names(self):
- return []
-
- def _floor_complexity(self, complexity):
- return self._to_complexity_10(complexity) / 10.0
-
- def _to_complexity_10(self, complexity):
- return min(9, max(0, int(complexity * 10)))
-
- def regenerate_parameters(self, complexity):
- complexity_10 = self._to_complexity_10(complexity)
-
- if complexity_10 != self.current_complexity_10:
- self.current_complexity_10 = complexity_10
- self.current_complexity = self._floor_complexity(complexity)
-
- if len(self.precomputed_params[complexity_10]) <= self.to_precompute_per_complexity:
- # not yet enough params generated, produce one more
- # and append to list
- new_params = self._initialize_new_params()
- new_params = self._generate_fields(new_params)
- self.current_params = new_params
- self.precomputed_params[complexity_10].append(new_params)
- else:
- # if we have enough precomputed fields, just select one
- # at random and set parameters to match what they were
- # when the field was generated
- idx = numpy.random.randint(0, len(self.precomputed_params[complexity_10]))
- self.current_params = self.precomputed_params[complexity_10][idx]
-
- # don't return anything, to avoid storing deterministic parameters
- return [] # self.current_params.alpha_sigma()
-
- def get_parameters_determined_by_complexity(self, complexity):
- tmp_params = self._initialize_new_params(_floor_complexity(complexity))
- return tmp_params.alpha_sigma()
-
- def get_settings_names_determined_by_complexity(self, complexity):
- return ['alpha', 'sigma']
-
- # adapted from http://blenderartists.org/forum/showthread.php?t=163361
- def _gen_gaussian_kernel(self, sigma):
- # the kernel size can change DRAMATICALLY the time
- # for the blur operation... so even though results are better
- # with a bigger kernel, we need to compromise here
- # 1*s is very different from 2*s, but there's not much difference
- # between 2*s and 4*s
- ks = self.kernel_size
- s = sigma
- target_ks = (1.5*s, 1.5*s)
- if not ks is None and ks[0] == target_ks[0] and ks[1] == target_ks[1]:
- # kernel size is good, ok, no need to regenerate
- return
- self.kernel_size = target_ks
- h,w = self.kernel_size
- a,b = h/2.0, w/2.0
- y,x = numpy.ogrid[0:w, 0:h]
- gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s))
- # Normalize so we don't reduce image intensity
- self.kernel = gauss/gauss.sum()
-
- def _gen_distortion_field(self, params):
- self._gen_gaussian_kernel(params.sigma)
-
- # we add kernel_size on all four sides so blurring
- # with the kernel produces a smoother result on borders
- ks0 = self.kernel_size[0]
- ks1 = self.kernel_size[1]
- sz0 = self.image_size[1] + ks0
- sz1 = self.image_size[0] + ks1
- field = numpy.random.uniform(-1.0, 1.0, (sz0, sz1))
- field = scipy.signal.convolve2d(field, self.kernel, mode='same')
-
- # crop only image_size in the middle
- field = field[ks0:ks0+self.image_size[0], ks1:ks1+self.image_size[1]]
-
- return params.alpha * field
-
-
- def _initialize_new_params(self, complexity=None):
- if not complexity:
- complexity = self.current_complexity
-
- params = ElasticDistortionParams(self.image_size)
-
- # pour faire progresser la complexité un peu plus vite
- # tout en gardant les extrêmes de 0.0 et 1.0
- complexity = complexity ** (1./3.)
-
- # the smaller the alpha, the closest the pixels are fetched
- # a max of 10 is reasonable
- params.alpha = complexity * 10.0
-
- # the bigger the sigma, the smoother is the distortion
- # max of 1 is "reasonable", but produces VERY noisy results
- # And the bigger the sigma, the bigger the blur kernel, and the
- # slower the field generation, btw.
- params.sigma = 10.0 - (7.0 * complexity)
-
- return params
-
- def _generate_fields(self, params):
- '''
- Here's how the code works:
- - We first generate "distortion fields" for x and y with these steps:
- - Uniform noise over [-1, 1] in a matrix of size (h,w)
- - Blur with a Gaussian kernel of spread sigma
- - Multiply by alpha
- - Then (conceptually) to compose the distorted image, we loop over each pixel
- of the new image and use the corresponding x and y distortions
- (from the matrices generated above) to identify pixels
- of the old image from which we fetch color data. As the
- coordinates are not integer, we interpolate between the
- 4 nearby pixels (top left, top right etc.).
- - That's just conceptually. Here I'm using matrix operations
- to speed up the computation. I first identify the 4 nearby
- pixels in the old image for each pixel in the distorted image.
- I can then use them as "fancy indices" to extract the proper
- pixels for each new pixel.
- - Then I multiply those extracted nearby points by precomputed
- ratios for the bilinear interpolation.
- '''
-
- p = params
-
- dist_fields = [None, None]
- dist_fields[0] = self._gen_distortion_field(params)
- dist_fields[1] = self._gen_distortion_field(params)
-
- #pylab.imshow(dist_fields[0])
- #pylab.show()
-
- # regenerate distortion index matrices
- # "_rows" are row indices
- # "_cols" are column indices
- # (separated due to the way fancy indexing works in numpy)
- h,w = p.image_size
-
- for y in range(h):
- for x in range(w):
- distort_x = dist_fields[0][y,x]
- distort_y = dist_fields[1][y,x]
-
- # the "target" is the coordinate we fetch color data from
- # (in the original image)
- # target_left and _top are the rounded coordinate on the
- # left/top of this target (float) coordinate
- target_pixel = (y+distort_y, x+distort_x)
-
- target_left = int(math.floor(x + distort_x))
- target_top = int(math.floor(y + distort_y))
-
- index_tl = [target_top, target_left]
- index_tr = [target_top, target_left+1]
- index_bl = [target_top+1, target_left]
- index_br = [target_top+1, target_left+1]
-
- # x_ratio is the ratio of importance of left pixels
- # y_ratio is the """" of top pixels
- # (in bilinear combination)
- y_ratio = 1.0 - (target_pixel[0] - target_top)
- x_ratio = 1.0 - (target_pixel[1] - target_left)
-
- # We use a default background color of 0 for displacements
- # outside of boundaries of the image.
-
- # if top left outside bounds
- if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w:
- p.matrix_tl_corners_rows[y][x] = 0
- p.matrix_tl_corners_cols[y][x] = 0
- p.matrix_tl_multiply[y,x] = 0
- else:
- p.matrix_tl_corners_rows[y][x] = index_tl[0]
- p.matrix_tl_corners_cols[y][x] = index_tl[1]
- p.matrix_tl_multiply[y,x] = x_ratio*y_ratio
-
- # if top right outside bounds
- if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w:
- p.matrix_tr_corners_rows[y][x] = 0
- p.matrix_tr_corners_cols[y][x] = 0
- p.matrix_tr_multiply[y,x] = 0
- else:
- p.matrix_tr_corners_rows[y][x] = index_tr[0]
- p.matrix_tr_corners_cols[y][x] = index_tr[1]
- p.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio
-
- # if bottom left outside bounds
- if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w:
- p.matrix_bl_corners_rows[y][x] = 0
- p.matrix_bl_corners_cols[y][x] = 0
- p.matrix_bl_multiply[y,x] = 0
- else:
- p.matrix_bl_corners_rows[y][x] = index_bl[0]
- p.matrix_bl_corners_cols[y][x] = index_bl[1]
- p.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio)
-
- # if bottom right outside bounds
- if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w:
- p.matrix_br_corners_rows[y][x] = 0
- p.matrix_br_corners_cols[y][x] = 0
- p.matrix_br_multiply[y,x] = 0
- else:
- p.matrix_br_corners_rows[y][x] = index_br[0]
- p.matrix_br_corners_cols[y][x] = index_br[1]
- p.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio)
-
- # not really necessary, but anyway
- return p
-
- def transform_image(self, image):
- p = self.current_params
-
- # index pixels to get the 4 corners for bilinear combination
- tl_pixels = image[p.matrix_tl_corners_rows, p.matrix_tl_corners_cols]
- tr_pixels = image[p.matrix_tr_corners_rows, p.matrix_tr_corners_cols]
- bl_pixels = image[p.matrix_bl_corners_rows, p.matrix_bl_corners_cols]
- br_pixels = image[p.matrix_br_corners_rows, p.matrix_br_corners_cols]
-
- # bilinear ratios, elemwise multiply
- tl_pixels = numpy.multiply(tl_pixels, p.matrix_tl_multiply)
- tr_pixels = numpy.multiply(tr_pixels, p.matrix_tr_multiply)
- bl_pixels = numpy.multiply(bl_pixels, p.matrix_bl_multiply)
- br_pixels = numpy.multiply(br_pixels, p.matrix_br_multiply)
-
- # sum to finish bilinear combination
- return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0).astype(numpy.float32)
-
-# TESTS ----------------------------------------------------------------------
-
-def _load_image(filepath):
- _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0]
- img = Image.open(filepath)
- img = numpy.asarray(img)
- if len(img.shape) > 2:
- img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
- return (img / 255.0).astype('float')
-
-def _specific_test():
- imgpath = os.path.join(_TEST_DIR, "d.png")
- img = _load_image(imgpath)
- dist = LocalElasticDistorter((32,32))
- print dist.regenerate_parameters(0.5)
- img = dist.transform_image(img)
- print dist.get_parameters_determined_by_complexity(0.4)
- pylab.imshow(img)
- pylab.show()
-
-def _complexity_tests():
- imgpath = os.path.join(_TEST_DIR, "d.png")
- dist = LocalElasticDistorter((32,32))
- orig_img = _load_image(imgpath)
- html_content = '''Original:
'''
- for complexity in numpy.arange(0.0, 1.1, 0.1):
- html_content += '
Complexity: ' + str(complexity) + '
'
- for i in range(10):
- t1 = time.time()
- dist.regenerate_parameters(complexity)
- t2 = time.time()
- print "diff", t2-t1
- img = dist.transform_image(orig_img)
- filename = "complexity_" + str(complexity) + "_" + str(i) + ".png"
- new_path = os.path.join(_TEST_DIR, filename)
- _save_image(img, new_path)
- html_content += ''
- html_content += ""
- html_file = open(os.path.join(_TEST_DIR, "complexity.html"), "w")
- html_file.write(html_content)
- html_file.close()
-
-def _complexity_benchmark():
- imgpath = os.path.join(_TEST_DIR, "d.png")
- dist = LocalElasticDistorter((32,32))
- orig_img = _load_image(imgpath)
-
- for cpx in (0.21, 0.35):
- # time the first 10
- t1 = time.time()
- for i in range(10):
- dist.regenerate_parameters(cpx)
- img = dist.transform_image(orig_img)
- t2 = time.time()
-
- print "first 10, total = ", t2-t1, ", avg=", (t2-t1)/10
-
- # time the next 40
- t1 = time.time()
- for i in range(40):
- dist.regenerate_parameters(cpx)
- img = dist.transform_image(orig_img)
- t2 = time.time()
-
- print "next 40, total = ", t2-t1, ", avg=", (t2-t1)/40
-
- # time the next 50
- t1 = time.time()
- for i in range(50):
- dist.regenerate_parameters(cpx)
- img = dist.transform_image(orig_img)
- t2 = time.time()
-
- print "next 50, total = ", t2-t1, ", avg=", (t2-t1)/50
-
- # time the next 1000
- t1 = time.time()
- for i in range(1000):
- dist.regenerate_parameters(cpx)
- img = dist.transform_image(orig_img)
- t2 = time.time()
-
- print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
-
- # time the next 1000 with old complexity
- t1 = time.time()
- for i in range(1000):
- dist.regenerate_parameters(0.21)
- img = dist.transform_image(orig_img)
- t2 = time.time()
-
- print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
-
-
-
-
-def _save_image(img, path):
- img2 = Image.fromarray((img * 255).astype('uint8'), "L")
- img2.save(path)
-
-# TODO: reformat to follow new class... it function of complexity now
-'''
-def _distorter_tests():
- #import pylab
- #pylab.imshow(img)
- #pylab.show()
-
- for letter in ("d", "a", "n", "o"):
- img = _load_image("tests/" + letter + ".png")
- for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
- for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
- id = LocalElasticDistorter((32,32))
- img2 = id.distort_image(img)
- img2 = Image.fromarray((img2 * 255).astype('uint8'), "L")
- img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png")
-'''
-
-def _benchmark():
- img = _load_image("tests/d.png")
- dist = LocalElasticDistorter((32,32))
- dist.regenerate_parameters(0.0)
- import time
- t1 = time.time()
- for i in range(10000):
- if i % 1000 == 0:
- print "-"
- dist.distort_image(img)
- t2 = time.time()
- print "t2-t1", t2-t1
- print "avg", 10000/(t2-t1)
-
-if __name__ == '__main__':
- import time
- import pylab
- import Image
- import os.path
- #_distorter_tests()
- #_benchmark()
- #_specific_test()
- #_complexity_tests()
- _complexity_benchmark()
-
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/pipeline.py
--- a/transformations/pipeline.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,391 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-from __future__ import with_statement
-
-# This is intended to be run as a GIMP script
-#from gimpfu import *
-
-import sys, os, getopt
-import numpy
-import filetensor as ft
-import random
-
-# To debug locally, also call with -s 100 (to stop after ~100)
-# (otherwise we allocate all needed memory, might be loonnng and/or crash
-# if, lucky like me, you have an age-old laptop creaking from everywhere)
-DEBUG = False
-DEBUG_X = False
-if DEBUG:
- DEBUG_X = False # Debug under X (pylab.show())
-
-DEBUG_IMAGES_PATH = None
-if DEBUG:
- # UNTESTED YET
- # To avoid loading NIST if you don't have it handy
- # (use with debug_images_iterator(), see main())
- # To use NIST, leave as = None
- DEBUG_IMAGES_PATH = None#'/home/francois/Desktop/debug_images'
-
-# Directory where to dump images to visualize results
-# (create it, otherwise it'll crash)
-DEBUG_OUTPUT_DIR = 'debug_out'
-
-DEFAULT_NIST_PATH = '/data/lisa/data/ift6266h10/train_data.ft'
-DEFAULT_LABEL_PATH = '/data/lisa/data/ift6266h10/train_labels.ft'
-DEFAULT_OCR_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft'
-DEFAULT_OCRLABEL_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-labels-shuffled.ft'
-ARGS_FILE = os.environ['PIPELINE_ARGS_TMPFILE']
-
-# PARSE COMMAND LINE ARGUMENTS
-def get_argv():
- with open(ARGS_FILE) as f:
- args = [l.rstrip() for l in f.readlines()]
- return args
-
-def usage():
- print '''
-Usage: run_pipeline.sh [-m ...] [-z ...] [-o ...] [-p ...]
- -m, --max-complexity: max complexity to generate for an image
- -z, --probability-zero: probability of using complexity=0 for an image
- -o, --output-file: full path to file to use for output of images
- -p, --params-output-file: path to file to output params to
- -x, --labels-output-file: path to file to output labels to
- -f, --data-file: path to filetensor (.ft) data file (NIST)
- -l, --label-file: path to filetensor (.ft) labels file (NIST labels)
- -c, --ocr-file: path to filetensor (.ft) data file (OCR)
- -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels)
- -a, --prob-font: probability of using a raw font image
- -b, --prob-captcha: probability of using a captcha image
- -g, --prob-ocr: probability of using an ocr image
- -y, --seed: the job seed
- '''
-
-try:
- opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=",
-"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="])
-except getopt.GetoptError, err:
- # print help information and exit:
- print str(err) # will print something like "option -a not recognized"
- usage()
- pdb.gimp_quit(0)
- sys.exit(2)
-
-for o, a in opts:
- if o in ('-y','--seed'):
- random.seed(int(a))
- numpy.random.seed(int(a))
-
-if DEBUG_X:
- import pylab
- pylab.ion()
-
-from PoivreSel import PoivreSel
-from thick import Thick
-from BruitGauss import BruitGauss
-from DistorsionGauss import DistorsionGauss
-from PermutPixel import PermutPixel
-from gimp_script import GIMP1
-from Rature import Rature
-from contrast import Contrast
-from local_elastic_distortions import LocalElasticDistorter
-from slant import Slant
-from Occlusion import Occlusion
-from add_background_image import AddBackground
-from affine_transform import AffineTransformation
-from ttf2jpg import ttf2jpg
-from Facade import generateCaptcha
-
-if DEBUG:
- from visualizer import Visualizer
- # Either put the visualizer as in the MODULES_INSTANCES list
- # after each module you want to visualize, or in the
- # AFTER_EACH_MODULE_HOOK list (but not both, it's redundant)
- VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR, on_screen=False)
-
-###---------------------order of transformation module
-MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
-
-# These should have a "after_transform_callback(self, image)" method
-# (called after each call to transform_image in a module)
-AFTER_EACH_MODULE_HOOK = []
-if DEBUG:
- AFTER_EACH_MODULE_HOOK = [VISUALIZER]
-
-# These should have a "end_transform_callback(self, final_image" method
-# (called after all modules have been called)
-END_TRANSFORM_HOOK = []
-if DEBUG:
- END_TRANSFORM_HOOK = [VISUALIZER]
-
-class Pipeline():
- def __init__(self, modules, num_img, image_size=(32,32)):
- self.modules = modules
- self.num_img = num_img
- self.num_params_stored = 0
- self.image_size = image_size
-
- self.init_memory()
-
- def init_num_params_stored(self):
- # just a dummy call to regenerate_parameters() to get the
- # real number of params (only those which are stored)
- self.num_params_stored = 0
- for m in self.modules:
- self.num_params_stored += len(m.regenerate_parameters(0.0))
-
- def init_memory(self):
- self.init_num_params_stored()
-
- total = self.num_img
- num_px = self.image_size[0] * self.image_size[1]
-
- self.res_data = numpy.empty((total, num_px), dtype=numpy.uint8)
- # +1 to store complexity
- self.params = numpy.empty((total, self.num_params_stored+len(self.modules)))
- self.res_labels = numpy.empty(total, dtype=numpy.int32)
-
- def run(self, img_iterator, complexity_iterator):
- img_size = self.image_size
-
- should_hook_after_each = len(AFTER_EACH_MODULE_HOOK) != 0
- should_hook_at_the_end = len(END_TRANSFORM_HOOK) != 0
-
- for img_no, (img, label) in enumerate(img_iterator):
- sys.stdout.flush()
-
- global_idx = img_no
-
- img = img.reshape(img_size)
-
- param_idx = 0
- mod_idx = 0
- for mod in self.modules:
- # This used to be done _per batch_,
- # ie. out of the "for img" loop
- complexity = complexity_iterator.next()
- #better to do a complexity sampling for each transformations in order to have more variability
- #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time))
- #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall
- #complexity
- self.params[global_idx, mod_idx] = complexity
- mod_idx += 1
-
- p = mod.regenerate_parameters(complexity)
- self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p
- param_idx += len(p)
-
- img = mod.transform_image(img)
-
- if should_hook_after_each:
- for hook in AFTER_EACH_MODULE_HOOK:
- hook.after_transform_callback(img)
-
- self.res_data[global_idx] = \
- img.reshape((img_size[0] * img_size[1],))*255
- self.res_labels[global_idx] = label
-
- if should_hook_at_the_end:
- for hook in END_TRANSFORM_HOOK:
- hook.end_transform_callback(img)
-
- def write_output(self, output_file_path, params_output_file_path, labels_output_file_path):
- with open(output_file_path, 'wb') as f:
- ft.write(f, self.res_data)
-
- numpy.save(params_output_file_path, self.params)
-
- with open(labels_output_file_path, 'wb') as f:
- ft.write(f, self.res_labels)
-
-
-##############################################################################
-# COMPLEXITY ITERATORS
-# They're called once every img, to get the complexity to use for that img
-# they must be infinite (should never throw StopIteration when calling next())
-
-# probability of generating 0 complexity, otherwise
-# uniform over 0.0-max_complexity
-def range_complexity_iterator(probability_zero, max_complexity):
- assert max_complexity <= 1.0
- n = numpy.random.uniform(0.0, 1.0)
- while True:
- if n < probability_zero:
- yield 0.0
- else:
- yield numpy.random.uniform(0.0, max_complexity)
-
-##############################################################################
-# DATA ITERATORS
-# They can be used to interleave different data sources etc.
-
-'''
-# Following code (DebugImages and iterator) is untested
-
-def load_image(filepath):
- _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0]
- img = Image.open(filepath)
- img = numpy.asarray(img)
- if len(img.shape) > 2:
- img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
- return (img / 255.0).astype('float')
-
-class DebugImages():
- def __init__(self, images_dir_path):
- import glob, os.path
- self.filelist = glob.glob(os.path.join(images_dir_path, "*.png"))
-
-def debug_images_iterator(debug_images):
- for path in debug_images.filelist:
- yield load_image(path)
-'''
-
-class NistData():
- def __init__(self, nist_path, label_path, ocr_path, ocrlabel_path):
- self.train_data = open(nist_path, 'rb')
- self.train_labels = open(label_path, 'rb')
- self.dim = tuple(ft._read_header(self.train_data)[3])
- # in order to seek to the beginning of the file
- self.train_data.close()
- self.train_data = open(nist_path, 'rb')
- self.ocr_data = open(ocr_path, 'rb')
- self.ocr_labels = open(ocrlabel_path, 'rb')
-
-# cet iterator load tout en ram
-def nist_supp_iterator(nist, prob_font, prob_captcha, prob_ocr, num_img):
- img = ft.read(nist.train_data)
- labels = ft.read(nist.train_labels)
- if prob_ocr:
- ocr_img = ft.read(nist.ocr_data)
- ocr_labels = ft.read(nist.ocr_labels)
- ttf = ttf2jpg()
- L = [chr(ord('0')+x) for x in range(10)] + [chr(ord('A')+x) for x in range(26)] + [chr(ord('a')+x) for x in range(26)]
-
- for i in xrange(num_img):
- r = numpy.random.rand()
- if r <= prob_font:
- yield ttf.generate_image()
- elif r <=prob_font + prob_captcha:
- (arr, charac) = generateCaptcha(0,1)
- yield arr.astype(numpy.float32)/255, L.index(charac[0])
- elif r <= prob_font + prob_captcha + prob_ocr:
- j = numpy.random.randint(len(ocr_labels))
- yield ocr_img[j].astype(numpy.float32)/255, ocr_labels[j]
- else:
- j = numpy.random.randint(len(labels))
- yield img[j].astype(numpy.float32)/255, labels[j]
-
-
-# Mostly for debugging, for the moment, just to see if we can
-# reload the images and parameters.
-def reload(output_file_path, params_output_file_path):
- images_ft = open(output_file_path, 'rb')
- images_ft_dim = tuple(ft._read_header(images_ft)[3])
-
- print "Images dimensions: ", images_ft_dim
-
- params = numpy.load(params_output_file_path)
-
- print "Params dimensions: ", params.shape
- print params
-
-
-##############################################################################
-# MAIN
-
-
-# Might be called locally or through dbidispatch. In all cases it should be
-# passed to the GIMP executable to be able to use GIMP filters.
-# Ex:
-def _main():
- #global DEFAULT_NIST_PATH, DEFAULT_LABEL_PATH, DEFAULT_OCR_PATH, DEFAULT_OCRLABEL_PATH
- #global getopt, get_argv
-
- max_complexity = 0.5 # default
- probability_zero = 0.1 # default
- output_file_path = None
- params_output_file_path = None
- labels_output_file_path = None
- nist_path = DEFAULT_NIST_PATH
- label_path = DEFAULT_LABEL_PATH
- ocr_path = DEFAULT_OCR_PATH
- ocrlabel_path = DEFAULT_OCRLABEL_PATH
- prob_font = 0.0
- prob_captcha = 0.0
- prob_ocr = 0.0
- stop_after = None
- reload_mode = False
-
- for o, a in opts:
- if o in ('-m', '--max-complexity'):
- max_complexity = float(a)
- assert max_complexity >= 0.0 and max_complexity <= 1.0
- elif o in ('-r', '--reload'):
- reload_mode = True
- elif o in ("-z", "--probability-zero"):
- probability_zero = float(a)
- assert probability_zero >= 0.0 and probability_zero <= 1.0
- elif o in ("-o", "--output-file"):
- output_file_path = a
- elif o in ('-p', "--params-output-file"):
- params_output_file_path = a
- elif o in ('-x', "--labels-output-file"):
- labels_output_file_path = a
- elif o in ('-s', "--stop-after"):
- stop_after = int(a)
- elif o in ('-f', "--data-file"):
- nist_path = a
- elif o in ('-l', "--label-file"):
- label_path = a
- elif o in ('-c', "--ocr-file"):
- ocr_path = a
- elif o in ('-d', "--ocrlabel-file"):
- ocrlabel_path = a
- elif o in ('-a', "--prob-font"):
- prob_font = float(a)
- elif o in ('-b', "--prob-captcha"):
- prob_captcha = float(a)
- elif o in ('-g', "--prob-ocr"):
- prob_ocr = float(a)
- elif o in ('-y', "--seed"):
- pass
- else:
- assert False, "unhandled option"
-
- if output_file_path == None or params_output_file_path == None or labels_output_file_path == None:
- print "Must specify the three output files."
- usage()
- pdb.gimp_quit(0)
- sys.exit(2)
-
- if reload_mode:
- reload(output_file_path, params_output_file_path)
- else:
- if DEBUG_IMAGES_PATH:
- '''
- # This code is yet untested
- debug_images = DebugImages(DEBUG_IMAGES_PATH)
- num_img = len(debug_images.filelist)
- pl = Pipeline(modules=MODULE_INSTANCES, num_img=num_img, image_size=(32,32))
- img_it = debug_images_iterator(debug_images)
- '''
- else:
- nist = NistData(nist_path, label_path, ocr_path, ocrlabel_path)
- num_img = 819200 # 800 Mb file
- if stop_after:
- num_img = stop_after
- pl = Pipeline(modules=MODULE_INSTANCES, num_img=num_img, image_size=(32,32))
- img_it = nist_supp_iterator(nist, prob_font, prob_captcha, prob_ocr, num_img)
-
- cpx_it = range_complexity_iterator(probability_zero, max_complexity)
- pl.run(img_it, cpx_it)
- pl.write_output(output_file_path, params_output_file_path, labels_output_file_path)
-
-_main()
-
-if DEBUG_X:
- pylab.ioff()
- pylab.show()
-
-pdb.gimp_quit(0)
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/run_pipeline.sh
--- a/transformations/run_pipeline.sh Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-# This is one _ugly_ hack, but I couldn't figure out how
-# to cleanly pass command line options to the script if
-# invoking using the "gimp --batch < script.py" syntax
-
-# Basically I create a temp file, put the args into it,
-# then the script gets the filename and reads back the
-# args
-
-export PIPELINE_ARGS_TMPFILE=`mktemp`
-
-for arg in "$@"
-do
- echo $arg >> $PIPELINE_ARGS_TMPFILE
-done
-
-gimp -i --batch-interpreter python-fu-eval --batch - < pipeline.py
-
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/slant.py
--- a/transformations/slant.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Author: Youssouf
-
-this module add a slant effect to the image.
-
-To obtain the slant effect, each row of the array is shifted proportionately by a step controlled by the complexity.
-
-'''
-
-import numpy
-
-
-class Slant():
- def __init__(self, complexity=1):
- #---------- private attributes
- self.direction = 1
- self.angle = 0
-
- #---------- generation parameters
- self.regenerate_parameters(complexity)
- #------------------------------------------------
-
- def _get_current_parameters(self):
- return [self.angle, self.direction]
-
- def get_settings_names(self):
- return ['angle', 'direction']
-
- def regenerate_parameters(self, complexity):
- self.angle = numpy.random.uniform(0.0, complexity)
- P = numpy.random.uniform()
- self.direction = 1;
- if P < 0.5:
- self.direction = -1;
- return self._get_current_parameters()
-
-
- def transform_image(self,image):
- if self.angle == 0:
- return image
-
- ysize, xsize = image.shape
- slant = self.direction*self.angle
-
- output = image.copy()
-
- # shift all the rows
- for i in range(ysize):
- line = image[i]
- delta = round((i*slant)) % xsize
- line1 = line[:xsize-delta]
- line2 = line[xsize-delta:xsize]
-
- output[i][delta:xsize] = line1
- output[i][0:delta] = line2
-
-
- #correction to center the image
- correction = (self.direction)*round(self.angle*ysize/2)
- correction = (xsize - correction) % xsize
-
- # center the region
- line1 = output[0:ysize,0:xsize-correction].copy()
- line2 = output[0:ysize,xsize-correction:xsize].copy()
- output[0:ysize,correction:xsize] = line1
- output[0:ysize,0:correction] = line2
-
-
- return output
-
-
-# Test function
-# Load an image in local and create several samples of the effect on the
-# original image with different parameter. All the samples are saved in a single image, the 1st image being the original.
-
-def test_slant():
- import scipy
- img_name = "test_img/mnist_0.png"
- dest_img_name = "test_img/slanted.png"
- nb_samples = 10
- im = Image.open(img_name)
- im = im.convert("L")
- image = numpy.asarray(im)
-
- image_final = image
- slant = Slant()
- for i in range(nb_samples):
- slant.regenerate_parameters(1)
- image_slant = slant.transform_image(image)
- image_final = scipy.hstack((image_final,image_slant))
-
- im = Image.fromarray(image_final.astype('uint8'), "L")
- im.save(dest_img_name)
-
-# Test
-if __name__ == '__main__':
- import sys, os, fnmatch
- import Image
-
- test_slant()
-
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/testmod.py
--- a/transformations/testmod.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-# This script is to test your modules to see if they conform to the module API
-# defined on the wiki.
-import random, numpy, gc, time, math, sys
-
-# this is an example module that does stupid image value shifting
-
-class DummyModule(object):
- def get_settings_names(self):
- return ['value']
-
- def regenerate_parameters(self, complexity):
- self._value = random.gauss(0, 0.5*complexity)
- return [self._value]
-
- def transform_image(self, image):
- return numpy.clip(image+self._value, 0, 1)
-
-#import
-
-# instanciate your class here (rather than DummyModule)
-mod = DummyModule()
-
-def error(msg):
- print "ERROR:", msg
- sys.exit(1)
-
-def warn(msg):
- print "WARNING:", msg
-
-def timeit(f, lbl):
-
- gc.disable()
- t = time.time()
- f()
- est = time.time() - t
- gc.enable()
-
- loops = max(1, int(10**math.floor(math.log(10/est, 10))))
-
- gc.disable()
- t = time.time()
- for _ in xrange(loops):
- f()
-
- print lbl, "(", loops, "loops ):", (time.time() - t)/loops, "s"
- gc.enable()
-
-########################
-# get_settings_names() #
-########################
-
-print "Testing get_settings_names()"
-
-names = mod.get_settings_names()
-
-if type(names) is not list:
- error("Must return a list")
-
-if not all(type(e) is str for e in names):
- warn("The elements of the list should be strings")
-
-###########################
-# regenerate_parameters() #
-###########################
-
-print "Testing regenerate_parameters()"
-
-params = mod.regenerate_parameters(0.2)
-
-if type(params) is not list:
- error("Must return a list")
-
-if len(params) != len(names):
- error("the returned parameter list must have the same length as the number of parameters")
-
-params2 = mod.regenerate_parameters(0.2)
-if len(names) != 0 and params == params2:
- error("the complexity parameter determines the distribution of the parameters, not their value")
-
-mod.regenerate_parameters(0.0)
-mod.regenerate_parameters(1.0)
-
-mod.regenerate_parameters(0.5)
-
-#####################
-# transform_image() #
-#####################
-
-print "Testing transform_image()"
-
-imgr = numpy.random.random_sample((32, 32)).astype(numpy.float32)
-img1 = numpy.ones((32, 32), dtype=numpy.float32)
-img0 = numpy.zeros((32, 32), dtype=numpy.float32)
-
-resr = mod.transform_image(imgr)
-
-if type(resr) is not numpy.ndarray:
- error("Must return an ndarray")
-
-if resr.shape != (32, 32):
- error("Must return 32x32 array")
-
-if resr.dtype != numpy.float32:
- error("Must return float32 array")
-
-res1 = mod.transform_image(img1)
-res0 = mod.transform_image(img0)
-
-if res1.max() > 1.0 or res0.max() > 1.0:
- error("Must keep array values between 0 and 1")
-
-if res1.min() < 0.0 or res0.min() < 0.0:
- error("Must keep array values between 0 and 1")
-
-mod.regenerate_parameters(0.0)
-mod.transform_image(imgr)
-mod.regenerate_parameters(1.0)
-mod.transform_image(imgr)
-
-print "Bonus Stage: timings"
-
-timeit(lambda: None, "empty")
-timeit(lambda: mod.regenerate_parameters(0.5), "regenerate_parameters()")
-timeit(lambda: mod.transform_image(imgr), "tranform_image()")
-
-def f():
- mod.regenerate_parameters(0.2)
- mod.transform_image(imgr)
-
-timeit(f, "regen and transform")
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/testtransformations.py
--- a/transformations/testtransformations.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,156 +0,0 @@
-#!/usr/bin/env python
-
-
-
-from pylearn.io import filetensor as ft
-import copy
-import pygame
-import time
-import numpy as N
-
-from ttf2jpg import ttf2jpg
-
-#from gimpfu import *
-
-
-from PoivreSel import PoivreSel
-from thick import Thick
-from BruitGauss import BruitGauss
-from DistorsionGauss import DistorsionGauss
-from PermutPixel import PermutPixel
-from gimp_script import GIMP1
-from Rature import Rature
-from contrast import Contrast
-from local_elastic_distortions import LocalElasticDistorter
-from slant import Slant
-from Occlusion import Occlusion
-from add_background_image import AddBackground
-from affine_transform import AffineTransformation
-
-###---------------------order of transformation module
-MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
-
-###---------------------complexity associated to each of them
-complexity = 0.7
-#complexity = [0.5]*len(MODULE_INSTANCES)
-#complexity = [0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.]
-n=100
-
-def createimage(path,d):
- for i in range(n):
- screen.fill(0)
- a=d[i,:]
- off1=4*32
- off2=0
- for u in range(n):
- b=N.asarray(N.reshape(a,(32,32)))
- c=N.asarray([N.reshape(a*255.0,(32,32))]*3).T
- new=pygame.surfarray.make_surface(c)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- #new.set_palette(anglcolorpalette)
- screen.blit(new,(0,0))
- exemple.blit(new,(0,0))
-
- offset = 4*32
- offset2 = 0
- ct = 0
- ctmp = N.random.rand()*complexity
- print u
- for j in MODULE_INSTANCES:
- #max dilation
- #ctmp = N.random.rand()*complexity[ct]
- ctmp = N.random.rand()*complexity
- #print j.get_settings_names(), j.regenerate_parameters(ctmp)
- th=j.regenerate_parameters(ctmp)
-
- b=j.transform_image(b)
- c=N.asarray([b*255]*3).T
- new=pygame.surfarray.make_surface(c)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- if u==0:
- #new.set_palette(anglcolorpalette)
- screen.blit(new,(offset,offset2))
- font = pygame.font.SysFont('liberationserif',18)
- text = font.render('%s '%(int(ctmp*100.0)/100.0) + j.__module__,0,(255,255,255),(0,0,0))
- #if j.__module__ == 'Rature':
- # text = font.render('%s,%s'%(th[-1],int(ctmp*100.0)/100.0) + j.__module__,0,(255,255,255),(0,0,0))
- screen.blit(text,(offset,offset2+4*32))
- if ct == len(MODULE_INSTANCES)/2-1:
- offset = 0
- offset2 = 4*32+20
- else:
- offset += 4*32
- ct+=1
- exemple.blit(new,(off1,off2))
- if off1 != 9*4*32:
- off1+=4*32
- else:
- off1=0
- off2+=4*32
- pygame.image.save(exemple,path+'/perimages/%s.PNG'%i)
- pygame.image.save(screen,path+'/exemples/%s.PNG'%i)
-
-
-
-
-nbmodule = len(MODULE_INSTANCES)
-
-pygame.surfarray.use_arraytype('numpy')
-
-#pygame.display.init()
-screen = pygame.Surface((4*(nbmodule+1)/2*32,2*(4*32+20)),depth=32)
-exemple = pygame.Surface((N.ceil(N.sqrt(n))*4*32,N.ceil(N.sqrt(n))*4*32),depth=32)
-
-anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
-#pygame.Surface.set_palette(anglcolorpalette)
-#screen.set_palette(anglcolorpalette)
-
-pygame.font.init()
-
-d = N.zeros((n,1024))
-
-datapath = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft'
-f = open(datapath)
-d = ft.read(f)
-d = d[0:n,:]/255.0
-createimage('/u/glorotxa/transf/OCR',d)
-
-
-
-datapath = '/data/lisa/data/nist/by_class/'
-f = open(datapath+'digits_reshuffled/digits_reshuffled_train_data.ft')
-d = ft.read(f)
-d = d[0:n,:]/255.0
-createimage('/u/glorotxa/transf/NIST_digits',d)
-
-
-
-datapath = '/data/lisa/data/nist/by_class/'
-f = open(datapath+'upper/upper_train_data.ft')
-d = ft.read(f)
-d = d[0:n,:]/255.0
-createimage('/u/glorotxa/transf/NIST_upper',d)
-
-from Facade import *
-
-for i in range(n):
- d[i,:]=N.asarray(N.reshape(generateCaptcha(0.8,0),(1,1024))/255.0,dtype='float32')
-
-createimage('/u/glorotxa/transf/capcha',d)
-
-
-for i in range(n):
- myttf2jpg = ttf2jpg()
- d[i,:]=N.reshape(myttf2jpg.generate_image()[0],(1,1024))
-createimage('/u/glorotxa/transf/fonts',d)
-
-datapath = '/data/lisa/data/nist/by_class/'
-f = open(datapath+'lower/lower_train_data.ft')
-d = ft.read(f)
-d = d[0:n,:]/255.0
-createimage('/u/glorotxa/transf/NIST_lower',d)
-
-
-#pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/thick.py
--- a/transformations/thick.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,198 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-
-'''
-Simple implementation of random thickness deformation using morphological
-operation of scipy.
-Only one morphological operation applied (dilation or erosion), the kernel is random
-out of a list of 12 symmetric kernels. (only 5 to be chosen for erosion because it can
-hurt the recognizability of the charater and 12 for dilation).
-
-Author: Xavier Glorot
-
-'''
-
-import scipy.ndimage.morphology
-import numpy as N
-
-
-class Thick():
- def __init__(self,complexity = 1):
- #---------- private attributes
- self.__nx__ = 32 #xdim of the images
- self.__ny__ = 32 #ydim of the images
- self.__erodemax__ = 5 #nb of index max of erode structuring elements
- self.__dilatemax__ = 9 #nb of index max of dilation structuring elements
- self.__structuring_elements__ = [N.asarray([[1,1]]),N.asarray([[1],[1]]),\
- N.asarray([[1,1],[1,1]]),N.asarray([[0,1,0],[1,1,1],[0,1,0]]),\
- N.asarray([[1,1,1],[1,1,1]]),N.asarray([[1,1],[1,1],[1,1]]),\
- N.asarray([[1,1,1],[1,1,1],[1,1,1]]),\
- N.asarray([[1,1,1,1],[1,1,1,1],[1,1,1,1]]),\
- N.asarray([[1,1,1],[1,1,1],[1,1,1],[1,1,1]]),\
- N.asarray([[0,0,1,0,0],[0,1,1,1,0],[1,1,1,1,1],[0,1,1,1,0],[0,0,1,0,0]]),\
- N.asarray([[1,1,1,1],[1,1,1,1]]),N.asarray([[1,1],[1,1],[1,1],[1,1]])]
- #------------------------------------------------
-
- #---------- generation parameters
- self.regenerate_parameters(complexity)
- #------------------------------------------------
-
- def _get_current_parameters(self):
- return [self.thick_param]
-
- def get_settings_names(self):
- return ['thick_param']
-
- def regenerate_parameters(self, complexity):
- self.erodenb = N.ceil(complexity * self.__erodemax__)
- self.dilatenb = N.ceil(complexity * self.__dilatemax__)
- self.Perode = self.erodenb / (self.dilatenb + self.erodenb + 1.0)
- self.Pdilate = self.dilatenb / (self.dilatenb + self.erodenb + 1.0)
- assert (self.Perode + self.Pdilate <= 1) & (self.Perode + self.Pdilate >= 0)
- assert (complexity >= 0) & (complexity <= 1)
- P = N.random.uniform()
- if P>1-(self.Pdilate+self.Perode):
- if P>1-(self.Pdilate+self.Perode)+self.Perode:
- self.meth = 1
- self.nb=N.random.randint(self.dilatenb)
- else:
- self.meth = -1
- self.nb=N.random.randint(self.erodenb)
- else:
- self.meth = 0
- self.nb = -1
- self.thick_param = self.meth*self.nb
- return self._get_current_parameters()
-
- def transform_1_image(self,image): #the real transformation method
- if self.meth!=0:
- maxi = float(N.max(image))
- mini = float(N.min(image))
-
- imagenorm=image/maxi
-
- if self.meth==1:
- trans=scipy.ndimage.morphology.grey_dilation\
- (imagenorm,size=self.__structuring_elements__[self.nb].shape,structure=self.__structuring_elements__[self.nb])
- else:
- trans=scipy.ndimage.morphology.grey_erosion\
- (imagenorm,size=self.__structuring_elements__[self.nb].shape,structure=self.__structuring_elements__[self.nb])
-
- #------renormalizing
- maxit = N.max(trans)
- minit = N.min(trans)
- trans= N.asarray((trans - (minit+mini)) / (maxit - (minit+mini)) * maxi,dtype=image.dtype)
- #--------
- return trans
- else:
- return image
-
- def transform_image(self,image): #handling different format
- if image.shape == (self.__nx__,self.__ny__):
- return self.transform_1_image(image)
- if image.ndim == 3:
- newimage = copy.copy(image)
- for i in range(image.shape[0]):
- newimage[i,:,:] = self.transform_1_image(image[i,:,:])
- return newimage
- if image.ndim == 2 and image.shape != (self.__nx__,self.__ny__):
- newimage = N.reshape(image,(image.shape[0],self.__nx__,self.__ny__))
- for i in range(image.shape[0]):
- newimage[i,:,:] = self.transform_1_image(newimage[i,:,:])
- return N.reshape(newimage,image.shape)
- if image.ndim == 1:
- newimage = N.reshape(image,(self.__nx__,self.__ny__))
- newimage = self.transform_1_image(newimage)
- return N.reshape(newimage,image.shape)
- assert False #should never go there
-
-
-
-
-#test on NIST (you need pylearn and access to NIST to do that)
-
-if __name__ == '__main__':
-
- from pylearn.io import filetensor as ft
- import copy
- import pygame
- import time
- datapath = '/data/lisa/data/nist/by_class/'
- f = open(datapath+'digits/digits_train_data.ft')
- d = ft.read(f)
-
- pygame.surfarray.use_arraytype('numpy')
-
- pygame.display.init()
- screen = pygame.display.set_mode((8*4*32,8*32),0,8)
- anglcolorpalette=[(x,x,x) for x in xrange(0,256)]
- screen.set_palette(anglcolorpalette)
-
- MyThick = Thick()
-
- #debut=time.time()
- #MyThick.transform_image(d)
- #fin=time.time()
- #print '------------------------------------------------'
- #print d.shape[0],' images transformed in :', fin-debut, ' seconds'
- #print '------------------------------------------------'
- #print (fin-debut)/d.shape[0]*1000000,' microseconds per image'
- #print '------------------------------------------------'
- #print MyThick.get_settings_names()
- #print MyThick._get_current_parameters()
- #print MyThick.regenerate_parameters(0)
- #print MyThick.regenerate_parameters(0.5)
- #print MyThick.regenerate_parameters(1)
- for i in range(10000):
- a=d[i,:]
- b=N.asarray(N.reshape(a,(32,32))).T
-
- new=pygame.surfarray.make_surface(b)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new.set_palette(anglcolorpalette)
- screen.blit(new,(0,0))
-
- #max dilation
- MyThick.meth=1
- MyThick.nb=MyThick.__dilatemax__
- c=MyThick.transform_image(a)
- b=N.asarray(N.reshape(c,(32,32))).T
-
- new=pygame.surfarray.make_surface(b)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new.set_palette(anglcolorpalette)
- screen.blit(new,(8*32,0))
-
- #max erosion
- MyThick.meth=-1
- MyThick.nb=MyThick.__erodemax__
- c=MyThick.transform_image(a)
- b=N.asarray(N.reshape(c,(32,32))).T
-
- new=pygame.surfarray.make_surface(b)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new.set_palette(anglcolorpalette)
- screen.blit(new,(8*2*32,0))
-
- #random
- print MyThick.get_settings_names(), MyThick.regenerate_parameters(1)
- c=MyThick.transform_image(a)
- b=N.asarray(N.reshape(c,(32,32))).T
-
- new=pygame.surfarray.make_surface(b)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new=pygame.transform.scale2x(new)
- new.set_palette(anglcolorpalette)
- screen.blit(new,(8*3*32,0))
-
- pygame.display.update()
- raw_input('Press Enter')
-
- pygame.display.quit()
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/ttf2jpg.py
--- a/transformations/ttf2jpg.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-#!/usr/bin/python
-# -*- coding: iso-8859-1 -*-
-
-'''
- Implementation of font image generator
- download fonts from http://www.dafont.com for exemple
-
- Author: Guillaume Sicard
-'''
-
-import sys, os, fnmatch, random
-import Image, ImageFont, ImageDraw, numpy
-
-class ttf2jpg():
- def __init__(self, font_file = ''):
- self.w = 32
- self.h = 32
- self.font_dir = '/Tmp/allfonts/'
- self.font_file = font_file
- self.image_dir = './images/'
- self.pattern = '*.ttf'
- self.char_list = []
- for i in range(0,10):
- self.char_list.append(chr(ord('0') + i) )
- for i in range(0,26):
- self.char_list.append(chr(ord('A') + i) )
- for i in range(0,26):
- self.char_list.append(chr(ord('a') + i) )
- files = os.listdir(self.font_dir)
- self.font_files = fnmatch.filter(files, '*.ttf') + fnmatch.filter(files, '*.TTF')
-
- # get font name
- def get_settings_names(self):
- return [self.font_file]
-
- # save an image
- def save_image(self,array, filename = ''):
- image = (array * 255.0).astype('int')
- image = Image.fromarray(image).convert('L')
- if (filename != ''):
- image.save(filename)
- else:
- image.show()
-
- # set a random font for character generation
- def set_random_font(self):
- i = random.randint(0, len(self.font_files) - 1)
- self.font_file = self.font_dir + self.font_files[i]
-
- # return a picture array of "text" with font "font_file"
- def create_image(self, text):
- # create a w x h black picture, and a drawing space
- image = Image.new('L', (self.w, self.h), 'Black')
- draw = ImageDraw.Draw(image)
-
- # load the font with the right size
- font = ImageFont.truetype(self.font_file, 28)
- d_w,d_h = draw.textsize(text, font=font)
-
- # write text and aligns it
- draw.text(((32 - d_w) / 2, ((32 - d_h) / 2)), text, font=font, fill='White')
-
- image = numpy.asarray(image)
- image = (image / 255.0).astype(numpy.float32)
-
- return image
-
- # write all the letters and numbers into pictures
- def process_font(self):
- for i in range(0, len(self.char_list) ):
- image = self.create_image(self.char_list[i])
- self.save_image(image, self.image_dir + self.char_list[i] + '-' + os.path.basename(self.font_file) + '.jpg')
- sys.stdout.write('.')
- sys.stdout.flush()
- return (len(self.char_list))
-
- # generate the character from the font_file and returns a numpy array
- def generate_image_from_char(self, character, font_file = ''):
- if (font_file != ''):
- self.font_file = font_file
-
- return self.create_image(character)
-
- # generate random character from random font file as a numpy array
- def generate_image(self):
- self.set_random_font()
- i = random.randint(0, len(self.char_list) - 1)
- return self.generate_image_from_char(self.char_list[i]), i
-
- # test method, create character images for all fonts in "font_dir" in dir "image_dir"
- def test(self):
- import time
-
- # look for ttf files
- files = os.listdir(self.font_dir)
- font_files = fnmatch.filter(files, self.pattern)
-
- # create "image_dir" if it doesn't exist
- if not os.path.isdir(self.image_dir):
- os.mkdir(self.image_dir)
-
- sys.stdout.write( str(len(font_files)) + ' fonts found, generating jpg images in folder ' + self.image_dir )
- sys.stdout.flush()
-
- # main loop
- t = time.time()
- n = 0
-
- for font_file in font_files:
- self.font_file = self.font_dir + font_file
- n += self.process_font()
- t = time.time() - t
-
- sys.stdout.write('\nall done!\n' + str(n) + ' images generated in ' + str(t) + 's (average : ' + str(1000 * t / n) + ' ms/im)\n')
-
-if __name__ == '__main__':
-
- myttf2jpg = ttf2jpg()
- #myttf2jpg.test()
- image, i = myttf2jpg.generate_image()
- myttf2jpg.save_image(image, '')
diff -r 17ae5a1a4dd1 -r 1f5937e9e530 transformations/visualizer.py
--- a/transformations/visualizer.py Fri Feb 26 14:03:24 2010 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-#!/usr/bin/python
-
-import numpy
-import Image
-from image_tiling import tile_raster_images
-import pylab
-import time
-
-class Visualizer():
- def __init__(self, num_columns=10, image_size=(32,32), to_dir=None, on_screen=False):
- self.list = []
- self.image_size = image_size
- self.num_columns = num_columns
-
- self.on_screen = on_screen
- self.to_dir = to_dir
-
- self.cur_grid_image = None
-
- self.cur_index = 0
-
- def visualize_stop_and_flush(self):
- self.make_grid_image()
-
- if self.on_screen:
- self.visualize()
- if self.to_dir:
- self.dump_to_disk()
-
- self.stop_and_wait()
- self.flush()
-
- self.cur_index += 1
-
- def make_grid_image(self):
- num_rows = len(self.list) / self.num_columns
- if len(self.list) % self.num_columns != 0:
- num_rows += 1
- grid_shape = (num_rows, self.num_columns)
- self.cur_grid_image = tile_raster_images(numpy.array(self.list), self.image_size, grid_shape, tile_spacing=(5,5), output_pixel_vals=False)
-
- def visualize(self):
- pylab.imshow(self.cur_grid_image)
- pylab.draw()
-
- def dump_to_disk(self):
- gi = Image.fromarray((self.cur_grid_image * 255).astype('uint8'), "L")
- gi.save(self.to_dir + "/grid_" + str(self.cur_index) + ".png")
-
- def stop_and_wait(self):
- # can't raw_input under gimp, so sleep)
- print "New image generated, sleeping 5 secs"
- time.sleep(5)
-
- def flush(self):
- self.list = []
-
- def get_parameters_names(self):
- return []
-
- def regenerate_parameters(self):
- return []
-
- def after_transform_callback(self, image):
- self.transform_image(image)
-
- def end_transform_callback(self, final_image):
- self.visualize_stop_and_flush()
-
- def transform_image(self, image):
- sz = self.image_size
- self.list.append(image.copy().reshape((sz[0] * sz[1])))
-