annotate data_generation/transformations/pycaptcha/Captcha/File.py @ 595:da46a62ce402

submitted JMLR pdf
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Tue, 05 Oct 2010 15:07:33 -0400
parents 7800be7bce66
children
rev   line source
87
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
1 """ Captcha.File
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
2
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
3 Utilities for finding and picking random files from our 'data' directory
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
4 """
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
5 #
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
6 # PyCAPTCHA Package
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
7 # Copyright (C) 2004 Micah Dowty <micah@navi.cx>
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
8 #
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
9
273
7800be7bce66 changes in ttf2jpg and pycaptcha to load a file list with cPickle (instead of doing a listdir()) in order to have always the same list order from different machine (to reproduce generation)
Xavier Glorot <glorotxa@iro.umontreal.ca>
parents: 184
diff changeset
10 import os, random, cPickle
87
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
11
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
12 # Determine the data directory. This can be overridden after import-time if needed.
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
13 dataDir = os.path.join(os.path.split(os.path.abspath(__file__))[0], "data")
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
14
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
15
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
16 class RandomFileFactory(object):
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
17 """Given a list of files and/or directories, this picks a random file.
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
18 Directories are searched for files matching any of a list of extensions.
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
19 Files are relative to our data directory plus a subclass-specified base path.
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
20 """
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
21 extensions = []
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
22 basePath = "."
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
23
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
24 def __init__(self, *fileList):
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
25 self.fileList = fileList
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
26 self._fullPaths = None
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
27
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
28 def _checkExtension(self, name):
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
29 """Check the file against our given list of extensions"""
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
30 for ext in self.extensions:
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
31 if name.endswith(ext):
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
32 return True
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
33 return False
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
34
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
35 def _findFullPaths(self):
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
36 """From our given file list, find a list of full paths to files"""
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
37 paths = []
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
38 for name in self.fileList:
184
81f8466dc121 Transient exception handling in captchas (ie. lorsque le NFS est temporairement inaccessible)
boulanni <nicolas_boulanger@hotmail.com>
parents: 167
diff changeset
39 if name[0] == '/':
81f8466dc121 Transient exception handling in captchas (ie. lorsque le NFS est temporairement inaccessible)
boulanni <nicolas_boulanger@hotmail.com>
parents: 167
diff changeset
40 path = name
81f8466dc121 Transient exception handling in captchas (ie. lorsque le NFS est temporairement inaccessible)
boulanni <nicolas_boulanger@hotmail.com>
parents: 167
diff changeset
41 else:
81f8466dc121 Transient exception handling in captchas (ie. lorsque le NFS est temporairement inaccessible)
boulanni <nicolas_boulanger@hotmail.com>
parents: 167
diff changeset
42 path = os.path.join(dataDir, self.basePath, name)
87
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
43 if os.path.isdir(path):
273
7800be7bce66 changes in ttf2jpg and pycaptcha to load a file list with cPickle (instead of doing a listdir()) in order to have always the same list order from different machine (to reproduce generation)
Xavier Glorot <glorotxa@iro.umontreal.ca>
parents: 184
diff changeset
44 f = open(path + '/filelist.pkl')
7800be7bce66 changes in ttf2jpg and pycaptcha to load a file list with cPickle (instead of doing a listdir()) in order to have always the same list order from different machine (to reproduce generation)
Xavier Glorot <glorotxa@iro.umontreal.ca>
parents: 184
diff changeset
45 filelist = cPickle.load(f)
7800be7bce66 changes in ttf2jpg and pycaptcha to load a file list with cPickle (instead of doing a listdir()) in order to have always the same list order from different machine (to reproduce generation)
Xavier Glorot <glorotxa@iro.umontreal.ca>
parents: 184
diff changeset
46 f.close()
7800be7bce66 changes in ttf2jpg and pycaptcha to load a file list with cPickle (instead of doing a listdir()) in order to have always the same list order from different machine (to reproduce generation)
Xavier Glorot <glorotxa@iro.umontreal.ca>
parents: 184
diff changeset
47 for content in filelist:
87
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
48 if self._checkExtension(content):
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
49 paths.append(os.path.join(path, content))
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
50 else:
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
51 paths.append(path)
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
52 return paths
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
53
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
54 def pick(self):
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
55 if self._fullPaths is None:
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
56 self._fullPaths = self._findFullPaths()
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
57 return random.choice(self._fullPaths)
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
58
4775b4195b4b code pour la generation de captchas
goldfinger
parents:
diff changeset
59 ### The End ###