Mercurial > ift6266
annotate data_generation/transformations/pycaptcha/Captcha/File.py @ 595:da46a62ce402
submitted JMLR pdf
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Tue, 05 Oct 2010 15:07:33 -0400 |
parents | 7800be7bce66 |
children |
rev | line source |
---|---|
87 | 1 """ Captcha.File |
2 | |
3 Utilities for finding and picking random files from our 'data' directory | |
4 """ | |
5 # | |
6 # PyCAPTCHA Package | |
7 # Copyright (C) 2004 Micah Dowty <micah@navi.cx> | |
8 # | |
9 | |
273
7800be7bce66
changes in ttf2jpg and pycaptcha to load a file list with cPickle (instead of doing a listdir()) in order to have always the same list order from different machine (to reproduce generation)
Xavier Glorot <glorotxa@iro.umontreal.ca>
parents:
184
diff
changeset
|
10 import os, random, cPickle |
87 | 11 |
12 # Determine the data directory. This can be overridden after import-time if needed. | |
13 dataDir = os.path.join(os.path.split(os.path.abspath(__file__))[0], "data") | |
14 | |
15 | |
16 class RandomFileFactory(object): | |
17 """Given a list of files and/or directories, this picks a random file. | |
18 Directories are searched for files matching any of a list of extensions. | |
19 Files are relative to our data directory plus a subclass-specified base path. | |
20 """ | |
21 extensions = [] | |
22 basePath = "." | |
23 | |
24 def __init__(self, *fileList): | |
25 self.fileList = fileList | |
26 self._fullPaths = None | |
27 | |
28 def _checkExtension(self, name): | |
29 """Check the file against our given list of extensions""" | |
30 for ext in self.extensions: | |
31 if name.endswith(ext): | |
32 return True | |
33 return False | |
34 | |
35 def _findFullPaths(self): | |
36 """From our given file list, find a list of full paths to files""" | |
37 paths = [] | |
38 for name in self.fileList: | |
184
81f8466dc121
Transient exception handling in captchas (ie. lorsque le NFS est temporairement inaccessible)
boulanni <nicolas_boulanger@hotmail.com>
parents:
167
diff
changeset
|
39 if name[0] == '/': |
81f8466dc121
Transient exception handling in captchas (ie. lorsque le NFS est temporairement inaccessible)
boulanni <nicolas_boulanger@hotmail.com>
parents:
167
diff
changeset
|
40 path = name |
81f8466dc121
Transient exception handling in captchas (ie. lorsque le NFS est temporairement inaccessible)
boulanni <nicolas_boulanger@hotmail.com>
parents:
167
diff
changeset
|
41 else: |
81f8466dc121
Transient exception handling in captchas (ie. lorsque le NFS est temporairement inaccessible)
boulanni <nicolas_boulanger@hotmail.com>
parents:
167
diff
changeset
|
42 path = os.path.join(dataDir, self.basePath, name) |
87 | 43 if os.path.isdir(path): |
273
7800be7bce66
changes in ttf2jpg and pycaptcha to load a file list with cPickle (instead of doing a listdir()) in order to have always the same list order from different machine (to reproduce generation)
Xavier Glorot <glorotxa@iro.umontreal.ca>
parents:
184
diff
changeset
|
44 f = open(path + '/filelist.pkl') |
7800be7bce66
changes in ttf2jpg and pycaptcha to load a file list with cPickle (instead of doing a listdir()) in order to have always the same list order from different machine (to reproduce generation)
Xavier Glorot <glorotxa@iro.umontreal.ca>
parents:
184
diff
changeset
|
45 filelist = cPickle.load(f) |
7800be7bce66
changes in ttf2jpg and pycaptcha to load a file list with cPickle (instead of doing a listdir()) in order to have always the same list order from different machine (to reproduce generation)
Xavier Glorot <glorotxa@iro.umontreal.ca>
parents:
184
diff
changeset
|
46 f.close() |
7800be7bce66
changes in ttf2jpg and pycaptcha to load a file list with cPickle (instead of doing a listdir()) in order to have always the same list order from different machine (to reproduce generation)
Xavier Glorot <glorotxa@iro.umontreal.ca>
parents:
184
diff
changeset
|
47 for content in filelist: |
87 | 48 if self._checkExtension(content): |
49 paths.append(os.path.join(path, content)) | |
50 else: | |
51 paths.append(path) | |
52 return paths | |
53 | |
54 def pick(self): | |
55 if self._fullPaths is None: | |
56 self._fullPaths = self._findFullPaths() | |
57 return random.choice(self._fullPaths) | |
58 | |
59 ### The End ### |