Mercurial > ift6266
comparison data_generation/transformations/pycaptcha/Captcha/Words.py @ 167:1f5937e9e530
More moves - transformations into data_generation, added "deep" folder
author | Dumitru Erhan <dumitru.erhan@gmail.com> |
---|---|
date | Fri, 26 Feb 2010 14:15:38 -0500 |
parents | pycaptcha/Captcha/Words.py@4775b4195b4b |
children |
comparison
equal
deleted
inserted
replaced
166:17ae5a1a4dd1 | 167:1f5937e9e530 |
---|---|
1 """ Captcha.Words | |
2 | |
3 Utilities for managing word lists and finding random words | |
4 """ | |
5 # | |
6 # PyCAPTCHA Package | |
7 # Copyright (C) 2004 Micah Dowty <micah@navi.cx> | |
8 # | |
9 | |
10 import random, os | |
11 import File | |
12 | |
13 | |
14 class WordList(object): | |
15 """A class representing a word list read from disk lazily. | |
16 Blank lines and comment lines starting with '#' are ignored. | |
17 Any number of words per line may be used. The list can | |
18 optionally ingore words not within a given length range. | |
19 """ | |
20 def __init__(self, fileName, minLength=None, maxLength=None): | |
21 self.words = None | |
22 self.fileName = fileName | |
23 self.minLength = minLength | |
24 self.maxLength = maxLength | |
25 | |
26 def read(self): | |
27 """Read words from disk""" | |
28 f = open(os.path.join(File.dataDir, "words", self.fileName)) | |
29 | |
30 self.words = [] | |
31 for line in f.xreadlines(): | |
32 line = line.strip() | |
33 if not line: | |
34 continue | |
35 if line[0] == '#': | |
36 continue | |
37 for word in line.split(): | |
38 if self.minLength is not None and len(word) < self.minLength: | |
39 continue | |
40 if self.maxLength is not None and len(word) > self.maxLength: | |
41 continue | |
42 self.words.append(word) | |
43 | |
44 def pick(self): | |
45 """Pick a random word from the list, reading it in if necessary""" | |
46 if self.words is None: | |
47 self.read() | |
48 return random.choice(self.words) | |
49 | |
50 | |
51 # Define several shared word lists that are read from disk on demand | |
52 basic_english = WordList("basic-english") | |
53 basic_english_restricted = WordList("basic-english", minLength=5, maxLength=8) | |
54 characters = WordList("characters") | |
55 defaultWordList = characters | |
56 | |
57 | |
58 ### The End ### |