comparison data_generation/transformations/pycaptcha/Captcha/Words.py @ 167:1f5937e9e530

More moves - transformations into data_generation, added "deep" folder
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Fri, 26 Feb 2010 14:15:38 -0500
parents pycaptcha/Captcha/Words.py@4775b4195b4b
children
comparison
equal deleted inserted replaced
166:17ae5a1a4dd1 167:1f5937e9e530
1 """ Captcha.Words
2
3 Utilities for managing word lists and finding random words
4 """
5 #
6 # PyCAPTCHA Package
7 # Copyright (C) 2004 Micah Dowty <micah@navi.cx>
8 #
9
10 import random, os
11 import File
12
13
14 class WordList(object):
15 """A class representing a word list read from disk lazily.
16 Blank lines and comment lines starting with '#' are ignored.
17 Any number of words per line may be used. The list can
18 optionally ingore words not within a given length range.
19 """
20 def __init__(self, fileName, minLength=None, maxLength=None):
21 self.words = None
22 self.fileName = fileName
23 self.minLength = minLength
24 self.maxLength = maxLength
25
26 def read(self):
27 """Read words from disk"""
28 f = open(os.path.join(File.dataDir, "words", self.fileName))
29
30 self.words = []
31 for line in f.xreadlines():
32 line = line.strip()
33 if not line:
34 continue
35 if line[0] == '#':
36 continue
37 for word in line.split():
38 if self.minLength is not None and len(word) < self.minLength:
39 continue
40 if self.maxLength is not None and len(word) > self.maxLength:
41 continue
42 self.words.append(word)
43
44 def pick(self):
45 """Pick a random word from the list, reading it in if necessary"""
46 if self.words is None:
47 self.read()
48 return random.choice(self.words)
49
50
51 # Define several shared word lists that are read from disk on demand
52 basic_english = WordList("basic-english")
53 basic_english_restricted = WordList("basic-english", minLength=5, maxLength=8)
54 characters = WordList("characters")
55 defaultWordList = characters
56
57
58 ### The End ###