comparison transformations/pipeline.py @ 128:ccce06590e64

Added captcha support in pipeline.py
author boulanni <nicolas_boulanger@hotmail.com>
date Thu, 18 Feb 2010 14:27:49 -0500
parents 3bec123dd75d
children 4981c729149c
comparison
equal deleted inserted replaced
127:3f85e8c58a9f 128:ccce06590e64
53 from slant import Slant 53 from slant import Slant
54 from Occlusion import Occlusion 54 from Occlusion import Occlusion
55 from add_background_image import AddBackground 55 from add_background_image import AddBackground
56 from affine_transform import AffineTransformation 56 from affine_transform import AffineTransformation
57 from ttf2jpg import ttf2jpg 57 from ttf2jpg import ttf2jpg
58 from ..pycaptcha.Facade import generateCaptcha
58 59
59 if DEBUG: 60 if DEBUG:
60 from visualizer import Visualizer 61 from visualizer import Visualizer
61 # Either put the visualizer as in the MODULES_INSTANCES list 62 # Either put the visualizer as in the MODULES_INSTANCES list
62 # after each module you want to visualize, or in the 63 # after each module you want to visualize, or in the
217 labels = ft.read(nist.train_labels) 218 labels = ft.read(nist.train_labels)
218 if prob_ocr: 219 if prob_ocr:
219 ocr_img = ft.read(nist.ocr_data) 220 ocr_img = ft.read(nist.ocr_data)
220 ocr_labels = ft.read(nist.ocr_labels) 221 ocr_labels = ft.read(nist.ocr_labels)
221 ttf = ttf2jpg() 222 ttf = ttf2jpg()
223 L = [chr(ord('0')+x) for x in range(10)] + [chr(ord('A')+x) for x in range(26)] + [chr(ord('a')+x) for x in range(26)]
222 224
223 for i in xrange(num_img): 225 for i in xrange(num_img):
224 r = numpy.random.rand() 226 r = numpy.random.rand()
225 if r <= prob_font: 227 if r <= prob_font:
226 yield ttf.generate_image() 228 yield ttf.generate_image()
227 elif r <= prob_font + prob_captcha: 229 elif r <= prob_font + prob_captcha:
228 pass #get captcha 230 (arr, charac) = generateCaptcha(0,1)
231 yield arr.astype(numpy.float32)/255, L.index(charac)
229 elif r <= prob_font + prob_captcha + prob_ocr: 232 elif r <= prob_font + prob_captcha + prob_ocr:
230 j = numpy.random.randint(len(ocr_labels)) 233 j = numpy.random.randint(len(ocr_labels))
231 yield ocr_img[j].astype(numpy.float32)/255, ocr_labels[j] 234 yield ocr_img[j].astype(numpy.float32)/255, ocr_labels[j]
232 else: 235 else:
233 j = numpy.random.randint(len(labels)) 236 j = numpy.random.randint(len(labels))