comparison transformations/pipeline.py @ 159:e81241cfc2de

merge
author Myriam Cote <cotemyri@iro.umontreal.ca>
date Thu, 25 Feb 2010 09:05:48 -0500
parents 6f3b866c0182
children
comparison
equal deleted inserted replaced
158:d1bb6e06497a 159:e81241cfc2de
34 DEFAULT_NIST_PATH = '/data/lisa/data/ift6266h10/train_data.ft' 34 DEFAULT_NIST_PATH = '/data/lisa/data/ift6266h10/train_data.ft'
35 DEFAULT_LABEL_PATH = '/data/lisa/data/ift6266h10/train_labels.ft' 35 DEFAULT_LABEL_PATH = '/data/lisa/data/ift6266h10/train_labels.ft'
36 DEFAULT_OCR_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft' 36 DEFAULT_OCR_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft'
37 DEFAULT_OCRLABEL_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-labels-shuffled.ft' 37 DEFAULT_OCRLABEL_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-labels-shuffled.ft'
38 ARGS_FILE = os.environ['PIPELINE_ARGS_TMPFILE'] 38 ARGS_FILE = os.environ['PIPELINE_ARGS_TMPFILE']
39
40 # PARSE COMMAND LINE ARGUMENTS
41 def get_argv():
42 with open(ARGS_FILE) as f:
43 args = [l.rstrip() for l in f.readlines()]
44 return args
45
46 def usage():
47 print '''
48 Usage: run_pipeline.sh [-m ...] [-z ...] [-o ...] [-p ...]
49 -m, --max-complexity: max complexity to generate for an image
50 -z, --probability-zero: probability of using complexity=0 for an image
51 -o, --output-file: full path to file to use for output of images
52 -p, --params-output-file: path to file to output params to
53 -x, --labels-output-file: path to file to output labels to
54 -f, --data-file: path to filetensor (.ft) data file (NIST)
55 -l, --label-file: path to filetensor (.ft) labels file (NIST labels)
56 -c, --ocr-file: path to filetensor (.ft) data file (OCR)
57 -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels)
58 -a, --prob-font: probability of using a raw font image
59 -b, --prob-captcha: probability of using a captcha image
60 -g, --prob-ocr: probability of using an ocr image
61 -y, --seed: the job seed
62 '''
63
64 try:
65 opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=",
66 "stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="])
67 except getopt.GetoptError, err:
68 # print help information and exit:
69 print str(err) # will print something like "option -a not recognized"
70 usage()
71 pdb.gimp_quit(0)
72 sys.exit(2)
73
74 for o, a in opts:
75 if o in ('-y','--seed'):
76 random.seed(int(a))
77 numpy.random.seed(int(a))
39 78
40 if DEBUG_X: 79 if DEBUG_X:
41 import pylab 80 import pylab
42 pylab.ion() 81 pylab.ion()
43 82
53 from slant import Slant 92 from slant import Slant
54 from Occlusion import Occlusion 93 from Occlusion import Occlusion
55 from add_background_image import AddBackground 94 from add_background_image import AddBackground
56 from affine_transform import AffineTransformation 95 from affine_transform import AffineTransformation
57 from ttf2jpg import ttf2jpg 96 from ttf2jpg import ttf2jpg
58 from pycaptcha.Facade import generateCaptcha 97 from Facade import generateCaptcha
59 98
60 if DEBUG: 99 if DEBUG:
61 from visualizer import Visualizer 100 from visualizer import Visualizer
62 # Either put the visualizer as in the MODULES_INSTANCES list 101 # Either put the visualizer as in the MODULES_INSTANCES list
63 # after each module you want to visualize, or in the 102 # after each module you want to visualize, or in the
252 291
253 292
254 ############################################################################## 293 ##############################################################################
255 # MAIN 294 # MAIN
256 295
257 def usage():
258 print '''
259 Usage: run_pipeline.sh [-m ...] [-z ...] [-o ...] [-p ...]
260 -m, --max-complexity: max complexity to generate for an image
261 -z, --probability-zero: probability of using complexity=0 for an image
262 -o, --output-file: full path to file to use for output of images
263 -p, --params-output-file: path to file to output params to
264 -x, --labels-output-file: path to file to output labels to
265 -f, --data-file: path to filetensor (.ft) data file (NIST)
266 -l, --label-file: path to filetensor (.ft) labels file (NIST labels)
267 -c, --ocr-file: path to filetensor (.ft) data file (OCR)
268 -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels)
269 -a, --prob-font: probability of using a raw font image
270 -b, --prob-captcha: probability of using a captcha image
271 -g, --prob-ocr: probability of using an ocr image
272 '''
273
274 # See run_pipeline.py
275 def get_argv():
276 with open(ARGS_FILE) as f:
277 args = [l.rstrip() for l in f.readlines()]
278 return args
279 296
280 # Might be called locally or through dbidispatch. In all cases it should be 297 # Might be called locally or through dbidispatch. In all cases it should be
281 # passed to the GIMP executable to be able to use GIMP filters. 298 # passed to the GIMP executable to be able to use GIMP filters.
282 # Ex: 299 # Ex:
283 def _main(): 300 def _main():
296 prob_font = 0.0 313 prob_font = 0.0
297 prob_captcha = 0.0 314 prob_captcha = 0.0
298 prob_ocr = 0.0 315 prob_ocr = 0.0
299 stop_after = None 316 stop_after = None
300 reload_mode = False 317 reload_mode = False
301
302 try:
303 opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=",
304 "stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr="])
305 except getopt.GetoptError, err:
306 # print help information and exit:
307 print str(err) # will print something like "option -a not recognized"
308 usage()
309 pdb.gimp_quit(0)
310 sys.exit(2)
311 318
312 for o, a in opts: 319 for o, a in opts:
313 if o in ('-m', '--max-complexity'): 320 if o in ('-m', '--max-complexity'):
314 max_complexity = float(a) 321 max_complexity = float(a)
315 assert max_complexity >= 0.0 and max_complexity <= 1.0 322 assert max_complexity >= 0.0 and max_complexity <= 1.0
338 prob_font = float(a) 345 prob_font = float(a)
339 elif o in ('-b', "--prob-captcha"): 346 elif o in ('-b', "--prob-captcha"):
340 prob_captcha = float(a) 347 prob_captcha = float(a)
341 elif o in ('-g', "--prob-ocr"): 348 elif o in ('-g', "--prob-ocr"):
342 prob_ocr = float(a) 349 prob_ocr = float(a)
350 elif o in ('-y', "--seed"):
351 pass
343 else: 352 else:
344 assert False, "unhandled option" 353 assert False, "unhandled option"
345 354
346 if output_file_path == None or params_output_file_path == None or labels_output_file_path == None: 355 if output_file_path == None or params_output_file_path == None or labels_output_file_path == None:
347 print "Must specify the three output files." 356 print "Must specify the three output files."