Mercurial > ift6266
comparison transformations/pipeline.py @ 156:6f3b866c0182
On peut maintenant launcher le pipeline avec un seed donné, résultats déterministes
author | boulanni <nicolas_boulanger@hotmail.com> |
---|---|
date | Wed, 24 Feb 2010 19:12:01 -0500 |
parents | 4981c729149c |
children |
comparison
equal
deleted
inserted
replaced
155:7640cb31cf1f | 156:6f3b866c0182 |
---|---|
34 DEFAULT_NIST_PATH = '/data/lisa/data/ift6266h10/train_data.ft' | 34 DEFAULT_NIST_PATH = '/data/lisa/data/ift6266h10/train_data.ft' |
35 DEFAULT_LABEL_PATH = '/data/lisa/data/ift6266h10/train_labels.ft' | 35 DEFAULT_LABEL_PATH = '/data/lisa/data/ift6266h10/train_labels.ft' |
36 DEFAULT_OCR_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft' | 36 DEFAULT_OCR_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-shuffled.ft' |
37 DEFAULT_OCRLABEL_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-labels-shuffled.ft' | 37 DEFAULT_OCRLABEL_PATH = '/data/lisa/data/ocr_breuel/filetensor/unlv-corrected-2010-02-01-labels-shuffled.ft' |
38 ARGS_FILE = os.environ['PIPELINE_ARGS_TMPFILE'] | 38 ARGS_FILE = os.environ['PIPELINE_ARGS_TMPFILE'] |
39 | |
40 # PARSE COMMAND LINE ARGUMENTS | |
41 def get_argv(): | |
42 with open(ARGS_FILE) as f: | |
43 args = [l.rstrip() for l in f.readlines()] | |
44 return args | |
45 | |
46 def usage(): | |
47 print ''' | |
48 Usage: run_pipeline.sh [-m ...] [-z ...] [-o ...] [-p ...] | |
49 -m, --max-complexity: max complexity to generate for an image | |
50 -z, --probability-zero: probability of using complexity=0 for an image | |
51 -o, --output-file: full path to file to use for output of images | |
52 -p, --params-output-file: path to file to output params to | |
53 -x, --labels-output-file: path to file to output labels to | |
54 -f, --data-file: path to filetensor (.ft) data file (NIST) | |
55 -l, --label-file: path to filetensor (.ft) labels file (NIST labels) | |
56 -c, --ocr-file: path to filetensor (.ft) data file (OCR) | |
57 -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels) | |
58 -a, --prob-font: probability of using a raw font image | |
59 -b, --prob-captcha: probability of using a captcha image | |
60 -g, --prob-ocr: probability of using an ocr image | |
61 -y, --seed: the job seed | |
62 ''' | |
63 | |
64 try: | |
65 opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", | |
66 "stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="]) | |
67 except getopt.GetoptError, err: | |
68 # print help information and exit: | |
69 print str(err) # will print something like "option -a not recognized" | |
70 usage() | |
71 pdb.gimp_quit(0) | |
72 sys.exit(2) | |
73 | |
74 for o, a in opts: | |
75 if o in ('-y','--seed'): | |
76 random.seed(int(a)) | |
77 numpy.random.seed(int(a)) | |
39 | 78 |
40 if DEBUG_X: | 79 if DEBUG_X: |
41 import pylab | 80 import pylab |
42 pylab.ion() | 81 pylab.ion() |
43 | 82 |
53 from slant import Slant | 92 from slant import Slant |
54 from Occlusion import Occlusion | 93 from Occlusion import Occlusion |
55 from add_background_image import AddBackground | 94 from add_background_image import AddBackground |
56 from affine_transform import AffineTransformation | 95 from affine_transform import AffineTransformation |
57 from ttf2jpg import ttf2jpg | 96 from ttf2jpg import ttf2jpg |
58 from pycaptcha.Facade import generateCaptcha | 97 from Facade import generateCaptcha |
59 | 98 |
60 if DEBUG: | 99 if DEBUG: |
61 from visualizer import Visualizer | 100 from visualizer import Visualizer |
62 # Either put the visualizer as in the MODULES_INSTANCES list | 101 # Either put the visualizer as in the MODULES_INSTANCES list |
63 # after each module you want to visualize, or in the | 102 # after each module you want to visualize, or in the |
252 | 291 |
253 | 292 |
254 ############################################################################## | 293 ############################################################################## |
255 # MAIN | 294 # MAIN |
256 | 295 |
257 def usage(): | |
258 print ''' | |
259 Usage: run_pipeline.sh [-m ...] [-z ...] [-o ...] [-p ...] | |
260 -m, --max-complexity: max complexity to generate for an image | |
261 -z, --probability-zero: probability of using complexity=0 for an image | |
262 -o, --output-file: full path to file to use for output of images | |
263 -p, --params-output-file: path to file to output params to | |
264 -x, --labels-output-file: path to file to output labels to | |
265 -f, --data-file: path to filetensor (.ft) data file (NIST) | |
266 -l, --label-file: path to filetensor (.ft) labels file (NIST labels) | |
267 -c, --ocr-file: path to filetensor (.ft) data file (OCR) | |
268 -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels) | |
269 -a, --prob-font: probability of using a raw font image | |
270 -b, --prob-captcha: probability of using a captcha image | |
271 -g, --prob-ocr: probability of using an ocr image | |
272 ''' | |
273 | |
274 # See run_pipeline.py | |
275 def get_argv(): | |
276 with open(ARGS_FILE) as f: | |
277 args = [l.rstrip() for l in f.readlines()] | |
278 return args | |
279 | 296 |
280 # Might be called locally or through dbidispatch. In all cases it should be | 297 # Might be called locally or through dbidispatch. In all cases it should be |
281 # passed to the GIMP executable to be able to use GIMP filters. | 298 # passed to the GIMP executable to be able to use GIMP filters. |
282 # Ex: | 299 # Ex: |
283 def _main(): | 300 def _main(): |
296 prob_font = 0.0 | 313 prob_font = 0.0 |
297 prob_captcha = 0.0 | 314 prob_captcha = 0.0 |
298 prob_ocr = 0.0 | 315 prob_ocr = 0.0 |
299 stop_after = None | 316 stop_after = None |
300 reload_mode = False | 317 reload_mode = False |
301 | |
302 try: | |
303 opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", | |
304 "stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr="]) | |
305 except getopt.GetoptError, err: | |
306 # print help information and exit: | |
307 print str(err) # will print something like "option -a not recognized" | |
308 usage() | |
309 pdb.gimp_quit(0) | |
310 sys.exit(2) | |
311 | 318 |
312 for o, a in opts: | 319 for o, a in opts: |
313 if o in ('-m', '--max-complexity'): | 320 if o in ('-m', '--max-complexity'): |
314 max_complexity = float(a) | 321 max_complexity = float(a) |
315 assert max_complexity >= 0.0 and max_complexity <= 1.0 | 322 assert max_complexity >= 0.0 and max_complexity <= 1.0 |
338 prob_font = float(a) | 345 prob_font = float(a) |
339 elif o in ('-b', "--prob-captcha"): | 346 elif o in ('-b', "--prob-captcha"): |
340 prob_captcha = float(a) | 347 prob_captcha = float(a) |
341 elif o in ('-g', "--prob-ocr"): | 348 elif o in ('-g', "--prob-ocr"): |
342 prob_ocr = float(a) | 349 prob_ocr = float(a) |
350 elif o in ('-y', "--seed"): | |
351 pass | |
343 else: | 352 else: |
344 assert False, "unhandled option" | 353 assert False, "unhandled option" |
345 | 354 |
346 if output_file_path == None or params_output_file_path == None or labels_output_file_path == None: | 355 if output_file_path == None or params_output_file_path == None or labels_output_file_path == None: |
347 print "Must specify the three output files." | 356 print "Must specify the three output files." |