Mercurial > ift6266
comparison data_generation/pipeline/pipeline.py @ 261:6d16a2bf142b
important bug fix in pipeline corresponding to an iterator always giving 0 complexity, need to redo 10% of the P07 dataset
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Wed, 17 Mar 2010 16:41:16 -0400 |
parents | dd2df78fcf47 |
children |
comparison
equal
deleted
inserted
replaced
256:bd7e50d56d80 | 261:6d16a2bf142b |
---|---|
62 -y, --seed: the job seed | 62 -y, --seed: the job seed |
63 -t, --type: [default: 0:full transformations], 1:Nist-friendly transformations | 63 -t, --type: [default: 0:full transformations], 1:Nist-friendly transformations |
64 ''' | 64 ''' |
65 | 65 |
66 try: | 66 try: |
67 opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", | 67 opts, args = getopt.getopt(get_argv(), "r:m:z:o:p:x:s:f:l:c:d:a:b:g:y:t:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", |
68 "stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="]) | 68 "stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed=","type="]) |
69 except getopt.GetoptError, err: | 69 except getopt.GetoptError, err: |
70 # print help information and exit: | 70 # print help information and exit: |
71 print str(err) # will print something like "option -a not recognized" | 71 print str(err) # will print something like "option -a not recognized" |
72 usage() | 72 usage() |
73 pdb.gimp_quit(0) | 73 pdb.gimp_quit(0) |
76 for o, a in opts: | 76 for o, a in opts: |
77 if o in ('-y','--seed'): | 77 if o in ('-y','--seed'): |
78 random.seed(int(a)) | 78 random.seed(int(a)) |
79 numpy.random.seed(int(a)) | 79 numpy.random.seed(int(a)) |
80 | 80 |
81 type_pipeline = 0 | |
81 for o, a in opts: | 82 for o, a in opts: |
82 if o in ('-t','--type'): | 83 if o in ('-t','--type'): |
83 type_pipeline = int(a) | 84 type_pipeline = int(a) |
84 else: | |
85 type_pipeline = 0 | |
86 | 85 |
87 if DEBUG_X: | 86 if DEBUG_X: |
88 import pylab | 87 import pylab |
89 pylab.ion() | 88 pylab.ion() |
90 | 89 |
179 param_idx = 0 | 178 param_idx = 0 |
180 mod_idx = 0 | 179 mod_idx = 0 |
181 for mod in self.modules: | 180 for mod in self.modules: |
182 # This used to be done _per batch_, | 181 # This used to be done _per batch_, |
183 # ie. out of the "for img" loop | 182 # ie. out of the "for img" loop |
184 complexity = complexity_iterator.next() | 183 complexity = complexity_iterator.next() |
185 #better to do a complexity sampling for each transformations in order to have more variability | 184 #better to do a complexity sampling for each transformations in order to have more variability |
186 #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time)) | 185 #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time)) |
187 #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall | 186 #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall |
188 #complexity | 187 #complexity |
189 self.params[global_idx, mod_idx] = complexity | 188 self.params[global_idx, mod_idx] = complexity |
213 hook.end_transform_callback(img) | 212 hook.end_transform_callback(img) |
214 | 213 |
215 def write_output(self, output_file_path, params_output_file_path, labels_output_file_path): | 214 def write_output(self, output_file_path, params_output_file_path, labels_output_file_path): |
216 with open(output_file_path, 'wb') as f: | 215 with open(output_file_path, 'wb') as f: |
217 ft.write(f, self.res_data) | 216 ft.write(f, self.res_data) |
218 | 217 |
218 #if type_pipeline == 0: #only needed for type 0 pipeline | |
219 numpy.save(params_output_file_path, self.params) | 219 numpy.save(params_output_file_path, self.params) |
220 | 220 |
221 with open(labels_output_file_path, 'wb') as f: | 221 with open(labels_output_file_path, 'wb') as f: |
222 ft.write(f, self.res_labels) | 222 ft.write(f, self.res_labels) |
223 | 223 |
224 | 224 |
225 ############################################################################## | 225 ############################################################################## |
230 # probability of generating 0 complexity, otherwise | 230 # probability of generating 0 complexity, otherwise |
231 # uniform over 0.0-max_complexity | 231 # uniform over 0.0-max_complexity |
232 def range_complexity_iterator(probability_zero, max_complexity): | 232 def range_complexity_iterator(probability_zero, max_complexity): |
233 assert max_complexity <= 1.0 | 233 assert max_complexity <= 1.0 |
234 n = numpy.random.uniform(0.0, 1.0) | 234 n = numpy.random.uniform(0.0, 1.0) |
235 n = 2.0 #hack to bug fix, having a min complexity is not necessary and we need the same seed... | |
235 while True: | 236 while True: |
236 if n < probability_zero: | 237 if n < probability_zero: |
237 yield 0.0 | 238 yield 0.0 |
238 else: | 239 else: |
239 yield numpy.random.uniform(0.0, max_complexity) | 240 yield numpy.random.uniform(0.0, max_complexity) |
371 elif o in ('-g', "--prob-ocr"): | 372 elif o in ('-g', "--prob-ocr"): |
372 prob_ocr = float(a) | 373 prob_ocr = float(a) |
373 elif o in ('-y', "--seed"): | 374 elif o in ('-y', "--seed"): |
374 pass | 375 pass |
375 elif o in ('-t', "--type"): | 376 elif o in ('-t', "--type"): |
376 type_pipeline = int(a) | 377 pass |
377 else: | 378 else: |
378 assert False, "unhandled option" | 379 assert False, "unhandled option" |
379 | 380 |
380 if output_file_path == None or params_output_file_path == None or labels_output_file_path == None: | 381 if output_file_path == None or params_output_file_path == None or labels_output_file_path == None: |
381 print "Must specify the three output files." | 382 print "Must specify the three output files." |