# HG changeset patch # User Xavier Glorot # Date 1268858476 14400 # Node ID 6d16a2bf142bd67440226d98fcb7703311005471 # Parent bd7e50d56d801b644d8e28e8773ec22067d22bed important bug fix in pipeline corresponding to an iterator always giving 0 complexity, need to redo 10% of the P07 dataset diff -r bd7e50d56d80 -r 6d16a2bf142b data_generation/pipeline/pipeline.py --- a/data_generation/pipeline/pipeline.py Wed Mar 17 14:04:12 2010 -0400 +++ b/data_generation/pipeline/pipeline.py Wed Mar 17 16:41:16 2010 -0400 @@ -64,8 +64,8 @@ ''' try: - opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", -"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="]) + opts, args = getopt.getopt(get_argv(), "r:m:z:o:p:x:s:f:l:c:d:a:b:g:y:t:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", +"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed=","type="]) except getopt.GetoptError, err: # print help information and exit: print str(err) # will print something like "option -a not recognized" @@ -78,11 +78,10 @@ random.seed(int(a)) numpy.random.seed(int(a)) +type_pipeline = 0 for o, a in opts: if o in ('-t','--type'): type_pipeline = int(a) - else: - type_pipeline = 0 if DEBUG_X: import pylab @@ -181,7 +180,7 @@ for mod in self.modules: # This used to be done _per batch_, # ie. out of the "for img" loop - complexity = complexity_iterator.next() + complexity = complexity_iterator.next() #better to do a complexity sampling for each transformations in order to have more variability #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time)) #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall @@ -215,9 +214,10 @@ def write_output(self, output_file_path, params_output_file_path, labels_output_file_path): with open(output_file_path, 'wb') as f: ft.write(f, self.res_data) - + + #if type_pipeline == 0: #only needed for type 0 pipeline numpy.save(params_output_file_path, self.params) - + with open(labels_output_file_path, 'wb') as f: ft.write(f, self.res_labels) @@ -232,6 +232,7 @@ def range_complexity_iterator(probability_zero, max_complexity): assert max_complexity <= 1.0 n = numpy.random.uniform(0.0, 1.0) + n = 2.0 #hack to bug fix, having a min complexity is not necessary and we need the same seed... while True: if n < probability_zero: yield 0.0 @@ -373,7 +374,7 @@ elif o in ('-y', "--seed"): pass elif o in ('-t', "--type"): - type_pipeline = int(a) + pass else: assert False, "unhandled option"