ift6266: data_generation/pipeline/pipeline.py comparison

comparison data_generation/pipeline/pipeline.py @ 261:6d16a2bf142b

important bug fix in pipeline corresponding to an iterator always giving 0 complexity, need to redo 10% of the P07 dataset

author	Xavier Glorot <glorotxa@iro.umontreal.ca>
date	Wed, 17 Mar 2010 16:41:16 -0400
parents	dd2df78fcf47
children

comparison

equal deleted inserted replaced

-:bd7e50d56d80
+:6d16a2bf142b
 -y, --seed: the job seed
 -t, --type: [default: 0:full transformations], 1:Nist-friendly transformations
 '''
 try:
-opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=",
+opts, args = getopt.getopt(get_argv(), "r:m:z:o:p:x:s:f:l:c:d:a:b:g:y:t:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=",
-"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="])
+"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed=","type="])
 except getopt.GetoptError, err:
 # print help information and exit:
 print str(err) # will print something like "option -a not recognized"
 usage()
 pdb.gimp_quit(0)
 for o, a in opts:
 if o in ('-y','--seed'):
 random.seed(int(a))
 numpy.random.seed(int(a))
+type_pipeline = 0
 for o, a in opts:
 if o in ('-t','--type'):
 type_pipeline = int(a)
-else:
-type_pipeline = 0
 if DEBUG_X:
 import pylab
 pylab.ion()
 param_idx = 0
 mod_idx = 0
 for mod in self.modules:
 # This used to be done _per batch_,
 # ie. out of the "for img" loop
 complexity = complexity_iterator.next()
 #better to do a complexity sampling for each transformations in order to have more variability
 #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time))
 #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall
 #complexity
 self.params[global_idx, mod_idx] = complexity
 hook.end_transform_callback(img)
 def write_output(self, output_file_path, params_output_file_path, labels_output_file_path):
 with open(output_file_path, 'wb') as f:
 ft.write(f, self.res_data)
+#if type_pipeline == 0: #only needed for type 0 pipeline
 numpy.save(params_output_file_path, self.params)
 with open(labels_output_file_path, 'wb') as f:
 ft.write(f, self.res_labels)
 ##############################################################################
 # probability of generating 0 complexity, otherwise
 # uniform over 0.0-max_complexity
 def range_complexity_iterator(probability_zero, max_complexity):
 assert max_complexity <= 1.0
 n = numpy.random.uniform(0.0, 1.0)
+n = 2.0 #hack to bug fix, having a min complexity is not necessary and we need the same seed...
 while True:
 if n < probability_zero:
 yield 0.0
 else:
 yield numpy.random.uniform(0.0, max_complexity)
 elif o in ('-g', "--prob-ocr"):
 prob_ocr = float(a)
 elif o in ('-y', "--seed"):
 pass
 elif o in ('-t', "--type"):
-type_pipeline = int(a)
+pass
 else:
 assert False, "unhandled option"
 if output_file_path == None or params_output_file_path == None or labels_output_file_path == None:
 print "Must specify the three output files."

Mercurial > ift6266

comparison data_generation/pipeline/pipeline.py @ 261:6d16a2bf142b