diff data_generation/pipeline/pipeline.py @ 261:6d16a2bf142b

important bug fix in pipeline corresponding to an iterator always giving 0 complexity, need to redo 10% of the P07 dataset
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Wed, 17 Mar 2010 16:41:16 -0400
parents dd2df78fcf47
children
line wrap: on
line diff
--- a/data_generation/pipeline/pipeline.py	Wed Mar 17 14:04:12 2010 -0400
+++ b/data_generation/pipeline/pipeline.py	Wed Mar 17 16:41:16 2010 -0400
@@ -64,8 +64,8 @@
     '''
 
 try:
-    opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", 
-"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="])
+    opts, args = getopt.getopt(get_argv(), "r:m:z:o:p:x:s:f:l:c:d:a:b:g:y:t:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", 
+"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed=","type="])
 except getopt.GetoptError, err:
         # print help information and exit:
         print str(err) # will print something like "option -a not recognized"
@@ -78,11 +78,10 @@
         random.seed(int(a))
         numpy.random.seed(int(a))
 
+type_pipeline = 0
 for o, a in opts:
     if o in ('-t','--type'):
         type_pipeline = int(a)
-    else:
-        type_pipeline = 0
 
 if DEBUG_X:
     import pylab
@@ -181,7 +180,7 @@
             for mod in self.modules:
                 # This used to be done _per batch_,
                 # ie. out of the "for img" loop
-                complexity = complexity_iterator.next() 
+                complexity = complexity_iterator.next()    
                 #better to do a complexity sampling for each transformations in order to have more variability
                 #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time))
                 #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall
@@ -215,9 +214,10 @@
     def write_output(self, output_file_path, params_output_file_path, labels_output_file_path):
         with open(output_file_path, 'wb') as f:
             ft.write(f, self.res_data)
-
+        
+        #if type_pipeline == 0: #only needed for type 0 pipeline
         numpy.save(params_output_file_path, self.params)
-
+        
         with open(labels_output_file_path, 'wb') as f:
             ft.write(f, self.res_labels)
                 
@@ -232,6 +232,7 @@
 def range_complexity_iterator(probability_zero, max_complexity):
     assert max_complexity <= 1.0
     n = numpy.random.uniform(0.0, 1.0)
+    n = 2.0 #hack to bug fix, having a min complexity is not necessary and we need the same seed...
     while True:
         if n < probability_zero:
             yield 0.0
@@ -373,7 +374,7 @@
         elif o in ('-y', "--seed"):
             pass
         elif o in ('-t', "--type"):
-            type_pipeline = int(a)
+            pass            
         else:
             assert False, "unhandled option"