Mercurial > ift6266

diff data_generation/pipeline/pipeline.py @ 266:1e4e60ddadb1
Merge. Ah, et dans le dernier commit, j'avais oublié de mentionner que j'ai ajouté du code pour gérer l'isolation de différents clones pour rouler des expériences et modifier le code en même temps.
author: fsavard
date: Fri, 19 Mar 2010 10:56:16 -0400
parents: 6d16a2bf142b
--- a/data_generation/pipeline/pipeline.py	Fri Mar 19 10:54:39 2010 -0400
+++ b/data_generation/pipeline/pipeline.py	Fri Mar 19 10:56:16 2010 -0400
@@ -10,6 +10,7 @@
 import numpy
 import ift6266.data_generation.transformations.filetensor as ft
 import random
+import copy
 
 # To debug locally, also call with -s 100 (to stop after ~100)
 # (otherwise we allocate all needed memory, might be loonnng and/or crash
@@ -59,11 +60,12 @@
     -b, --prob-captcha: probability of using a captcha image
     -g, --prob-ocr: probability of using an ocr image
     -y, --seed: the job seed
+    -t, --type: [default: 0:full transformations], 1:Nist-friendly transformations
     '''
 
 try:
-    opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", 
-"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="])
+    opts, args = getopt.getopt(get_argv(), "r:m:z:o:p:x:s:f:l:c:d:a:b:g:y:t:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", 
+"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed=","type="])
 except getopt.GetoptError, err:
         # print help information and exit:
         print str(err) # will print something like "option -a not recognized"
@@ -76,6 +78,11 @@
         random.seed(int(a))
         numpy.random.seed(int(a))
 
+type_pipeline = 0
+for o, a in opts:
+    if o in ('-t','--type'):
+        type_pipeline = int(a)
+
 if DEBUG_X:
     import pylab
     pylab.ion()
@@ -104,7 +111,17 @@
     VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR,  on_screen=False)
 
 ###---------------------order of transformation module
-MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+if type_pipeline == 0:
+    MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+    stop_idx = 0
+if type_pipeline == 1:
+    MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(False),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+    stop_idx = 5
+    #we disable transformation corresponding to MODULE_INSTANCES[stop_idx:] but we still need to apply them on dummy images
+    #in order to be sure to have the same random generator state than with the default pipeline.
+    #This is not optimal (we do more calculus than necessary) but it is a quick hack to produce similar results than previous generation
+
+
 
 # These should have a "after_transform_callback(self, image)" method
 # (called after each call to transform_image in a module)
@@ -155,7 +172,7 @@
             sys.stdout.flush()
             
             global_idx = img_no
-
+            
             img = img.reshape(img_size)
 
             param_idx = 0
@@ -163,7 +180,7 @@
             for mod in self.modules:
                 # This used to be done _per batch_,
                 # ie. out of the "for img" loop
-                complexity = complexity_iterator.next() 
+                complexity = complexity_iterator.next()    
                 #better to do a complexity sampling for each transformations in order to have more variability
                 #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time))
                 #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall
@@ -174,8 +191,13 @@
                 p = mod.regenerate_parameters(complexity)
                 self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p
                 param_idx += len(p)
-
-                img = mod.transform_image(img)
+                
+                if not(stop_idx) or stop_idx > mod_idx:  
+                    img = mod.transform_image(img)
+                else:
+                    tmp = mod.transform_image(copy.copy(img)) 
+                    #this is done to be sure to have the same global random generator state
+                    #we don't apply the transformation on the original image but on a copy in case of in-place transformations
 
                 if should_hook_after_each:
                     for hook in AFTER_EACH_MODULE_HOOK:
@@ -192,9 +214,10 @@
     def write_output(self, output_file_path, params_output_file_path, labels_output_file_path):
         with open(output_file_path, 'wb') as f:
             ft.write(f, self.res_data)
-
+        
+        #if type_pipeline == 0: #only needed for type 0 pipeline
         numpy.save(params_output_file_path, self.params)
-
+        
         with open(labels_output_file_path, 'wb') as f:
             ft.write(f, self.res_labels)
                 
@@ -209,6 +232,7 @@
 def range_complexity_iterator(probability_zero, max_complexity):
     assert max_complexity <= 1.0
     n = numpy.random.uniform(0.0, 1.0)
+    n = 2.0 #hack to bug fix, having a min complexity is not necessary and we need the same seed...
     while True:
         if n < probability_zero:
             yield 0.0
@@ -349,6 +373,8 @@
             prob_ocr = float(a)
         elif o in ('-y', "--seed"):
             pass
+        elif o in ('-t', "--type"):
+            pass            
         else:
             assert False, "unhandled option"
author	fsavard
date	Fri, 19 Mar 2010 10:56:16 -0400
parents	6d16a2bf142b
children