diff data_generation/pipeline/pipeline.py @ 254:dd2df78fcf47

added option to pipeline and gimp_script to produce NIST-friendly data
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Wed, 17 Mar 2010 13:57:15 -0400
parents 992ca8035a4d
children 6d16a2bf142b
line wrap: on
line diff
--- a/data_generation/pipeline/pipeline.py	Tue Mar 16 12:14:10 2010 -0400
+++ b/data_generation/pipeline/pipeline.py	Wed Mar 17 13:57:15 2010 -0400
@@ -10,6 +10,7 @@
 import numpy
 import ift6266.data_generation.transformations.filetensor as ft
 import random
+import copy
 
 # To debug locally, also call with -s 100 (to stop after ~100)
 # (otherwise we allocate all needed memory, might be loonnng and/or crash
@@ -59,6 +60,7 @@
     -b, --prob-captcha: probability of using a captcha image
     -g, --prob-ocr: probability of using an ocr image
     -y, --seed: the job seed
+    -t, --type: [default: 0:full transformations], 1:Nist-friendly transformations
     '''
 
 try:
@@ -76,6 +78,12 @@
         random.seed(int(a))
         numpy.random.seed(int(a))
 
+for o, a in opts:
+    if o in ('-t','--type'):
+        type_pipeline = int(a)
+    else:
+        type_pipeline = 0
+
 if DEBUG_X:
     import pylab
     pylab.ion()
@@ -104,7 +112,17 @@
     VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR,  on_screen=False)
 
 ###---------------------order of transformation module
-MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+if type_pipeline == 0:
+    MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+    stop_idx = 0
+if type_pipeline == 1:
+    MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(False),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+    stop_idx = 5
+    #we disable transformation corresponding to MODULE_INSTANCES[stop_idx:] but we still need to apply them on dummy images
+    #in order to be sure to have the same random generator state than with the default pipeline.
+    #This is not optimal (we do more calculus than necessary) but it is a quick hack to produce similar results than previous generation
+
+
 
 # These should have a "after_transform_callback(self, image)" method
 # (called after each call to transform_image in a module)
@@ -155,7 +173,7 @@
             sys.stdout.flush()
             
             global_idx = img_no
-
+            
             img = img.reshape(img_size)
 
             param_idx = 0
@@ -174,8 +192,13 @@
                 p = mod.regenerate_parameters(complexity)
                 self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p
                 param_idx += len(p)
-
-                img = mod.transform_image(img)
+                
+                if not(stop_idx) or stop_idx > mod_idx:  
+                    img = mod.transform_image(img)
+                else:
+                    tmp = mod.transform_image(copy.copy(img)) 
+                    #this is done to be sure to have the same global random generator state
+                    #we don't apply the transformation on the original image but on a copy in case of in-place transformations
 
                 if should_hook_after_each:
                     for hook in AFTER_EACH_MODULE_HOOK:
@@ -349,6 +372,8 @@
             prob_ocr = float(a)
         elif o in ('-y', "--seed"):
             pass
+        elif o in ('-t', "--type"):
+            type_pipeline = int(a)
         else:
             assert False, "unhandled option"