changeset 256:bd7e50d56d80

merge
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Wed, 17 Mar 2010 14:04:12 -0400
parents 17c72763d574 (diff) a491d3600a77 (current diff)
children 0c0f0b3f6a93 6d16a2bf142b
files
diffstat 3 files changed, 37 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/data_generation/pipeline/pipeline.py	Wed Mar 17 10:21:57 2010 -0400
+++ b/data_generation/pipeline/pipeline.py	Wed Mar 17 14:04:12 2010 -0400
@@ -10,6 +10,7 @@
 import numpy
 import ift6266.data_generation.transformations.filetensor as ft
 import random
+import copy
 
 # To debug locally, also call with -s 100 (to stop after ~100)
 # (otherwise we allocate all needed memory, might be loonnng and/or crash
@@ -59,6 +60,7 @@
     -b, --prob-captcha: probability of using a captcha image
     -g, --prob-ocr: probability of using an ocr image
     -y, --seed: the job seed
+    -t, --type: [default: 0:full transformations], 1:Nist-friendly transformations
     '''
 
 try:
@@ -76,6 +78,12 @@
         random.seed(int(a))
         numpy.random.seed(int(a))
 
+for o, a in opts:
+    if o in ('-t','--type'):
+        type_pipeline = int(a)
+    else:
+        type_pipeline = 0
+
 if DEBUG_X:
     import pylab
     pylab.ion()
@@ -104,7 +112,17 @@
     VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR,  on_screen=False)
 
 ###---------------------order of transformation module
-MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+if type_pipeline == 0:
+    MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+    stop_idx = 0
+if type_pipeline == 1:
+    MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(False),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()]
+    stop_idx = 5
+    #we disable transformation corresponding to MODULE_INSTANCES[stop_idx:] but we still need to apply them on dummy images
+    #in order to be sure to have the same random generator state than with the default pipeline.
+    #This is not optimal (we do more calculus than necessary) but it is a quick hack to produce similar results than previous generation
+
+
 
 # These should have a "after_transform_callback(self, image)" method
 # (called after each call to transform_image in a module)
@@ -155,7 +173,7 @@
             sys.stdout.flush()
             
             global_idx = img_no
-
+            
             img = img.reshape(img_size)
 
             param_idx = 0
@@ -174,8 +192,13 @@
                 p = mod.regenerate_parameters(complexity)
                 self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p
                 param_idx += len(p)
-
-                img = mod.transform_image(img)
+                
+                if not(stop_idx) or stop_idx > mod_idx:  
+                    img = mod.transform_image(img)
+                else:
+                    tmp = mod.transform_image(copy.copy(img)) 
+                    #this is done to be sure to have the same global random generator state
+                    #we don't apply the transformation on the original image but on a copy in case of in-place transformations
 
                 if should_hook_after_each:
                     for hook in AFTER_EACH_MODULE_HOOK:
@@ -349,6 +372,8 @@
             prob_ocr = float(a)
         elif o in ('-y', "--seed"):
             pass
+        elif o in ('-t', "--type"):
+            type_pipeline = int(a)
         else:
             assert False, "unhandled option"
 
--- a/data_generation/transformations/gimp_script.py	Wed Mar 17 10:21:57 2010 -0400
+++ b/data_generation/transformations/gimp_script.py	Wed Mar 17 14:04:12 2010 -0400
@@ -38,7 +38,7 @@
         return ['mblur_length', 'mblur_angle', 'pinch']
     
     def regenerate_parameters(self, complexity):
-        if complexity and self.blur_bool:
+        if complexity:
            self.mblur_length = abs(int(round(numpy.random.normal(0, 3*complexity))))
         else:
             self.mblur_length = 0
@@ -50,7 +50,7 @@
     def transform_image(self, image):
         if self.mblur_length or self.pinch:
             setpix(image)
-            if self.mblur_length:
+            if self.mblur_length and self.blur_bool:
                 pdb.plug_in_mblur(img, layer1, 0, self.mblur_length, self.mblur_angle, 0, 0)
             if self.pinch:        
                 pdb.plug_in_whirl_pinch(img, layer1, 0.0, self.pinch, 1.0)
--- a/scripts/launch_generate100.py	Wed Mar 17 10:21:57 2010 -0400
+++ b/scripts/launch_generate100.py	Wed Mar 17 14:04:12 2010 -0400
@@ -3,12 +3,17 @@
 import os
 dir1 = "/data/lisa/data/ift6266h10/"
 
-mach = "brams0c.iro.umontreal.ca,brams02.iro.umontreal.ca,brams03.iro.umontreal.ca,maggie22.iro.umontreal.ca"
+mach = "maggie16.iro.umontreal.ca,maggie15.iro.umontreal.ca"
 
 for i,s in enumerate(['valid','test']):
     for j,c in enumerate([0.3,0.5,0.7,1]):
         l = str(c).replace('.','')
         os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d -y %d" % (mach, dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i], 200+i*4+j))
 
+#P07
 for i in range(100):
     os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i))
+
+#PNIST07
+for i in range(100):
+    os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/PNIST07_train%d_data.ft -p %sdata/PNIST07_train%d_params -x %sdata/PNIST07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d -t %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i,1))