# HG changeset patch # User Xavier Glorot # Date 1268849052 14400 # Node ID bd7e50d56d801b644d8e28e8773ec22067d22bed # Parent 17c72763d574b97ac6e138a41785c45f2d108940# Parent a491d3600a7765cc242c951cba90ee4ad88e6dd0 merge diff -r a491d3600a77 -r bd7e50d56d80 data_generation/pipeline/pipeline.py --- a/data_generation/pipeline/pipeline.py Wed Mar 17 10:21:57 2010 -0400 +++ b/data_generation/pipeline/pipeline.py Wed Mar 17 14:04:12 2010 -0400 @@ -10,6 +10,7 @@ import numpy import ift6266.data_generation.transformations.filetensor as ft import random +import copy # To debug locally, also call with -s 100 (to stop after ~100) # (otherwise we allocate all needed memory, might be loonnng and/or crash @@ -59,6 +60,7 @@ -b, --prob-captcha: probability of using a captcha image -g, --prob-ocr: probability of using an ocr image -y, --seed: the job seed + -t, --type: [default: 0:full transformations], 1:Nist-friendly transformations ''' try: @@ -76,6 +78,12 @@ random.seed(int(a)) numpy.random.seed(int(a)) +for o, a in opts: + if o in ('-t','--type'): + type_pipeline = int(a) + else: + type_pipeline = 0 + if DEBUG_X: import pylab pylab.ion() @@ -104,7 +112,17 @@ VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR, on_screen=False) ###---------------------order of transformation module -MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] +if type_pipeline == 0: + MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] + stop_idx = 0 +if type_pipeline == 1: + MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(False),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] + stop_idx = 5 + #we disable transformation corresponding to MODULE_INSTANCES[stop_idx:] but we still need to apply them on dummy images + #in order to be sure to have the same random generator state than with the default pipeline. + #This is not optimal (we do more calculus than necessary) but it is a quick hack to produce similar results than previous generation + + # These should have a "after_transform_callback(self, image)" method # (called after each call to transform_image in a module) @@ -155,7 +173,7 @@ sys.stdout.flush() global_idx = img_no - + img = img.reshape(img_size) param_idx = 0 @@ -174,8 +192,13 @@ p = mod.regenerate_parameters(complexity) self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p param_idx += len(p) - - img = mod.transform_image(img) + + if not(stop_idx) or stop_idx > mod_idx: + img = mod.transform_image(img) + else: + tmp = mod.transform_image(copy.copy(img)) + #this is done to be sure to have the same global random generator state + #we don't apply the transformation on the original image but on a copy in case of in-place transformations if should_hook_after_each: for hook in AFTER_EACH_MODULE_HOOK: @@ -349,6 +372,8 @@ prob_ocr = float(a) elif o in ('-y', "--seed"): pass + elif o in ('-t', "--type"): + type_pipeline = int(a) else: assert False, "unhandled option" diff -r a491d3600a77 -r bd7e50d56d80 data_generation/transformations/gimp_script.py --- a/data_generation/transformations/gimp_script.py Wed Mar 17 10:21:57 2010 -0400 +++ b/data_generation/transformations/gimp_script.py Wed Mar 17 14:04:12 2010 -0400 @@ -38,7 +38,7 @@ return ['mblur_length', 'mblur_angle', 'pinch'] def regenerate_parameters(self, complexity): - if complexity and self.blur_bool: + if complexity: self.mblur_length = abs(int(round(numpy.random.normal(0, 3*complexity)))) else: self.mblur_length = 0 @@ -50,7 +50,7 @@ def transform_image(self, image): if self.mblur_length or self.pinch: setpix(image) - if self.mblur_length: + if self.mblur_length and self.blur_bool: pdb.plug_in_mblur(img, layer1, 0, self.mblur_length, self.mblur_angle, 0, 0) if self.pinch: pdb.plug_in_whirl_pinch(img, layer1, 0.0, self.pinch, 1.0) diff -r a491d3600a77 -r bd7e50d56d80 scripts/launch_generate100.py --- a/scripts/launch_generate100.py Wed Mar 17 10:21:57 2010 -0400 +++ b/scripts/launch_generate100.py Wed Mar 17 14:04:12 2010 -0400 @@ -3,12 +3,17 @@ import os dir1 = "/data/lisa/data/ift6266h10/" -mach = "brams0c.iro.umontreal.ca,brams02.iro.umontreal.ca,brams03.iro.umontreal.ca,maggie22.iro.umontreal.ca" +mach = "maggie16.iro.umontreal.ca,maggie15.iro.umontreal.ca" for i,s in enumerate(['valid','test']): for j,c in enumerate([0.3,0.5,0.7,1]): l = str(c).replace('.','') os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P%s_%s_data.ft -p %sdata/P%s_%s_params -x %sdata/P%s_%s_labels.ft -f %s%s_data.ft -l %s%s_labels.ft -c %socr_%s_data.ft -d %socr_%s_labels.ft -m 0.3 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s %d -y %d" % (mach, dir1, l, s, dir1, l, s, dir1, l, s, dir1, s, dir1, s, dir1, s, dir1, s, [20000,80000][i], 200+i*4+j)) +#P07 for i in range(100): os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/P07_train%d_data.ft -p %sdata/P07_train%d_params -x %sdata/P07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i)) + +#PNIST07 +for i in range(100): + os.system("dbidispatch --condor --os=fc4,fc7,fc9 --machine=%s ./run_pipeline.sh -o %sdata/PNIST07_train%d_data.ft -p %sdata/PNIST07_train%d_params -x %sdata/PNIST07_train%d_labels.ft -f %strain_data.ft -l %strain_labels.ft -c %socr_train_data.ft -d %socr_train_labels.ft -m 0.7 -z 0.1 -a 0.1 -b 0.25 -g 0.25 -s 819200 -y %d -t %d" % (mach, dir1, i, dir1, i, dir1, i, dir1, dir1, dir1, dir1, 100+i,1))