# HG changeset patch # User Xavier Glorot # Date 1268848635 14400 # Node ID dd2df78fcf47be786b7b577402c2b5b387a5392c # Parent 2024368a8d3d0ab3e7b359b0a3bd7ed74e40ebee added option to pipeline and gimp_script to produce NIST-friendly data diff -r 2024368a8d3d -r dd2df78fcf47 data_generation/pipeline/pipeline.py --- a/data_generation/pipeline/pipeline.py Tue Mar 16 12:14:10 2010 -0400 +++ b/data_generation/pipeline/pipeline.py Wed Mar 17 13:57:15 2010 -0400 @@ -10,6 +10,7 @@ import numpy import ift6266.data_generation.transformations.filetensor as ft import random +import copy # To debug locally, also call with -s 100 (to stop after ~100) # (otherwise we allocate all needed memory, might be loonnng and/or crash @@ -59,6 +60,7 @@ -b, --prob-captcha: probability of using a captcha image -g, --prob-ocr: probability of using an ocr image -y, --seed: the job seed + -t, --type: [default: 0:full transformations], 1:Nist-friendly transformations ''' try: @@ -76,6 +78,12 @@ random.seed(int(a)) numpy.random.seed(int(a)) +for o, a in opts: + if o in ('-t','--type'): + type_pipeline = int(a) + else: + type_pipeline = 0 + if DEBUG_X: import pylab pylab.ion() @@ -104,7 +112,17 @@ VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR, on_screen=False) ###---------------------order of transformation module -MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] +if type_pipeline == 0: + MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] + stop_idx = 0 +if type_pipeline == 1: + MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(False),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] + stop_idx = 5 + #we disable transformation corresponding to MODULE_INSTANCES[stop_idx:] but we still need to apply them on dummy images + #in order to be sure to have the same random generator state than with the default pipeline. + #This is not optimal (we do more calculus than necessary) but it is a quick hack to produce similar results than previous generation + + # These should have a "after_transform_callback(self, image)" method # (called after each call to transform_image in a module) @@ -155,7 +173,7 @@ sys.stdout.flush() global_idx = img_no - + img = img.reshape(img_size) param_idx = 0 @@ -174,8 +192,13 @@ p = mod.regenerate_parameters(complexity) self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p param_idx += len(p) - - img = mod.transform_image(img) + + if not(stop_idx) or stop_idx > mod_idx: + img = mod.transform_image(img) + else: + tmp = mod.transform_image(copy.copy(img)) + #this is done to be sure to have the same global random generator state + #we don't apply the transformation on the original image but on a copy in case of in-place transformations if should_hook_after_each: for hook in AFTER_EACH_MODULE_HOOK: @@ -349,6 +372,8 @@ prob_ocr = float(a) elif o in ('-y', "--seed"): pass + elif o in ('-t', "--type"): + type_pipeline = int(a) else: assert False, "unhandled option" diff -r 2024368a8d3d -r dd2df78fcf47 data_generation/transformations/gimp_script.py --- a/data_generation/transformations/gimp_script.py Tue Mar 16 12:14:10 2010 -0400 +++ b/data_generation/transformations/gimp_script.py Wed Mar 17 13:57:15 2010 -0400 @@ -38,7 +38,7 @@ return ['mblur_length', 'mblur_angle', 'pinch'] def regenerate_parameters(self, complexity): - if complexity and self.blur_bool: + if complexity: self.mblur_length = abs(int(round(numpy.random.normal(0, 3*complexity)))) else: self.mblur_length = 0 @@ -50,7 +50,7 @@ def transform_image(self, image): if self.mblur_length or self.pinch: setpix(image) - if self.mblur_length: + if self.mblur_length and self.blur_bool: pdb.plug_in_mblur(img, layer1, 0, self.mblur_length, self.mblur_angle, 0, 0) if self.pinch: pdb.plug_in_whirl_pinch(img, layer1, 0.0, self.pinch, 1.0)