Mercurial > ift6266
diff data_generation/pipeline/pipeline.py @ 266:1e4e60ddadb1
Merge. Ah, et dans le dernier commit, j'avais oublié de mentionner que j'ai ajouté du code pour gérer l'isolation de différents clones pour rouler des expériences et modifier le code en même temps.
author | fsavard |
---|---|
date | Fri, 19 Mar 2010 10:56:16 -0400 |
parents | 6d16a2bf142b |
children |
line wrap: on
line diff
--- a/data_generation/pipeline/pipeline.py Fri Mar 19 10:54:39 2010 -0400 +++ b/data_generation/pipeline/pipeline.py Fri Mar 19 10:56:16 2010 -0400 @@ -10,6 +10,7 @@ import numpy import ift6266.data_generation.transformations.filetensor as ft import random +import copy # To debug locally, also call with -s 100 (to stop after ~100) # (otherwise we allocate all needed memory, might be loonnng and/or crash @@ -59,11 +60,12 @@ -b, --prob-captcha: probability of using a captcha image -g, --prob-ocr: probability of using an ocr image -y, --seed: the job seed + -t, --type: [default: 0:full transformations], 1:Nist-friendly transformations ''' try: - opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:y:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", -"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed="]) + opts, args = getopt.getopt(get_argv(), "r:m:z:o:p:x:s:f:l:c:d:a:b:g:y:t:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", +"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr=", "seed=","type="]) except getopt.GetoptError, err: # print help information and exit: print str(err) # will print something like "option -a not recognized" @@ -76,6 +78,11 @@ random.seed(int(a)) numpy.random.seed(int(a)) +type_pipeline = 0 +for o, a in opts: + if o in ('-t','--type'): + type_pipeline = int(a) + if DEBUG_X: import pylab pylab.ion() @@ -104,7 +111,17 @@ VISUALIZER = Visualizer(to_dir=DEBUG_OUTPUT_DIR, on_screen=False) ###---------------------order of transformation module -MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] +if type_pipeline == 0: + MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] + stop_idx = 0 +if type_pipeline == 1: + MODULE_INSTANCES = [Slant(),Thick(),AffineTransformation(),LocalElasticDistorter(),GIMP1(False),Rature(),Occlusion(), PermutPixel(),DistorsionGauss(),AddBackground(), PoivreSel(), BruitGauss(), Contrast()] + stop_idx = 5 + #we disable transformation corresponding to MODULE_INSTANCES[stop_idx:] but we still need to apply them on dummy images + #in order to be sure to have the same random generator state than with the default pipeline. + #This is not optimal (we do more calculus than necessary) but it is a quick hack to produce similar results than previous generation + + # These should have a "after_transform_callback(self, image)" method # (called after each call to transform_image in a module) @@ -155,7 +172,7 @@ sys.stdout.flush() global_idx = img_no - + img = img.reshape(img_size) param_idx = 0 @@ -163,7 +180,7 @@ for mod in self.modules: # This used to be done _per batch_, # ie. out of the "for img" loop - complexity = complexity_iterator.next() + complexity = complexity_iterator.next() #better to do a complexity sampling for each transformations in order to have more variability #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time)) #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall @@ -174,8 +191,13 @@ p = mod.regenerate_parameters(complexity) self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p param_idx += len(p) - - img = mod.transform_image(img) + + if not(stop_idx) or stop_idx > mod_idx: + img = mod.transform_image(img) + else: + tmp = mod.transform_image(copy.copy(img)) + #this is done to be sure to have the same global random generator state + #we don't apply the transformation on the original image but on a copy in case of in-place transformations if should_hook_after_each: for hook in AFTER_EACH_MODULE_HOOK: @@ -192,9 +214,10 @@ def write_output(self, output_file_path, params_output_file_path, labels_output_file_path): with open(output_file_path, 'wb') as f: ft.write(f, self.res_data) - + + #if type_pipeline == 0: #only needed for type 0 pipeline numpy.save(params_output_file_path, self.params) - + with open(labels_output_file_path, 'wb') as f: ft.write(f, self.res_labels) @@ -209,6 +232,7 @@ def range_complexity_iterator(probability_zero, max_complexity): assert max_complexity <= 1.0 n = numpy.random.uniform(0.0, 1.0) + n = 2.0 #hack to bug fix, having a min complexity is not necessary and we need the same seed... while True: if n < probability_zero: yield 0.0 @@ -349,6 +373,8 @@ prob_ocr = float(a) elif o in ('-y', "--seed"): pass + elif o in ('-t', "--type"): + pass else: assert False, "unhandled option"