diff transformations/pipeline.py @ 144:c958941c1b9d

merge
author XavierMuller
date Tue, 23 Feb 2010 18:16:55 -0500
parents 4981c729149c
children 6f3b866c0182
line wrap: on
line diff
--- a/transformations/pipeline.py	Tue Feb 23 18:08:11 2010 -0500
+++ b/transformations/pipeline.py	Tue Feb 23 18:16:55 2010 -0500
@@ -55,6 +55,7 @@
 from add_background_image import AddBackground
 from affine_transform import AffineTransformation
 from ttf2jpg import ttf2jpg
+from pycaptcha.Facade import generateCaptcha
 
 if DEBUG:
     from visualizer import Visualizer
@@ -102,7 +103,7 @@
 
         self.res_data = numpy.empty((total, num_px), dtype=numpy.uint8)
         # +1 to store complexity
-        self.params = numpy.empty((total, self.num_params_stored+1))
+        self.params = numpy.empty((total, self.num_params_stored+len(self.modules)))
         self.res_labels = numpy.empty(total, dtype=numpy.int32)
 
     def run(self, img_iterator, complexity_iterator):
@@ -113,20 +114,26 @@
 
         for img_no, (img, label) in enumerate(img_iterator):
             sys.stdout.flush()
-            complexity = complexity_iterator.next()
-
+            
             global_idx = img_no
 
             img = img.reshape(img_size)
 
-            param_idx = 1
-            # store complexity along with other params
-            self.params[global_idx, 0] = complexity
+            param_idx = 0
+            mod_idx = 0
             for mod in self.modules:
                 # This used to be done _per batch_,
-                # ie. out of the "for img" loop                   
+                # ie. out of the "for img" loop
+                complexity = complexity_iterator.next() 
+                #better to do a complexity sampling for each transformations in order to have more variability
+                #otherwise a lot of images similar to the source are generated (i.e. when complexity is close to 0 (1/8 of the time))
+                #we need to save the complexity of each transformations and the sum of these complexity is a good indicator of the overall
+                #complexity
+                self.params[global_idx, mod_idx] = complexity
+                mod_idx += 1
+                 
                 p = mod.regenerate_parameters(complexity)
-                self.params[global_idx, param_idx:param_idx+len(p)] = p
+                self.params[global_idx, param_idx+len(self.modules):param_idx+len(p)+len(self.modules)] = p
                 param_idx += len(p)
 
                 img = mod.transform_image(img)
@@ -213,13 +220,15 @@
         ocr_img = ft.read(nist.ocr_data)
         ocr_labels = ft.read(nist.ocr_labels)
     ttf = ttf2jpg()
+    L = [chr(ord('0')+x) for x in range(10)] + [chr(ord('A')+x) for x in range(26)] + [chr(ord('a')+x) for x in range(26)]
 
     for i in xrange(num_img):
         r = numpy.random.rand()
         if r <= prob_font:
             yield ttf.generate_image()
-        elif r <= prob_font + prob_captcha:
-            pass #get captcha
+        elif r <=prob_font + prob_captcha:
+            (arr, charac) = generateCaptcha(0,1)
+            yield arr.astype(numpy.float32)/255, L.index(charac[0])
         elif r <= prob_font + prob_captcha + prob_ocr:
             j = numpy.random.randint(len(ocr_labels))
             yield ocr_img[j].astype(numpy.float32)/255, ocr_labels[j]
@@ -259,7 +268,7 @@
     -d, --ocrlabel-file: path to filetensor (.ft) labels file (OCR labels)
     -a, --prob-font: probability of using a raw font image
     -b, --prob-captcha: probability of using a captcha image
-    -e, --prob-ocr: probability of using an ocr image
+    -g, --prob-ocr: probability of using an ocr image
     '''
 
 # See run_pipeline.py
@@ -291,7 +300,8 @@
     reload_mode = False
 
     try:
-        opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:e:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", "stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr="])
+        opts, args = getopt.getopt(get_argv(), "rm:z:o:p:x:s:f:l:c:d:a:b:g:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "labels-output-file=", 
+"stop-after=", "data-file=", "label-file=", "ocr-file=", "ocrlabel-file=", "prob-font=", "prob-captcha=", "prob-ocr="])
     except getopt.GetoptError, err:
         # print help information and exit:
         print str(err) # will print something like "option -a not recognized"
@@ -328,7 +338,7 @@
             prob_font = float(a)
         elif o in ('-b', "--prob-captcha"):
             prob_captcha = float(a)
-        elif o in ('-e', "--prob-ocr"):
+        elif o in ('-g', "--prob-ocr"):
             prob_ocr = float(a)
         else:
             assert False, "unhandled option"