diff transformations/local_elastic_distortions.py @ 24:010e826b41e8

Modifications to elastic distortions: fixed an important bug with distortions themselves (now result is much nicer visually), made interface to conform to Transformation standard, and added ability to save a certain amount of distortion fields to reuse them if complexity doesn't change
author fsavard <francois.savard@polymtl.ca>
date Fri, 29 Jan 2010 13:37:52 -0500
parents 8d1c37190122
children b67d729ebfe3
line wrap: on
line diff
--- a/transformations/local_elastic_distortions.py	Thu Jan 28 23:03:44 2010 -0600
+++ b/transformations/local_elastic_distortions.py	Fri Jan 29 13:37:52 2010 -0500
@@ -23,36 +23,151 @@
 import numpy.random
 import scipy.signal # convolve2d
 
-def raw_zeros(size):
+_TEST_DIR = "/home/francois/Desktop/dist_tests/"
+
+def _raw_zeros(size):
     return [[0 for i in range(size[1])] for j in range(size[0])]
 
+class ElasticDistortionParams():
+    def __init__(self, image_size, alpha=0.0, sigma=0.0):
+        self.image_size = image_size
+        self.alpha = alpha
+        self.sigma = sigma
+
+        h,w = self.image_size
+
+        self.matrix_tl_corners_rows = _raw_zeros((h,w))
+        self.matrix_tl_corners_cols = _raw_zeros((h,w))
+
+        self.matrix_tr_corners_rows = _raw_zeros((h,w))
+        self.matrix_tr_corners_cols = _raw_zeros((h,w))
+
+        self.matrix_bl_corners_rows = _raw_zeros((h,w))
+        self.matrix_bl_corners_cols = _raw_zeros((h,w))
+
+        self.matrix_br_corners_rows = _raw_zeros((h,w))
+        self.matrix_br_corners_cols = _raw_zeros((h,w))
+
+        # those will hold the precomputed ratios for
+        # bilinear interpolation
+        self.matrix_tl_multiply = numpy.zeros((h,w))
+        self.matrix_tr_multiply = numpy.zeros((h,w))
+        self.matrix_bl_multiply = numpy.zeros((h,w))
+        self.matrix_br_multiply = numpy.zeros((h,w))
+
+    def alpha_sigma(self):
+        return [self.alpha, self.sigma]
+
 class LocalElasticDistorter():
-    def __init__(self, image_size, kernel_size, sigma, alpha):
+    def __init__(self, image_size):
         self.image_size = image_size
-        self.kernel_size = kernel_size
-        self.sigma = sigma
-        self.alpha = alpha
-        self.c_alpha = int(math.ceil(alpha))
+
+        self.current_complexity = 0.0
+
+        # number of precomputed fields
+        # (principle: as complexity doesn't change often, we can
+        # precompute a certain number of fields for a given complexity,
+        # each with its own parameters. That way, we have good
+        # randomization, but we're much faster).
+        self.to_precompute = 50
+
+        # Both use ElasticDistortionParams
+        self.current_params = None
+        self.precomputed_params = []
+
+        # 
+        self.kernel_size = None
+        self.kernel = None
+
+        # set some defaults
+        self.regenerate_parameters(0.0)
 
-        self.kernel = self.gen_gaussian_kernel()
-        self.fields = None
-        self.regenerate_fields()
+    def get_settings_names(self):
+        return ['alpha', 'sigma']
+
+    def regenerate_parameters(self, complexity):
+        if abs(complexity - self.current_complexity) > 1e-4:
+            self.current_complexity = complexity
+
+            # complexity changed, fields must be regenerated
+            self.precomputed_params = []
+
+        if len(self.precomputed_params) <= self.to_precompute:
+            # not yet enough params generated, produce one more
+            # and append to list
+            new_params = self._initialize_new_params()
+            new_params = self._generate_fields(new_params)
+            self.current_params = new_params
+            self.precomputed_params.append(new_params)
+        else:
+            # if we have enough precomputed fields, just select one
+            # at random and set parameters to match what they were
+            # when the field was generated
+            idx = numpy.random.randint(0, len(self.precomputed_params))
+            self.current_params = self.precomputed_params[idx]
+
+        return self.current_params.alpha_sigma()
 
     # adapted from http://blenderartists.org/forum/showthread.php?t=163361
-    def gen_gaussian_kernel(self):
+    def _gen_gaussian_kernel(self, sigma):
+        # the kernel size can change DRAMATICALLY the time 
+        # for the blur operation... so even though results are better
+        # with a bigger kernel, we need to compromise here
+        # 1*s is very different from 2*s, but there's not much difference
+        # between 2*s and 4*s
+        ks = self.kernel_size
+        s = sigma
+        target_ks = (1.5*s, 1.5*s)
+        if not ks is None and ks[0] == target_ks[0] and ks[1] == target_ks[1]:
+            # kernel size is good, ok, no need to regenerate
+            return
+        self.kernel_size = target_ks
         h,w = self.kernel_size
         a,b = h/2.0, w/2.0
         y,x = numpy.ogrid[0:w, 0:h]
-        s = self.sigma
         gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s))
         # Normalize so we don't reduce image intensity
-        return gauss/gauss.sum()
+        self.kernel = gauss/gauss.sum()
+
+    def _gen_distortion_field(self, params):
+        self._gen_gaussian_kernel(params.sigma)
+
+        # we add kernel_size on all four sides so blurring
+        # with the kernel produces a smoother result on borders
+        ks0 = self.kernel_size[0]
+        ks1 = self.kernel_size[1]
+        sz0 = self.image_size[1] + ks0
+        sz1 = self.image_size[0] + ks1
+        field = numpy.random.uniform(-1.0, 1.0, (sz0, sz1))
+        field = scipy.signal.convolve2d(field, self.kernel, mode='same')
+
+        # crop only image_size in the middle
+        field = field[ks0:ks0+self.image_size[0], ks1:ks1+self.image_size[1]]
+
+        return params.alpha * field
+        
 
-    def gen_distortion_field(self):
-        field = numpy.random.uniform(-1.0, 1.0, self.image_size)
-        return scipy.signal.convolve2d(field, self.kernel, mode='same')
+    def _initialize_new_params(self):
+        params = ElasticDistortionParams(self.image_size)
+
+        cpx = self.current_complexity
+        # pour faire progresser la complexité un peu plus vite
+        # tout en gardant les extrêmes de 0.0 et 1.0
+        cpx = cpx ** (1./3.)
 
-    def regenerate_fields(self):
+        # the smaller the alpha, the closest the pixels are fetched
+        # a max of 10 is reasonable
+        params.alpha = cpx * 10.0
+
+        # the bigger the sigma, the smoother is the distortion
+        # max of 1 is "reasonable", but produces VERY noisy results
+        # And the bigger the sigma, the bigger the blur kernel, and the
+        # slower the field generation, btw.
+        params.sigma = 10.0 - (7.0 * cpx)
+
+        return params
+
+    def _generate_fields(self, params):
         '''
         Here's how the code works:
         - We first generate "distortion fields" for x and y with these steps:
@@ -74,110 +189,106 @@
             ratios for the bilinear interpolation.
         '''
 
-        self.fields = [None, None]
-        self.fields[0] = self.alpha*self.gen_distortion_field()
-        self.fields[1] = self.alpha*self.gen_distortion_field()
+        p = params
 
-        #import pylab
-        #pylab.imshow(self.fields[0])
+        dist_fields = [None, None]
+        dist_fields[0] = self._gen_distortion_field(params)
+        dist_fields[1] = self._gen_distortion_field(params)
+
+        #pylab.imshow(dist_fields[0])
         #pylab.show()
 
         # regenerate distortion index matrices
         # "_rows" are row indices
         # "_cols" are column indices
         # (separated due to the way fancy indexing works in numpy)
-        h,w = self.image_size
-
-        self.matrix_tl_corners_rows = raw_zeros((h,w))
-        self.matrix_tl_corners_cols = raw_zeros((h,w))
-
-        self.matrix_tr_corners_rows = raw_zeros((h,w))
-        self.matrix_tr_corners_cols = raw_zeros((h,w))
-
-        self.matrix_bl_corners_rows = raw_zeros((h,w))
-        self.matrix_bl_corners_cols = raw_zeros((h,w))
-
-        self.matrix_br_corners_rows = raw_zeros((h,w))
-        self.matrix_br_corners_cols = raw_zeros((h,w))
-
-        # those will hold the precomputed ratios for
-        # bilinear interpolation
-        self.matrix_tl_multiply = numpy.zeros((h,w))
-        self.matrix_tr_multiply = numpy.zeros((h,w))
-        self.matrix_bl_multiply = numpy.zeros((h,w))
-        self.matrix_br_multiply = numpy.zeros((h,w))
+        h,w = p.image_size
 
         for y in range(h):
-            for x in range(w):
-                distort_x = self.fields[0][y,x]
-                distort_y = self.fields[1][y,x]
-                f_dy = int(math.floor(distort_y))
-                f_dx = int(math.floor(distort_x))
-                y0 = y+f_dy
-                x0 = x+f_dx
-                index_tl = [y0, x0]
-                index_tr = [y0, x0+1]
-                index_bl = [y0+1, x0]
-                index_br = [y0+1, x0+1]
-                x_ratio = abs(distort_x-f_dx) # ratio of left vs right (for bilinear)
-                y_ratio = abs(distort_y-f_dy) # ratio of top vs bottom
+            for x in range(w): 
+                distort_x = dist_fields[0][y,x]
+                distort_y = dist_fields[1][y,x]
+
+                # the "target" is the coordinate we fetch color data from
+                # (in the original image)
+                # target_left and _top are the rounded coordinate on the
+                # left/top of this target (float) coordinate
+                target_pixel = (y+distort_y, x+distort_x)
+
+                target_left = int(math.floor(x + distort_x))
+                target_top = int(math.floor(y + distort_y))
+
+                index_tl = [target_top, target_left]
+                index_tr = [target_top, target_left+1]
+                index_bl = [target_top+1, target_left]
+                index_br = [target_top+1, target_left+1]
+
+                # x_ratio is the ratio of importance of left pixels
+                # y_ratio is the """" of top pixels
+                # (in bilinear combination)
+                y_ratio = 1.0 - (target_pixel[0] - target_top)
+                x_ratio = 1.0 - (target_pixel[1] - target_left)
 
                 # We use a default background color of 0 for displacements
                 # outside of boundaries of the image.
 
                 # if top left outside bounds
                 if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w: 
-                    self.matrix_tl_corners_rows[y][x] = 0
-                    self.matrix_tl_corners_cols[y][x] = 0
-                    self.matrix_tl_multiply[y,x] = 0
+                    p.matrix_tl_corners_rows[y][x] = 0
+                    p.matrix_tl_corners_cols[y][x] = 0
+                    p.matrix_tl_multiply[y,x] = 0
                 else:
-                    self.matrix_tl_corners_rows[y][x] = index_tl[0]
-                    self.matrix_tl_corners_cols[y][x] = index_tl[1]
-                    self.matrix_tl_multiply[y,x] = x_ratio*y_ratio
-
+                    p.matrix_tl_corners_rows[y][x] = index_tl[0]
+                    p.matrix_tl_corners_cols[y][x] = index_tl[1]
+                    p.matrix_tl_multiply[y,x] = x_ratio*y_ratio
 
                 # if top right outside bounds
                 if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w:
-                    self.matrix_tr_corners_rows[y][x] = 0
-                    self.matrix_tr_corners_cols[y][x] = 0
-                    self.matrix_tr_multiply[y,x] = 0
+                    p.matrix_tr_corners_rows[y][x] = 0
+                    p.matrix_tr_corners_cols[y][x] = 0
+                    p.matrix_tr_multiply[y,x] = 0
                 else:
-                    self.matrix_tr_corners_rows[y][x] = index_tr[0]
-                    self.matrix_tr_corners_cols[y][x] = index_tr[1]
-                    self.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio
+                    p.matrix_tr_corners_rows[y][x] = index_tr[0]
+                    p.matrix_tr_corners_cols[y][x] = index_tr[1]
+                    p.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio
 
                 # if bottom left outside bounds
                 if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w:
-                    self.matrix_bl_corners_rows[y][x] = 0
-                    self.matrix_bl_corners_cols[y][x] = 0
-                    self.matrix_bl_multiply[y,x] = 0
+                    p.matrix_bl_corners_rows[y][x] = 0
+                    p.matrix_bl_corners_cols[y][x] = 0
+                    p.matrix_bl_multiply[y,x] = 0
                 else:
-                    self.matrix_bl_corners_rows[y][x] = index_bl[0]
-                    self.matrix_bl_corners_cols[y][x] = index_bl[1]
-                    self.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio)
+                    p.matrix_bl_corners_rows[y][x] = index_bl[0]
+                    p.matrix_bl_corners_cols[y][x] = index_bl[1]
+                    p.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio)
 
                 # if bottom right outside bounds
                 if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w:
-                    self.matrix_br_corners_rows[y][x] = 0
-                    self.matrix_br_corners_cols[y][x] = 0
-                    self.matrix_br_multiply[y,x] = 0
+                    p.matrix_br_corners_rows[y][x] = 0
+                    p.matrix_br_corners_cols[y][x] = 0
+                    p.matrix_br_multiply[y,x] = 0
                 else:
-                    self.matrix_br_corners_rows[y][x] = index_br[0]
-                    self.matrix_br_corners_cols[y][x] = index_br[1]
-                    self.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio)
+                    p.matrix_br_corners_rows[y][x] = index_br[0]
+                    p.matrix_br_corners_cols[y][x] = index_br[1]
+                    p.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio)
+
+        # not really necessary, but anyway
+        return p
 
-    def distort_image(self, image):
+    def transform_image(self, image):
+        p = self.current_params
+
         # index pixels to get the 4 corners for bilinear combination
-        tl_pixels = image[self.matrix_tl_corners_rows, self.matrix_tl_corners_cols]
-        tr_pixels = image[self.matrix_tr_corners_rows, self.matrix_tr_corners_cols]
-        bl_pixels = image[self.matrix_bl_corners_rows, self.matrix_bl_corners_cols]
-        br_pixels = image[self.matrix_br_corners_rows, self.matrix_br_corners_cols]
+        tl_pixels = image[p.matrix_tl_corners_rows, p.matrix_tl_corners_cols]
+        tr_pixels = image[p.matrix_tr_corners_rows, p.matrix_tr_corners_cols]
+        bl_pixels = image[p.matrix_bl_corners_rows, p.matrix_bl_corners_cols]
+        br_pixels = image[p.matrix_br_corners_rows, p.matrix_br_corners_cols]
 
         # bilinear ratios, elemwise multiply
-        tl_pixels = numpy.multiply(tl_pixels, self.matrix_tl_multiply)
-        tr_pixels = numpy.multiply(tr_pixels, self.matrix_tr_multiply)
-        bl_pixels = numpy.multiply(bl_pixels, self.matrix_bl_multiply)
-        br_pixels = numpy.multiply(br_pixels, self.matrix_br_multiply)
+        tl_pixels = numpy.multiply(tl_pixels, p.matrix_tl_multiply)
+        tr_pixels = numpy.multiply(tr_pixels, p.matrix_tr_multiply)
+        bl_pixels = numpy.multiply(bl_pixels, p.matrix_bl_multiply)
+        br_pixels = numpy.multiply(br_pixels, p.matrix_br_multiply)
 
         # sum to finish bilinear combination
         return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0)
@@ -193,10 +304,85 @@
     return (img / 255.0).astype('float')
 
 def _specific_test():
-    img = _load_image("tests/d.png")
-    dist = LocalElasticDistorter((32,32), (15,15), 9.0, 5.0)
-    dist.distort_image(img)
+    imgpath = os.path.join(_TEST_DIR, "d.png")
+    img = _load_image(imgpath)
+    dist = LocalElasticDistorter((32,32))
+    print dist.regenerate_parameters(0.5)
+    img = dist.distort_image(img)
+    pylab.imshow(img)
+    pylab.show()
+
+def _complexity_tests():
+    imgpath = os.path.join(_TEST_DIR, "d.png")
+    dist = LocalElasticDistorter((32,32))
+    orig_img = _load_image(imgpath)
+    html_content = '''<html><body>Original:<br/><img src='d.png'>'''
+    for complexity in numpy.arange(0.0, 1.1, 0.1):
+        html_content += '<br/>Complexity: ' + str(complexity) + '<br/>'
+        for i in range(10):
+            t1 = time.time()
+            dist.regenerate_parameters(complexity)
+            t2 = time.time()
+            print "diff", t2-t1
+            img = dist.transform_image(orig_img)
+            filename = "complexity_" + str(complexity) + "_" + str(i) + ".png"
+            new_path = os.path.join(_TEST_DIR, filename)
+            _save_image(img, new_path)
+            html_content += '<img src="' + filename + '">'
+    html_content += "</body></html>"
+    html_file = open(os.path.join(_TEST_DIR, "complexity.html"), "w")
+    html_file.write(html_content)
+    html_file.close()
+    
+def _complexity_benchmark():
+    imgpath = os.path.join(_TEST_DIR, "d.png")
+    dist = LocalElasticDistorter((32,32))
+    orig_img = _load_image(imgpath)
 
+    # time the first 10
+    t1 = time.time()
+    for i in range(10):
+        dist.regenerate_parameters(0.2)
+        img = dist.transform_image(orig_img)
+    t2 = time.time()
+
+    print "first 10, total = ", t2-t1, ", avg=", (t2-t1)/10
+
+    # time the next 40
+    t1 = time.time()
+    for i in range(40):
+        dist.regenerate_parameters(0.2)
+        img = dist.transform_image(orig_img)
+    t2 = time.time()
+   
+    print "next 40, total = ", t2-t1, ", avg=", (t2-t1)/40
+
+    # time the next 50
+    t1 = time.time()
+    for i in range(50):
+        dist.regenerate_parameters(0.2)
+        img = dist.transform_image(orig_img)
+    t2 = time.time()
+   
+    print "next 50, total = ", t2-t1, ", avg=", (t2-t1)/50
+
+    # time the next 1000 
+    t1 = time.time()
+    for i in range(1000):
+        dist.regenerate_parameters(0.2)
+        img = dist.transform_image(orig_img)
+    t2 = time.time()
+   
+    print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
+
+
+
+def _save_image(img, path):
+    img2 = Image.fromarray((img * 255).astype('uint8'), "L")
+    img2.save(path)
+
+# TODO: reformat to follow new class... it function of complexity now
+'''
 def _distorter_tests():
     #import pylab
     #pylab.imshow(img)
@@ -206,14 +392,16 @@
         img = _load_image("tests/" + letter + ".png")
         for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
             for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
-                id = LocalElasticDistorter((32,32), (15,15), sigma, alpha)
+                id = LocalElasticDistorter((32,32))
                 img2 = id.distort_image(img)
                 img2 = Image.fromarray((img2 * 255).astype('uint8'), "L")
                 img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png")
+'''
 
 def _benchmark():
     img = _load_image("tests/d.png")
-    dist = LocalElasticDistorter((32,32), (10,10), 5.0, 5.0)
+    dist = LocalElasticDistorter((32,32))
+    dist.regenerate_parameters(0.0)
     import time
     t1 = time.time()
     for i in range(10000):
@@ -225,9 +413,14 @@
     print "avg", 10000/(t2-t1)
 
 if __name__ == '__main__':
+    import time
+    import pylab
     import Image
-    _distorter_tests()
+    import os.path
+    #_distorter_tests()
     #_benchmark()
     #_specific_test()
+    #_complexity_tests()
+    _complexity_benchmark()