ift6266: transformations/local_elastic_distortions.py comparison

comparison transformations/local_elastic_distortions.py @ 24:010e826b41e8

Modifications to elastic distortions: fixed an important bug with distortions themselves (now result is much nicer visually), made interface to conform to Transformation standard, and added ability to save a certain amount of distortion fields to reuse them if complexity doesn't change

author	fsavard <francois.savard@polymtl.ca>
date	Fri, 29 Jan 2010 13:37:52 -0500
parents	8d1c37190122
children	b67d729ebfe3

comparison

equal deleted inserted replaced

-:afdd41db8152
+:010e826b41e8
 import math
 import numpy
 import numpy.random
 import scipy.signal # convolve2d
-def raw_zeros(size):
+_TEST_DIR = "/home/francois/Desktop/dist_tests/"
+def _raw_zeros(size):
 return [[0 for i in range(size[1])] for j in range(size[0])]
+class ElasticDistortionParams():
+def __init__(self, image_size, alpha=0.0, sigma=0.0):
+self.image_size = image_size
+self.alpha = alpha
+self.sigma = sigma
+h,w = self.image_size
+self.matrix_tl_corners_rows = _raw_zeros((h,w))
+self.matrix_tl_corners_cols = _raw_zeros((h,w))
+self.matrix_tr_corners_rows = _raw_zeros((h,w))
+self.matrix_tr_corners_cols = _raw_zeros((h,w))
+self.matrix_bl_corners_rows = _raw_zeros((h,w))
+self.matrix_bl_corners_cols = _raw_zeros((h,w))
+self.matrix_br_corners_rows = _raw_zeros((h,w))
+self.matrix_br_corners_cols = _raw_zeros((h,w))
+# those will hold the precomputed ratios for
+# bilinear interpolation
+self.matrix_tl_multiply = numpy.zeros((h,w))
+self.matrix_tr_multiply = numpy.zeros((h,w))
+self.matrix_bl_multiply = numpy.zeros((h,w))
+self.matrix_br_multiply = numpy.zeros((h,w))
+def alpha_sigma(self):
+return [self.alpha, self.sigma]
 class LocalElasticDistorter():
-def __init__(self, image_size, kernel_size, sigma, alpha):
+def __init__(self, image_size):
 self.image_size = image_size
-self.kernel_size = kernel_size
-self.sigma = sigma
+self.current_complexity = 0.0
-self.alpha = alpha
-self.c_alpha = int(math.ceil(alpha))
+# number of precomputed fields
+# (principle: as complexity doesn't change often, we can
-self.kernel = self.gen_gaussian_kernel()
+# precompute a certain number of fields for a given complexity,
-self.fields = None
+# each with its own parameters. That way, we have good
-self.regenerate_fields()
+# randomization, but we're much faster).
+self.to_precompute = 50
+# Both use ElasticDistortionParams
+self.current_params = None
+self.precomputed_params = []
+#
+self.kernel_size = None
+self.kernel = None
+# set some defaults
+self.regenerate_parameters(0.0)
+def get_settings_names(self):
+return ['alpha', 'sigma']
+def regenerate_parameters(self, complexity):
+if abs(complexity - self.current_complexity) > 1e-4:
+self.current_complexity = complexity
+# complexity changed, fields must be regenerated
+self.precomputed_params = []
+if len(self.precomputed_params) <= self.to_precompute:
+# not yet enough params generated, produce one more
+# and append to list
+new_params = self._initialize_new_params()
+new_params = self._generate_fields(new_params)
+self.current_params = new_params
+self.precomputed_params.append(new_params)
+else:
+# if we have enough precomputed fields, just select one
+# at random and set parameters to match what they were
+# when the field was generated
+idx = numpy.random.randint(0, len(self.precomputed_params))
+self.current_params = self.precomputed_params[idx]
+return self.current_params.alpha_sigma()
 # adapted from http://blenderartists.org/forum/showthread.php?t=163361
-def gen_gaussian_kernel(self):
+def _gen_gaussian_kernel(self, sigma):
+# the kernel size can change DRAMATICALLY the time
+# for the blur operation... so even though results are better
+# with a bigger kernel, we need to compromise here
+# 1*s is very different from 2*s, but there's not much difference
+# between 2*s and 4*s
+ks = self.kernel_size
+s = sigma
+target_ks = (1.5*s, 1.5*s)
+if not ks is None and ks[0] == target_ks[0] and ks[1] == target_ks[1]:
+# kernel size is good, ok, no need to regenerate
+return
+self.kernel_size = target_ks
 h,w = self.kernel_size
 a,b = h/2.0, w/2.0
 y,x = numpy.ogrid[0:w, 0:h]
-s = self.sigma
 gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s))
 # Normalize so we don't reduce image intensity
-return gauss/gauss.sum()
+self.kernel = gauss/gauss.sum()
-def gen_distortion_field(self):
+def _gen_distortion_field(self, params):
-field = numpy.random.uniform(-1.0, 1.0, self.image_size)
+self._gen_gaussian_kernel(params.sigma)
-return scipy.signal.convolve2d(field, self.kernel, mode='same')
+# we add kernel_size on all four sides so blurring
-def regenerate_fields(self):
+# with the kernel produces a smoother result on borders
+ks0 = self.kernel_size[0]
+ks1 = self.kernel_size[1]
+sz0 = self.image_size[1] + ks0
+sz1 = self.image_size[0] + ks1
+field = numpy.random.uniform(-1.0, 1.0, (sz0, sz1))
+field = scipy.signal.convolve2d(field, self.kernel, mode='same')
+# crop only image_size in the middle
+field = field[ks0:ks0+self.image_size[0], ks1:ks1+self.image_size[1]]
+return params.alpha * field
+def _initialize_new_params(self):
+params = ElasticDistortionParams(self.image_size)
+cpx = self.current_complexity
+# pour faire progresser la complexité un peu plus vite
+# tout en gardant les extrêmes de 0.0 et 1.0
+cpx = cpx ** (1./3.)
+# the smaller the alpha, the closest the pixels are fetched
+# a max of 10 is reasonable
+params.alpha = cpx * 10.0
+# the bigger the sigma, the smoother is the distortion
+# max of 1 is "reasonable", but produces VERY noisy results
+# And the bigger the sigma, the bigger the blur kernel, and the
+# slower the field generation, btw.
+params.sigma = 10.0 - (7.0 * cpx)
+return params
+def _generate_fields(self, params):
 '''
 Here's how the code works:
 - We first generate "distortion fields" for x and y with these steps:
 - Uniform noise over [-1, 1] in a matrix of size (h,w)
 - Blur with a Gaussian kernel of spread sigma
 pixels for each new pixel.
 - Then I multiply those extracted nearby points by precomputed
 ratios for the bilinear interpolation.
 '''
-self.fields = [None, None]
+p = params
-self.fields[0] = self.alpha*self.gen_distortion_field()
-self.fields[1] = self.alpha*self.gen_distortion_field()
+dist_fields = [None, None]
+dist_fields[0] = self._gen_distortion_field(params)
-#import pylab
+dist_fields[1] = self._gen_distortion_field(params)
-#pylab.imshow(self.fields[0])
+#pylab.imshow(dist_fields[0])
 #pylab.show()
 # regenerate distortion index matrices
 # "_rows" are row indices
 # "_cols" are column indices
 # (separated due to the way fancy indexing works in numpy)
-h,w = self.image_size
+h,w = p.image_size
-self.matrix_tl_corners_rows = raw_zeros((h,w))
-self.matrix_tl_corners_cols = raw_zeros((h,w))
-self.matrix_tr_corners_rows = raw_zeros((h,w))
-self.matrix_tr_corners_cols = raw_zeros((h,w))
-self.matrix_bl_corners_rows = raw_zeros((h,w))
-self.matrix_bl_corners_cols = raw_zeros((h,w))
-self.matrix_br_corners_rows = raw_zeros((h,w))
-self.matrix_br_corners_cols = raw_zeros((h,w))
-# those will hold the precomputed ratios for
-# bilinear interpolation
-self.matrix_tl_multiply = numpy.zeros((h,w))
-self.matrix_tr_multiply = numpy.zeros((h,w))
-self.matrix_bl_multiply = numpy.zeros((h,w))
-self.matrix_br_multiply = numpy.zeros((h,w))
 for y in range(h):
 for x in range(w):
-distort_x = self.fields[0][y,x]
+distort_x = dist_fields[0][y,x]
-distort_y = self.fields[1][y,x]
+distort_y = dist_fields[1][y,x]
-f_dy = int(math.floor(distort_y))
-f_dx = int(math.floor(distort_x))
+# the "target" is the coordinate we fetch color data from
-y0 = y+f_dy
+# (in the original image)
-x0 = x+f_dx
+# target_left and _top are the rounded coordinate on the
-index_tl = [y0, x0]
+# left/top of this target (float) coordinate
-index_tr = [y0, x0+1]
+target_pixel = (y+distort_y, x+distort_x)
-index_bl = [y0+1, x0]
-index_br = [y0+1, x0+1]
+target_left = int(math.floor(x + distort_x))
-x_ratio = abs(distort_x-f_dx) # ratio of left vs right (for bilinear)
+target_top = int(math.floor(y + distort_y))
-y_ratio = abs(distort_y-f_dy) # ratio of top vs bottom
+index_tl = [target_top, target_left]
+index_tr = [target_top, target_left+1]
+index_bl = [target_top+1, target_left]
+index_br = [target_top+1, target_left+1]
+# x_ratio is the ratio of importance of left pixels
+# y_ratio is the """" of top pixels
+# (in bilinear combination)
+y_ratio = 1.0 - (target_pixel[0] - target_top)
+x_ratio = 1.0 - (target_pixel[1] - target_left)
 # We use a default background color of 0 for displacements
 # outside of boundaries of the image.
 # if top left outside bounds
 if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w:
-self.matrix_tl_corners_rows[y][x] = 0
+p.matrix_tl_corners_rows[y][x] = 0
-self.matrix_tl_corners_cols[y][x] = 0
+p.matrix_tl_corners_cols[y][x] = 0
-self.matrix_tl_multiply[y,x] = 0
+p.matrix_tl_multiply[y,x] = 0
 else:
-self.matrix_tl_corners_rows[y][x] = index_tl[0]
+p.matrix_tl_corners_rows[y][x] = index_tl[0]
-self.matrix_tl_corners_cols[y][x] = index_tl[1]
+p.matrix_tl_corners_cols[y][x] = index_tl[1]
-self.matrix_tl_multiply[y,x] = x_ratio*y_ratio
+p.matrix_tl_multiply[y,x] = x_ratio*y_ratio
 # if top right outside bounds
 if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w:
-self.matrix_tr_corners_rows[y][x] = 0
+p.matrix_tr_corners_rows[y][x] = 0
-self.matrix_tr_corners_cols[y][x] = 0
+p.matrix_tr_corners_cols[y][x] = 0
-self.matrix_tr_multiply[y,x] = 0
+p.matrix_tr_multiply[y,x] = 0
 else:
-self.matrix_tr_corners_rows[y][x] = index_tr[0]
+p.matrix_tr_corners_rows[y][x] = index_tr[0]
-self.matrix_tr_corners_cols[y][x] = index_tr[1]
+p.matrix_tr_corners_cols[y][x] = index_tr[1]
-self.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio
+p.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio
 # if bottom left outside bounds
 if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w:
-self.matrix_bl_corners_rows[y][x] = 0
+p.matrix_bl_corners_rows[y][x] = 0
-self.matrix_bl_corners_cols[y][x] = 0
+p.matrix_bl_corners_cols[y][x] = 0
-self.matrix_bl_multiply[y,x] = 0
+p.matrix_bl_multiply[y,x] = 0
 else:
-self.matrix_bl_corners_rows[y][x] = index_bl[0]
+p.matrix_bl_corners_rows[y][x] = index_bl[0]
-self.matrix_bl_corners_cols[y][x] = index_bl[1]
+p.matrix_bl_corners_cols[y][x] = index_bl[1]
-self.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio)
+p.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio)
 # if bottom right outside bounds
 if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w:
-self.matrix_br_corners_rows[y][x] = 0
+p.matrix_br_corners_rows[y][x] = 0
-self.matrix_br_corners_cols[y][x] = 0
+p.matrix_br_corners_cols[y][x] = 0
-self.matrix_br_multiply[y,x] = 0
+p.matrix_br_multiply[y,x] = 0
 else:
-self.matrix_br_corners_rows[y][x] = index_br[0]
+p.matrix_br_corners_rows[y][x] = index_br[0]
-self.matrix_br_corners_cols[y][x] = index_br[1]
+p.matrix_br_corners_cols[y][x] = index_br[1]
-self.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio)
+p.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio)
-def distort_image(self, image):
+# not really necessary, but anyway
+return p
+def transform_image(self, image):
+p = self.current_params
 # index pixels to get the 4 corners for bilinear combination
-tl_pixels = image[self.matrix_tl_corners_rows, self.matrix_tl_corners_cols]
+tl_pixels = image[p.matrix_tl_corners_rows, p.matrix_tl_corners_cols]
-tr_pixels = image[self.matrix_tr_corners_rows, self.matrix_tr_corners_cols]
+tr_pixels = image[p.matrix_tr_corners_rows, p.matrix_tr_corners_cols]
-bl_pixels = image[self.matrix_bl_corners_rows, self.matrix_bl_corners_cols]
+bl_pixels = image[p.matrix_bl_corners_rows, p.matrix_bl_corners_cols]
-br_pixels = image[self.matrix_br_corners_rows, self.matrix_br_corners_cols]
+br_pixels = image[p.matrix_br_corners_rows, p.matrix_br_corners_cols]
 # bilinear ratios, elemwise multiply
-tl_pixels = numpy.multiply(tl_pixels, self.matrix_tl_multiply)
+tl_pixels = numpy.multiply(tl_pixels, p.matrix_tl_multiply)
-tr_pixels = numpy.multiply(tr_pixels, self.matrix_tr_multiply)
+tr_pixels = numpy.multiply(tr_pixels, p.matrix_tr_multiply)
-bl_pixels = numpy.multiply(bl_pixels, self.matrix_bl_multiply)
+bl_pixels = numpy.multiply(bl_pixels, p.matrix_bl_multiply)
-br_pixels = numpy.multiply(br_pixels, self.matrix_br_multiply)
+br_pixels = numpy.multiply(br_pixels, p.matrix_br_multiply)
 # sum to finish bilinear combination
 return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0)
 # TESTS ----------------------------------------------------------------------
 if len(img.shape) > 2:
 img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
 return (img / 255.0).astype('float')
 def _specific_test():
-img = _load_image("tests/d.png")
+imgpath = os.path.join(_TEST_DIR, "d.png")
-dist = LocalElasticDistorter((32,32), (15,15), 9.0, 5.0)
+img = _load_image(imgpath)
-dist.distort_image(img)
+dist = LocalElasticDistorter((32,32))
+print dist.regenerate_parameters(0.5)
+img = dist.distort_image(img)
+pylab.imshow(img)
+pylab.show()
+def _complexity_tests():
+imgpath = os.path.join(_TEST_DIR, "d.png")
+dist = LocalElasticDistorter((32,32))
+orig_img = _load_image(imgpath)
+html_content = '''<html><body>Original:<br/><img src='d.png'>'''
+for complexity in numpy.arange(0.0, 1.1, 0.1):
+html_content += '<br/>Complexity: ' + str(complexity) + '<br/>'
+for i in range(10):
+t1 = time.time()
+dist.regenerate_parameters(complexity)
+t2 = time.time()
+print "diff", t2-t1
+img = dist.transform_image(orig_img)
+filename = "complexity_" + str(complexity) + "_" + str(i) + ".png"
+new_path = os.path.join(_TEST_DIR, filename)
+_save_image(img, new_path)
+html_content += '<img src="' + filename + '">'
+html_content += "</body></html>"
+html_file = open(os.path.join(_TEST_DIR, "complexity.html"), "w")
+html_file.write(html_content)
+html_file.close()
+def _complexity_benchmark():
+imgpath = os.path.join(_TEST_DIR, "d.png")
+dist = LocalElasticDistorter((32,32))
+orig_img = _load_image(imgpath)
+# time the first 10
+t1 = time.time()
+for i in range(10):
+dist.regenerate_parameters(0.2)
+img = dist.transform_image(orig_img)
+t2 = time.time()
+print "first 10, total = ", t2-t1, ", avg=", (t2-t1)/10
+# time the next 40
+t1 = time.time()
+for i in range(40):
+dist.regenerate_parameters(0.2)
+img = dist.transform_image(orig_img)
+t2 = time.time()
+print "next 40, total = ", t2-t1, ", avg=", (t2-t1)/40
+# time the next 50
+t1 = time.time()
+for i in range(50):
+dist.regenerate_parameters(0.2)
+img = dist.transform_image(orig_img)
+t2 = time.time()
+print "next 50, total = ", t2-t1, ", avg=", (t2-t1)/50
+# time the next 1000
+t1 = time.time()
+for i in range(1000):
+dist.regenerate_parameters(0.2)
+img = dist.transform_image(orig_img)
+t2 = time.time()
+print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
+def _save_image(img, path):
+img2 = Image.fromarray((img * 255).astype('uint8'), "L")
+img2.save(path)
+# TODO: reformat to follow new class... it function of complexity now
+'''
 def _distorter_tests():
 #import pylab
 #pylab.imshow(img)
 #pylab.show()
 for letter in ("d", "a", "n", "o"):
 img = _load_image("tests/" + letter + ".png")
 for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
 for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
-id = LocalElasticDistorter((32,32), (15,15), sigma, alpha)
+id = LocalElasticDistorter((32,32))
 img2 = id.distort_image(img)
 img2 = Image.fromarray((img2 * 255).astype('uint8'), "L")
 img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png")
+'''
 def _benchmark():
 img = _load_image("tests/d.png")
-dist = LocalElasticDistorter((32,32), (10,10), 5.0, 5.0)
+dist = LocalElasticDistorter((32,32))
+dist.regenerate_parameters(0.0)
 import time
 t1 = time.time()
 for i in range(10000):
 if i % 1000 == 0:
 print "-"
 t2 = time.time()
 print "t2-t1", t2-t1
 print "avg", 10000/(t2-t1)
 if __name__ == '__main__':
+import time
+import pylab
 import Image
-_distorter_tests()
+import os.path
+#_distorter_tests()
 #_benchmark()
 #_specific_test()
+#_complexity_tests()
+_complexity_benchmark()

Mercurial > ift6266

comparison transformations/local_elastic_distortions.py @ 24:010e826b41e8