Mercurial > ift6266
changeset 24:010e826b41e8
Modifications to elastic distortions: fixed an important bug with distortions themselves (now result is much nicer visually), made interface to conform to Transformation standard, and added ability to save a certain amount of distortion fields to reuse them if complexity doesn't change
author | fsavard <francois.savard@polymtl.ca> |
---|---|
date | Fri, 29 Jan 2010 13:37:52 -0500 |
parents | afdd41db8152 |
children | a679c1ffb5c5 |
files | transformations/local_elastic_distortions.py |
diffstat | 1 files changed, 287 insertions(+), 94 deletions(-) [+] |
line wrap: on
line diff
--- a/transformations/local_elastic_distortions.py Thu Jan 28 23:03:44 2010 -0600 +++ b/transformations/local_elastic_distortions.py Fri Jan 29 13:37:52 2010 -0500 @@ -23,36 +23,151 @@ import numpy.random import scipy.signal # convolve2d -def raw_zeros(size): +_TEST_DIR = "/home/francois/Desktop/dist_tests/" + +def _raw_zeros(size): return [[0 for i in range(size[1])] for j in range(size[0])] +class ElasticDistortionParams(): + def __init__(self, image_size, alpha=0.0, sigma=0.0): + self.image_size = image_size + self.alpha = alpha + self.sigma = sigma + + h,w = self.image_size + + self.matrix_tl_corners_rows = _raw_zeros((h,w)) + self.matrix_tl_corners_cols = _raw_zeros((h,w)) + + self.matrix_tr_corners_rows = _raw_zeros((h,w)) + self.matrix_tr_corners_cols = _raw_zeros((h,w)) + + self.matrix_bl_corners_rows = _raw_zeros((h,w)) + self.matrix_bl_corners_cols = _raw_zeros((h,w)) + + self.matrix_br_corners_rows = _raw_zeros((h,w)) + self.matrix_br_corners_cols = _raw_zeros((h,w)) + + # those will hold the precomputed ratios for + # bilinear interpolation + self.matrix_tl_multiply = numpy.zeros((h,w)) + self.matrix_tr_multiply = numpy.zeros((h,w)) + self.matrix_bl_multiply = numpy.zeros((h,w)) + self.matrix_br_multiply = numpy.zeros((h,w)) + + def alpha_sigma(self): + return [self.alpha, self.sigma] + class LocalElasticDistorter(): - def __init__(self, image_size, kernel_size, sigma, alpha): + def __init__(self, image_size): self.image_size = image_size - self.kernel_size = kernel_size - self.sigma = sigma - self.alpha = alpha - self.c_alpha = int(math.ceil(alpha)) + + self.current_complexity = 0.0 + + # number of precomputed fields + # (principle: as complexity doesn't change often, we can + # precompute a certain number of fields for a given complexity, + # each with its own parameters. That way, we have good + # randomization, but we're much faster). + self.to_precompute = 50 + + # Both use ElasticDistortionParams + self.current_params = None + self.precomputed_params = [] + + # + self.kernel_size = None + self.kernel = None + + # set some defaults + self.regenerate_parameters(0.0) - self.kernel = self.gen_gaussian_kernel() - self.fields = None - self.regenerate_fields() + def get_settings_names(self): + return ['alpha', 'sigma'] + + def regenerate_parameters(self, complexity): + if abs(complexity - self.current_complexity) > 1e-4: + self.current_complexity = complexity + + # complexity changed, fields must be regenerated + self.precomputed_params = [] + + if len(self.precomputed_params) <= self.to_precompute: + # not yet enough params generated, produce one more + # and append to list + new_params = self._initialize_new_params() + new_params = self._generate_fields(new_params) + self.current_params = new_params + self.precomputed_params.append(new_params) + else: + # if we have enough precomputed fields, just select one + # at random and set parameters to match what they were + # when the field was generated + idx = numpy.random.randint(0, len(self.precomputed_params)) + self.current_params = self.precomputed_params[idx] + + return self.current_params.alpha_sigma() # adapted from http://blenderartists.org/forum/showthread.php?t=163361 - def gen_gaussian_kernel(self): + def _gen_gaussian_kernel(self, sigma): + # the kernel size can change DRAMATICALLY the time + # for the blur operation... so even though results are better + # with a bigger kernel, we need to compromise here + # 1*s is very different from 2*s, but there's not much difference + # between 2*s and 4*s + ks = self.kernel_size + s = sigma + target_ks = (1.5*s, 1.5*s) + if not ks is None and ks[0] == target_ks[0] and ks[1] == target_ks[1]: + # kernel size is good, ok, no need to regenerate + return + self.kernel_size = target_ks h,w = self.kernel_size a,b = h/2.0, w/2.0 y,x = numpy.ogrid[0:w, 0:h] - s = self.sigma gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s)) # Normalize so we don't reduce image intensity - return gauss/gauss.sum() + self.kernel = gauss/gauss.sum() + + def _gen_distortion_field(self, params): + self._gen_gaussian_kernel(params.sigma) + + # we add kernel_size on all four sides so blurring + # with the kernel produces a smoother result on borders + ks0 = self.kernel_size[0] + ks1 = self.kernel_size[1] + sz0 = self.image_size[1] + ks0 + sz1 = self.image_size[0] + ks1 + field = numpy.random.uniform(-1.0, 1.0, (sz0, sz1)) + field = scipy.signal.convolve2d(field, self.kernel, mode='same') + + # crop only image_size in the middle + field = field[ks0:ks0+self.image_size[0], ks1:ks1+self.image_size[1]] + + return params.alpha * field + - def gen_distortion_field(self): - field = numpy.random.uniform(-1.0, 1.0, self.image_size) - return scipy.signal.convolve2d(field, self.kernel, mode='same') + def _initialize_new_params(self): + params = ElasticDistortionParams(self.image_size) + + cpx = self.current_complexity + # pour faire progresser la complexité un peu plus vite + # tout en gardant les extrêmes de 0.0 et 1.0 + cpx = cpx ** (1./3.) - def regenerate_fields(self): + # the smaller the alpha, the closest the pixels are fetched + # a max of 10 is reasonable + params.alpha = cpx * 10.0 + + # the bigger the sigma, the smoother is the distortion + # max of 1 is "reasonable", but produces VERY noisy results + # And the bigger the sigma, the bigger the blur kernel, and the + # slower the field generation, btw. + params.sigma = 10.0 - (7.0 * cpx) + + return params + + def _generate_fields(self, params): ''' Here's how the code works: - We first generate "distortion fields" for x and y with these steps: @@ -74,110 +189,106 @@ ratios for the bilinear interpolation. ''' - self.fields = [None, None] - self.fields[0] = self.alpha*self.gen_distortion_field() - self.fields[1] = self.alpha*self.gen_distortion_field() + p = params - #import pylab - #pylab.imshow(self.fields[0]) + dist_fields = [None, None] + dist_fields[0] = self._gen_distortion_field(params) + dist_fields[1] = self._gen_distortion_field(params) + + #pylab.imshow(dist_fields[0]) #pylab.show() # regenerate distortion index matrices # "_rows" are row indices # "_cols" are column indices # (separated due to the way fancy indexing works in numpy) - h,w = self.image_size - - self.matrix_tl_corners_rows = raw_zeros((h,w)) - self.matrix_tl_corners_cols = raw_zeros((h,w)) - - self.matrix_tr_corners_rows = raw_zeros((h,w)) - self.matrix_tr_corners_cols = raw_zeros((h,w)) - - self.matrix_bl_corners_rows = raw_zeros((h,w)) - self.matrix_bl_corners_cols = raw_zeros((h,w)) - - self.matrix_br_corners_rows = raw_zeros((h,w)) - self.matrix_br_corners_cols = raw_zeros((h,w)) - - # those will hold the precomputed ratios for - # bilinear interpolation - self.matrix_tl_multiply = numpy.zeros((h,w)) - self.matrix_tr_multiply = numpy.zeros((h,w)) - self.matrix_bl_multiply = numpy.zeros((h,w)) - self.matrix_br_multiply = numpy.zeros((h,w)) + h,w = p.image_size for y in range(h): - for x in range(w): - distort_x = self.fields[0][y,x] - distort_y = self.fields[1][y,x] - f_dy = int(math.floor(distort_y)) - f_dx = int(math.floor(distort_x)) - y0 = y+f_dy - x0 = x+f_dx - index_tl = [y0, x0] - index_tr = [y0, x0+1] - index_bl = [y0+1, x0] - index_br = [y0+1, x0+1] - x_ratio = abs(distort_x-f_dx) # ratio of left vs right (for bilinear) - y_ratio = abs(distort_y-f_dy) # ratio of top vs bottom + for x in range(w): + distort_x = dist_fields[0][y,x] + distort_y = dist_fields[1][y,x] + + # the "target" is the coordinate we fetch color data from + # (in the original image) + # target_left and _top are the rounded coordinate on the + # left/top of this target (float) coordinate + target_pixel = (y+distort_y, x+distort_x) + + target_left = int(math.floor(x + distort_x)) + target_top = int(math.floor(y + distort_y)) + + index_tl = [target_top, target_left] + index_tr = [target_top, target_left+1] + index_bl = [target_top+1, target_left] + index_br = [target_top+1, target_left+1] + + # x_ratio is the ratio of importance of left pixels + # y_ratio is the """" of top pixels + # (in bilinear combination) + y_ratio = 1.0 - (target_pixel[0] - target_top) + x_ratio = 1.0 - (target_pixel[1] - target_left) # We use a default background color of 0 for displacements # outside of boundaries of the image. # if top left outside bounds if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w: - self.matrix_tl_corners_rows[y][x] = 0 - self.matrix_tl_corners_cols[y][x] = 0 - self.matrix_tl_multiply[y,x] = 0 + p.matrix_tl_corners_rows[y][x] = 0 + p.matrix_tl_corners_cols[y][x] = 0 + p.matrix_tl_multiply[y,x] = 0 else: - self.matrix_tl_corners_rows[y][x] = index_tl[0] - self.matrix_tl_corners_cols[y][x] = index_tl[1] - self.matrix_tl_multiply[y,x] = x_ratio*y_ratio - + p.matrix_tl_corners_rows[y][x] = index_tl[0] + p.matrix_tl_corners_cols[y][x] = index_tl[1] + p.matrix_tl_multiply[y,x] = x_ratio*y_ratio # if top right outside bounds if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w: - self.matrix_tr_corners_rows[y][x] = 0 - self.matrix_tr_corners_cols[y][x] = 0 - self.matrix_tr_multiply[y,x] = 0 + p.matrix_tr_corners_rows[y][x] = 0 + p.matrix_tr_corners_cols[y][x] = 0 + p.matrix_tr_multiply[y,x] = 0 else: - self.matrix_tr_corners_rows[y][x] = index_tr[0] - self.matrix_tr_corners_cols[y][x] = index_tr[1] - self.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio + p.matrix_tr_corners_rows[y][x] = index_tr[0] + p.matrix_tr_corners_cols[y][x] = index_tr[1] + p.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio # if bottom left outside bounds if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w: - self.matrix_bl_corners_rows[y][x] = 0 - self.matrix_bl_corners_cols[y][x] = 0 - self.matrix_bl_multiply[y,x] = 0 + p.matrix_bl_corners_rows[y][x] = 0 + p.matrix_bl_corners_cols[y][x] = 0 + p.matrix_bl_multiply[y,x] = 0 else: - self.matrix_bl_corners_rows[y][x] = index_bl[0] - self.matrix_bl_corners_cols[y][x] = index_bl[1] - self.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio) + p.matrix_bl_corners_rows[y][x] = index_bl[0] + p.matrix_bl_corners_cols[y][x] = index_bl[1] + p.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio) # if bottom right outside bounds if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w: - self.matrix_br_corners_rows[y][x] = 0 - self.matrix_br_corners_cols[y][x] = 0 - self.matrix_br_multiply[y,x] = 0 + p.matrix_br_corners_rows[y][x] = 0 + p.matrix_br_corners_cols[y][x] = 0 + p.matrix_br_multiply[y,x] = 0 else: - self.matrix_br_corners_rows[y][x] = index_br[0] - self.matrix_br_corners_cols[y][x] = index_br[1] - self.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio) + p.matrix_br_corners_rows[y][x] = index_br[0] + p.matrix_br_corners_cols[y][x] = index_br[1] + p.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio) + + # not really necessary, but anyway + return p - def distort_image(self, image): + def transform_image(self, image): + p = self.current_params + # index pixels to get the 4 corners for bilinear combination - tl_pixels = image[self.matrix_tl_corners_rows, self.matrix_tl_corners_cols] - tr_pixels = image[self.matrix_tr_corners_rows, self.matrix_tr_corners_cols] - bl_pixels = image[self.matrix_bl_corners_rows, self.matrix_bl_corners_cols] - br_pixels = image[self.matrix_br_corners_rows, self.matrix_br_corners_cols] + tl_pixels = image[p.matrix_tl_corners_rows, p.matrix_tl_corners_cols] + tr_pixels = image[p.matrix_tr_corners_rows, p.matrix_tr_corners_cols] + bl_pixels = image[p.matrix_bl_corners_rows, p.matrix_bl_corners_cols] + br_pixels = image[p.matrix_br_corners_rows, p.matrix_br_corners_cols] # bilinear ratios, elemwise multiply - tl_pixels = numpy.multiply(tl_pixels, self.matrix_tl_multiply) - tr_pixels = numpy.multiply(tr_pixels, self.matrix_tr_multiply) - bl_pixels = numpy.multiply(bl_pixels, self.matrix_bl_multiply) - br_pixels = numpy.multiply(br_pixels, self.matrix_br_multiply) + tl_pixels = numpy.multiply(tl_pixels, p.matrix_tl_multiply) + tr_pixels = numpy.multiply(tr_pixels, p.matrix_tr_multiply) + bl_pixels = numpy.multiply(bl_pixels, p.matrix_bl_multiply) + br_pixels = numpy.multiply(br_pixels, p.matrix_br_multiply) # sum to finish bilinear combination return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0) @@ -193,10 +304,85 @@ return (img / 255.0).astype('float') def _specific_test(): - img = _load_image("tests/d.png") - dist = LocalElasticDistorter((32,32), (15,15), 9.0, 5.0) - dist.distort_image(img) + imgpath = os.path.join(_TEST_DIR, "d.png") + img = _load_image(imgpath) + dist = LocalElasticDistorter((32,32)) + print dist.regenerate_parameters(0.5) + img = dist.distort_image(img) + pylab.imshow(img) + pylab.show() + +def _complexity_tests(): + imgpath = os.path.join(_TEST_DIR, "d.png") + dist = LocalElasticDistorter((32,32)) + orig_img = _load_image(imgpath) + html_content = '''<html><body>Original:<br/><img src='d.png'>''' + for complexity in numpy.arange(0.0, 1.1, 0.1): + html_content += '<br/>Complexity: ' + str(complexity) + '<br/>' + for i in range(10): + t1 = time.time() + dist.regenerate_parameters(complexity) + t2 = time.time() + print "diff", t2-t1 + img = dist.transform_image(orig_img) + filename = "complexity_" + str(complexity) + "_" + str(i) + ".png" + new_path = os.path.join(_TEST_DIR, filename) + _save_image(img, new_path) + html_content += '<img src="' + filename + '">' + html_content += "</body></html>" + html_file = open(os.path.join(_TEST_DIR, "complexity.html"), "w") + html_file.write(html_content) + html_file.close() + +def _complexity_benchmark(): + imgpath = os.path.join(_TEST_DIR, "d.png") + dist = LocalElasticDistorter((32,32)) + orig_img = _load_image(imgpath) + # time the first 10 + t1 = time.time() + for i in range(10): + dist.regenerate_parameters(0.2) + img = dist.transform_image(orig_img) + t2 = time.time() + + print "first 10, total = ", t2-t1, ", avg=", (t2-t1)/10 + + # time the next 40 + t1 = time.time() + for i in range(40): + dist.regenerate_parameters(0.2) + img = dist.transform_image(orig_img) + t2 = time.time() + + print "next 40, total = ", t2-t1, ", avg=", (t2-t1)/40 + + # time the next 50 + t1 = time.time() + for i in range(50): + dist.regenerate_parameters(0.2) + img = dist.transform_image(orig_img) + t2 = time.time() + + print "next 50, total = ", t2-t1, ", avg=", (t2-t1)/50 + + # time the next 1000 + t1 = time.time() + for i in range(1000): + dist.regenerate_parameters(0.2) + img = dist.transform_image(orig_img) + t2 = time.time() + + print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000 + + + +def _save_image(img, path): + img2 = Image.fromarray((img * 255).astype('uint8'), "L") + img2.save(path) + +# TODO: reformat to follow new class... it function of complexity now +''' def _distorter_tests(): #import pylab #pylab.imshow(img) @@ -206,14 +392,16 @@ img = _load_image("tests/" + letter + ".png") for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): - id = LocalElasticDistorter((32,32), (15,15), sigma, alpha) + id = LocalElasticDistorter((32,32)) img2 = id.distort_image(img) img2 = Image.fromarray((img2 * 255).astype('uint8'), "L") img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png") +''' def _benchmark(): img = _load_image("tests/d.png") - dist = LocalElasticDistorter((32,32), (10,10), 5.0, 5.0) + dist = LocalElasticDistorter((32,32)) + dist.regenerate_parameters(0.0) import time t1 = time.time() for i in range(10000): @@ -225,9 +413,14 @@ print "avg", 10000/(t2-t1) if __name__ == '__main__': + import time + import pylab import Image - _distorter_tests() + import os.path + #_distorter_tests() #_benchmark() #_specific_test() + #_complexity_tests() + _complexity_benchmark()