Mercurial > ift6266
view data_generation/transformations/local_elastic_distortions.py @ 379:a21a174c1c18
added writeup skeleton
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Mon, 26 Apr 2010 14:56:06 -0400 |
parents | 1f5937e9e530 |
children |
line wrap: on
line source
#!/usr/bin/python # coding: utf-8 ''' Implementation of elastic distortions as described in Simard, Steinkraus, Platt, "Best Practices for Convolutional Neural Networks Applied to Visual Document Analysis", 2003 Author: François Savard Date: Fall 2009, revised Winter 2010 Usage: create the Distorter with proper alpha, sigma etc. Then each time you want to change the distortion field applied, call regenerate_field(). (The point behind this is that regeneration takes some time, so we better reuse the fields a few times) ''' import sys import math import numpy import numpy.random import scipy.signal # convolve2d _TEST_DIR = "/u/savardf/ift6266/debug_images/" def _raw_zeros(size): return [[0 for i in range(size[1])] for j in range(size[0])] class ElasticDistortionParams(): def __init__(self, image_size=(32,32), alpha=0.0, sigma=0.0): self.image_size = image_size self.alpha = alpha self.sigma = sigma h,w = self.image_size self.matrix_tl_corners_rows = _raw_zeros((h,w)) self.matrix_tl_corners_cols = _raw_zeros((h,w)) self.matrix_tr_corners_rows = _raw_zeros((h,w)) self.matrix_tr_corners_cols = _raw_zeros((h,w)) self.matrix_bl_corners_rows = _raw_zeros((h,w)) self.matrix_bl_corners_cols = _raw_zeros((h,w)) self.matrix_br_corners_rows = _raw_zeros((h,w)) self.matrix_br_corners_cols = _raw_zeros((h,w)) # those will hold the precomputed ratios for # bilinear interpolation self.matrix_tl_multiply = numpy.zeros((h,w)) self.matrix_tr_multiply = numpy.zeros((h,w)) self.matrix_bl_multiply = numpy.zeros((h,w)) self.matrix_br_multiply = numpy.zeros((h,w)) def alpha_sigma(self): return [self.alpha, self.sigma] class LocalElasticDistorter(): def __init__(self, image_size=(32,32)): self.image_size = image_size self.current_complexity_10 = 0 self.current_complexity = 0 # number of precomputed fields # (principle: as complexity doesn't change often, we can # precompute a certain number of fields for a given complexity, # each with its own parameters. That way, we have good # randomization, but we're much faster). self.to_precompute_per_complexity = 50 # Both use ElasticDistortionParams self.current_params = None self.precomputed_params = [[] for i in range(10)] # self.kernel_size = None self.kernel = None # set some defaults self.regenerate_parameters(0.0) def get_settings_names(self): return [] def _floor_complexity(self, complexity): return self._to_complexity_10(complexity) / 10.0 def _to_complexity_10(self, complexity): return min(9, max(0, int(complexity * 10))) def regenerate_parameters(self, complexity): complexity_10 = self._to_complexity_10(complexity) if complexity_10 != self.current_complexity_10: self.current_complexity_10 = complexity_10 self.current_complexity = self._floor_complexity(complexity) if len(self.precomputed_params[complexity_10]) <= self.to_precompute_per_complexity: # not yet enough params generated, produce one more # and append to list new_params = self._initialize_new_params() new_params = self._generate_fields(new_params) self.current_params = new_params self.precomputed_params[complexity_10].append(new_params) else: # if we have enough precomputed fields, just select one # at random and set parameters to match what they were # when the field was generated idx = numpy.random.randint(0, len(self.precomputed_params[complexity_10])) self.current_params = self.precomputed_params[complexity_10][idx] # don't return anything, to avoid storing deterministic parameters return [] # self.current_params.alpha_sigma() def get_parameters_determined_by_complexity(self, complexity): tmp_params = self._initialize_new_params(_floor_complexity(complexity)) return tmp_params.alpha_sigma() def get_settings_names_determined_by_complexity(self, complexity): return ['alpha', 'sigma'] # adapted from http://blenderartists.org/forum/showthread.php?t=163361 def _gen_gaussian_kernel(self, sigma): # the kernel size can change DRAMATICALLY the time # for the blur operation... so even though results are better # with a bigger kernel, we need to compromise here # 1*s is very different from 2*s, but there's not much difference # between 2*s and 4*s ks = self.kernel_size s = sigma target_ks = (1.5*s, 1.5*s) if not ks is None and ks[0] == target_ks[0] and ks[1] == target_ks[1]: # kernel size is good, ok, no need to regenerate return self.kernel_size = target_ks h,w = self.kernel_size a,b = h/2.0, w/2.0 y,x = numpy.ogrid[0:w, 0:h] gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s)) # Normalize so we don't reduce image intensity self.kernel = gauss/gauss.sum() def _gen_distortion_field(self, params): self._gen_gaussian_kernel(params.sigma) # we add kernel_size on all four sides so blurring # with the kernel produces a smoother result on borders ks0 = self.kernel_size[0] ks1 = self.kernel_size[1] sz0 = self.image_size[1] + ks0 sz1 = self.image_size[0] + ks1 field = numpy.random.uniform(-1.0, 1.0, (sz0, sz1)) field = scipy.signal.convolve2d(field, self.kernel, mode='same') # crop only image_size in the middle field = field[ks0:ks0+self.image_size[0], ks1:ks1+self.image_size[1]] return params.alpha * field def _initialize_new_params(self, complexity=None): if not complexity: complexity = self.current_complexity params = ElasticDistortionParams(self.image_size) # pour faire progresser la complexité un peu plus vite # tout en gardant les extrêmes de 0.0 et 1.0 complexity = complexity ** (1./3.) # the smaller the alpha, the closest the pixels are fetched # a max of 10 is reasonable params.alpha = complexity * 10.0 # the bigger the sigma, the smoother is the distortion # max of 1 is "reasonable", but produces VERY noisy results # And the bigger the sigma, the bigger the blur kernel, and the # slower the field generation, btw. params.sigma = 10.0 - (7.0 * complexity) return params def _generate_fields(self, params): ''' Here's how the code works: - We first generate "distortion fields" for x and y with these steps: - Uniform noise over [-1, 1] in a matrix of size (h,w) - Blur with a Gaussian kernel of spread sigma - Multiply by alpha - Then (conceptually) to compose the distorted image, we loop over each pixel of the new image and use the corresponding x and y distortions (from the matrices generated above) to identify pixels of the old image from which we fetch color data. As the coordinates are not integer, we interpolate between the 4 nearby pixels (top left, top right etc.). - That's just conceptually. Here I'm using matrix operations to speed up the computation. I first identify the 4 nearby pixels in the old image for each pixel in the distorted image. I can then use them as "fancy indices" to extract the proper pixels for each new pixel. - Then I multiply those extracted nearby points by precomputed ratios for the bilinear interpolation. ''' p = params dist_fields = [None, None] dist_fields[0] = self._gen_distortion_field(params) dist_fields[1] = self._gen_distortion_field(params) #pylab.imshow(dist_fields[0]) #pylab.show() # regenerate distortion index matrices # "_rows" are row indices # "_cols" are column indices # (separated due to the way fancy indexing works in numpy) h,w = p.image_size for y in range(h): for x in range(w): distort_x = dist_fields[0][y,x] distort_y = dist_fields[1][y,x] # the "target" is the coordinate we fetch color data from # (in the original image) # target_left and _top are the rounded coordinate on the # left/top of this target (float) coordinate target_pixel = (y+distort_y, x+distort_x) target_left = int(math.floor(x + distort_x)) target_top = int(math.floor(y + distort_y)) index_tl = [target_top, target_left] index_tr = [target_top, target_left+1] index_bl = [target_top+1, target_left] index_br = [target_top+1, target_left+1] # x_ratio is the ratio of importance of left pixels # y_ratio is the """" of top pixels # (in bilinear combination) y_ratio = 1.0 - (target_pixel[0] - target_top) x_ratio = 1.0 - (target_pixel[1] - target_left) # We use a default background color of 0 for displacements # outside of boundaries of the image. # if top left outside bounds if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w: p.matrix_tl_corners_rows[y][x] = 0 p.matrix_tl_corners_cols[y][x] = 0 p.matrix_tl_multiply[y,x] = 0 else: p.matrix_tl_corners_rows[y][x] = index_tl[0] p.matrix_tl_corners_cols[y][x] = index_tl[1] p.matrix_tl_multiply[y,x] = x_ratio*y_ratio # if top right outside bounds if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w: p.matrix_tr_corners_rows[y][x] = 0 p.matrix_tr_corners_cols[y][x] = 0 p.matrix_tr_multiply[y,x] = 0 else: p.matrix_tr_corners_rows[y][x] = index_tr[0] p.matrix_tr_corners_cols[y][x] = index_tr[1] p.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio # if bottom left outside bounds if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w: p.matrix_bl_corners_rows[y][x] = 0 p.matrix_bl_corners_cols[y][x] = 0 p.matrix_bl_multiply[y,x] = 0 else: p.matrix_bl_corners_rows[y][x] = index_bl[0] p.matrix_bl_corners_cols[y][x] = index_bl[1] p.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio) # if bottom right outside bounds if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w: p.matrix_br_corners_rows[y][x] = 0 p.matrix_br_corners_cols[y][x] = 0 p.matrix_br_multiply[y,x] = 0 else: p.matrix_br_corners_rows[y][x] = index_br[0] p.matrix_br_corners_cols[y][x] = index_br[1] p.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio) # not really necessary, but anyway return p def transform_image(self, image): p = self.current_params # index pixels to get the 4 corners for bilinear combination tl_pixels = image[p.matrix_tl_corners_rows, p.matrix_tl_corners_cols] tr_pixels = image[p.matrix_tr_corners_rows, p.matrix_tr_corners_cols] bl_pixels = image[p.matrix_bl_corners_rows, p.matrix_bl_corners_cols] br_pixels = image[p.matrix_br_corners_rows, p.matrix_br_corners_cols] # bilinear ratios, elemwise multiply tl_pixels = numpy.multiply(tl_pixels, p.matrix_tl_multiply) tr_pixels = numpy.multiply(tr_pixels, p.matrix_tr_multiply) bl_pixels = numpy.multiply(bl_pixels, p.matrix_bl_multiply) br_pixels = numpy.multiply(br_pixels, p.matrix_br_multiply) # sum to finish bilinear combination return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0).astype(numpy.float32) # TESTS ---------------------------------------------------------------------- def _load_image(filepath): _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0] img = Image.open(filepath) img = numpy.asarray(img) if len(img.shape) > 2: img = (img * _RGB_TO_GRAYSCALE).sum(axis=2) return (img / 255.0).astype('float') def _specific_test(): imgpath = os.path.join(_TEST_DIR, "d.png") img = _load_image(imgpath) dist = LocalElasticDistorter((32,32)) print dist.regenerate_parameters(0.5) img = dist.transform_image(img) print dist.get_parameters_determined_by_complexity(0.4) pylab.imshow(img) pylab.show() def _complexity_tests(): imgpath = os.path.join(_TEST_DIR, "d.png") dist = LocalElasticDistorter((32,32)) orig_img = _load_image(imgpath) html_content = '''<html><body>Original:<br/><img src='d.png'>''' for complexity in numpy.arange(0.0, 1.1, 0.1): html_content += '<br/>Complexity: ' + str(complexity) + '<br/>' for i in range(10): t1 = time.time() dist.regenerate_parameters(complexity) t2 = time.time() print "diff", t2-t1 img = dist.transform_image(orig_img) filename = "complexity_" + str(complexity) + "_" + str(i) + ".png" new_path = os.path.join(_TEST_DIR, filename) _save_image(img, new_path) html_content += '<img src="' + filename + '">' html_content += "</body></html>" html_file = open(os.path.join(_TEST_DIR, "complexity.html"), "w") html_file.write(html_content) html_file.close() def _complexity_benchmark(): imgpath = os.path.join(_TEST_DIR, "d.png") dist = LocalElasticDistorter((32,32)) orig_img = _load_image(imgpath) for cpx in (0.21, 0.35): # time the first 10 t1 = time.time() for i in range(10): dist.regenerate_parameters(cpx) img = dist.transform_image(orig_img) t2 = time.time() print "first 10, total = ", t2-t1, ", avg=", (t2-t1)/10 # time the next 40 t1 = time.time() for i in range(40): dist.regenerate_parameters(cpx) img = dist.transform_image(orig_img) t2 = time.time() print "next 40, total = ", t2-t1, ", avg=", (t2-t1)/40 # time the next 50 t1 = time.time() for i in range(50): dist.regenerate_parameters(cpx) img = dist.transform_image(orig_img) t2 = time.time() print "next 50, total = ", t2-t1, ", avg=", (t2-t1)/50 # time the next 1000 t1 = time.time() for i in range(1000): dist.regenerate_parameters(cpx) img = dist.transform_image(orig_img) t2 = time.time() print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000 # time the next 1000 with old complexity t1 = time.time() for i in range(1000): dist.regenerate_parameters(0.21) img = dist.transform_image(orig_img) t2 = time.time() print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000 def _save_image(img, path): img2 = Image.fromarray((img * 255).astype('uint8'), "L") img2.save(path) # TODO: reformat to follow new class... it function of complexity now ''' def _distorter_tests(): #import pylab #pylab.imshow(img) #pylab.show() for letter in ("d", "a", "n", "o"): img = _load_image("tests/" + letter + ".png") for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): id = LocalElasticDistorter((32,32)) img2 = id.distort_image(img) img2 = Image.fromarray((img2 * 255).astype('uint8'), "L") img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png") ''' def _benchmark(): img = _load_image("tests/d.png") dist = LocalElasticDistorter((32,32)) dist.regenerate_parameters(0.0) import time t1 = time.time() for i in range(10000): if i % 1000 == 0: print "-" dist.distort_image(img) t2 = time.time() print "t2-t1", t2-t1 print "avg", 10000/(t2-t1) if __name__ == '__main__': import time import pylab import Image import os.path #_distorter_tests() #_benchmark() #_specific_test() #_complexity_tests() _complexity_benchmark()