Mercurial > ift6266
view transformations/local_elastic_distortions.py @ 22:cb47cbc95a21
I fixed a bug in the computation of L1 and L2 regularizations
author | Razvan Pascanu <r.pascanu@gmail.com> |
---|---|
date | Fri, 29 Jan 2010 11:01:39 -0500 |
parents | 8d1c37190122 |
children | 010e826b41e8 |
line wrap: on
line source
#!/usr/bin/python # coding: utf-8 ''' Implementation of elastic distortions as described in Simard, Steinkraus, Platt, "Best Practices for Convolutional Neural Networks Applied to Visual Document Analysis", 2003 Author: François Savard Date: Fall 2009, revised Winter 2010 Usage: create the Distorter with proper alpha, sigma etc. Then each time you want to change the distortion field applied, call regenerate_field(). (The point behind this is that regeneration takes some time, so we better reuse the fields a few times) ''' import sys import math import numpy import numpy.random import scipy.signal # convolve2d def raw_zeros(size): return [[0 for i in range(size[1])] for j in range(size[0])] class LocalElasticDistorter(): def __init__(self, image_size, kernel_size, sigma, alpha): self.image_size = image_size self.kernel_size = kernel_size self.sigma = sigma self.alpha = alpha self.c_alpha = int(math.ceil(alpha)) self.kernel = self.gen_gaussian_kernel() self.fields = None self.regenerate_fields() # adapted from http://blenderartists.org/forum/showthread.php?t=163361 def gen_gaussian_kernel(self): h,w = self.kernel_size a,b = h/2.0, w/2.0 y,x = numpy.ogrid[0:w, 0:h] s = self.sigma gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s)) # Normalize so we don't reduce image intensity return gauss/gauss.sum() def gen_distortion_field(self): field = numpy.random.uniform(-1.0, 1.0, self.image_size) return scipy.signal.convolve2d(field, self.kernel, mode='same') def regenerate_fields(self): ''' Here's how the code works: - We first generate "distortion fields" for x and y with these steps: - Uniform noise over [-1, 1] in a matrix of size (h,w) - Blur with a Gaussian kernel of spread sigma - Multiply by alpha - Then (conceptually) to compose the distorted image, we loop over each pixel of the new image and use the corresponding x and y distortions (from the matrices generated above) to identify pixels of the old image from which we fetch color data. As the coordinates are not integer, we interpolate between the 4 nearby pixels (top left, top right etc.). - That's just conceptually. Here I'm using matrix operations to speed up the computation. I first identify the 4 nearby pixels in the old image for each pixel in the distorted image. I can then use them as "fancy indices" to extract the proper pixels for each new pixel. - Then I multiply those extracted nearby points by precomputed ratios for the bilinear interpolation. ''' self.fields = [None, None] self.fields[0] = self.alpha*self.gen_distortion_field() self.fields[1] = self.alpha*self.gen_distortion_field() #import pylab #pylab.imshow(self.fields[0]) #pylab.show() # regenerate distortion index matrices # "_rows" are row indices # "_cols" are column indices # (separated due to the way fancy indexing works in numpy) h,w = self.image_size self.matrix_tl_corners_rows = raw_zeros((h,w)) self.matrix_tl_corners_cols = raw_zeros((h,w)) self.matrix_tr_corners_rows = raw_zeros((h,w)) self.matrix_tr_corners_cols = raw_zeros((h,w)) self.matrix_bl_corners_rows = raw_zeros((h,w)) self.matrix_bl_corners_cols = raw_zeros((h,w)) self.matrix_br_corners_rows = raw_zeros((h,w)) self.matrix_br_corners_cols = raw_zeros((h,w)) # those will hold the precomputed ratios for # bilinear interpolation self.matrix_tl_multiply = numpy.zeros((h,w)) self.matrix_tr_multiply = numpy.zeros((h,w)) self.matrix_bl_multiply = numpy.zeros((h,w)) self.matrix_br_multiply = numpy.zeros((h,w)) for y in range(h): for x in range(w): distort_x = self.fields[0][y,x] distort_y = self.fields[1][y,x] f_dy = int(math.floor(distort_y)) f_dx = int(math.floor(distort_x)) y0 = y+f_dy x0 = x+f_dx index_tl = [y0, x0] index_tr = [y0, x0+1] index_bl = [y0+1, x0] index_br = [y0+1, x0+1] x_ratio = abs(distort_x-f_dx) # ratio of left vs right (for bilinear) y_ratio = abs(distort_y-f_dy) # ratio of top vs bottom # We use a default background color of 0 for displacements # outside of boundaries of the image. # if top left outside bounds if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w: self.matrix_tl_corners_rows[y][x] = 0 self.matrix_tl_corners_cols[y][x] = 0 self.matrix_tl_multiply[y,x] = 0 else: self.matrix_tl_corners_rows[y][x] = index_tl[0] self.matrix_tl_corners_cols[y][x] = index_tl[1] self.matrix_tl_multiply[y,x] = x_ratio*y_ratio # if top right outside bounds if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w: self.matrix_tr_corners_rows[y][x] = 0 self.matrix_tr_corners_cols[y][x] = 0 self.matrix_tr_multiply[y,x] = 0 else: self.matrix_tr_corners_rows[y][x] = index_tr[0] self.matrix_tr_corners_cols[y][x] = index_tr[1] self.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio # if bottom left outside bounds if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w: self.matrix_bl_corners_rows[y][x] = 0 self.matrix_bl_corners_cols[y][x] = 0 self.matrix_bl_multiply[y,x] = 0 else: self.matrix_bl_corners_rows[y][x] = index_bl[0] self.matrix_bl_corners_cols[y][x] = index_bl[1] self.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio) # if bottom right outside bounds if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w: self.matrix_br_corners_rows[y][x] = 0 self.matrix_br_corners_cols[y][x] = 0 self.matrix_br_multiply[y,x] = 0 else: self.matrix_br_corners_rows[y][x] = index_br[0] self.matrix_br_corners_cols[y][x] = index_br[1] self.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio) def distort_image(self, image): # index pixels to get the 4 corners for bilinear combination tl_pixels = image[self.matrix_tl_corners_rows, self.matrix_tl_corners_cols] tr_pixels = image[self.matrix_tr_corners_rows, self.matrix_tr_corners_cols] bl_pixels = image[self.matrix_bl_corners_rows, self.matrix_bl_corners_cols] br_pixels = image[self.matrix_br_corners_rows, self.matrix_br_corners_cols] # bilinear ratios, elemwise multiply tl_pixels = numpy.multiply(tl_pixels, self.matrix_tl_multiply) tr_pixels = numpy.multiply(tr_pixels, self.matrix_tr_multiply) bl_pixels = numpy.multiply(bl_pixels, self.matrix_bl_multiply) br_pixels = numpy.multiply(br_pixels, self.matrix_br_multiply) # sum to finish bilinear combination return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0) # TESTS ---------------------------------------------------------------------- def _load_image(filepath): _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0] img = Image.open(filepath) img = numpy.asarray(img) if len(img.shape) > 2: img = (img * _RGB_TO_GRAYSCALE).sum(axis=2) return (img / 255.0).astype('float') def _specific_test(): img = _load_image("tests/d.png") dist = LocalElasticDistorter((32,32), (15,15), 9.0, 5.0) dist.distort_image(img) def _distorter_tests(): #import pylab #pylab.imshow(img) #pylab.show() for letter in ("d", "a", "n", "o"): img = _load_image("tests/" + letter + ".png") for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): id = LocalElasticDistorter((32,32), (15,15), sigma, alpha) img2 = id.distort_image(img) img2 = Image.fromarray((img2 * 255).astype('uint8'), "L") img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png") def _benchmark(): img = _load_image("tests/d.png") dist = LocalElasticDistorter((32,32), (10,10), 5.0, 5.0) import time t1 = time.time() for i in range(10000): if i % 1000 == 0: print "-" dist.distort_image(img) t2 = time.time() print "t2-t1", t2-t1 print "avg", 10000/(t2-t1) if __name__ == '__main__': import Image _distorter_tests() #_benchmark() #_specific_test()