Mercurial > ift6266
diff transformations/local_elastic_distortions.py @ 5:8d1c37190122
Ajouté code de déformations élastiques locales, adapté depuis un travail que j'ai fait la session dernière
author | fsavard <francois.savard@polymtl.ca> |
---|---|
date | Tue, 26 Jan 2010 14:21:40 -0500 |
parents | |
children | 010e826b41e8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/transformations/local_elastic_distortions.py Tue Jan 26 14:21:40 2010 -0500 @@ -0,0 +1,233 @@ +#!/usr/bin/python +# coding: utf-8 + +''' +Implementation of elastic distortions as described in +Simard, Steinkraus, Platt, "Best Practices for Convolutional + Neural Networks Applied to Visual Document Analysis", 2003 + +Author: François Savard +Date: Fall 2009, revised Winter 2010 + +Usage: create the Distorter with proper alpha, sigma etc. + Then each time you want to change the distortion field applied, + call regenerate_field(). + + (The point behind this is that regeneration takes some time, + so we better reuse the fields a few times) +''' + +import sys +import math +import numpy +import numpy.random +import scipy.signal # convolve2d + +def raw_zeros(size): + return [[0 for i in range(size[1])] for j in range(size[0])] + +class LocalElasticDistorter(): + def __init__(self, image_size, kernel_size, sigma, alpha): + self.image_size = image_size + self.kernel_size = kernel_size + self.sigma = sigma + self.alpha = alpha + self.c_alpha = int(math.ceil(alpha)) + + self.kernel = self.gen_gaussian_kernel() + self.fields = None + self.regenerate_fields() + + # adapted from http://blenderartists.org/forum/showthread.php?t=163361 + def gen_gaussian_kernel(self): + h,w = self.kernel_size + a,b = h/2.0, w/2.0 + y,x = numpy.ogrid[0:w, 0:h] + s = self.sigma + gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s)) + # Normalize so we don't reduce image intensity + return gauss/gauss.sum() + + def gen_distortion_field(self): + field = numpy.random.uniform(-1.0, 1.0, self.image_size) + return scipy.signal.convolve2d(field, self.kernel, mode='same') + + def regenerate_fields(self): + ''' + Here's how the code works: + - We first generate "distortion fields" for x and y with these steps: + - Uniform noise over [-1, 1] in a matrix of size (h,w) + - Blur with a Gaussian kernel of spread sigma + - Multiply by alpha + - Then (conceptually) to compose the distorted image, we loop over each pixel + of the new image and use the corresponding x and y distortions + (from the matrices generated above) to identify pixels + of the old image from which we fetch color data. As the + coordinates are not integer, we interpolate between the + 4 nearby pixels (top left, top right etc.). + - That's just conceptually. Here I'm using matrix operations + to speed up the computation. I first identify the 4 nearby + pixels in the old image for each pixel in the distorted image. + I can then use them as "fancy indices" to extract the proper + pixels for each new pixel. + - Then I multiply those extracted nearby points by precomputed + ratios for the bilinear interpolation. + ''' + + self.fields = [None, None] + self.fields[0] = self.alpha*self.gen_distortion_field() + self.fields[1] = self.alpha*self.gen_distortion_field() + + #import pylab + #pylab.imshow(self.fields[0]) + #pylab.show() + + # regenerate distortion index matrices + # "_rows" are row indices + # "_cols" are column indices + # (separated due to the way fancy indexing works in numpy) + h,w = self.image_size + + self.matrix_tl_corners_rows = raw_zeros((h,w)) + self.matrix_tl_corners_cols = raw_zeros((h,w)) + + self.matrix_tr_corners_rows = raw_zeros((h,w)) + self.matrix_tr_corners_cols = raw_zeros((h,w)) + + self.matrix_bl_corners_rows = raw_zeros((h,w)) + self.matrix_bl_corners_cols = raw_zeros((h,w)) + + self.matrix_br_corners_rows = raw_zeros((h,w)) + self.matrix_br_corners_cols = raw_zeros((h,w)) + + # those will hold the precomputed ratios for + # bilinear interpolation + self.matrix_tl_multiply = numpy.zeros((h,w)) + self.matrix_tr_multiply = numpy.zeros((h,w)) + self.matrix_bl_multiply = numpy.zeros((h,w)) + self.matrix_br_multiply = numpy.zeros((h,w)) + + for y in range(h): + for x in range(w): + distort_x = self.fields[0][y,x] + distort_y = self.fields[1][y,x] + f_dy = int(math.floor(distort_y)) + f_dx = int(math.floor(distort_x)) + y0 = y+f_dy + x0 = x+f_dx + index_tl = [y0, x0] + index_tr = [y0, x0+1] + index_bl = [y0+1, x0] + index_br = [y0+1, x0+1] + x_ratio = abs(distort_x-f_dx) # ratio of left vs right (for bilinear) + y_ratio = abs(distort_y-f_dy) # ratio of top vs bottom + + # We use a default background color of 0 for displacements + # outside of boundaries of the image. + + # if top left outside bounds + if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w: + self.matrix_tl_corners_rows[y][x] = 0 + self.matrix_tl_corners_cols[y][x] = 0 + self.matrix_tl_multiply[y,x] = 0 + else: + self.matrix_tl_corners_rows[y][x] = index_tl[0] + self.matrix_tl_corners_cols[y][x] = index_tl[1] + self.matrix_tl_multiply[y,x] = x_ratio*y_ratio + + + # if top right outside bounds + if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w: + self.matrix_tr_corners_rows[y][x] = 0 + self.matrix_tr_corners_cols[y][x] = 0 + self.matrix_tr_multiply[y,x] = 0 + else: + self.matrix_tr_corners_rows[y][x] = index_tr[0] + self.matrix_tr_corners_cols[y][x] = index_tr[1] + self.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio + + # if bottom left outside bounds + if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w: + self.matrix_bl_corners_rows[y][x] = 0 + self.matrix_bl_corners_cols[y][x] = 0 + self.matrix_bl_multiply[y,x] = 0 + else: + self.matrix_bl_corners_rows[y][x] = index_bl[0] + self.matrix_bl_corners_cols[y][x] = index_bl[1] + self.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio) + + # if bottom right outside bounds + if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w: + self.matrix_br_corners_rows[y][x] = 0 + self.matrix_br_corners_cols[y][x] = 0 + self.matrix_br_multiply[y,x] = 0 + else: + self.matrix_br_corners_rows[y][x] = index_br[0] + self.matrix_br_corners_cols[y][x] = index_br[1] + self.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio) + + def distort_image(self, image): + # index pixels to get the 4 corners for bilinear combination + tl_pixels = image[self.matrix_tl_corners_rows, self.matrix_tl_corners_cols] + tr_pixels = image[self.matrix_tr_corners_rows, self.matrix_tr_corners_cols] + bl_pixels = image[self.matrix_bl_corners_rows, self.matrix_bl_corners_cols] + br_pixels = image[self.matrix_br_corners_rows, self.matrix_br_corners_cols] + + # bilinear ratios, elemwise multiply + tl_pixels = numpy.multiply(tl_pixels, self.matrix_tl_multiply) + tr_pixels = numpy.multiply(tr_pixels, self.matrix_tr_multiply) + bl_pixels = numpy.multiply(bl_pixels, self.matrix_bl_multiply) + br_pixels = numpy.multiply(br_pixels, self.matrix_br_multiply) + + # sum to finish bilinear combination + return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0) + +# TESTS ---------------------------------------------------------------------- + +def _load_image(filepath): + _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0] + img = Image.open(filepath) + img = numpy.asarray(img) + if len(img.shape) > 2: + img = (img * _RGB_TO_GRAYSCALE).sum(axis=2) + return (img / 255.0).astype('float') + +def _specific_test(): + img = _load_image("tests/d.png") + dist = LocalElasticDistorter((32,32), (15,15), 9.0, 5.0) + dist.distort_image(img) + +def _distorter_tests(): + #import pylab + #pylab.imshow(img) + #pylab.show() + + for letter in ("d", "a", "n", "o"): + img = _load_image("tests/" + letter + ".png") + for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): + for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): + id = LocalElasticDistorter((32,32), (15,15), sigma, alpha) + img2 = id.distort_image(img) + img2 = Image.fromarray((img2 * 255).astype('uint8'), "L") + img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png") + +def _benchmark(): + img = _load_image("tests/d.png") + dist = LocalElasticDistorter((32,32), (10,10), 5.0, 5.0) + import time + t1 = time.time() + for i in range(10000): + if i % 1000 == 0: + print "-" + dist.distort_image(img) + t2 = time.time() + print "t2-t1", t2-t1 + print "avg", 10000/(t2-t1) + +if __name__ == '__main__': + import Image + _distorter_tests() + #_benchmark() + #_specific_test() + +