Mercurial > ift6266
comparison transformations/local_elastic_distortions.py @ 5:8d1c37190122
Ajouté code de déformations élastiques locales, adapté depuis un travail que j'ai fait la session dernière
author | fsavard <francois.savard@polymtl.ca> |
---|---|
date | Tue, 26 Jan 2010 14:21:40 -0500 |
parents | |
children | 010e826b41e8 |
comparison
equal
deleted
inserted
replaced
4:7fb10ae0efec | 5:8d1c37190122 |
---|---|
1 #!/usr/bin/python | |
2 # coding: utf-8 | |
3 | |
4 ''' | |
5 Implementation of elastic distortions as described in | |
6 Simard, Steinkraus, Platt, "Best Practices for Convolutional | |
7 Neural Networks Applied to Visual Document Analysis", 2003 | |
8 | |
9 Author: François Savard | |
10 Date: Fall 2009, revised Winter 2010 | |
11 | |
12 Usage: create the Distorter with proper alpha, sigma etc. | |
13 Then each time you want to change the distortion field applied, | |
14 call regenerate_field(). | |
15 | |
16 (The point behind this is that regeneration takes some time, | |
17 so we better reuse the fields a few times) | |
18 ''' | |
19 | |
20 import sys | |
21 import math | |
22 import numpy | |
23 import numpy.random | |
24 import scipy.signal # convolve2d | |
25 | |
26 def raw_zeros(size): | |
27 return [[0 for i in range(size[1])] for j in range(size[0])] | |
28 | |
29 class LocalElasticDistorter(): | |
30 def __init__(self, image_size, kernel_size, sigma, alpha): | |
31 self.image_size = image_size | |
32 self.kernel_size = kernel_size | |
33 self.sigma = sigma | |
34 self.alpha = alpha | |
35 self.c_alpha = int(math.ceil(alpha)) | |
36 | |
37 self.kernel = self.gen_gaussian_kernel() | |
38 self.fields = None | |
39 self.regenerate_fields() | |
40 | |
41 # adapted from http://blenderartists.org/forum/showthread.php?t=163361 | |
42 def gen_gaussian_kernel(self): | |
43 h,w = self.kernel_size | |
44 a,b = h/2.0, w/2.0 | |
45 y,x = numpy.ogrid[0:w, 0:h] | |
46 s = self.sigma | |
47 gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s)) | |
48 # Normalize so we don't reduce image intensity | |
49 return gauss/gauss.sum() | |
50 | |
51 def gen_distortion_field(self): | |
52 field = numpy.random.uniform(-1.0, 1.0, self.image_size) | |
53 return scipy.signal.convolve2d(field, self.kernel, mode='same') | |
54 | |
55 def regenerate_fields(self): | |
56 ''' | |
57 Here's how the code works: | |
58 - We first generate "distortion fields" for x and y with these steps: | |
59 - Uniform noise over [-1, 1] in a matrix of size (h,w) | |
60 - Blur with a Gaussian kernel of spread sigma | |
61 - Multiply by alpha | |
62 - Then (conceptually) to compose the distorted image, we loop over each pixel | |
63 of the new image and use the corresponding x and y distortions | |
64 (from the matrices generated above) to identify pixels | |
65 of the old image from which we fetch color data. As the | |
66 coordinates are not integer, we interpolate between the | |
67 4 nearby pixels (top left, top right etc.). | |
68 - That's just conceptually. Here I'm using matrix operations | |
69 to speed up the computation. I first identify the 4 nearby | |
70 pixels in the old image for each pixel in the distorted image. | |
71 I can then use them as "fancy indices" to extract the proper | |
72 pixels for each new pixel. | |
73 - Then I multiply those extracted nearby points by precomputed | |
74 ratios for the bilinear interpolation. | |
75 ''' | |
76 | |
77 self.fields = [None, None] | |
78 self.fields[0] = self.alpha*self.gen_distortion_field() | |
79 self.fields[1] = self.alpha*self.gen_distortion_field() | |
80 | |
81 #import pylab | |
82 #pylab.imshow(self.fields[0]) | |
83 #pylab.show() | |
84 | |
85 # regenerate distortion index matrices | |
86 # "_rows" are row indices | |
87 # "_cols" are column indices | |
88 # (separated due to the way fancy indexing works in numpy) | |
89 h,w = self.image_size | |
90 | |
91 self.matrix_tl_corners_rows = raw_zeros((h,w)) | |
92 self.matrix_tl_corners_cols = raw_zeros((h,w)) | |
93 | |
94 self.matrix_tr_corners_rows = raw_zeros((h,w)) | |
95 self.matrix_tr_corners_cols = raw_zeros((h,w)) | |
96 | |
97 self.matrix_bl_corners_rows = raw_zeros((h,w)) | |
98 self.matrix_bl_corners_cols = raw_zeros((h,w)) | |
99 | |
100 self.matrix_br_corners_rows = raw_zeros((h,w)) | |
101 self.matrix_br_corners_cols = raw_zeros((h,w)) | |
102 | |
103 # those will hold the precomputed ratios for | |
104 # bilinear interpolation | |
105 self.matrix_tl_multiply = numpy.zeros((h,w)) | |
106 self.matrix_tr_multiply = numpy.zeros((h,w)) | |
107 self.matrix_bl_multiply = numpy.zeros((h,w)) | |
108 self.matrix_br_multiply = numpy.zeros((h,w)) | |
109 | |
110 for y in range(h): | |
111 for x in range(w): | |
112 distort_x = self.fields[0][y,x] | |
113 distort_y = self.fields[1][y,x] | |
114 f_dy = int(math.floor(distort_y)) | |
115 f_dx = int(math.floor(distort_x)) | |
116 y0 = y+f_dy | |
117 x0 = x+f_dx | |
118 index_tl = [y0, x0] | |
119 index_tr = [y0, x0+1] | |
120 index_bl = [y0+1, x0] | |
121 index_br = [y0+1, x0+1] | |
122 x_ratio = abs(distort_x-f_dx) # ratio of left vs right (for bilinear) | |
123 y_ratio = abs(distort_y-f_dy) # ratio of top vs bottom | |
124 | |
125 # We use a default background color of 0 for displacements | |
126 # outside of boundaries of the image. | |
127 | |
128 # if top left outside bounds | |
129 if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w: | |
130 self.matrix_tl_corners_rows[y][x] = 0 | |
131 self.matrix_tl_corners_cols[y][x] = 0 | |
132 self.matrix_tl_multiply[y,x] = 0 | |
133 else: | |
134 self.matrix_tl_corners_rows[y][x] = index_tl[0] | |
135 self.matrix_tl_corners_cols[y][x] = index_tl[1] | |
136 self.matrix_tl_multiply[y,x] = x_ratio*y_ratio | |
137 | |
138 | |
139 # if top right outside bounds | |
140 if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w: | |
141 self.matrix_tr_corners_rows[y][x] = 0 | |
142 self.matrix_tr_corners_cols[y][x] = 0 | |
143 self.matrix_tr_multiply[y,x] = 0 | |
144 else: | |
145 self.matrix_tr_corners_rows[y][x] = index_tr[0] | |
146 self.matrix_tr_corners_cols[y][x] = index_tr[1] | |
147 self.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio | |
148 | |
149 # if bottom left outside bounds | |
150 if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w: | |
151 self.matrix_bl_corners_rows[y][x] = 0 | |
152 self.matrix_bl_corners_cols[y][x] = 0 | |
153 self.matrix_bl_multiply[y,x] = 0 | |
154 else: | |
155 self.matrix_bl_corners_rows[y][x] = index_bl[0] | |
156 self.matrix_bl_corners_cols[y][x] = index_bl[1] | |
157 self.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio) | |
158 | |
159 # if bottom right outside bounds | |
160 if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w: | |
161 self.matrix_br_corners_rows[y][x] = 0 | |
162 self.matrix_br_corners_cols[y][x] = 0 | |
163 self.matrix_br_multiply[y,x] = 0 | |
164 else: | |
165 self.matrix_br_corners_rows[y][x] = index_br[0] | |
166 self.matrix_br_corners_cols[y][x] = index_br[1] | |
167 self.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio) | |
168 | |
169 def distort_image(self, image): | |
170 # index pixels to get the 4 corners for bilinear combination | |
171 tl_pixels = image[self.matrix_tl_corners_rows, self.matrix_tl_corners_cols] | |
172 tr_pixels = image[self.matrix_tr_corners_rows, self.matrix_tr_corners_cols] | |
173 bl_pixels = image[self.matrix_bl_corners_rows, self.matrix_bl_corners_cols] | |
174 br_pixels = image[self.matrix_br_corners_rows, self.matrix_br_corners_cols] | |
175 | |
176 # bilinear ratios, elemwise multiply | |
177 tl_pixels = numpy.multiply(tl_pixels, self.matrix_tl_multiply) | |
178 tr_pixels = numpy.multiply(tr_pixels, self.matrix_tr_multiply) | |
179 bl_pixels = numpy.multiply(bl_pixels, self.matrix_bl_multiply) | |
180 br_pixels = numpy.multiply(br_pixels, self.matrix_br_multiply) | |
181 | |
182 # sum to finish bilinear combination | |
183 return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0) | |
184 | |
185 # TESTS ---------------------------------------------------------------------- | |
186 | |
187 def _load_image(filepath): | |
188 _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0] | |
189 img = Image.open(filepath) | |
190 img = numpy.asarray(img) | |
191 if len(img.shape) > 2: | |
192 img = (img * _RGB_TO_GRAYSCALE).sum(axis=2) | |
193 return (img / 255.0).astype('float') | |
194 | |
195 def _specific_test(): | |
196 img = _load_image("tests/d.png") | |
197 dist = LocalElasticDistorter((32,32), (15,15), 9.0, 5.0) | |
198 dist.distort_image(img) | |
199 | |
200 def _distorter_tests(): | |
201 #import pylab | |
202 #pylab.imshow(img) | |
203 #pylab.show() | |
204 | |
205 for letter in ("d", "a", "n", "o"): | |
206 img = _load_image("tests/" + letter + ".png") | |
207 for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): | |
208 for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): | |
209 id = LocalElasticDistorter((32,32), (15,15), sigma, alpha) | |
210 img2 = id.distort_image(img) | |
211 img2 = Image.fromarray((img2 * 255).astype('uint8'), "L") | |
212 img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png") | |
213 | |
214 def _benchmark(): | |
215 img = _load_image("tests/d.png") | |
216 dist = LocalElasticDistorter((32,32), (10,10), 5.0, 5.0) | |
217 import time | |
218 t1 = time.time() | |
219 for i in range(10000): | |
220 if i % 1000 == 0: | |
221 print "-" | |
222 dist.distort_image(img) | |
223 t2 = time.time() | |
224 print "t2-t1", t2-t1 | |
225 print "avg", 10000/(t2-t1) | |
226 | |
227 if __name__ == '__main__': | |
228 import Image | |
229 _distorter_tests() | |
230 #_benchmark() | |
231 #_specific_test() | |
232 | |
233 |