comparison transformations/local_elastic_distortions.py @ 5:8d1c37190122

Ajouté code de déformations élastiques locales, adapté depuis un travail que j'ai fait la session dernière
author fsavard <francois.savard@polymtl.ca>
date Tue, 26 Jan 2010 14:21:40 -0500
parents
children 010e826b41e8
comparison
equal deleted inserted replaced
4:7fb10ae0efec 5:8d1c37190122
1 #!/usr/bin/python
2 # coding: utf-8
3
4 '''
5 Implementation of elastic distortions as described in
6 Simard, Steinkraus, Platt, "Best Practices for Convolutional
7 Neural Networks Applied to Visual Document Analysis", 2003
8
9 Author: François Savard
10 Date: Fall 2009, revised Winter 2010
11
12 Usage: create the Distorter with proper alpha, sigma etc.
13 Then each time you want to change the distortion field applied,
14 call regenerate_field().
15
16 (The point behind this is that regeneration takes some time,
17 so we better reuse the fields a few times)
18 '''
19
20 import sys
21 import math
22 import numpy
23 import numpy.random
24 import scipy.signal # convolve2d
25
26 def raw_zeros(size):
27 return [[0 for i in range(size[1])] for j in range(size[0])]
28
29 class LocalElasticDistorter():
30 def __init__(self, image_size, kernel_size, sigma, alpha):
31 self.image_size = image_size
32 self.kernel_size = kernel_size
33 self.sigma = sigma
34 self.alpha = alpha
35 self.c_alpha = int(math.ceil(alpha))
36
37 self.kernel = self.gen_gaussian_kernel()
38 self.fields = None
39 self.regenerate_fields()
40
41 # adapted from http://blenderartists.org/forum/showthread.php?t=163361
42 def gen_gaussian_kernel(self):
43 h,w = self.kernel_size
44 a,b = h/2.0, w/2.0
45 y,x = numpy.ogrid[0:w, 0:h]
46 s = self.sigma
47 gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s))
48 # Normalize so we don't reduce image intensity
49 return gauss/gauss.sum()
50
51 def gen_distortion_field(self):
52 field = numpy.random.uniform(-1.0, 1.0, self.image_size)
53 return scipy.signal.convolve2d(field, self.kernel, mode='same')
54
55 def regenerate_fields(self):
56 '''
57 Here's how the code works:
58 - We first generate "distortion fields" for x and y with these steps:
59 - Uniform noise over [-1, 1] in a matrix of size (h,w)
60 - Blur with a Gaussian kernel of spread sigma
61 - Multiply by alpha
62 - Then (conceptually) to compose the distorted image, we loop over each pixel
63 of the new image and use the corresponding x and y distortions
64 (from the matrices generated above) to identify pixels
65 of the old image from which we fetch color data. As the
66 coordinates are not integer, we interpolate between the
67 4 nearby pixels (top left, top right etc.).
68 - That's just conceptually. Here I'm using matrix operations
69 to speed up the computation. I first identify the 4 nearby
70 pixels in the old image for each pixel in the distorted image.
71 I can then use them as "fancy indices" to extract the proper
72 pixels for each new pixel.
73 - Then I multiply those extracted nearby points by precomputed
74 ratios for the bilinear interpolation.
75 '''
76
77 self.fields = [None, None]
78 self.fields[0] = self.alpha*self.gen_distortion_field()
79 self.fields[1] = self.alpha*self.gen_distortion_field()
80
81 #import pylab
82 #pylab.imshow(self.fields[0])
83 #pylab.show()
84
85 # regenerate distortion index matrices
86 # "_rows" are row indices
87 # "_cols" are column indices
88 # (separated due to the way fancy indexing works in numpy)
89 h,w = self.image_size
90
91 self.matrix_tl_corners_rows = raw_zeros((h,w))
92 self.matrix_tl_corners_cols = raw_zeros((h,w))
93
94 self.matrix_tr_corners_rows = raw_zeros((h,w))
95 self.matrix_tr_corners_cols = raw_zeros((h,w))
96
97 self.matrix_bl_corners_rows = raw_zeros((h,w))
98 self.matrix_bl_corners_cols = raw_zeros((h,w))
99
100 self.matrix_br_corners_rows = raw_zeros((h,w))
101 self.matrix_br_corners_cols = raw_zeros((h,w))
102
103 # those will hold the precomputed ratios for
104 # bilinear interpolation
105 self.matrix_tl_multiply = numpy.zeros((h,w))
106 self.matrix_tr_multiply = numpy.zeros((h,w))
107 self.matrix_bl_multiply = numpy.zeros((h,w))
108 self.matrix_br_multiply = numpy.zeros((h,w))
109
110 for y in range(h):
111 for x in range(w):
112 distort_x = self.fields[0][y,x]
113 distort_y = self.fields[1][y,x]
114 f_dy = int(math.floor(distort_y))
115 f_dx = int(math.floor(distort_x))
116 y0 = y+f_dy
117 x0 = x+f_dx
118 index_tl = [y0, x0]
119 index_tr = [y0, x0+1]
120 index_bl = [y0+1, x0]
121 index_br = [y0+1, x0+1]
122 x_ratio = abs(distort_x-f_dx) # ratio of left vs right (for bilinear)
123 y_ratio = abs(distort_y-f_dy) # ratio of top vs bottom
124
125 # We use a default background color of 0 for displacements
126 # outside of boundaries of the image.
127
128 # if top left outside bounds
129 if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w:
130 self.matrix_tl_corners_rows[y][x] = 0
131 self.matrix_tl_corners_cols[y][x] = 0
132 self.matrix_tl_multiply[y,x] = 0
133 else:
134 self.matrix_tl_corners_rows[y][x] = index_tl[0]
135 self.matrix_tl_corners_cols[y][x] = index_tl[1]
136 self.matrix_tl_multiply[y,x] = x_ratio*y_ratio
137
138
139 # if top right outside bounds
140 if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w:
141 self.matrix_tr_corners_rows[y][x] = 0
142 self.matrix_tr_corners_cols[y][x] = 0
143 self.matrix_tr_multiply[y,x] = 0
144 else:
145 self.matrix_tr_corners_rows[y][x] = index_tr[0]
146 self.matrix_tr_corners_cols[y][x] = index_tr[1]
147 self.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio
148
149 # if bottom left outside bounds
150 if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w:
151 self.matrix_bl_corners_rows[y][x] = 0
152 self.matrix_bl_corners_cols[y][x] = 0
153 self.matrix_bl_multiply[y,x] = 0
154 else:
155 self.matrix_bl_corners_rows[y][x] = index_bl[0]
156 self.matrix_bl_corners_cols[y][x] = index_bl[1]
157 self.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio)
158
159 # if bottom right outside bounds
160 if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w:
161 self.matrix_br_corners_rows[y][x] = 0
162 self.matrix_br_corners_cols[y][x] = 0
163 self.matrix_br_multiply[y,x] = 0
164 else:
165 self.matrix_br_corners_rows[y][x] = index_br[0]
166 self.matrix_br_corners_cols[y][x] = index_br[1]
167 self.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio)
168
169 def distort_image(self, image):
170 # index pixels to get the 4 corners for bilinear combination
171 tl_pixels = image[self.matrix_tl_corners_rows, self.matrix_tl_corners_cols]
172 tr_pixels = image[self.matrix_tr_corners_rows, self.matrix_tr_corners_cols]
173 bl_pixels = image[self.matrix_bl_corners_rows, self.matrix_bl_corners_cols]
174 br_pixels = image[self.matrix_br_corners_rows, self.matrix_br_corners_cols]
175
176 # bilinear ratios, elemwise multiply
177 tl_pixels = numpy.multiply(tl_pixels, self.matrix_tl_multiply)
178 tr_pixels = numpy.multiply(tr_pixels, self.matrix_tr_multiply)
179 bl_pixels = numpy.multiply(bl_pixels, self.matrix_bl_multiply)
180 br_pixels = numpy.multiply(br_pixels, self.matrix_br_multiply)
181
182 # sum to finish bilinear combination
183 return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0)
184
185 # TESTS ----------------------------------------------------------------------
186
187 def _load_image(filepath):
188 _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0]
189 img = Image.open(filepath)
190 img = numpy.asarray(img)
191 if len(img.shape) > 2:
192 img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
193 return (img / 255.0).astype('float')
194
195 def _specific_test():
196 img = _load_image("tests/d.png")
197 dist = LocalElasticDistorter((32,32), (15,15), 9.0, 5.0)
198 dist.distort_image(img)
199
200 def _distorter_tests():
201 #import pylab
202 #pylab.imshow(img)
203 #pylab.show()
204
205 for letter in ("d", "a", "n", "o"):
206 img = _load_image("tests/" + letter + ".png")
207 for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
208 for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
209 id = LocalElasticDistorter((32,32), (15,15), sigma, alpha)
210 img2 = id.distort_image(img)
211 img2 = Image.fromarray((img2 * 255).astype('uint8'), "L")
212 img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png")
213
214 def _benchmark():
215 img = _load_image("tests/d.png")
216 dist = LocalElasticDistorter((32,32), (10,10), 5.0, 5.0)
217 import time
218 t1 = time.time()
219 for i in range(10000):
220 if i % 1000 == 0:
221 print "-"
222 dist.distort_image(img)
223 t2 = time.time()
224 print "t2-t1", t2-t1
225 print "avg", 10000/(t2-t1)
226
227 if __name__ == '__main__':
228 import Image
229 _distorter_tests()
230 #_benchmark()
231 #_specific_test()
232
233