comparison data_generation/transformations/local_elastic_distortions.py @ 167:1f5937e9e530

More moves - transformations into data_generation, added "deep" folder
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Fri, 26 Feb 2010 14:15:38 -0500
parents transformations/local_elastic_distortions.py@b3d76ebf2fac
children
comparison
equal deleted inserted replaced
166:17ae5a1a4dd1 167:1f5937e9e530
1 #!/usr/bin/python
2 # coding: utf-8
3
4 '''
5 Implementation of elastic distortions as described in
6 Simard, Steinkraus, Platt, "Best Practices for Convolutional
7 Neural Networks Applied to Visual Document Analysis", 2003
8
9 Author: François Savard
10 Date: Fall 2009, revised Winter 2010
11
12 Usage: create the Distorter with proper alpha, sigma etc.
13 Then each time you want to change the distortion field applied,
14 call regenerate_field().
15
16 (The point behind this is that regeneration takes some time,
17 so we better reuse the fields a few times)
18 '''
19
20 import sys
21 import math
22 import numpy
23 import numpy.random
24 import scipy.signal # convolve2d
25
26 _TEST_DIR = "/u/savardf/ift6266/debug_images/"
27
28 def _raw_zeros(size):
29 return [[0 for i in range(size[1])] for j in range(size[0])]
30
31 class ElasticDistortionParams():
32 def __init__(self, image_size=(32,32), alpha=0.0, sigma=0.0):
33 self.image_size = image_size
34 self.alpha = alpha
35 self.sigma = sigma
36
37 h,w = self.image_size
38
39 self.matrix_tl_corners_rows = _raw_zeros((h,w))
40 self.matrix_tl_corners_cols = _raw_zeros((h,w))
41
42 self.matrix_tr_corners_rows = _raw_zeros((h,w))
43 self.matrix_tr_corners_cols = _raw_zeros((h,w))
44
45 self.matrix_bl_corners_rows = _raw_zeros((h,w))
46 self.matrix_bl_corners_cols = _raw_zeros((h,w))
47
48 self.matrix_br_corners_rows = _raw_zeros((h,w))
49 self.matrix_br_corners_cols = _raw_zeros((h,w))
50
51 # those will hold the precomputed ratios for
52 # bilinear interpolation
53 self.matrix_tl_multiply = numpy.zeros((h,w))
54 self.matrix_tr_multiply = numpy.zeros((h,w))
55 self.matrix_bl_multiply = numpy.zeros((h,w))
56 self.matrix_br_multiply = numpy.zeros((h,w))
57
58 def alpha_sigma(self):
59 return [self.alpha, self.sigma]
60
61 class LocalElasticDistorter():
62 def __init__(self, image_size=(32,32)):
63 self.image_size = image_size
64
65 self.current_complexity_10 = 0
66 self.current_complexity = 0
67
68 # number of precomputed fields
69 # (principle: as complexity doesn't change often, we can
70 # precompute a certain number of fields for a given complexity,
71 # each with its own parameters. That way, we have good
72 # randomization, but we're much faster).
73 self.to_precompute_per_complexity = 50
74
75 # Both use ElasticDistortionParams
76 self.current_params = None
77 self.precomputed_params = [[] for i in range(10)]
78
79 #
80 self.kernel_size = None
81 self.kernel = None
82
83 # set some defaults
84 self.regenerate_parameters(0.0)
85
86 def get_settings_names(self):
87 return []
88
89 def _floor_complexity(self, complexity):
90 return self._to_complexity_10(complexity) / 10.0
91
92 def _to_complexity_10(self, complexity):
93 return min(9, max(0, int(complexity * 10)))
94
95 def regenerate_parameters(self, complexity):
96 complexity_10 = self._to_complexity_10(complexity)
97
98 if complexity_10 != self.current_complexity_10:
99 self.current_complexity_10 = complexity_10
100 self.current_complexity = self._floor_complexity(complexity)
101
102 if len(self.precomputed_params[complexity_10]) <= self.to_precompute_per_complexity:
103 # not yet enough params generated, produce one more
104 # and append to list
105 new_params = self._initialize_new_params()
106 new_params = self._generate_fields(new_params)
107 self.current_params = new_params
108 self.precomputed_params[complexity_10].append(new_params)
109 else:
110 # if we have enough precomputed fields, just select one
111 # at random and set parameters to match what they were
112 # when the field was generated
113 idx = numpy.random.randint(0, len(self.precomputed_params[complexity_10]))
114 self.current_params = self.precomputed_params[complexity_10][idx]
115
116 # don't return anything, to avoid storing deterministic parameters
117 return [] # self.current_params.alpha_sigma()
118
119 def get_parameters_determined_by_complexity(self, complexity):
120 tmp_params = self._initialize_new_params(_floor_complexity(complexity))
121 return tmp_params.alpha_sigma()
122
123 def get_settings_names_determined_by_complexity(self, complexity):
124 return ['alpha', 'sigma']
125
126 # adapted from http://blenderartists.org/forum/showthread.php?t=163361
127 def _gen_gaussian_kernel(self, sigma):
128 # the kernel size can change DRAMATICALLY the time
129 # for the blur operation... so even though results are better
130 # with a bigger kernel, we need to compromise here
131 # 1*s is very different from 2*s, but there's not much difference
132 # between 2*s and 4*s
133 ks = self.kernel_size
134 s = sigma
135 target_ks = (1.5*s, 1.5*s)
136 if not ks is None and ks[0] == target_ks[0] and ks[1] == target_ks[1]:
137 # kernel size is good, ok, no need to regenerate
138 return
139 self.kernel_size = target_ks
140 h,w = self.kernel_size
141 a,b = h/2.0, w/2.0
142 y,x = numpy.ogrid[0:w, 0:h]
143 gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s))
144 # Normalize so we don't reduce image intensity
145 self.kernel = gauss/gauss.sum()
146
147 def _gen_distortion_field(self, params):
148 self._gen_gaussian_kernel(params.sigma)
149
150 # we add kernel_size on all four sides so blurring
151 # with the kernel produces a smoother result on borders
152 ks0 = self.kernel_size[0]
153 ks1 = self.kernel_size[1]
154 sz0 = self.image_size[1] + ks0
155 sz1 = self.image_size[0] + ks1
156 field = numpy.random.uniform(-1.0, 1.0, (sz0, sz1))
157 field = scipy.signal.convolve2d(field, self.kernel, mode='same')
158
159 # crop only image_size in the middle
160 field = field[ks0:ks0+self.image_size[0], ks1:ks1+self.image_size[1]]
161
162 return params.alpha * field
163
164
165 def _initialize_new_params(self, complexity=None):
166 if not complexity:
167 complexity = self.current_complexity
168
169 params = ElasticDistortionParams(self.image_size)
170
171 # pour faire progresser la complexité un peu plus vite
172 # tout en gardant les extrêmes de 0.0 et 1.0
173 complexity = complexity ** (1./3.)
174
175 # the smaller the alpha, the closest the pixels are fetched
176 # a max of 10 is reasonable
177 params.alpha = complexity * 10.0
178
179 # the bigger the sigma, the smoother is the distortion
180 # max of 1 is "reasonable", but produces VERY noisy results
181 # And the bigger the sigma, the bigger the blur kernel, and the
182 # slower the field generation, btw.
183 params.sigma = 10.0 - (7.0 * complexity)
184
185 return params
186
187 def _generate_fields(self, params):
188 '''
189 Here's how the code works:
190 - We first generate "distortion fields" for x and y with these steps:
191 - Uniform noise over [-1, 1] in a matrix of size (h,w)
192 - Blur with a Gaussian kernel of spread sigma
193 - Multiply by alpha
194 - Then (conceptually) to compose the distorted image, we loop over each pixel
195 of the new image and use the corresponding x and y distortions
196 (from the matrices generated above) to identify pixels
197 of the old image from which we fetch color data. As the
198 coordinates are not integer, we interpolate between the
199 4 nearby pixels (top left, top right etc.).
200 - That's just conceptually. Here I'm using matrix operations
201 to speed up the computation. I first identify the 4 nearby
202 pixels in the old image for each pixel in the distorted image.
203 I can then use them as "fancy indices" to extract the proper
204 pixels for each new pixel.
205 - Then I multiply those extracted nearby points by precomputed
206 ratios for the bilinear interpolation.
207 '''
208
209 p = params
210
211 dist_fields = [None, None]
212 dist_fields[0] = self._gen_distortion_field(params)
213 dist_fields[1] = self._gen_distortion_field(params)
214
215 #pylab.imshow(dist_fields[0])
216 #pylab.show()
217
218 # regenerate distortion index matrices
219 # "_rows" are row indices
220 # "_cols" are column indices
221 # (separated due to the way fancy indexing works in numpy)
222 h,w = p.image_size
223
224 for y in range(h):
225 for x in range(w):
226 distort_x = dist_fields[0][y,x]
227 distort_y = dist_fields[1][y,x]
228
229 # the "target" is the coordinate we fetch color data from
230 # (in the original image)
231 # target_left and _top are the rounded coordinate on the
232 # left/top of this target (float) coordinate
233 target_pixel = (y+distort_y, x+distort_x)
234
235 target_left = int(math.floor(x + distort_x))
236 target_top = int(math.floor(y + distort_y))
237
238 index_tl = [target_top, target_left]
239 index_tr = [target_top, target_left+1]
240 index_bl = [target_top+1, target_left]
241 index_br = [target_top+1, target_left+1]
242
243 # x_ratio is the ratio of importance of left pixels
244 # y_ratio is the """" of top pixels
245 # (in bilinear combination)
246 y_ratio = 1.0 - (target_pixel[0] - target_top)
247 x_ratio = 1.0 - (target_pixel[1] - target_left)
248
249 # We use a default background color of 0 for displacements
250 # outside of boundaries of the image.
251
252 # if top left outside bounds
253 if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w:
254 p.matrix_tl_corners_rows[y][x] = 0
255 p.matrix_tl_corners_cols[y][x] = 0
256 p.matrix_tl_multiply[y,x] = 0
257 else:
258 p.matrix_tl_corners_rows[y][x] = index_tl[0]
259 p.matrix_tl_corners_cols[y][x] = index_tl[1]
260 p.matrix_tl_multiply[y,x] = x_ratio*y_ratio
261
262 # if top right outside bounds
263 if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w:
264 p.matrix_tr_corners_rows[y][x] = 0
265 p.matrix_tr_corners_cols[y][x] = 0
266 p.matrix_tr_multiply[y,x] = 0
267 else:
268 p.matrix_tr_corners_rows[y][x] = index_tr[0]
269 p.matrix_tr_corners_cols[y][x] = index_tr[1]
270 p.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio
271
272 # if bottom left outside bounds
273 if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w:
274 p.matrix_bl_corners_rows[y][x] = 0
275 p.matrix_bl_corners_cols[y][x] = 0
276 p.matrix_bl_multiply[y,x] = 0
277 else:
278 p.matrix_bl_corners_rows[y][x] = index_bl[0]
279 p.matrix_bl_corners_cols[y][x] = index_bl[1]
280 p.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio)
281
282 # if bottom right outside bounds
283 if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w:
284 p.matrix_br_corners_rows[y][x] = 0
285 p.matrix_br_corners_cols[y][x] = 0
286 p.matrix_br_multiply[y,x] = 0
287 else:
288 p.matrix_br_corners_rows[y][x] = index_br[0]
289 p.matrix_br_corners_cols[y][x] = index_br[1]
290 p.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio)
291
292 # not really necessary, but anyway
293 return p
294
295 def transform_image(self, image):
296 p = self.current_params
297
298 # index pixels to get the 4 corners for bilinear combination
299 tl_pixels = image[p.matrix_tl_corners_rows, p.matrix_tl_corners_cols]
300 tr_pixels = image[p.matrix_tr_corners_rows, p.matrix_tr_corners_cols]
301 bl_pixels = image[p.matrix_bl_corners_rows, p.matrix_bl_corners_cols]
302 br_pixels = image[p.matrix_br_corners_rows, p.matrix_br_corners_cols]
303
304 # bilinear ratios, elemwise multiply
305 tl_pixels = numpy.multiply(tl_pixels, p.matrix_tl_multiply)
306 tr_pixels = numpy.multiply(tr_pixels, p.matrix_tr_multiply)
307 bl_pixels = numpy.multiply(bl_pixels, p.matrix_bl_multiply)
308 br_pixels = numpy.multiply(br_pixels, p.matrix_br_multiply)
309
310 # sum to finish bilinear combination
311 return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0).astype(numpy.float32)
312
313 # TESTS ----------------------------------------------------------------------
314
315 def _load_image(filepath):
316 _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0]
317 img = Image.open(filepath)
318 img = numpy.asarray(img)
319 if len(img.shape) > 2:
320 img = (img * _RGB_TO_GRAYSCALE).sum(axis=2)
321 return (img / 255.0).astype('float')
322
323 def _specific_test():
324 imgpath = os.path.join(_TEST_DIR, "d.png")
325 img = _load_image(imgpath)
326 dist = LocalElasticDistorter((32,32))
327 print dist.regenerate_parameters(0.5)
328 img = dist.transform_image(img)
329 print dist.get_parameters_determined_by_complexity(0.4)
330 pylab.imshow(img)
331 pylab.show()
332
333 def _complexity_tests():
334 imgpath = os.path.join(_TEST_DIR, "d.png")
335 dist = LocalElasticDistorter((32,32))
336 orig_img = _load_image(imgpath)
337 html_content = '''<html><body>Original:<br/><img src='d.png'>'''
338 for complexity in numpy.arange(0.0, 1.1, 0.1):
339 html_content += '<br/>Complexity: ' + str(complexity) + '<br/>'
340 for i in range(10):
341 t1 = time.time()
342 dist.regenerate_parameters(complexity)
343 t2 = time.time()
344 print "diff", t2-t1
345 img = dist.transform_image(orig_img)
346 filename = "complexity_" + str(complexity) + "_" + str(i) + ".png"
347 new_path = os.path.join(_TEST_DIR, filename)
348 _save_image(img, new_path)
349 html_content += '<img src="' + filename + '">'
350 html_content += "</body></html>"
351 html_file = open(os.path.join(_TEST_DIR, "complexity.html"), "w")
352 html_file.write(html_content)
353 html_file.close()
354
355 def _complexity_benchmark():
356 imgpath = os.path.join(_TEST_DIR, "d.png")
357 dist = LocalElasticDistorter((32,32))
358 orig_img = _load_image(imgpath)
359
360 for cpx in (0.21, 0.35):
361 # time the first 10
362 t1 = time.time()
363 for i in range(10):
364 dist.regenerate_parameters(cpx)
365 img = dist.transform_image(orig_img)
366 t2 = time.time()
367
368 print "first 10, total = ", t2-t1, ", avg=", (t2-t1)/10
369
370 # time the next 40
371 t1 = time.time()
372 for i in range(40):
373 dist.regenerate_parameters(cpx)
374 img = dist.transform_image(orig_img)
375 t2 = time.time()
376
377 print "next 40, total = ", t2-t1, ", avg=", (t2-t1)/40
378
379 # time the next 50
380 t1 = time.time()
381 for i in range(50):
382 dist.regenerate_parameters(cpx)
383 img = dist.transform_image(orig_img)
384 t2 = time.time()
385
386 print "next 50, total = ", t2-t1, ", avg=", (t2-t1)/50
387
388 # time the next 1000
389 t1 = time.time()
390 for i in range(1000):
391 dist.regenerate_parameters(cpx)
392 img = dist.transform_image(orig_img)
393 t2 = time.time()
394
395 print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
396
397 # time the next 1000 with old complexity
398 t1 = time.time()
399 for i in range(1000):
400 dist.regenerate_parameters(0.21)
401 img = dist.transform_image(orig_img)
402 t2 = time.time()
403
404 print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000
405
406
407
408
409 def _save_image(img, path):
410 img2 = Image.fromarray((img * 255).astype('uint8'), "L")
411 img2.save(path)
412
413 # TODO: reformat to follow new class... it function of complexity now
414 '''
415 def _distorter_tests():
416 #import pylab
417 #pylab.imshow(img)
418 #pylab.show()
419
420 for letter in ("d", "a", "n", "o"):
421 img = _load_image("tests/" + letter + ".png")
422 for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
423 for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0):
424 id = LocalElasticDistorter((32,32))
425 img2 = id.distort_image(img)
426 img2 = Image.fromarray((img2 * 255).astype('uint8'), "L")
427 img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png")
428 '''
429
430 def _benchmark():
431 img = _load_image("tests/d.png")
432 dist = LocalElasticDistorter((32,32))
433 dist.regenerate_parameters(0.0)
434 import time
435 t1 = time.time()
436 for i in range(10000):
437 if i % 1000 == 0:
438 print "-"
439 dist.distort_image(img)
440 t2 = time.time()
441 print "t2-t1", t2-t1
442 print "avg", 10000/(t2-t1)
443
444 if __name__ == '__main__':
445 import time
446 import pylab
447 import Image
448 import os.path
449 #_distorter_tests()
450 #_benchmark()
451 #_specific_test()
452 #_complexity_tests()
453 _complexity_benchmark()
454
455
456