Mercurial > ift6266
comparison data_generation/transformations/local_elastic_distortions.py @ 167:1f5937e9e530
More moves - transformations into data_generation, added "deep" folder
author | Dumitru Erhan <dumitru.erhan@gmail.com> |
---|---|
date | Fri, 26 Feb 2010 14:15:38 -0500 |
parents | transformations/local_elastic_distortions.py@b3d76ebf2fac |
children |
comparison
equal
deleted
inserted
replaced
166:17ae5a1a4dd1 | 167:1f5937e9e530 |
---|---|
1 #!/usr/bin/python | |
2 # coding: utf-8 | |
3 | |
4 ''' | |
5 Implementation of elastic distortions as described in | |
6 Simard, Steinkraus, Platt, "Best Practices for Convolutional | |
7 Neural Networks Applied to Visual Document Analysis", 2003 | |
8 | |
9 Author: François Savard | |
10 Date: Fall 2009, revised Winter 2010 | |
11 | |
12 Usage: create the Distorter with proper alpha, sigma etc. | |
13 Then each time you want to change the distortion field applied, | |
14 call regenerate_field(). | |
15 | |
16 (The point behind this is that regeneration takes some time, | |
17 so we better reuse the fields a few times) | |
18 ''' | |
19 | |
20 import sys | |
21 import math | |
22 import numpy | |
23 import numpy.random | |
24 import scipy.signal # convolve2d | |
25 | |
26 _TEST_DIR = "/u/savardf/ift6266/debug_images/" | |
27 | |
28 def _raw_zeros(size): | |
29 return [[0 for i in range(size[1])] for j in range(size[0])] | |
30 | |
31 class ElasticDistortionParams(): | |
32 def __init__(self, image_size=(32,32), alpha=0.0, sigma=0.0): | |
33 self.image_size = image_size | |
34 self.alpha = alpha | |
35 self.sigma = sigma | |
36 | |
37 h,w = self.image_size | |
38 | |
39 self.matrix_tl_corners_rows = _raw_zeros((h,w)) | |
40 self.matrix_tl_corners_cols = _raw_zeros((h,w)) | |
41 | |
42 self.matrix_tr_corners_rows = _raw_zeros((h,w)) | |
43 self.matrix_tr_corners_cols = _raw_zeros((h,w)) | |
44 | |
45 self.matrix_bl_corners_rows = _raw_zeros((h,w)) | |
46 self.matrix_bl_corners_cols = _raw_zeros((h,w)) | |
47 | |
48 self.matrix_br_corners_rows = _raw_zeros((h,w)) | |
49 self.matrix_br_corners_cols = _raw_zeros((h,w)) | |
50 | |
51 # those will hold the precomputed ratios for | |
52 # bilinear interpolation | |
53 self.matrix_tl_multiply = numpy.zeros((h,w)) | |
54 self.matrix_tr_multiply = numpy.zeros((h,w)) | |
55 self.matrix_bl_multiply = numpy.zeros((h,w)) | |
56 self.matrix_br_multiply = numpy.zeros((h,w)) | |
57 | |
58 def alpha_sigma(self): | |
59 return [self.alpha, self.sigma] | |
60 | |
61 class LocalElasticDistorter(): | |
62 def __init__(self, image_size=(32,32)): | |
63 self.image_size = image_size | |
64 | |
65 self.current_complexity_10 = 0 | |
66 self.current_complexity = 0 | |
67 | |
68 # number of precomputed fields | |
69 # (principle: as complexity doesn't change often, we can | |
70 # precompute a certain number of fields for a given complexity, | |
71 # each with its own parameters. That way, we have good | |
72 # randomization, but we're much faster). | |
73 self.to_precompute_per_complexity = 50 | |
74 | |
75 # Both use ElasticDistortionParams | |
76 self.current_params = None | |
77 self.precomputed_params = [[] for i in range(10)] | |
78 | |
79 # | |
80 self.kernel_size = None | |
81 self.kernel = None | |
82 | |
83 # set some defaults | |
84 self.regenerate_parameters(0.0) | |
85 | |
86 def get_settings_names(self): | |
87 return [] | |
88 | |
89 def _floor_complexity(self, complexity): | |
90 return self._to_complexity_10(complexity) / 10.0 | |
91 | |
92 def _to_complexity_10(self, complexity): | |
93 return min(9, max(0, int(complexity * 10))) | |
94 | |
95 def regenerate_parameters(self, complexity): | |
96 complexity_10 = self._to_complexity_10(complexity) | |
97 | |
98 if complexity_10 != self.current_complexity_10: | |
99 self.current_complexity_10 = complexity_10 | |
100 self.current_complexity = self._floor_complexity(complexity) | |
101 | |
102 if len(self.precomputed_params[complexity_10]) <= self.to_precompute_per_complexity: | |
103 # not yet enough params generated, produce one more | |
104 # and append to list | |
105 new_params = self._initialize_new_params() | |
106 new_params = self._generate_fields(new_params) | |
107 self.current_params = new_params | |
108 self.precomputed_params[complexity_10].append(new_params) | |
109 else: | |
110 # if we have enough precomputed fields, just select one | |
111 # at random and set parameters to match what they were | |
112 # when the field was generated | |
113 idx = numpy.random.randint(0, len(self.precomputed_params[complexity_10])) | |
114 self.current_params = self.precomputed_params[complexity_10][idx] | |
115 | |
116 # don't return anything, to avoid storing deterministic parameters | |
117 return [] # self.current_params.alpha_sigma() | |
118 | |
119 def get_parameters_determined_by_complexity(self, complexity): | |
120 tmp_params = self._initialize_new_params(_floor_complexity(complexity)) | |
121 return tmp_params.alpha_sigma() | |
122 | |
123 def get_settings_names_determined_by_complexity(self, complexity): | |
124 return ['alpha', 'sigma'] | |
125 | |
126 # adapted from http://blenderartists.org/forum/showthread.php?t=163361 | |
127 def _gen_gaussian_kernel(self, sigma): | |
128 # the kernel size can change DRAMATICALLY the time | |
129 # for the blur operation... so even though results are better | |
130 # with a bigger kernel, we need to compromise here | |
131 # 1*s is very different from 2*s, but there's not much difference | |
132 # between 2*s and 4*s | |
133 ks = self.kernel_size | |
134 s = sigma | |
135 target_ks = (1.5*s, 1.5*s) | |
136 if not ks is None and ks[0] == target_ks[0] and ks[1] == target_ks[1]: | |
137 # kernel size is good, ok, no need to regenerate | |
138 return | |
139 self.kernel_size = target_ks | |
140 h,w = self.kernel_size | |
141 a,b = h/2.0, w/2.0 | |
142 y,x = numpy.ogrid[0:w, 0:h] | |
143 gauss = numpy.exp(-numpy.square((x-a)/s))*numpy.exp(-numpy.square((y-b)/s)) | |
144 # Normalize so we don't reduce image intensity | |
145 self.kernel = gauss/gauss.sum() | |
146 | |
147 def _gen_distortion_field(self, params): | |
148 self._gen_gaussian_kernel(params.sigma) | |
149 | |
150 # we add kernel_size on all four sides so blurring | |
151 # with the kernel produces a smoother result on borders | |
152 ks0 = self.kernel_size[0] | |
153 ks1 = self.kernel_size[1] | |
154 sz0 = self.image_size[1] + ks0 | |
155 sz1 = self.image_size[0] + ks1 | |
156 field = numpy.random.uniform(-1.0, 1.0, (sz0, sz1)) | |
157 field = scipy.signal.convolve2d(field, self.kernel, mode='same') | |
158 | |
159 # crop only image_size in the middle | |
160 field = field[ks0:ks0+self.image_size[0], ks1:ks1+self.image_size[1]] | |
161 | |
162 return params.alpha * field | |
163 | |
164 | |
165 def _initialize_new_params(self, complexity=None): | |
166 if not complexity: | |
167 complexity = self.current_complexity | |
168 | |
169 params = ElasticDistortionParams(self.image_size) | |
170 | |
171 # pour faire progresser la complexité un peu plus vite | |
172 # tout en gardant les extrêmes de 0.0 et 1.0 | |
173 complexity = complexity ** (1./3.) | |
174 | |
175 # the smaller the alpha, the closest the pixels are fetched | |
176 # a max of 10 is reasonable | |
177 params.alpha = complexity * 10.0 | |
178 | |
179 # the bigger the sigma, the smoother is the distortion | |
180 # max of 1 is "reasonable", but produces VERY noisy results | |
181 # And the bigger the sigma, the bigger the blur kernel, and the | |
182 # slower the field generation, btw. | |
183 params.sigma = 10.0 - (7.0 * complexity) | |
184 | |
185 return params | |
186 | |
187 def _generate_fields(self, params): | |
188 ''' | |
189 Here's how the code works: | |
190 - We first generate "distortion fields" for x and y with these steps: | |
191 - Uniform noise over [-1, 1] in a matrix of size (h,w) | |
192 - Blur with a Gaussian kernel of spread sigma | |
193 - Multiply by alpha | |
194 - Then (conceptually) to compose the distorted image, we loop over each pixel | |
195 of the new image and use the corresponding x and y distortions | |
196 (from the matrices generated above) to identify pixels | |
197 of the old image from which we fetch color data. As the | |
198 coordinates are not integer, we interpolate between the | |
199 4 nearby pixels (top left, top right etc.). | |
200 - That's just conceptually. Here I'm using matrix operations | |
201 to speed up the computation. I first identify the 4 nearby | |
202 pixels in the old image for each pixel in the distorted image. | |
203 I can then use them as "fancy indices" to extract the proper | |
204 pixels for each new pixel. | |
205 - Then I multiply those extracted nearby points by precomputed | |
206 ratios for the bilinear interpolation. | |
207 ''' | |
208 | |
209 p = params | |
210 | |
211 dist_fields = [None, None] | |
212 dist_fields[0] = self._gen_distortion_field(params) | |
213 dist_fields[1] = self._gen_distortion_field(params) | |
214 | |
215 #pylab.imshow(dist_fields[0]) | |
216 #pylab.show() | |
217 | |
218 # regenerate distortion index matrices | |
219 # "_rows" are row indices | |
220 # "_cols" are column indices | |
221 # (separated due to the way fancy indexing works in numpy) | |
222 h,w = p.image_size | |
223 | |
224 for y in range(h): | |
225 for x in range(w): | |
226 distort_x = dist_fields[0][y,x] | |
227 distort_y = dist_fields[1][y,x] | |
228 | |
229 # the "target" is the coordinate we fetch color data from | |
230 # (in the original image) | |
231 # target_left and _top are the rounded coordinate on the | |
232 # left/top of this target (float) coordinate | |
233 target_pixel = (y+distort_y, x+distort_x) | |
234 | |
235 target_left = int(math.floor(x + distort_x)) | |
236 target_top = int(math.floor(y + distort_y)) | |
237 | |
238 index_tl = [target_top, target_left] | |
239 index_tr = [target_top, target_left+1] | |
240 index_bl = [target_top+1, target_left] | |
241 index_br = [target_top+1, target_left+1] | |
242 | |
243 # x_ratio is the ratio of importance of left pixels | |
244 # y_ratio is the """" of top pixels | |
245 # (in bilinear combination) | |
246 y_ratio = 1.0 - (target_pixel[0] - target_top) | |
247 x_ratio = 1.0 - (target_pixel[1] - target_left) | |
248 | |
249 # We use a default background color of 0 for displacements | |
250 # outside of boundaries of the image. | |
251 | |
252 # if top left outside bounds | |
253 if index_tl[0] < 0 or index_tl[0] >= h or index_tl[1] < 0 or index_tl[1] >= w: | |
254 p.matrix_tl_corners_rows[y][x] = 0 | |
255 p.matrix_tl_corners_cols[y][x] = 0 | |
256 p.matrix_tl_multiply[y,x] = 0 | |
257 else: | |
258 p.matrix_tl_corners_rows[y][x] = index_tl[0] | |
259 p.matrix_tl_corners_cols[y][x] = index_tl[1] | |
260 p.matrix_tl_multiply[y,x] = x_ratio*y_ratio | |
261 | |
262 # if top right outside bounds | |
263 if index_tr[0] < 0 or index_tr[0] >= h or index_tr[1] < 0 or index_tr[1] >= w: | |
264 p.matrix_tr_corners_rows[y][x] = 0 | |
265 p.matrix_tr_corners_cols[y][x] = 0 | |
266 p.matrix_tr_multiply[y,x] = 0 | |
267 else: | |
268 p.matrix_tr_corners_rows[y][x] = index_tr[0] | |
269 p.matrix_tr_corners_cols[y][x] = index_tr[1] | |
270 p.matrix_tr_multiply[y,x] = (1.0-x_ratio)*y_ratio | |
271 | |
272 # if bottom left outside bounds | |
273 if index_bl[0] < 0 or index_bl[0] >= h or index_bl[1] < 0 or index_bl[1] >= w: | |
274 p.matrix_bl_corners_rows[y][x] = 0 | |
275 p.matrix_bl_corners_cols[y][x] = 0 | |
276 p.matrix_bl_multiply[y,x] = 0 | |
277 else: | |
278 p.matrix_bl_corners_rows[y][x] = index_bl[0] | |
279 p.matrix_bl_corners_cols[y][x] = index_bl[1] | |
280 p.matrix_bl_multiply[y,x] = x_ratio*(1.0-y_ratio) | |
281 | |
282 # if bottom right outside bounds | |
283 if index_br[0] < 0 or index_br[0] >= h or index_br[1] < 0 or index_br[1] >= w: | |
284 p.matrix_br_corners_rows[y][x] = 0 | |
285 p.matrix_br_corners_cols[y][x] = 0 | |
286 p.matrix_br_multiply[y,x] = 0 | |
287 else: | |
288 p.matrix_br_corners_rows[y][x] = index_br[0] | |
289 p.matrix_br_corners_cols[y][x] = index_br[1] | |
290 p.matrix_br_multiply[y,x] = (1.0-x_ratio)*(1.0-y_ratio) | |
291 | |
292 # not really necessary, but anyway | |
293 return p | |
294 | |
295 def transform_image(self, image): | |
296 p = self.current_params | |
297 | |
298 # index pixels to get the 4 corners for bilinear combination | |
299 tl_pixels = image[p.matrix_tl_corners_rows, p.matrix_tl_corners_cols] | |
300 tr_pixels = image[p.matrix_tr_corners_rows, p.matrix_tr_corners_cols] | |
301 bl_pixels = image[p.matrix_bl_corners_rows, p.matrix_bl_corners_cols] | |
302 br_pixels = image[p.matrix_br_corners_rows, p.matrix_br_corners_cols] | |
303 | |
304 # bilinear ratios, elemwise multiply | |
305 tl_pixels = numpy.multiply(tl_pixels, p.matrix_tl_multiply) | |
306 tr_pixels = numpy.multiply(tr_pixels, p.matrix_tr_multiply) | |
307 bl_pixels = numpy.multiply(bl_pixels, p.matrix_bl_multiply) | |
308 br_pixels = numpy.multiply(br_pixels, p.matrix_br_multiply) | |
309 | |
310 # sum to finish bilinear combination | |
311 return numpy.sum([tl_pixels,tr_pixels,bl_pixels,br_pixels], axis=0).astype(numpy.float32) | |
312 | |
313 # TESTS ---------------------------------------------------------------------- | |
314 | |
315 def _load_image(filepath): | |
316 _RGB_TO_GRAYSCALE = [0.3, 0.59, 0.11, 0.0] | |
317 img = Image.open(filepath) | |
318 img = numpy.asarray(img) | |
319 if len(img.shape) > 2: | |
320 img = (img * _RGB_TO_GRAYSCALE).sum(axis=2) | |
321 return (img / 255.0).astype('float') | |
322 | |
323 def _specific_test(): | |
324 imgpath = os.path.join(_TEST_DIR, "d.png") | |
325 img = _load_image(imgpath) | |
326 dist = LocalElasticDistorter((32,32)) | |
327 print dist.regenerate_parameters(0.5) | |
328 img = dist.transform_image(img) | |
329 print dist.get_parameters_determined_by_complexity(0.4) | |
330 pylab.imshow(img) | |
331 pylab.show() | |
332 | |
333 def _complexity_tests(): | |
334 imgpath = os.path.join(_TEST_DIR, "d.png") | |
335 dist = LocalElasticDistorter((32,32)) | |
336 orig_img = _load_image(imgpath) | |
337 html_content = '''<html><body>Original:<br/><img src='d.png'>''' | |
338 for complexity in numpy.arange(0.0, 1.1, 0.1): | |
339 html_content += '<br/>Complexity: ' + str(complexity) + '<br/>' | |
340 for i in range(10): | |
341 t1 = time.time() | |
342 dist.regenerate_parameters(complexity) | |
343 t2 = time.time() | |
344 print "diff", t2-t1 | |
345 img = dist.transform_image(orig_img) | |
346 filename = "complexity_" + str(complexity) + "_" + str(i) + ".png" | |
347 new_path = os.path.join(_TEST_DIR, filename) | |
348 _save_image(img, new_path) | |
349 html_content += '<img src="' + filename + '">' | |
350 html_content += "</body></html>" | |
351 html_file = open(os.path.join(_TEST_DIR, "complexity.html"), "w") | |
352 html_file.write(html_content) | |
353 html_file.close() | |
354 | |
355 def _complexity_benchmark(): | |
356 imgpath = os.path.join(_TEST_DIR, "d.png") | |
357 dist = LocalElasticDistorter((32,32)) | |
358 orig_img = _load_image(imgpath) | |
359 | |
360 for cpx in (0.21, 0.35): | |
361 # time the first 10 | |
362 t1 = time.time() | |
363 for i in range(10): | |
364 dist.regenerate_parameters(cpx) | |
365 img = dist.transform_image(orig_img) | |
366 t2 = time.time() | |
367 | |
368 print "first 10, total = ", t2-t1, ", avg=", (t2-t1)/10 | |
369 | |
370 # time the next 40 | |
371 t1 = time.time() | |
372 for i in range(40): | |
373 dist.regenerate_parameters(cpx) | |
374 img = dist.transform_image(orig_img) | |
375 t2 = time.time() | |
376 | |
377 print "next 40, total = ", t2-t1, ", avg=", (t2-t1)/40 | |
378 | |
379 # time the next 50 | |
380 t1 = time.time() | |
381 for i in range(50): | |
382 dist.regenerate_parameters(cpx) | |
383 img = dist.transform_image(orig_img) | |
384 t2 = time.time() | |
385 | |
386 print "next 50, total = ", t2-t1, ", avg=", (t2-t1)/50 | |
387 | |
388 # time the next 1000 | |
389 t1 = time.time() | |
390 for i in range(1000): | |
391 dist.regenerate_parameters(cpx) | |
392 img = dist.transform_image(orig_img) | |
393 t2 = time.time() | |
394 | |
395 print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000 | |
396 | |
397 # time the next 1000 with old complexity | |
398 t1 = time.time() | |
399 for i in range(1000): | |
400 dist.regenerate_parameters(0.21) | |
401 img = dist.transform_image(orig_img) | |
402 t2 = time.time() | |
403 | |
404 print "next 1000, total = ", t2-t1, ", avg=", (t2-t1)/1000 | |
405 | |
406 | |
407 | |
408 | |
409 def _save_image(img, path): | |
410 img2 = Image.fromarray((img * 255).astype('uint8'), "L") | |
411 img2.save(path) | |
412 | |
413 # TODO: reformat to follow new class... it function of complexity now | |
414 ''' | |
415 def _distorter_tests(): | |
416 #import pylab | |
417 #pylab.imshow(img) | |
418 #pylab.show() | |
419 | |
420 for letter in ("d", "a", "n", "o"): | |
421 img = _load_image("tests/" + letter + ".png") | |
422 for alpha in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): | |
423 for sigma in (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0): | |
424 id = LocalElasticDistorter((32,32)) | |
425 img2 = id.distort_image(img) | |
426 img2 = Image.fromarray((img2 * 255).astype('uint8'), "L") | |
427 img2.save("tests/"+letter+"_alpha"+str(alpha)+"_sigma"+str(sigma)+".png") | |
428 ''' | |
429 | |
430 def _benchmark(): | |
431 img = _load_image("tests/d.png") | |
432 dist = LocalElasticDistorter((32,32)) | |
433 dist.regenerate_parameters(0.0) | |
434 import time | |
435 t1 = time.time() | |
436 for i in range(10000): | |
437 if i % 1000 == 0: | |
438 print "-" | |
439 dist.distort_image(img) | |
440 t2 = time.time() | |
441 print "t2-t1", t2-t1 | |
442 print "avg", 10000/(t2-t1) | |
443 | |
444 if __name__ == '__main__': | |
445 import time | |
446 import pylab | |
447 import Image | |
448 import os.path | |
449 #_distorter_tests() | |
450 #_benchmark() | |
451 #_specific_test() | |
452 #_complexity_tests() | |
453 _complexity_benchmark() | |
454 | |
455 | |
456 |