annotate transformations/pipeline.py @ 24:010e826b41e8

Modifications to elastic distortions: fixed an important bug with distortions themselves (now result is much nicer visually), made interface to conform to Transformation standard, and added ability to save a certain amount of distortion fields to reuse them if complexity doesn't change
author fsavard <francois.savard@polymtl.ca>
date Fri, 29 Jan 2010 13:37:52 -0500
parents f6b6c74bb82f
children fdb0e0870fb4
rev   line source
10
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
1 from __future__ import with_statement
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
2
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
3 import sys, os
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
4 import numpy
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
5 import filetensor as ft
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
6 import random
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
7
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
8 BATCH_SIZE = 100
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
9
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
10 #import <modules> and stuff them in mods below
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
11
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
12 mods = []
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
13
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
14 # DANGER: HIGH VOLTAGE -- DO NOT EDIT BELOW THIS LINE
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
15 # -----------------------------------------------------------
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
16
15
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
17 outf = sys.argv[1]
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
18 paramsf = sys.argv[2]
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
19 dataf = '/data/lisa/data/nist/by_class/all/all_train_data.ft'
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
20 if len(sys.argv) >= 4:
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
21 dataf = sys.argv[3]
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
22
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
23 train_data = open(dataf, 'rb')
10
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
24
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
25 dim = tuple(ft._read_header(train_data)[3])
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
26
15
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
27 res_data = numpy.empty(dim, dtype=numpy.int8)
10
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
28
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
29 all_settings = ['complexity']
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
30
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
31 for mod in mods:
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
32 all_settings += mod.get_settings_names()
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
33
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
34 params = numpy.empty(((dim[0]/BATCH_SIZE)+1, len(all_settings)))
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
35
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
36 for i in xrange(0, dim[0], BATCH_SIZE):
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
37 train_data.seek(0)
15
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
38 imgs = ft.read(train_data, slice(i, i+BATCH_SIZE)).astype(numpy.float32)/255
10
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
39
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
40 complexity = random.random()
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
41 p = i/BATCH_SIZE
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
42 j = 1
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
43 for mod in mods:
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
44 par = mod.regenerate_parameters(complexity)
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
45 params[p, j:j+len(par)] = par
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
46 j += len(par)
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
47
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
48 for k in range(imgs.shape[0]):
15
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
49 c = imgs[k].reshape((32, 32))
10
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
50 for mod in mods:
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
51 c = mod.transform_image(c)
15
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
52 res_data[i+k] = c.reshape((1024,))*255
10
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
53
15
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
54 with open(outf, 'wb') as f:
10
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
55 ft.write(f, res_data)
faacc76d21c2 Basic new pipeline script for the images tranforms
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
56
15
f6b6c74bb82f Fix the datatypes.
Arnaud Bergeron <abergeron@gmail.com>
parents: 10
diff changeset
57 numpy.save(paramsf, params)