Mercurial > ift6266
comparison deep/convolutional_dae/scdae.py @ 298:a222af1d0598
- Adapt to scdae to input_shape change in pynnet
- Use the proper dataset in run_exp
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Mon, 29 Mar 2010 17:36:22 -0400 |
parents | 8108d271c30c |
children | be45e7db7cd4 |
comparison
equal
deleted
inserted
replaced
295:a6b6b1140de9 | 298:a222af1d0598 |
---|---|
1 from pynnet import * | 1 from pynnet import * |
2 # use hacks also | |
3 from pynnet.utils import * | |
4 | 2 |
5 import numpy | 3 import numpy |
6 import theano | 4 import theano |
7 import theano.tensor as T | 5 import theano.tensor as T |
8 | 6 |
9 from itertools import izip | 7 from itertools import izip |
10 from ift6266.utils.seriestables import * | 8 from ift6266.utils.seriestables import * |
11 | 9 |
12 class cdae(LayerStack): | 10 class cdae(LayerStack): |
13 def __init__(self, filter_size, num_filt, num_in, subsampling, corruption, | 11 def __init__(self, filter_size, num_filt, num_in, subsampling, corruption, |
14 dtype, img_shape): | 12 dtype): |
15 LayerStack.__init__(self, [ConvAutoencoder(filter_size=filter_size, | 13 LayerStack.__init__(self, [ConvAutoencoder(filter_size=filter_size, |
16 num_filt=num_filt, | 14 num_filt=num_filt, |
17 num_in=num_in, | 15 num_in=num_in, |
18 noisyness=corruption, | 16 noisyness=corruption, |
19 dtype=dtype, | 17 dtype=dtype), |
20 image_shape=img_shape), | |
21 MaxPoolLayer(subsampling)]) | 18 MaxPoolLayer(subsampling)]) |
22 | 19 |
23 def build(self, input): | 20 def build(self, input, input_shape=None): |
24 LayerStack.build(self, input) | 21 LayerStack.build(self, input, input_shape) |
25 self.cost = self.layers[0].cost | 22 self.cost = self.layers[0].cost |
26 | 23 self.pre_params = self.layers[0].pre_params |
27 def cdae_out_size(in_size, filt_size, num_filt, num_in, subs): | 24 |
28 out = [None] * 3 | 25 def scdae(filter_sizes, num_filts, subsamplings, corruptions, dtype): |
29 out[0] = num_filt | |
30 out[1] = (in_size[1]-filt_size[0]+1)/subs[0] | |
31 out[2] = (in_size[2]-filt_size[1]+1)/subs[1] | |
32 return out | |
33 | |
34 def scdae(in_size, num_in, filter_sizes, num_filts, | |
35 subsamplings, corruptions, dtype): | |
36 layers = [] | 26 layers = [] |
37 old_nfilt = 1 | 27 old_nfilt = 1 |
38 for fsize, nfilt, subs, corr in izip(filter_sizes, num_filts, | 28 for fsize, nfilt, subs, corr in izip(filter_sizes, num_filts, |
39 subsamplings, corruptions): | 29 subsamplings, corruptions): |
40 layers.append(cdae(fsize, nfilt, old_nfilt, subs, corr, dtype, | 30 layers.append(cdae(fsize, nfilt, old_nfilt, subs, corr, dtype)) |
41 (num_in, in_size[0], in_size[1], in_size[2]))) | |
42 in_size = cdae_out_size(in_size, fsize, nfilt, old_nfilt, subs) | |
43 old_nfilt = nfilt | 31 old_nfilt = nfilt |
44 return LayerStack(layers), in_size | 32 return LayerStack(layers) |
45 | 33 |
46 def mlp(layer_sizes, dtype): | 34 def mlp(layer_sizes, dtype): |
47 layers = [] | 35 layers = [] |
48 old_size = layer_sizes[0] | 36 old_size = layer_sizes[0] |
49 for size in layer_sizes[1:]: | 37 for size in layer_sizes[1:]: |
51 dtype=dtype)) | 39 dtype=dtype)) |
52 old_size = size | 40 old_size = size |
53 return LayerStack(layers) | 41 return LayerStack(layers) |
54 | 42 |
55 def scdae_net(in_size, num_in, filter_sizes, num_filts, subsamplings, | 43 def scdae_net(in_size, num_in, filter_sizes, num_filts, subsamplings, |
56 corruptions, layer_sizes, out_size, dtype, batch_size): | 44 corruptions, layer_sizes, out_size, dtype): |
57 rl1 = ReshapeLayer((None,)+in_size) | 45 rl1 = ReshapeLayer((None,)+in_size) |
58 ls, outs = scdae(in_size, num_in, filter_sizes, num_filts, subsamplings, | 46 ls = scdae(num_in, filter_sizes, num_filts, subsamplings, |
59 corruptions, dtype) | 47 corruptions, dtype) |
60 outs = numpy.prod(outs) | 48 x = T.tensor4() |
49 ls.build(x, input_shape=(1,)+in_size) | |
50 outs = numpy.prod(ls.output_shape) | |
61 rl2 = ReshapeLayer((None, outs)) | 51 rl2 = ReshapeLayer((None, outs)) |
62 layer_sizes = [outs]+layer_sizes | 52 layer_sizes = [outs]+layer_sizes |
63 ls2 = mlp(layer_sizes, dtype) | 53 ls2 = mlp(layer_sizes, dtype) |
64 lrl = SimpleLayer(layer_sizes[-1], out_size, activation=nlins.softmax) | 54 lrl = SimpleLayer(layer_sizes[-1], out_size, activation=nlins.softmax) |
65 return NNet([rl1, ls, rl2, ls2, lrl], error=errors.nll) | 55 return NNet([rl1, ls, rl2, ls2, lrl], error=errors.nll) |
66 | 56 |
67 def build_funcs(batch_size, img_size, filter_sizes, num_filters, subs, | 57 def build_funcs(batch_size, img_size, filter_sizes, num_filters, subs, |
68 noise, mlp_sizes, out_size, dtype, pretrain_lr, train_lr): | 58 noise, mlp_sizes, out_size, dtype, pretrain_lr, train_lr): |
69 | 59 |
70 n = scdae_net((1,)+img_size, batch_size, filter_sizes, num_filters, subs, | 60 n = scdae_net((1,)+img_size, batch_size, filter_sizes, num_filters, subs, |
71 noise, mlp_sizes, out_size, dtype, batch_size) | 61 noise, mlp_sizes, out_size, dtype) |
72 | 62 |
73 n.save('start.net') | 63 n.save('start.net') |
74 | 64 |
75 x = T.fmatrix('x') | 65 x = T.fmatrix('x') |
76 y = T.ivector('y') | 66 y = T.ivector('y') |
77 | 67 |
78 def pretrainfunc(net, alpha): | 68 def pretrainfunc(net, alpha): |
79 up = trainers.get_updates(net.params, net.cost, alpha) | 69 up = trainers.get_updates(net.pre_params, net.cost, alpha) |
80 return theano.function([x], net.cost, updates=up) | 70 return theano.function([x], net.cost, updates=up) |
81 | 71 |
82 def trainfunc(net, alpha): | 72 def trainfunc(net, alpha): |
83 up = trainers.get_updates(net.params, net.cost, alpha) | 73 up = trainers.get_updates(net.params, net.cost, alpha) |
84 return theano.function([x, y], net.cost, updates=up) | 74 return theano.function([x, y], net.cost, updates=up) |
85 | 75 |
86 n.build(x, y) | 76 n.build(x, y, input_shape=(bsize, 1)+img_size) |
87 pretrain_funcs_opt = [pretrainfunc(l, pretrain_lr) for l in n.layers[1].layers] | 77 pretrain_funcs_opt = [pretrainfunc(l, pretrain_lr) for l in n.layers[1].layers] |
88 trainf_opt = trainfunc(n, train_lr) | 78 trainf_opt = trainfunc(n, train_lr) |
89 evalf_opt = theano.function([x, y], errors.class_error(n.output, y)) | 79 evalf_opt = theano.function([x, y], errors.class_error(n.output, y)) |
90 | 80 |
91 clear_imgshape(n) | |
92 n.build(x, y) | 81 n.build(x, y) |
93 pretrain_funcs_reg = [pretrainfunc(l, 0.01) for l in n.layers[1].layers] | 82 pretrain_funcs_reg = [pretrainfunc(l, 0.01) for l in n.layers[1].layers] |
94 trainf_reg = trainfunc(n, 0.1) | 83 trainf_reg = trainfunc(n, 0.1) |
95 evalf_reg = theano.function([x, y], errors.class_error(n.output, y)) | 84 evalf_reg = theano.function([x, y], errors.class_error(n.output, y)) |
96 | 85 |
119 def do_pretrain(pretrain_funcs, pretrain_epochs, serie): | 108 def do_pretrain(pretrain_funcs, pretrain_epochs, serie): |
120 for layer, f in enumerate(pretrain_funcs): | 109 for layer, f in enumerate(pretrain_funcs): |
121 for epoch in xrange(pretrain_epochs): | 110 for epoch in xrange(pretrain_epochs): |
122 serie.append((layer, epoch), f()) | 111 serie.append((layer, epoch), f()) |
123 | 112 |
124 def massage_funcs(train_it, dset, batch_size, pretrain_funcs, trainf, evalf): | 113 def massage_funcs(pretrain_it, train_it, dset, batch_size, pretrain_funcs, |
114 trainf, evalf): | |
125 def pretrain_f(f): | 115 def pretrain_f(f): |
126 def res(): | 116 def res(): |
127 for x, y in train_it: | 117 for x, y in pretrain_it: |
128 yield f(x) | 118 yield f(x) |
129 it = res() | 119 it = res() |
130 return lambda: it.next() | 120 return lambda: it.next() |
131 | 121 |
132 pretrain_fs = map(pretrain_f, pretrain_funcs) | 122 pretrain_fs = map(pretrain_f, pretrain_funcs) |
194 index_names=('iter',), | 184 index_names=('iter',), |
195 title='Test error (class)') | 185 title='Test error (class)') |
196 | 186 |
197 return series | 187 return series |
198 | 188 |
199 def run_exp(state, channel): | |
200 from ift6266 import datasets | |
201 from sgd_opt import sgd_opt | |
202 import sys, time | |
203 | |
204 # params: bsize, pretrain_lr, train_lr, nfilts1, nfilts2, nftils3, nfilts4 | |
205 # pretrain_rounds | |
206 | |
207 pylearn.version.record_versions(state, [theano,ift6266,pylearn]) | |
208 # TODO: maybe record pynnet version? | |
209 channel.save() | |
210 | |
211 dset = dataset.nist_all(1000) | |
212 | |
213 nfilts = [] | |
214 if state.nfilts1 != 0: | |
215 nfilts.append(state.nfilts1) | |
216 if state.nfilts2 != 0: | |
217 nfilts.append(state.nfilts2) | |
218 if state.nfilts3 != 0: | |
219 nfilts.append(state.nfilts3) | |
220 if state.nfilts4 != 0: | |
221 nfilts.append(state.nfilts4) | |
222 | |
223 fsizes = [(5,5)]*len(nfilts) | |
224 subs = [(2,2)]*len(nfilts) | |
225 noise = [state.noise]*len(nfilts) | |
226 | |
227 pretrain_funcs, trainf, evalf, net = build_funcs( | |
228 img_size=(32, 32), | |
229 batch_size=state.bsize, | |
230 filter_sizes=fsizes, | |
231 num_filters=nfilts, | |
232 subs=subs, | |
233 noise=noise, | |
234 mlp_sizes=[state.mlp_sz], | |
235 out_size=62, | |
236 dtype=numpy.float32, | |
237 pretrain_lr=state.pretrain_lr, | |
238 train_lr=state.train_lr) | |
239 | |
240 pretrain_fs, train, valid, test = massage_funcs( | |
241 state.bsize, dset, pretrain_funcs, trainf, evalf) | |
242 | |
243 series = create_series() | |
244 | |
245 do_pretrain(pretrain_fs, state.pretrain_rounds, series['recons_error']) | |
246 | |
247 sgd_opt(train, valid, test, training_epochs=100000, patience=10000, | |
248 patience_increase=2., improvement_threshold=0.995, | |
249 validation_frequency=2500, series=series, net=net) | |
250 | |
251 if __name__ == '__main__': | 189 if __name__ == '__main__': |
252 from ift6266 import datasets | 190 from ift6266 import datasets |
253 from sgd_opt import sgd_opt | 191 from sgd_opt import sgd_opt |
254 import sys, time | 192 import sys, time |
255 | 193 |
261 batch_size=batch_size, filter_sizes=[(5,5), (3,3)], | 199 batch_size=batch_size, filter_sizes=[(5,5), (3,3)], |
262 num_filters=[4, 4], subs=[(2,2), (2,2)], noise=[0.2, 0.2], | 200 num_filters=[4, 4], subs=[(2,2), (2,2)], noise=[0.2, 0.2], |
263 mlp_sizes=[500], out_size=10, dtype=numpy.float32, | 201 mlp_sizes=[500], out_size=10, dtype=numpy.float32, |
264 pretrain_lr=0.01, train_lr=0.1) | 202 pretrain_lr=0.01, train_lr=0.1) |
265 | 203 |
204 t_it = repeat_itf(dset.train, batch_size) | |
266 pretrain_fs, train, valid, test = massage_funcs( | 205 pretrain_fs, train, valid, test = massage_funcs( |
267 repeat_itf(dset.train, batch_size), | 206 t_it, t_it, dset, batch_size, |
268 dset, batch_size, | |
269 pretrain_funcs, trainf, evalf) | 207 pretrain_funcs, trainf, evalf) |
270 | 208 |
271 print "pretraining ...", | 209 print "pretraining ...", |
272 sys.stdout.flush() | 210 sys.stdout.flush() |
273 start = time.time() | 211 start = time.time() |