comparison deep/convolutional_dae/scdae.py @ 298:a222af1d0598

- Adapt to scdae to input_shape change in pynnet - Use the proper dataset in run_exp
author Arnaud Bergeron <abergeron@gmail.com>
date Mon, 29 Mar 2010 17:36:22 -0400
parents 8108d271c30c
children be45e7db7cd4
comparison
equal deleted inserted replaced
295:a6b6b1140de9 298:a222af1d0598
1 from pynnet import * 1 from pynnet import *
2 # use hacks also
3 from pynnet.utils import *
4 2
5 import numpy 3 import numpy
6 import theano 4 import theano
7 import theano.tensor as T 5 import theano.tensor as T
8 6
9 from itertools import izip 7 from itertools import izip
10 from ift6266.utils.seriestables import * 8 from ift6266.utils.seriestables import *
11 9
12 class cdae(LayerStack): 10 class cdae(LayerStack):
13 def __init__(self, filter_size, num_filt, num_in, subsampling, corruption, 11 def __init__(self, filter_size, num_filt, num_in, subsampling, corruption,
14 dtype, img_shape): 12 dtype):
15 LayerStack.__init__(self, [ConvAutoencoder(filter_size=filter_size, 13 LayerStack.__init__(self, [ConvAutoencoder(filter_size=filter_size,
16 num_filt=num_filt, 14 num_filt=num_filt,
17 num_in=num_in, 15 num_in=num_in,
18 noisyness=corruption, 16 noisyness=corruption,
19 dtype=dtype, 17 dtype=dtype),
20 image_shape=img_shape),
21 MaxPoolLayer(subsampling)]) 18 MaxPoolLayer(subsampling)])
22 19
23 def build(self, input): 20 def build(self, input, input_shape=None):
24 LayerStack.build(self, input) 21 LayerStack.build(self, input, input_shape)
25 self.cost = self.layers[0].cost 22 self.cost = self.layers[0].cost
26 23 self.pre_params = self.layers[0].pre_params
27 def cdae_out_size(in_size, filt_size, num_filt, num_in, subs): 24
28 out = [None] * 3 25 def scdae(filter_sizes, num_filts, subsamplings, corruptions, dtype):
29 out[0] = num_filt
30 out[1] = (in_size[1]-filt_size[0]+1)/subs[0]
31 out[2] = (in_size[2]-filt_size[1]+1)/subs[1]
32 return out
33
34 def scdae(in_size, num_in, filter_sizes, num_filts,
35 subsamplings, corruptions, dtype):
36 layers = [] 26 layers = []
37 old_nfilt = 1 27 old_nfilt = 1
38 for fsize, nfilt, subs, corr in izip(filter_sizes, num_filts, 28 for fsize, nfilt, subs, corr in izip(filter_sizes, num_filts,
39 subsamplings, corruptions): 29 subsamplings, corruptions):
40 layers.append(cdae(fsize, nfilt, old_nfilt, subs, corr, dtype, 30 layers.append(cdae(fsize, nfilt, old_nfilt, subs, corr, dtype))
41 (num_in, in_size[0], in_size[1], in_size[2])))
42 in_size = cdae_out_size(in_size, fsize, nfilt, old_nfilt, subs)
43 old_nfilt = nfilt 31 old_nfilt = nfilt
44 return LayerStack(layers), in_size 32 return LayerStack(layers)
45 33
46 def mlp(layer_sizes, dtype): 34 def mlp(layer_sizes, dtype):
47 layers = [] 35 layers = []
48 old_size = layer_sizes[0] 36 old_size = layer_sizes[0]
49 for size in layer_sizes[1:]: 37 for size in layer_sizes[1:]:
51 dtype=dtype)) 39 dtype=dtype))
52 old_size = size 40 old_size = size
53 return LayerStack(layers) 41 return LayerStack(layers)
54 42
55 def scdae_net(in_size, num_in, filter_sizes, num_filts, subsamplings, 43 def scdae_net(in_size, num_in, filter_sizes, num_filts, subsamplings,
56 corruptions, layer_sizes, out_size, dtype, batch_size): 44 corruptions, layer_sizes, out_size, dtype):
57 rl1 = ReshapeLayer((None,)+in_size) 45 rl1 = ReshapeLayer((None,)+in_size)
58 ls, outs = scdae(in_size, num_in, filter_sizes, num_filts, subsamplings, 46 ls = scdae(num_in, filter_sizes, num_filts, subsamplings,
59 corruptions, dtype) 47 corruptions, dtype)
60 outs = numpy.prod(outs) 48 x = T.tensor4()
49 ls.build(x, input_shape=(1,)+in_size)
50 outs = numpy.prod(ls.output_shape)
61 rl2 = ReshapeLayer((None, outs)) 51 rl2 = ReshapeLayer((None, outs))
62 layer_sizes = [outs]+layer_sizes 52 layer_sizes = [outs]+layer_sizes
63 ls2 = mlp(layer_sizes, dtype) 53 ls2 = mlp(layer_sizes, dtype)
64 lrl = SimpleLayer(layer_sizes[-1], out_size, activation=nlins.softmax) 54 lrl = SimpleLayer(layer_sizes[-1], out_size, activation=nlins.softmax)
65 return NNet([rl1, ls, rl2, ls2, lrl], error=errors.nll) 55 return NNet([rl1, ls, rl2, ls2, lrl], error=errors.nll)
66 56
67 def build_funcs(batch_size, img_size, filter_sizes, num_filters, subs, 57 def build_funcs(batch_size, img_size, filter_sizes, num_filters, subs,
68 noise, mlp_sizes, out_size, dtype, pretrain_lr, train_lr): 58 noise, mlp_sizes, out_size, dtype, pretrain_lr, train_lr):
69 59
70 n = scdae_net((1,)+img_size, batch_size, filter_sizes, num_filters, subs, 60 n = scdae_net((1,)+img_size, batch_size, filter_sizes, num_filters, subs,
71 noise, mlp_sizes, out_size, dtype, batch_size) 61 noise, mlp_sizes, out_size, dtype)
72 62
73 n.save('start.net') 63 n.save('start.net')
74 64
75 x = T.fmatrix('x') 65 x = T.fmatrix('x')
76 y = T.ivector('y') 66 y = T.ivector('y')
77 67
78 def pretrainfunc(net, alpha): 68 def pretrainfunc(net, alpha):
79 up = trainers.get_updates(net.params, net.cost, alpha) 69 up = trainers.get_updates(net.pre_params, net.cost, alpha)
80 return theano.function([x], net.cost, updates=up) 70 return theano.function([x], net.cost, updates=up)
81 71
82 def trainfunc(net, alpha): 72 def trainfunc(net, alpha):
83 up = trainers.get_updates(net.params, net.cost, alpha) 73 up = trainers.get_updates(net.params, net.cost, alpha)
84 return theano.function([x, y], net.cost, updates=up) 74 return theano.function([x, y], net.cost, updates=up)
85 75
86 n.build(x, y) 76 n.build(x, y, input_shape=(bsize, 1)+img_size)
87 pretrain_funcs_opt = [pretrainfunc(l, pretrain_lr) for l in n.layers[1].layers] 77 pretrain_funcs_opt = [pretrainfunc(l, pretrain_lr) for l in n.layers[1].layers]
88 trainf_opt = trainfunc(n, train_lr) 78 trainf_opt = trainfunc(n, train_lr)
89 evalf_opt = theano.function([x, y], errors.class_error(n.output, y)) 79 evalf_opt = theano.function([x, y], errors.class_error(n.output, y))
90 80
91 clear_imgshape(n)
92 n.build(x, y) 81 n.build(x, y)
93 pretrain_funcs_reg = [pretrainfunc(l, 0.01) for l in n.layers[1].layers] 82 pretrain_funcs_reg = [pretrainfunc(l, 0.01) for l in n.layers[1].layers]
94 trainf_reg = trainfunc(n, 0.1) 83 trainf_reg = trainfunc(n, 0.1)
95 evalf_reg = theano.function([x, y], errors.class_error(n.output, y)) 84 evalf_reg = theano.function([x, y], errors.class_error(n.output, y))
96 85
119 def do_pretrain(pretrain_funcs, pretrain_epochs, serie): 108 def do_pretrain(pretrain_funcs, pretrain_epochs, serie):
120 for layer, f in enumerate(pretrain_funcs): 109 for layer, f in enumerate(pretrain_funcs):
121 for epoch in xrange(pretrain_epochs): 110 for epoch in xrange(pretrain_epochs):
122 serie.append((layer, epoch), f()) 111 serie.append((layer, epoch), f())
123 112
124 def massage_funcs(train_it, dset, batch_size, pretrain_funcs, trainf, evalf): 113 def massage_funcs(pretrain_it, train_it, dset, batch_size, pretrain_funcs,
114 trainf, evalf):
125 def pretrain_f(f): 115 def pretrain_f(f):
126 def res(): 116 def res():
127 for x, y in train_it: 117 for x, y in pretrain_it:
128 yield f(x) 118 yield f(x)
129 it = res() 119 it = res()
130 return lambda: it.next() 120 return lambda: it.next()
131 121
132 pretrain_fs = map(pretrain_f, pretrain_funcs) 122 pretrain_fs = map(pretrain_f, pretrain_funcs)
194 index_names=('iter',), 184 index_names=('iter',),
195 title='Test error (class)') 185 title='Test error (class)')
196 186
197 return series 187 return series
198 188
199 def run_exp(state, channel):
200 from ift6266 import datasets
201 from sgd_opt import sgd_opt
202 import sys, time
203
204 # params: bsize, pretrain_lr, train_lr, nfilts1, nfilts2, nftils3, nfilts4
205 # pretrain_rounds
206
207 pylearn.version.record_versions(state, [theano,ift6266,pylearn])
208 # TODO: maybe record pynnet version?
209 channel.save()
210
211 dset = dataset.nist_all(1000)
212
213 nfilts = []
214 if state.nfilts1 != 0:
215 nfilts.append(state.nfilts1)
216 if state.nfilts2 != 0:
217 nfilts.append(state.nfilts2)
218 if state.nfilts3 != 0:
219 nfilts.append(state.nfilts3)
220 if state.nfilts4 != 0:
221 nfilts.append(state.nfilts4)
222
223 fsizes = [(5,5)]*len(nfilts)
224 subs = [(2,2)]*len(nfilts)
225 noise = [state.noise]*len(nfilts)
226
227 pretrain_funcs, trainf, evalf, net = build_funcs(
228 img_size=(32, 32),
229 batch_size=state.bsize,
230 filter_sizes=fsizes,
231 num_filters=nfilts,
232 subs=subs,
233 noise=noise,
234 mlp_sizes=[state.mlp_sz],
235 out_size=62,
236 dtype=numpy.float32,
237 pretrain_lr=state.pretrain_lr,
238 train_lr=state.train_lr)
239
240 pretrain_fs, train, valid, test = massage_funcs(
241 state.bsize, dset, pretrain_funcs, trainf, evalf)
242
243 series = create_series()
244
245 do_pretrain(pretrain_fs, state.pretrain_rounds, series['recons_error'])
246
247 sgd_opt(train, valid, test, training_epochs=100000, patience=10000,
248 patience_increase=2., improvement_threshold=0.995,
249 validation_frequency=2500, series=series, net=net)
250
251 if __name__ == '__main__': 189 if __name__ == '__main__':
252 from ift6266 import datasets 190 from ift6266 import datasets
253 from sgd_opt import sgd_opt 191 from sgd_opt import sgd_opt
254 import sys, time 192 import sys, time
255 193
261 batch_size=batch_size, filter_sizes=[(5,5), (3,3)], 199 batch_size=batch_size, filter_sizes=[(5,5), (3,3)],
262 num_filters=[4, 4], subs=[(2,2), (2,2)], noise=[0.2, 0.2], 200 num_filters=[4, 4], subs=[(2,2), (2,2)], noise=[0.2, 0.2],
263 mlp_sizes=[500], out_size=10, dtype=numpy.float32, 201 mlp_sizes=[500], out_size=10, dtype=numpy.float32,
264 pretrain_lr=0.01, train_lr=0.1) 202 pretrain_lr=0.01, train_lr=0.1)
265 203
204 t_it = repeat_itf(dset.train, batch_size)
266 pretrain_fs, train, valid, test = massage_funcs( 205 pretrain_fs, train, valid, test = massage_funcs(
267 repeat_itf(dset.train, batch_size), 206 t_it, t_it, dset, batch_size,
268 dset, batch_size,
269 pretrain_funcs, trainf, evalf) 207 pretrain_funcs, trainf, evalf)
270 208
271 print "pretraining ...", 209 print "pretraining ...",
272 sys.stdout.flush() 210 sys.stdout.flush()
273 start = time.time() 211 start = time.time()