annotate deep/convolutional_dae/scdae.py @ 612:21d53fd07f6e

reviews AISTATS
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Mon, 20 Dec 2010 11:54:35 -0500
parents 5ddb1878dfbc
children
rev   line source
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
1 from pynnet import *
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
2
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
3 import numpy
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
4 import theano
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
5 import theano.tensor as T
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
6
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
7 from itertools import izip
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
8 from ift6266.utils.seriestables import *
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
9
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
10 class cdae(LayerStack):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
11 def __init__(self, filter_size, num_filt, num_in, subsampling, corruption,
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
12 dtype):
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
13 LayerStack.__init__(self, [ConvAutoencoder(filter_size=filter_size,
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
14 num_filt=num_filt,
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
15 num_in=num_in,
335
5ddb1878dfbc noisyness -> noise
Arnaud Bergeron <abergeron@gmail.com>
parents: 334
diff changeset
16 noise=corruption,
303
ef28cbb5f464 Use sigmoids with cross-entropy cost in the ConvAutoencoders.
Arnaud Bergeron <abergeron@gmail.com>
parents: 301
diff changeset
17 err=errors.cross_entropy,
ef28cbb5f464 Use sigmoids with cross-entropy cost in the ConvAutoencoders.
Arnaud Bergeron <abergeron@gmail.com>
parents: 301
diff changeset
18 nlin=nlins.sigmoid,
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
19 dtype=dtype),
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
20 MaxPoolLayer(subsampling)])
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
21
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
22 def build(self, input, input_shape=None):
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
23 LayerStack.build(self, input, input_shape)
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
24 self.cost = self.layers[0].cost
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
25 self.pre_params = self.layers[0].pre_params
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
26
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
27 def scdae(filter_sizes, num_filts, subsamplings, corruptions, dtype):
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
28 layers = []
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
29 old_nfilt = 1
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
30 for fsize, nfilt, subs, corr in izip(filter_sizes, num_filts,
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
31 subsamplings, corruptions):
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
32 layers.append(cdae(fsize, nfilt, old_nfilt, subs, corr, dtype))
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
33 old_nfilt = nfilt
334
6143b23e2610 Name the important layers to be able to change them easily later.
Arnaud Bergeron <abergeron@gmail.com>
parents: 314
diff changeset
34 return LayerStack(layers, name='scdae')
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
35
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
36 def mlp(layer_sizes, dtype):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
37 layers = []
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
38 old_size = layer_sizes[0]
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
39 for size in layer_sizes[1:]:
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
40 layers.append(SimpleLayer(old_size, size, activation=nlins.tanh,
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
41 dtype=dtype))
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
42 old_size = size
334
6143b23e2610 Name the important layers to be able to change them easily later.
Arnaud Bergeron <abergeron@gmail.com>
parents: 314
diff changeset
43 return LayerStack(layers, name='mlp')
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
44
301
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
45 def scdae_net(in_size, filter_sizes, num_filts, subsamplings,
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
46 corruptions, layer_sizes, out_size, dtype):
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
47 rl1 = ReshapeLayer((None,)+in_size)
301
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
48 ls = scdae(filter_sizes, num_filts, subsamplings,
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
49 corruptions, dtype)
301
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
50 x = T.ftensor4()
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
51 ls.build(x, input_shape=(1,)+in_size)
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
52 outs = numpy.prod(ls.output_shape)
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
53 rl2 = ReshapeLayer((None, outs))
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
54 layer_sizes = [outs]+layer_sizes
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
55 ls2 = mlp(layer_sizes, dtype)
334
6143b23e2610 Name the important layers to be able to change them easily later.
Arnaud Bergeron <abergeron@gmail.com>
parents: 314
diff changeset
56 lrl = SimpleLayer(layer_sizes[-1], out_size, activation=nlins.softmax,
6143b23e2610 Name the important layers to be able to change them easily later.
Arnaud Bergeron <abergeron@gmail.com>
parents: 314
diff changeset
57 name='output')
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
58 return NNet([rl1, ls, rl2, ls2, lrl], error=errors.nll)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
59
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
60 def build_funcs(batch_size, img_size, filter_sizes, num_filters, subs,
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
61 noise, mlp_sizes, out_size, dtype, pretrain_lr, train_lr):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
62
301
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
63 n = scdae_net((1,)+img_size, filter_sizes, num_filters, subs,
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
64 noise, mlp_sizes, out_size, dtype)
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
65
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
66 n.save('start.net')
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
67
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
68 x = T.fmatrix('x')
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
69 y = T.ivector('y')
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
70
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
71 def pretrainfunc(net, alpha):
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
72 up = trainers.get_updates(net.pre_params, net.cost, alpha)
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
73 return theano.function([x], net.cost, updates=up)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
74
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
75 def trainfunc(net, alpha):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
76 up = trainers.get_updates(net.params, net.cost, alpha)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
77 return theano.function([x, y], net.cost, updates=up)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
78
301
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
79 n.build(x, y, input_shape=(batch_size, numpy.prod(img_size)))
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
80 pretrain_funcs_opt = [pretrainfunc(l, pretrain_lr) for l in n.layers[1].layers]
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
81 trainf_opt = trainfunc(n, train_lr)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
82 evalf_opt = theano.function([x, y], errors.class_error(n.output, y))
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
83
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
84 n.build(x, y)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
85 pretrain_funcs_reg = [pretrainfunc(l, 0.01) for l in n.layers[1].layers]
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
86 trainf_reg = trainfunc(n, 0.1)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
87 evalf_reg = theano.function([x, y], errors.class_error(n.output, y))
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
88
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
89 def select_f(f1, f2, bsize):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
90 def f(x):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
91 if x.shape[0] == bsize:
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
92 return f1(x)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
93 else:
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
94 return f2(x)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
95 return f
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
96
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
97 pretrain_funcs = [select_f(p_opt, p_reg, batch_size) for p_opt, p_reg in zip(pretrain_funcs_opt, pretrain_funcs_reg)]
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
98
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
99 def select_f2(f1, f2, bsize):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
100 def f(x, y):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
101 if x.shape[0] == bsize:
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
102 return f1(x, y)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
103 else:
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
104 return f2(x, y)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
105 return f
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
106
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
107 trainf = select_f2(trainf_opt, trainf_reg, batch_size)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
108 evalf = select_f2(evalf_opt, evalf_reg, batch_size)
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
109 return pretrain_funcs, trainf, evalf, n
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
110
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
111 def do_pretrain(pretrain_funcs, pretrain_epochs, serie):
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
112 for layer, f in enumerate(pretrain_funcs):
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
113 for epoch in xrange(pretrain_epochs):
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
114 serie.append((layer, epoch), f())
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
115
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
116 def massage_funcs(pretrain_it, train_it, dset, batch_size, pretrain_funcs,
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
117 trainf, evalf):
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
118 def pretrain_f(f):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
119 def res():
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
120 for x, y in pretrain_it:
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
121 yield f(x)
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
122 it = res()
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
123 return lambda: it.next()
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
124
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
125 pretrain_fs = map(pretrain_f, pretrain_funcs)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
126
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
127 def train_f(f):
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
128 def dset_it():
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
129 for x, y in train_it:
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
130 yield f(x, y)
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
131 it = dset_it()
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
132 return lambda: it.next()
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
133
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
134 train = train_f(trainf)
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
135
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
136 def eval_f(f, dsetf):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
137 def res():
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
138 c = 0
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
139 i = 0
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
140 for x, y in dsetf(batch_size):
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
141 i += x.shape[0]
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
142 c += f(x, y)*x.shape[0]
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
143 return c/i
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
144 return res
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
145
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
146 test = eval_f(evalf, dset.test)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
147 valid = eval_f(evalf, dset.valid)
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
148
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
149 return pretrain_fs, train, valid, test
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
150
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
151 def repeat_itf(itf, *args, **kwargs):
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
152 while True:
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
153 for e in itf(*args, **kwargs):
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
154 yield e
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
155
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
156 def create_series():
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
157 import tables
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
158
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
159 series = {}
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
160 h5f = tables.openFile('series.h5', 'w')
314
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
161 class PrintWrap(object):
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
162 def __init__(self, series):
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
163 self.series = series
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
164
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
165 def append(self, idx, value):
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
166 print idx, value
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
167 self.series.append(idx, value)
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
168
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
169 series['recons_error'] = AccumulatorSeriesWrapper(
314
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
170 base_series=PrintWrap(ErrorSeries(error_name='reconstruction_error',
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
171 table_name='reconstruction_error',
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
172 hdf5_file=h5f,
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
173 index_names=('layer', 'epoch'),
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
174 title="Reconstruction error (mse)")),
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
175 reduce_every=100)
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
176
292
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
177 series['train_error'] = AccumulatorSeriesWrapper(
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
178 base_series=ErrorSeries(error_name='training_error',
290
518589bfee55 Add commas, since that way it's not a syntax error anymore.
Arnaud Bergeron <abergeron@gmail.com>
parents: 288
diff changeset
179 table_name='training_error',
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
180 hdf5_file=h5f,
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
181 index_names=('iter',),
292
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
182 title='Training error (nll)'),
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
183 reduce_every=100)
292
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
184
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
185 series['valid_error'] = ErrorSeries(error_name='valid_error',
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
186 table_name='valid_error',
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
187 hdf5_file=h5f,
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
188 index_names=('iter',),
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
189 title='Validation error (class)')
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
190
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
191 series['test_error'] = ErrorSeries(error_name='test_error',
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
192 table_name='test_error',
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
193 hdf5_file=h5f,
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
194 index_names=('iter',),
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
195 title='Test error (class)')
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
196
8108d271c30c Fix stuff (imports, ...) so that it can run under jobman properly.
Arnaud Bergeron <abergeron@gmail.com>
parents: 290
diff changeset
197 return series
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
198
301
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
199 class PrintSeries(object):
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
200 def append(self, idx, v):
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
201 print idx, v
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
202
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
203 if __name__ == '__main__':
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
204 from ift6266 import datasets
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
205 from sgd_opt import sgd_opt
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
206 import sys, time
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
207
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
208 batch_size = 100
301
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
209 dset = datasets.nist_digits(1000)
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
210
288
80ee63c3e749 Add net saving (only the best model) and error saving using SeriesTable
Arnaud Bergeron <abergeron@gmail.com>
parents: 277
diff changeset
211 pretrain_funcs, trainf, evalf, net = build_funcs(
301
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
212 img_size = (32, 32),
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
213 batch_size=batch_size, filter_sizes=[(5,5), (3,3)],
314
2937f2a421aa Print the error sometimes in the pretrain loop.
Arnaud Bergeron <abergeron@gmail.com>
parents: 303
diff changeset
214 num_filters=[20, 4], subs=[(2,2), (2,2)], noise=[0.2, 0.2],
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
215 mlp_sizes=[500], out_size=10, dtype=numpy.float32,
303
ef28cbb5f464 Use sigmoids with cross-entropy cost in the ConvAutoencoders.
Arnaud Bergeron <abergeron@gmail.com>
parents: 301
diff changeset
216 pretrain_lr=0.001, train_lr=0.1)
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
217
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
218 t_it = repeat_itf(dset.train, batch_size)
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
219 pretrain_fs, train, valid, test = massage_funcs(
298
a222af1d0598 - Adapt to scdae to input_shape change in pynnet
Arnaud Bergeron <abergeron@gmail.com>
parents: 292
diff changeset
220 t_it, t_it, dset, batch_size,
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
221 pretrain_funcs, trainf, evalf)
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
222
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
223 print "pretraining ...",
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
224 sys.stdout.flush()
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
225 start = time.time()
301
be45e7db7cd4 Fix last-minute bugs in the code.
Arnaud Bergeron <abergeron@gmail.com>
parents: 298
diff changeset
226 do_pretrain(pretrain_fs, 1000, PrintSeries())
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
227 end = time.time()
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
228 print "done (in", end-start, "s)"
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
229
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
230 sgd_opt(train, valid, test, training_epochs=10000, patience=1000,
276
727ed56fad12 Add reworked code for convolutional auto-encoder.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
231 patience_increase=2., improvement_threshold=0.995,
277
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
232 validation_frequency=250)
20ebc1f2a9fe Use softmax for the output layer and rework the dset iterator stuff.
Arnaud Bergeron <abergeron@gmail.com>
parents: 276
diff changeset
233