comparison deep/convolutional_dae/scdae.py @ 277:20ebc1f2a9fe

Use softmax for the output layer and rework the dset iterator stuff.
author Arnaud Bergeron <abergeron@gmail.com>
date Mon, 22 Mar 2010 16:37:34 -0400
parents 727ed56fad12
children 80ee63c3e749
comparison
equal deleted inserted replaced
276:727ed56fad12 277:20ebc1f2a9fe
58 corruptions, dtype) 58 corruptions, dtype)
59 outs = numpy.prod(outs) 59 outs = numpy.prod(outs)
60 rl2 = ReshapeLayer((None, outs)) 60 rl2 = ReshapeLayer((None, outs))
61 layer_sizes = [outs]+layer_sizes 61 layer_sizes = [outs]+layer_sizes
62 ls2 = mlp(layer_sizes, dtype) 62 ls2 = mlp(layer_sizes, dtype)
63 lrl = SimpleLayer(layer_sizes[-1], out_size, activation=nlins.sigmoid) 63 lrl = SimpleLayer(layer_sizes[-1], out_size, activation=nlins.softmax)
64 return NNet([rl1, ls, rl2, ls2, lrl], error=errors.nll) 64 return NNet([rl1, ls, rl2, ls2, lrl], error=errors.nll)
65 65
66 def build_funcs(batch_size, img_size, filter_sizes, num_filters, subs, 66 def build_funcs(batch_size, img_size, filter_sizes, num_filters, subs,
67 noise, mlp_sizes, out_size, dtype, pretrain_lr, train_lr): 67 noise, mlp_sizes, out_size, dtype, pretrain_lr, train_lr):
68 68
87 clear_imgshape(n) 87 clear_imgshape(n)
88 n.build(x, y) 88 n.build(x, y)
89 pretrain_funcs_reg = [pretrainfunc(l, 0.01) for l in n.layers[1].layers] 89 pretrain_funcs_reg = [pretrainfunc(l, 0.01) for l in n.layers[1].layers]
90 trainf_reg = trainfunc(n, 0.1) 90 trainf_reg = trainfunc(n, 0.1)
91 evalf_reg = theano.function([x, y], errors.class_error(n.output, y)) 91 evalf_reg = theano.function([x, y], errors.class_error(n.output, y))
92 92
93 def select_f(f1, f2, bsize): 93 def select_f(f1, f2, bsize):
94 def f(x): 94 def f(x):
95 if x.shape[0] == bsize: 95 if x.shape[0] == bsize:
96 return f1(x) 96 return f1(x)
97 else: 97 else:
115 def do_pretrain(pretrain_funcs, pretrain_epochs): 115 def do_pretrain(pretrain_funcs, pretrain_epochs):
116 for f in pretrain_funcs: 116 for f in pretrain_funcs:
117 for i in xrange(pretrain_epochs): 117 for i in xrange(pretrain_epochs):
118 f() 118 f()
119 119
120 def massage_funcs(batch_size, dset, pretrain_funcs, trainf, evalf): 120 def massage_funcs(train_it, dset, batch_size, pretrain_funcs, trainf, evalf):
121 def pretrain_f(f): 121 def pretrain_f(f):
122 def res(): 122 def res():
123 for x, y in dset.train(batch_size): 123 for x, y in train_it:
124 print "pretrain:", f(x) 124 yield f(x)
125 return res 125 it = res()
126 return lambda: it.next()
126 127
127 pretrain_fs = map(pretrain_f, pretrain_funcs) 128 pretrain_fs = map(pretrain_f, pretrain_funcs)
128 129
129 def train_f(f, dsetf): 130 def train_f(f):
130 def dset_it(): 131 def dset_it():
131 while True: 132 for x, y in train_it:
132 for x, y in dsetf(batch_size): 133 yield f(x, y)
133 yield f(x, y)
134 it = dset_it() 134 it = dset_it()
135 return lambda: it.next() 135 return lambda: it.next()
136 136
137 train = train_f(trainf, dset.train) 137 train = train_f(trainf)
138 138
139 def eval_f(f, dsetf): 139 def eval_f(f, dsetf):
140 def res(): 140 def res():
141 c = 0 141 c = 0
142 i = 0 142 i = 0
143 for x, y in dsetf(batch_size): 143 for x, y in dsetf(batch_size):
148 148
149 test = eval_f(evalf, dset.test) 149 test = eval_f(evalf, dset.test)
150 valid = eval_f(evalf, dset.valid) 150 valid = eval_f(evalf, dset.valid)
151 151
152 return pretrain_fs, train, valid, test 152 return pretrain_fs, train, valid, test
153
154 def repeat_itf(itf, *args, **kwargs):
155 while True:
156 for e in itf(*args, **kwargs):
157 yield e
153 158
154 def run_exp(state, channel): 159 def run_exp(state, channel):
155 from ift6266 import datasets 160 from ift6266 import datasets
156 from sgd_opt import sgd_opt 161 from sgd_opt import sgd_opt
157 import sys, time 162 import sys, time
203 from ift6266 import datasets 208 from ift6266 import datasets
204 from sgd_opt import sgd_opt 209 from sgd_opt import sgd_opt
205 import sys, time 210 import sys, time
206 211
207 batch_size = 100 212 batch_size = 100
208 dset = datasets.mnist(200) 213 dset = datasets.mnist()
209 214
210 pretrain_funcs, trainf, evalf = build_funcs( 215 pretrain_funcs, trainf, evalf = build_funcs(
211 img_size = (28, 28), 216 img_size = (28, 28),
212 batch_size=batch_size, filter_sizes=[(5,5), (5,5)], 217 batch_size=batch_size, filter_sizes=[(5,5), (3,3)],
213 num_filters=[4, 3], subs=[(2,2), (2,2)], noise=[0.2, 0.2], 218 num_filters=[4, 4], subs=[(2,2), (2,2)], noise=[0.2, 0.2],
214 mlp_sizes=[500], out_size=10, dtype=numpy.float32, 219 mlp_sizes=[500], out_size=10, dtype=numpy.float32,
215 pretrain_lr=0.01, train_lr=0.1) 220 pretrain_lr=0.01, train_lr=0.1)
216 221
217 pretrain_fs, train, valid, test = massage_funcs( 222 pretrain_fs, train, valid, test = massage_funcs(
218 batch_size, dset, pretrain_funcs, trainf, evalf) 223 repeat_itf(dset.train, batch_size),
224 dset, batch_size,
225 pretrain_funcs, trainf, evalf)
219 226
220 print "pretraining ...", 227 print "pretraining ...",
221 sys.stdout.flush() 228 sys.stdout.flush()
222 start = time.time() 229 start = time.time()
223 do_pretrain(pretrain_fs, 0) 230 do_pretrain(pretrain_fs, 2500)
224 end = time.time() 231 end = time.time()
225 print "done (in", end-start, "s)" 232 print "done (in", end-start, "s)"
226 233
227 sgd_opt(train, valid, test, training_epochs=1000, patience=1000, 234 sgd_opt(train, valid, test, training_epochs=10000, patience=1000,
228 patience_increase=2., improvement_threshold=0.995, 235 patience_increase=2., improvement_threshold=0.995,
229 validation_frequency=500) 236 validation_frequency=250)
237