Mercurial > ift6266
view deep/convolutional_dae/scdae.py @ 644:e63d23c7c9fb
reviews aistats finales
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Thu, 24 Mar 2011 17:05:05 -0400 |
parents | 5ddb1878dfbc |
children |
line wrap: on
line source
from pynnet import * import numpy import theano import theano.tensor as T from itertools import izip from ift6266.utils.seriestables import * class cdae(LayerStack): def __init__(self, filter_size, num_filt, num_in, subsampling, corruption, dtype): LayerStack.__init__(self, [ConvAutoencoder(filter_size=filter_size, num_filt=num_filt, num_in=num_in, noise=corruption, err=errors.cross_entropy, nlin=nlins.sigmoid, dtype=dtype), MaxPoolLayer(subsampling)]) def build(self, input, input_shape=None): LayerStack.build(self, input, input_shape) self.cost = self.layers[0].cost self.pre_params = self.layers[0].pre_params def scdae(filter_sizes, num_filts, subsamplings, corruptions, dtype): layers = [] old_nfilt = 1 for fsize, nfilt, subs, corr in izip(filter_sizes, num_filts, subsamplings, corruptions): layers.append(cdae(fsize, nfilt, old_nfilt, subs, corr, dtype)) old_nfilt = nfilt return LayerStack(layers, name='scdae') def mlp(layer_sizes, dtype): layers = [] old_size = layer_sizes[0] for size in layer_sizes[1:]: layers.append(SimpleLayer(old_size, size, activation=nlins.tanh, dtype=dtype)) old_size = size return LayerStack(layers, name='mlp') def scdae_net(in_size, filter_sizes, num_filts, subsamplings, corruptions, layer_sizes, out_size, dtype): rl1 = ReshapeLayer((None,)+in_size) ls = scdae(filter_sizes, num_filts, subsamplings, corruptions, dtype) x = T.ftensor4() ls.build(x, input_shape=(1,)+in_size) outs = numpy.prod(ls.output_shape) rl2 = ReshapeLayer((None, outs)) layer_sizes = [outs]+layer_sizes ls2 = mlp(layer_sizes, dtype) lrl = SimpleLayer(layer_sizes[-1], out_size, activation=nlins.softmax, name='output') return NNet([rl1, ls, rl2, ls2, lrl], error=errors.nll) def build_funcs(batch_size, img_size, filter_sizes, num_filters, subs, noise, mlp_sizes, out_size, dtype, pretrain_lr, train_lr): n = scdae_net((1,)+img_size, filter_sizes, num_filters, subs, noise, mlp_sizes, out_size, dtype) n.save('start.net') x = T.fmatrix('x') y = T.ivector('y') def pretrainfunc(net, alpha): up = trainers.get_updates(net.pre_params, net.cost, alpha) return theano.function([x], net.cost, updates=up) def trainfunc(net, alpha): up = trainers.get_updates(net.params, net.cost, alpha) return theano.function([x, y], net.cost, updates=up) n.build(x, y, input_shape=(batch_size, numpy.prod(img_size))) pretrain_funcs_opt = [pretrainfunc(l, pretrain_lr) for l in n.layers[1].layers] trainf_opt = trainfunc(n, train_lr) evalf_opt = theano.function([x, y], errors.class_error(n.output, y)) n.build(x, y) pretrain_funcs_reg = [pretrainfunc(l, 0.01) for l in n.layers[1].layers] trainf_reg = trainfunc(n, 0.1) evalf_reg = theano.function([x, y], errors.class_error(n.output, y)) def select_f(f1, f2, bsize): def f(x): if x.shape[0] == bsize: return f1(x) else: return f2(x) return f pretrain_funcs = [select_f(p_opt, p_reg, batch_size) for p_opt, p_reg in zip(pretrain_funcs_opt, pretrain_funcs_reg)] def select_f2(f1, f2, bsize): def f(x, y): if x.shape[0] == bsize: return f1(x, y) else: return f2(x, y) return f trainf = select_f2(trainf_opt, trainf_reg, batch_size) evalf = select_f2(evalf_opt, evalf_reg, batch_size) return pretrain_funcs, trainf, evalf, n def do_pretrain(pretrain_funcs, pretrain_epochs, serie): for layer, f in enumerate(pretrain_funcs): for epoch in xrange(pretrain_epochs): serie.append((layer, epoch), f()) def massage_funcs(pretrain_it, train_it, dset, batch_size, pretrain_funcs, trainf, evalf): def pretrain_f(f): def res(): for x, y in pretrain_it: yield f(x) it = res() return lambda: it.next() pretrain_fs = map(pretrain_f, pretrain_funcs) def train_f(f): def dset_it(): for x, y in train_it: yield f(x, y) it = dset_it() return lambda: it.next() train = train_f(trainf) def eval_f(f, dsetf): def res(): c = 0 i = 0 for x, y in dsetf(batch_size): i += x.shape[0] c += f(x, y)*x.shape[0] return c/i return res test = eval_f(evalf, dset.test) valid = eval_f(evalf, dset.valid) return pretrain_fs, train, valid, test def repeat_itf(itf, *args, **kwargs): while True: for e in itf(*args, **kwargs): yield e def create_series(): import tables series = {} h5f = tables.openFile('series.h5', 'w') class PrintWrap(object): def __init__(self, series): self.series = series def append(self, idx, value): print idx, value self.series.append(idx, value) series['recons_error'] = AccumulatorSeriesWrapper( base_series=PrintWrap(ErrorSeries(error_name='reconstruction_error', table_name='reconstruction_error', hdf5_file=h5f, index_names=('layer', 'epoch'), title="Reconstruction error (mse)")), reduce_every=100) series['train_error'] = AccumulatorSeriesWrapper( base_series=ErrorSeries(error_name='training_error', table_name='training_error', hdf5_file=h5f, index_names=('iter',), title='Training error (nll)'), reduce_every=100) series['valid_error'] = ErrorSeries(error_name='valid_error', table_name='valid_error', hdf5_file=h5f, index_names=('iter',), title='Validation error (class)') series['test_error'] = ErrorSeries(error_name='test_error', table_name='test_error', hdf5_file=h5f, index_names=('iter',), title='Test error (class)') return series class PrintSeries(object): def append(self, idx, v): print idx, v if __name__ == '__main__': from ift6266 import datasets from sgd_opt import sgd_opt import sys, time batch_size = 100 dset = datasets.nist_digits(1000) pretrain_funcs, trainf, evalf, net = build_funcs( img_size = (32, 32), batch_size=batch_size, filter_sizes=[(5,5), (3,3)], num_filters=[20, 4], subs=[(2,2), (2,2)], noise=[0.2, 0.2], mlp_sizes=[500], out_size=10, dtype=numpy.float32, pretrain_lr=0.001, train_lr=0.1) t_it = repeat_itf(dset.train, batch_size) pretrain_fs, train, valid, test = massage_funcs( t_it, t_it, dset, batch_size, pretrain_funcs, trainf, evalf) print "pretraining ...", sys.stdout.flush() start = time.time() do_pretrain(pretrain_fs, 1000, PrintSeries()) end = time.time() print "done (in", end-start, "s)" sgd_opt(train, valid, test, training_epochs=10000, patience=1000, patience_increase=2., improvement_threshold=0.995, validation_frequency=250)