Mercurial > ift6266
diff code_tutoriel/DBN.py @ 165:4bc5eeec6394
Updating the tutorial code to the latest revisions.
author | Dumitru Erhan <dumitru.erhan@gmail.com> |
---|---|
date | Fri, 26 Feb 2010 13:55:27 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code_tutoriel/DBN.py Fri Feb 26 13:55:27 2010 -0500 @@ -0,0 +1,384 @@ +""" +""" +import os + +import numpy, time, cPickle, gzip + +import theano +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams + +from logistic_sgd import LogisticRegression, load_data +from mlp import HiddenLayer +from rbm import RBM + + + +class DBN(object): + """ + """ + + def __init__(self, numpy_rng, theano_rng = None, n_ins = 784, + hidden_layers_sizes = [500,500], n_outs = 10): + """This class is made to support a variable number of layers. + + :type numpy_rng: numpy.random.RandomState + :param numpy_rng: numpy random number generator used to draw initial + weights + + :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams + :param theano_rng: Theano random generator; if None is given one is + generated based on a seed drawn from `rng` + + :type n_ins: int + :param n_ins: dimension of the input to the DBN + + :type n_layers_sizes: list of ints + :param n_layers_sizes: intermidiate layers size, must contain + at least one value + + :type n_outs: int + :param n_outs: dimension of the output of the network + """ + + self.sigmoid_layers = [] + self.rbm_layers = [] + self.params = [] + self.n_layers = len(hidden_layers_sizes) + + assert self.n_layers > 0 + + if not theano_rng: + theano_rng = RandomStreams(numpy_rng.randint(2**30)) + + # allocate symbolic variables for the data + self.x = T.matrix('x') # the data is presented as rasterized images + self.y = T.ivector('y') # the labels are presented as 1D vector of + # [int] labels + + # The DBN is an MLP, for which all weights of intermidiate layers are shared with a + # different RBM. We will first construct the DBN as a deep multilayer perceptron, and + # when constructing each sigmoidal layer we also construct an RBM that shares weights + # with that layer. During pretraining we will train these RBMs (which will lead + # to chainging the weights of the MLP as well) During finetuning we will finish + # training the DBN by doing stochastic gradient descent on the MLP. + + for i in xrange( self.n_layers ): + # construct the sigmoidal layer + + # the size of the input is either the number of hidden units of the layer below or + # the input size if we are on the first layer + if i == 0 : + input_size = n_ins + else: + input_size = hidden_layers_sizes[i-1] + + # the input to this layer is either the activation of the hidden layer below or the + # input of the DBN if you are on the first layer + if i == 0 : + layer_input = self.x + else: + layer_input = self.sigmoid_layers[-1].output + + sigmoid_layer = HiddenLayer(rng = numpy_rng, + input = layer_input, + n_in = input_size, + n_out = hidden_layers_sizes[i], + activation = T.nnet.sigmoid) + + # add the layer to our list of layers + self.sigmoid_layers.append(sigmoid_layer) + + # its arguably a philosophical question... but we are going to only declare that + # the parameters of the sigmoid_layers are parameters of the DBN. The visible + # biases in the RBM are parameters of those RBMs, but not of the DBN. + self.params.extend(sigmoid_layer.params) + + # Construct an RBM that shared weights with this layer + rbm_layer = RBM(numpy_rng = numpy_rng, theano_rng = theano_rng, + input = layer_input, + n_visible = input_size, + n_hidden = hidden_layers_sizes[i], + W = sigmoid_layer.W, + hbias = sigmoid_layer.b) + self.rbm_layers.append(rbm_layer) + + + # We now need to add a logistic layer on top of the MLP + self.logLayer = LogisticRegression(\ + input = self.sigmoid_layers[-1].output,\ + n_in = hidden_layers_sizes[-1], n_out = n_outs) + self.params.extend(self.logLayer.params) + + # construct a function that implements one step of fine-tuning compute the cost for + # second phase of training, defined as the negative log likelihood + self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) + + # compute the gradients with respect to the model parameters + # symbolic variable that points to the number of errors made on the + # minibatch given by self.x and self.y + self.errors = self.logLayer.errors(self.y) + + def pretraining_functions(self, train_set_x, batch_size): + ''' Generates a list of functions, for performing one step of gradient descent at a + given layer. The function will require as input the minibatch index, and to train an + RBM you just need to iterate, calling the corresponding function on all minibatch + indexes. + + :type train_set_x: theano.tensor.TensorType + :param train_set_x: Shared var. that contains all datapoints used for training the RBM + :type batch_size: int + :param batch_size: size of a [mini]batch + ''' + + # index to a [mini]batch + index = T.lscalar('index') # index to a minibatch + learning_rate = T.scalar('lr') # learning rate to use + + # number of batches + n_batches = train_set_x.value.shape[0] / batch_size + # begining of a batch, given `index` + batch_begin = index * batch_size + # ending of a batch given `index` + batch_end = batch_begin+batch_size + + pretrain_fns = [] + for rbm in self.rbm_layers: + + # get the cost and the updates list + # TODO: change cost function to reconstruction error + cost,updates = rbm.cd(learning_rate, persistent=None) + + # compile the theano function + fn = theano.function(inputs = [index, + theano.Param(learning_rate, default = 0.1)], + outputs = cost, + updates = updates, + givens = {self.x :train_set_x[batch_begin:batch_end]}) + # append `fn` to the list of functions + pretrain_fns.append(fn) + + return pretrain_fns + + + def build_finetune_functions(self, datasets, batch_size, learning_rate): + '''Generates a function `train` that implements one step of finetuning, a function + `validate` that computes the error on a batch from the validation set, and a function + `test` that computes the error on a batch from the testing set + + :type datasets: list of pairs of theano.tensor.TensorType + :param datasets: It is a list that contain all the datasets; the has to contain three + pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano + variables, one for the datapoints, the other for the labels + :type batch_size: int + :param batch_size: size of a minibatch + :type learning_rate: float + :param learning_rate: learning rate used during finetune stage + ''' + + (train_set_x, train_set_y) = datasets[0] + (valid_set_x, valid_set_y) = datasets[1] + (test_set_x , test_set_y ) = datasets[2] + + # compute number of minibatches for training, validation and testing + n_valid_batches = valid_set_x.value.shape[0] / batch_size + n_test_batches = test_set_x.value.shape[0] / batch_size + + index = T.lscalar('index') # index to a [mini]batch + + # compute the gradients with respect to the model parameters + gparams = T.grad(self.finetune_cost, self.params) + + # compute list of fine-tuning updates + updates = {} + for param, gparam in zip(self.params, gparams): + updates[param] = param - gparam*learning_rate + + train_fn = theano.function(inputs = [index], + outputs = self.finetune_cost, + updates = updates, + givens = { + self.x : train_set_x[index*batch_size:(index+1)*batch_size], + self.y : train_set_y[index*batch_size:(index+1)*batch_size]}) + + test_score_i = theano.function([index], self.errors, + givens = { + self.x: test_set_x[index*batch_size:(index+1)*batch_size], + self.y: test_set_y[index*batch_size:(index+1)*batch_size]}) + + valid_score_i = theano.function([index], self.errors, + givens = { + self.x: valid_set_x[index*batch_size:(index+1)*batch_size], + self.y: valid_set_y[index*batch_size:(index+1)*batch_size]}) + + # Create a function that scans the entire validation set + def valid_score(): + return [valid_score_i(i) for i in xrange(n_valid_batches)] + + # Create a function that scans the entire test set + def test_score(): + return [test_score_i(i) for i in xrange(n_test_batches)] + + return train_fn, valid_score, test_score + + + + + + +def test_DBN( finetune_lr = 0.1, pretraining_epochs = 10, \ + pretrain_lr = 0.1, training_epochs = 1000, \ + dataset='mnist.pkl.gz'): + """ + Demonstrates how to train and test a Deep Belief Network. + + This is demonstrated on MNIST. + + :type learning_rate: float + :param learning_rate: learning rate used in the finetune stage + :type pretraining_epochs: int + :param pretraining_epochs: number of epoch to do pretraining + :type pretrain_lr: float + :param pretrain_lr: learning rate to be used during pre-training + :type n_iter: int + :param n_iter: maximal number of iterations ot run the optimizer + :type dataset: string + :param dataset: path the the pickled dataset + """ + + print 'finetune_lr = ', finetune_lr + print 'pretrain_lr = ', pretrain_lr + + datasets = load_data(dataset) + + train_set_x, train_set_y = datasets[0] + valid_set_x, valid_set_y = datasets[1] + test_set_x , test_set_y = datasets[2] + + + batch_size = 20 # size of the minibatch + + # compute number of minibatches for training, validation and testing + n_train_batches = train_set_x.value.shape[0] / batch_size + + # numpy random generator + numpy_rng = numpy.random.RandomState(123) + print '... building the model' + # construct the Deep Belief Network + dbn = DBN(numpy_rng = numpy_rng, n_ins = 28*28, + hidden_layers_sizes = [1000,1000,1000], + n_outs = 10) + + + ######################### + # PRETRAINING THE MODEL # + ######################### + print '... getting the pretraining functions' + pretraining_fns = dbn.pretraining_functions( + train_set_x = train_set_x, + batch_size = batch_size ) + + print '... pre-training the model' + start_time = time.clock() + ## Pre-train layer-wise + for i in xrange(dbn.n_layers): + # go through pretraining epochs + for epoch in xrange(pretraining_epochs): + # go through the training set + c = [] + for batch_index in xrange(n_train_batches): + c.append(pretraining_fns[i](index = batch_index, + lr = pretrain_lr ) ) + print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),numpy.mean(c) + + end_time = time.clock() + + print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) + + ######################## + # FINETUNING THE MODEL # + ######################## + + # get the training, validation and testing function for the model + print '... getting the finetuning functions' + train_fn, validate_model, test_model = dbn.build_finetune_functions ( + datasets = datasets, batch_size = batch_size, + learning_rate = finetune_lr) + + print '... finetunning the model' + # early-stopping parameters + patience = 10000 # look as this many examples regardless + patience_increase = 2. # wait this much longer when a new best is + # found + improvement_threshold = 0.995 # a relative improvement of this much is + # considered significant + validation_frequency = min(n_train_batches, patience/2) + # go through this many + # minibatche before checking the network + # on the validation set; in this case we + # check every epoch + + + best_params = None + best_validation_loss = float('inf') + test_score = 0. + start_time = time.clock() + + done_looping = False + epoch = 0 + + while (epoch < training_epochs) and (not done_looping): + epoch = epoch + 1 + for minibatch_index in xrange(n_train_batches): + + minibatch_avg_cost = train_fn(minibatch_index) + iter = epoch * n_train_batches + minibatch_index + + if (iter+1) % validation_frequency == 0: + + validation_losses = validate_model() + this_validation_loss = numpy.mean(validation_losses) + print('epoch %i, minibatch %i/%i, validation error %f %%' % \ + (epoch, minibatch_index+1, n_train_batches, \ + this_validation_loss*100.)) + + + # if we got the best validation score until now + if this_validation_loss < best_validation_loss: + + #improve patience if loss improvement is good enough + if this_validation_loss < best_validation_loss * \ + improvement_threshold : + patience = max(patience, iter * patience_increase) + + # save best validation score and iteration number + best_validation_loss = this_validation_loss + best_iter = iter + + # test it on the test set + test_losses = test_model() + test_score = numpy.mean(test_losses) + print((' epoch %i, minibatch %i/%i, test error of best ' + 'model %f %%') % + (epoch, minibatch_index+1, n_train_batches, + test_score*100.)) + + + if patience <= iter : + done_looping = True + break + + end_time = time.clock() + print(('Optimization complete with best validation score of %f %%,' + 'with test performance %f %%') % + (best_validation_loss * 100., test_score*100.)) + print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) + + + + + +if __name__ == '__main__': + pretrain_lr = numpy.float(os.sys.argv[1]) + finetune_lr = numpy.float(os.sys.argv[2]) + test_DBN(pretrain_lr=pretrain_lr, finetune_lr=finetune_lr)