Mercurial > ift6266
view code_tutoriel/DBN.py @ 494:405cabc08c92
merge
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Tue, 01 Jun 2010 07:56:00 -0400 |
parents | 4bc5eeec6394 |
children |
line wrap: on
line source
""" """ import os import numpy, time, cPickle, gzip import theano import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams from logistic_sgd import LogisticRegression, load_data from mlp import HiddenLayer from rbm import RBM class DBN(object): """ """ def __init__(self, numpy_rng, theano_rng = None, n_ins = 784, hidden_layers_sizes = [500,500], n_outs = 10): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type n_layers_sizes: list of ints :param n_layers_sizes: intermidiate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # The DBN is an MLP, for which all weights of intermidiate layers are shared with a # different RBM. We will first construct the DBN as a deep multilayer perceptron, and # when constructing each sigmoidal layer we also construct an RBM that shares weights # with that layer. During pretraining we will train these RBMs (which will lead # to chainging the weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the MLP. for i in xrange( self.n_layers ): # construct the sigmoidal layer # the size of the input is either the number of hidden units of the layer below or # the input size if we are on the first layer if i == 0 : input_size = n_ins else: input_size = hidden_layers_sizes[i-1] # the input to this layer is either the activation of the hidden layer below or the # input of the DBN if you are on the first layer if i == 0 : layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng = numpy_rng, input = layer_input, n_in = input_size, n_out = hidden_layers_sizes[i], activation = T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are going to only declare that # the parameters of the sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not of the DBN. self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input, n_visible = input_size, n_hidden = hidden_layers_sizes[i], W = sigmoid_layer.W, hbias = sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression(\ input = self.sigmoid_layers[-1].output,\ n_in = hidden_layers_sizes[-1], n_out = n_outs) self.params.extend(self.logLayer.params) # construct a function that implements one step of fine-tuning compute the cost for # second phase of training, defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) def pretraining_functions(self, train_set_x, batch_size): ''' Generates a list of functions, for performing one step of gradient descent at a given layer. The function will require as input the minibatch index, and to train an RBM you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared var. that contains all datapoints used for training the RBM :type batch_size: int :param batch_size: size of a [mini]batch ''' # index to a [mini]batch index = T.lscalar('index') # index to a minibatch learning_rate = T.scalar('lr') # learning rate to use # number of batches n_batches = train_set_x.value.shape[0] / batch_size # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin+batch_size pretrain_fns = [] for rbm in self.rbm_layers: # get the cost and the updates list # TODO: change cost function to reconstruction error cost,updates = rbm.cd(learning_rate, persistent=None) # compile the theano function fn = theano.function(inputs = [index, theano.Param(learning_rate, default = 0.1)], outputs = cost, updates = updates, givens = {self.x :train_set_x[batch_begin:batch_end]}) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, datasets, batch_size, learning_rate): '''Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type datasets: list of pairs of theano.tensor.TensorType :param datasets: It is a list that contain all the datasets; the has to contain three pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano variables, one for the datapoints, the other for the labels :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage ''' (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x , test_set_y ) = datasets[2] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.value.shape[0] / batch_size n_test_batches = test_set_x.value.shape[0] / batch_size index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = {} for param, gparam in zip(self.params, gparams): updates[param] = param - gparam*learning_rate train_fn = theano.function(inputs = [index], outputs = self.finetune_cost, updates = updates, givens = { self.x : train_set_x[index*batch_size:(index+1)*batch_size], self.y : train_set_y[index*batch_size:(index+1)*batch_size]}) test_score_i = theano.function([index], self.errors, givens = { self.x: test_set_x[index*batch_size:(index+1)*batch_size], self.y: test_set_y[index*batch_size:(index+1)*batch_size]}) valid_score_i = theano.function([index], self.errors, givens = { self.x: valid_set_x[index*batch_size:(index+1)*batch_size], self.y: valid_set_y[index*batch_size:(index+1)*batch_size]}) # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in xrange(n_valid_batches)] # Create a function that scans the entire test set def test_score(): return [test_score_i(i) for i in xrange(n_test_batches)] return train_fn, valid_score, test_score def test_DBN( finetune_lr = 0.1, pretraining_epochs = 10, \ pretrain_lr = 0.1, training_epochs = 1000, \ dataset='mnist.pkl.gz'): """ Demonstrates how to train and test a Deep Belief Network. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset """ print 'finetune_lr = ', finetune_lr print 'pretrain_lr = ', pretrain_lr datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x , test_set_y = datasets[2] batch_size = 20 # size of the minibatch # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.value.shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network dbn = DBN(numpy_rng = numpy_rng, n_ins = 28*28, hidden_layers_sizes = [1000,1000,1000], n_outs = 10) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions( train_set_x = train_set_x, batch_size = batch_size ) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index = batch_index, lr = pretrain_lr ) ) print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),numpy.mean(c) end_time = time.clock() print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions ( datasets = datasets, batch_size = batch_size, learning_rate = finetune_lr) print '... finetunning the model' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience/2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = float('inf') test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = epoch * n_train_batches + minibatch_index if (iter+1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index+1, n_train_batches, \ this_validation_loss*100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index+1, n_train_batches, test_score*100.)) if patience <= iter : done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score*100.)) print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) if __name__ == '__main__': pretrain_lr = numpy.float(os.sys.argv[1]) finetune_lr = numpy.float(os.sys.argv[2]) test_DBN(pretrain_lr=pretrain_lr, finetune_lr=finetune_lr)