diff code_tutoriel/DBN.py @ 165:4bc5eeec6394

Updating the tutorial code to the latest revisions.
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Fri, 26 Feb 2010 13:55:27 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/code_tutoriel/DBN.py	Fri Feb 26 13:55:27 2010 -0500
@@ -0,0 +1,384 @@
+"""
+"""
+import os
+
+import numpy, time, cPickle, gzip 
+
+import theano
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+from logistic_sgd import LogisticRegression, load_data
+from mlp import HiddenLayer
+from rbm import RBM
+
+
+
+class DBN(object):
+    """
+    """
+
+    def __init__(self, numpy_rng, theano_rng = None, n_ins = 784, 
+                 hidden_layers_sizes = [500,500], n_outs = 10):
+        """This class is made to support a variable number of layers. 
+
+        :type numpy_rng: numpy.random.RandomState
+        :param numpy_rng: numpy random number generator used to draw initial 
+                    weights
+
+        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
+        :param theano_rng: Theano random generator; if None is given one is 
+                           generated based on a seed drawn from `rng`
+
+        :type n_ins: int
+        :param n_ins: dimension of the input to the DBN
+
+        :type n_layers_sizes: list of ints
+        :param n_layers_sizes: intermidiate layers size, must contain 
+                               at least one value
+
+        :type n_outs: int
+        :param n_outs: dimension of the output of the network
+        """
+        
+        self.sigmoid_layers = []
+        self.rbm_layers     = []
+        self.params         = []
+        self.n_layers       = len(hidden_layers_sizes)
+
+        assert self.n_layers > 0
+
+        if not theano_rng:
+            theano_rng = RandomStreams(numpy_rng.randint(2**30))
+
+        # allocate symbolic variables for the data
+        self.x  = T.matrix('x')  # the data is presented as rasterized images
+        self.y  = T.ivector('y') # the labels are presented as 1D vector of 
+                                 # [int] labels
+
+        # The DBN is an MLP, for which all weights of intermidiate layers are shared with a
+        # different RBM.  We will first construct the DBN as a deep multilayer perceptron, and
+        # when constructing each sigmoidal layer we also construct an RBM that shares weights
+        # with that layer. During pretraining we will train these RBMs (which will lead
+        # to chainging the weights of the MLP as well) During finetuning we will finish
+        # training the DBN by doing stochastic gradient descent on the MLP.
+
+        for i in xrange( self.n_layers ):
+            # construct the sigmoidal layer
+
+            # the size of the input is either the number of hidden units of the layer below or
+            # the input size if we are on the first layer
+            if i == 0 :
+                input_size = n_ins
+            else:
+                input_size = hidden_layers_sizes[i-1]
+
+            # the input to this layer is either the activation of the hidden layer below or the
+            # input of the DBN if you are on the first layer
+            if i == 0 : 
+                layer_input = self.x
+            else:
+                layer_input = self.sigmoid_layers[-1].output
+
+            sigmoid_layer = HiddenLayer(rng   = numpy_rng, 
+                                           input = layer_input, 
+                                           n_in  = input_size, 
+                                           n_out = hidden_layers_sizes[i],
+                                           activation = T.nnet.sigmoid)
+            
+            # add the layer to our list of layers 
+            self.sigmoid_layers.append(sigmoid_layer)
+
+            # its arguably a philosophical question...  but we are going to only declare that
+            # the parameters of the sigmoid_layers are parameters of the DBN. The visible
+            # biases in the RBM are parameters of those RBMs, but not of the DBN.
+            self.params.extend(sigmoid_layer.params)
+        
+            # Construct an RBM that shared weights with this layer
+            rbm_layer = RBM(numpy_rng = numpy_rng, theano_rng = theano_rng, 
+                          input = layer_input, 
+                          n_visible = input_size, 
+                          n_hidden  = hidden_layers_sizes[i],  
+                          W = sigmoid_layer.W, 
+                          hbias = sigmoid_layer.b)
+            self.rbm_layers.append(rbm_layer)        
+
+        
+        # We now need to add a logistic layer on top of the MLP
+        self.logLayer = LogisticRegression(\
+                         input = self.sigmoid_layers[-1].output,\
+                         n_in = hidden_layers_sizes[-1], n_out = n_outs)
+        self.params.extend(self.logLayer.params)
+
+        # construct a function that implements one step of fine-tuning compute the cost for
+        # second phase of training, defined as the negative log likelihood 
+        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
+
+        # compute the gradients with respect to the model parameters
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+        self.errors = self.logLayer.errors(self.y)
+
+    def pretraining_functions(self, train_set_x, batch_size):
+        ''' Generates a list of functions, for performing one step of gradient descent at a
+        given layer. The function will require as input the minibatch index, and to train an
+        RBM you just need to iterate, calling the corresponding function on all minibatch
+        indexes.
+
+        :type train_set_x: theano.tensor.TensorType
+        :param train_set_x: Shared var. that contains all datapoints used for training the RBM
+        :type batch_size: int
+        :param batch_size: size of a [mini]batch
+        '''
+
+        # index to a [mini]batch
+        index            = T.lscalar('index')   # index to a minibatch
+        learning_rate    = T.scalar('lr')    # learning rate to use
+
+        # number of batches
+        n_batches = train_set_x.value.shape[0] / batch_size
+        # begining of a batch, given `index`
+        batch_begin = index * batch_size
+        # ending of a batch given `index`
+        batch_end = batch_begin+batch_size
+
+        pretrain_fns = []
+        for rbm in self.rbm_layers:
+
+            # get the cost and the updates list
+            # TODO: change cost function to reconstruction error
+            cost,updates = rbm.cd(learning_rate, persistent=None)
+
+            # compile the theano function    
+            fn = theano.function(inputs = [index, 
+                              theano.Param(learning_rate, default = 0.1)], 
+                    outputs = cost, 
+                    updates = updates,
+                    givens  = {self.x :train_set_x[batch_begin:batch_end]})
+            # append `fn` to the list of functions
+            pretrain_fns.append(fn)
+
+        return pretrain_fns
+ 
+
+    def build_finetune_functions(self, datasets, batch_size, learning_rate):
+        '''Generates a function `train` that implements one step of finetuning, a function
+        `validate` that computes the error on a batch from the validation set, and a function
+        `test` that computes the error on a batch from the testing set
+
+        :type datasets: list of pairs of theano.tensor.TensorType
+        :param datasets: It is a list that contain all the datasets;  the has to contain three
+        pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano
+        variables, one for the datapoints, the other for the labels
+        :type batch_size: int
+        :param batch_size: size of a minibatch
+        :type learning_rate: float
+        :param learning_rate: learning rate used during finetune stage
+        '''
+
+        (train_set_x, train_set_y) = datasets[0]
+        (valid_set_x, valid_set_y) = datasets[1]
+        (test_set_x , test_set_y ) = datasets[2]
+
+        # compute number of minibatches for training, validation and testing
+        n_valid_batches = valid_set_x.value.shape[0] / batch_size
+        n_test_batches  = test_set_x.value.shape[0]  / batch_size
+
+        index   = T.lscalar('index')    # index to a [mini]batch 
+
+        # compute the gradients with respect to the model parameters
+        gparams = T.grad(self.finetune_cost, self.params)
+
+        # compute list of fine-tuning updates
+        updates = {}
+        for param, gparam in zip(self.params, gparams):
+            updates[param] = param - gparam*learning_rate
+
+        train_fn = theano.function(inputs = [index], 
+              outputs =   self.finetune_cost, 
+              updates = updates,
+              givens  = {
+                self.x : train_set_x[index*batch_size:(index+1)*batch_size],
+                self.y : train_set_y[index*batch_size:(index+1)*batch_size]})
+
+        test_score_i = theano.function([index], self.errors,
+                 givens = {
+                   self.x: test_set_x[index*batch_size:(index+1)*batch_size],
+                   self.y: test_set_y[index*batch_size:(index+1)*batch_size]})
+
+        valid_score_i = theano.function([index], self.errors,
+              givens = {
+                 self.x: valid_set_x[index*batch_size:(index+1)*batch_size],
+                 self.y: valid_set_y[index*batch_size:(index+1)*batch_size]})
+
+        # Create a function that scans the entire validation set
+        def valid_score():
+            return [valid_score_i(i) for i in xrange(n_valid_batches)]
+
+        # Create a function that scans the entire test set
+        def test_score():
+            return [test_score_i(i) for i in xrange(n_test_batches)]
+
+        return train_fn, valid_score, test_score
+
+
+
+
+
+
+def test_DBN( finetune_lr = 0.1, pretraining_epochs = 10, \
+              pretrain_lr = 0.1, training_epochs = 1000, \
+              dataset='mnist.pkl.gz'):
+    """
+    Demonstrates how to train and test a Deep Belief Network.
+
+    This is demonstrated on MNIST.
+
+    :type learning_rate: float
+    :param learning_rate: learning rate used in the finetune stage 
+    :type pretraining_epochs: int
+    :param pretraining_epochs: number of epoch to do pretraining
+    :type pretrain_lr: float
+    :param pretrain_lr: learning rate to be used during pre-training
+    :type n_iter: int
+    :param n_iter: maximal number of iterations ot run the optimizer 
+    :type dataset: string
+    :param dataset: path the the pickled dataset
+    """
+
+    print 'finetune_lr = ', finetune_lr
+    print 'pretrain_lr = ', pretrain_lr
+
+    datasets = load_data(dataset)
+
+    train_set_x, train_set_y = datasets[0]
+    valid_set_x, valid_set_y = datasets[1]
+    test_set_x , test_set_y  = datasets[2]
+
+
+    batch_size = 20    # size of the minibatch
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.value.shape[0] / batch_size
+
+    # numpy random generator
+    numpy_rng = numpy.random.RandomState(123)
+    print '... building the model'
+    # construct the Deep Belief Network
+    dbn = DBN(numpy_rng = numpy_rng, n_ins = 28*28, 
+              hidden_layers_sizes = [1000,1000,1000],
+              n_outs = 10)
+    
+
+    #########################
+    # PRETRAINING THE MODEL #
+    #########################
+    print '... getting the pretraining functions'
+    pretraining_fns = dbn.pretraining_functions(
+            train_set_x   = train_set_x, 
+            batch_size    = batch_size ) 
+
+    print '... pre-training the model'
+    start_time = time.clock()  
+    ## Pre-train layer-wise 
+    for i in xrange(dbn.n_layers):
+        # go through pretraining epochs 
+        for epoch in xrange(pretraining_epochs):
+            # go through the training set
+            c = []
+            for batch_index in xrange(n_train_batches):
+                c.append(pretraining_fns[i](index = batch_index, 
+                         lr = pretrain_lr ) )
+            print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),numpy.mean(c)
+ 
+    end_time = time.clock()
+
+    print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
+    
+    ########################
+    # FINETUNING THE MODEL #
+    ########################
+
+    # get the training, validation and testing function for the model
+    print '... getting the finetuning functions'
+    train_fn, validate_model, test_model = dbn.build_finetune_functions ( 
+                datasets = datasets, batch_size = batch_size, 
+                learning_rate = finetune_lr) 
+
+    print '... finetunning the model'
+    # early-stopping parameters
+    patience              = 10000 # look as this many examples regardless
+    patience_increase     = 2.    # wait this much longer when a new best is 
+                                  # found
+    improvement_threshold = 0.995 # a relative improvement of this much is 
+                                  # considered significant
+    validation_frequency  = min(n_train_batches, patience/2)
+                                  # go through this many 
+                                  # minibatche before checking the network 
+                                  # on the validation set; in this case we 
+                                  # check every epoch 
+
+
+    best_params          = None
+    best_validation_loss = float('inf')
+    test_score           = 0.
+    start_time = time.clock()
+
+    done_looping = False
+    epoch = 0
+
+    while (epoch < training_epochs) and (not done_looping):
+      epoch = epoch + 1
+      for minibatch_index in xrange(n_train_batches):
+
+        minibatch_avg_cost = train_fn(minibatch_index)
+        iter    = epoch * n_train_batches + minibatch_index
+
+        if (iter+1) % validation_frequency == 0: 
+            
+            validation_losses = validate_model()
+            this_validation_loss = numpy.mean(validation_losses)
+            print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+                   (epoch, minibatch_index+1, n_train_batches, \
+                    this_validation_loss*100.))
+
+
+            # if we got the best validation score until now
+            if this_validation_loss < best_validation_loss:
+
+                #improve patience if loss improvement is good enough
+                if this_validation_loss < best_validation_loss *  \
+                       improvement_threshold :
+                    patience = max(patience, iter * patience_increase)
+
+                # save best validation score and iteration number
+                best_validation_loss = this_validation_loss
+                best_iter = iter
+
+                # test it on the test set
+                test_losses = test_model()
+                test_score = numpy.mean(test_losses)
+                print(('     epoch %i, minibatch %i/%i, test error of best '
+                      'model %f %%') % 
+                             (epoch, minibatch_index+1, n_train_batches,
+                              test_score*100.))
+
+
+        if patience <= iter :
+                done_looping = True
+                break
+
+    end_time = time.clock()
+    print(('Optimization complete with best validation score of %f %%,'
+           'with test performance %f %%') %  
+                 (best_validation_loss * 100., test_score*100.))
+    print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
+
+
+
+
+
+if __name__ == '__main__':
+    pretrain_lr = numpy.float(os.sys.argv[1])
+    finetune_lr = numpy.float(os.sys.argv[2])
+    test_DBN(pretrain_lr=pretrain_lr, finetune_lr=finetune_lr)