Mercurial > ift6266
diff deep/deep_mlp/logistic_sgd.py @ 626:75dbbe409578
Added code for deep mlp, experiment code to go along with it. Also added code I used to filter the P07 / PNIST07 datasets to keep only digits.
author | fsavard |
---|---|
date | Wed, 16 Mar 2011 13:43:32 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/deep_mlp/logistic_sgd.py Wed Mar 16 13:43:32 2011 -0400 @@ -0,0 +1,223 @@ +import numpy, time, cPickle, gzip, sys, os + +import theano +import theano.tensor as T + +class LogisticRegression(object): + def __init__(self, input, n_in, n_out): + self.W = theano.shared(value=numpy.zeros((n_in,n_out), + dtype = theano.config.floatX), + name='W') + self.b = theano.shared(value=numpy.zeros((n_out,), + dtype = theano.config.floatX), + name='b') + + self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) + + self.y_pred=T.argmax(self.p_y_given_x, axis=1) + + self.params = [self.W, self.b] + + def negative_log_likelihood(self, y): + return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) + + + def errors(self, y): + if y.ndim != self.y_pred.ndim: + raise TypeError('y should have the same shape as self.y_pred', + ('y', target.type, 'y_pred', self.y_pred.type)) + + if y.dtype.startswith('int'): + return T.mean(T.neq(self.y_pred, y)) + else: + raise NotImplementedError() + + +def load_data(dataset): + ''' Loads the dataset + + :type dataset: string + :param dataset: the path to the dataset (here MNIST) + ''' + + ############# + # LOAD DATA # + ############# + print '... loading data' + + # Load the dataset + f = gzip.open(dataset,'rb') + train_set, valid_set, test_set = cPickle.load(f) + f.close() + + + def shared_dataset(data_xy): + """ Function that loads the dataset into shared variables + + The reason we store our dataset in shared variables is to allow + Theano to copy it into the GPU memory (when code is run on GPU). + Since copying data into the GPU is slow, copying a minibatch everytime + is needed (the default behaviour if the data is not in a shared + variable) would lead to a large decrease in performance. + """ + data_x, data_y = data_xy + shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) + shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) + # When storing data on the GPU it has to be stored as floats + # therefore we will store the labels as ``floatX`` as well + # (``shared_y`` does exactly that). But during our computations + # we need them as ints (we use labels as index, and if they are + # floats it doesn't make sense) therefore instead of returning + # ``shared_y`` we will have to cast it to int. This little hack + # lets ous get around this issue + return shared_x, T.cast(shared_y, 'int32') + + test_set_x, test_set_y = shared_dataset(test_set) + valid_set_x, valid_set_y = shared_dataset(valid_set) + train_set_x, train_set_y = shared_dataset(train_set) + + rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] + return rval + + + + +def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='../data/mnist.pkl.gz', + batch_size = 600): + datasets = load_data(dataset) + + train_set_x, train_set_y = datasets[0] + valid_set_x, valid_set_y = datasets[1] + test_set_x , test_set_y = datasets[2] + + # compute number of minibatches for training, validation and testing + n_train_batches = train_set_x.value.shape[0] / batch_size + n_valid_batches = valid_set_x.value.shape[0] / batch_size + n_test_batches = test_set_x.value.shape[0] / batch_size + + + ###################### + # BUILD ACTUAL MODEL # + ###################### + print '... building the model' + + + # allocate symbolic variables for the data + index = T.lscalar() # index to a [mini]batch + x = T.matrix('x') # the data is presented as rasterized images + y = T.ivector('y') # the labels are presented as 1D vector of + # [int] labels + + # construct the logistic regression class + # Each MNIST image has size 28*28 + classifier = LogisticRegression( input=x, n_in=28*28, n_out=10) + + # the cost we minimize during training is the negative log likelihood of + # the model in symbolic format + cost = classifier.negative_log_likelihood(y) + + # compiling a Theano function that computes the mistakes that are made by + # the model on a minibatch + test_model = theano.function(inputs = [index], + outputs = classifier.errors(y), + givens={ + x:test_set_x[index*batch_size:(index+1)*batch_size], + y:test_set_y[index*batch_size:(index+1)*batch_size]}) + + validate_model = theano.function( inputs = [index], + outputs = classifier.errors(y), + givens={ + x:valid_set_x[index*batch_size:(index+1)*batch_size], + y:valid_set_y[index*batch_size:(index+1)*batch_size]}) + + # compute the gradient of cost with respect to theta = (W,b) + g_W = T.grad(cost = cost, wrt = classifier.W) + g_b = T.grad(cost = cost, wrt = classifier.b) + + # specify how to update the parameters of the model as a dictionary + updates ={classifier.W: classifier.W - learning_rate*g_W,\ + classifier.b: classifier.b - learning_rate*g_b} + + # compiling a Theano function `train_model` that returns the cost, but in + # the same time updates the parameter of the model based on the rules + # defined in `updates` + train_model = theano.function(inputs = [index], + outputs = cost, + updates = updates, + givens={ + x:train_set_x[index*batch_size:(index+1)*batch_size], + y:train_set_y[index*batch_size:(index+1)*batch_size]}) + + ############### + # TRAIN MODEL # + ############### + print '... training the model' + # early-stopping parameters + patience = 5000 # look as this many examples regardless + patience_increase = 2 # wait this much longer when a new best is + # found + improvement_threshold = 0.995 # a relative improvement of this much is + # considered significant + validation_frequency = min(n_train_batches, patience/2) + # go through this many + # minibatche before checking the network + # on the validation set; in this case we + # check every epoch + + best_params = None + best_validation_loss = float('inf') + test_score = 0. + start_time = time.clock() + + done_looping = False + epoch = 0 + while (epoch < n_epochs) and (not done_looping): + epoch = epoch + 1 + for minibatch_index in xrange(n_train_batches): + + minibatch_avg_cost = train_model(minibatch_index) + # iteration number + iter = epoch * n_train_batches + minibatch_index + + if (iter+1) % validation_frequency == 0: + # compute zero-one loss on validation set + validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] + this_validation_loss = numpy.mean(validation_losses) + + print('epoch %i, minibatch %i/%i, validation error %f %%' % \ + (epoch, minibatch_index+1,n_train_batches, \ + this_validation_loss*100.)) + + + # if we got the best validation score until now + if this_validation_loss < best_validation_loss: + #improve patience if loss improvement is good enough + if this_validation_loss < best_validation_loss * \ + improvement_threshold : + patience = max(patience, iter * patience_increase) + + best_validation_loss = this_validation_loss + # test it on the test set + + test_losses = [test_model(i) for i in xrange(n_test_batches)] + test_score = numpy.mean(test_losses) + + print((' epoch %i, minibatch %i/%i, test error of best ' + 'model %f %%') % \ + (epoch, minibatch_index+1, n_train_batches,test_score*100.)) + + if patience <= iter : + done_looping = True + break + + end_time = time.clock() + print(('Optimization complete with best validation score of %f %%,' + 'with test performance %f %%') % + (best_validation_loss * 100., test_score*100.)) + print 'The code run for %d epochs, with %f epochs/sec'%(epoch,1.*epoch/(end_time-start_time)) + print >> sys.stderr, ('The code for file '+os.path.split(__file__)[1]+' ran for %.1fs' % ((end_time-start_time))) + +if __name__ == '__main__': + sgd_optimization_mnist() + +