Mercurial > ift6266
diff code_tutoriel/convolutional_mlp.py @ 0:fda5f787baa6
commit initial
author | Dumitru Erhan <dumitru.erhan@gmail.com> |
---|---|
date | Thu, 21 Jan 2010 11:26:43 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code_tutoriel/convolutional_mlp.py Thu Jan 21 11:26:43 2010 -0500 @@ -0,0 +1,230 @@ + +""" +This tutorial introduces the LeNet5 neural network architecture using Theano. LeNet5 is a +convolutional neural network, good for classifying images. This tutorial shows how to build the +architecture, and comes with all the hyper-parameters you need to reproduce the paper's MNIST +results. + +The best results are obtained after X iterations of the main program loop, which takes *** +minutes on my workstation (an Intel Core i7, circa July 2009), and *** minutes on my GPU (an +NVIDIA GTX 285 graphics processor). + +This implementation simplifies the model in the following ways: + + - LeNetConvPool doesn't implement location-specific gain and bias parameters + + - LeNetConvPool doesn't implement pooling by average, it implements pooling by max. + + - Digit classification is implemented with a logistic regression rather than an RBF network + + - LeNet5 was not fully-connected convolutions at second layer + +References: + + - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner: Gradient-Based Learning Applied to Document + Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998. + http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf + + +""" +import numpy +from theano.compile.sandbox import shared, pfunc +from theano import tensor +from pylearn.shared.layers import LogisticRegression, SigmoidalLayer +import theano.sandbox.softsign +import pylearn.datasets.MNIST + + +try: + # this tells theano to use the GPU if possible + from theano.sandbox.cuda import use + use() +except Exception, e: + print('Warning: Attempt to use GPU resulted in error "%s"' % str(e)) + +class LeNetConvPool(object): + """WRITEME + + Math of what the layer does, and what symbolic variables are created by the class (w, b, + output). + + """ + + #TODO: implement biases & scales properly. There are supposed to be more parameters. + # - one bias & scale per filter + # - one bias & scale per downsample feature location (a 2d bias) + # - more? + + def __init__(self, rng, input, n_examples, n_imgs, img_shape, n_filters, filter_shape=(5,5), + poolsize=(2,2)): + """ + Allocate a LeNetConvPool layer with shared variable internal parameters. + + :param rng: a random number generator used to initialize weights + + :param input: symbolic images. Shape: (n_examples, n_imgs, img_shape[0], img_shape[1]) + + :param n_examples: input's shape[0] at runtime + + :param n_imgs: input's shape[1] at runtime + + :param img_shape: input's shape[2:4] at runtime + + :param n_filters: the number of filters to apply to the image. + + :param filter_shape: the size of the filters to apply + :type filter_shape: pair (rows, cols) + + :param poolsize: the downsampling (pooling) factor + :type poolsize: pair (rows, cols) + """ + + #TODO: make a simpler convolution constructor!! + # - make dx and dy optional + # - why do we have to pass shapes? (Can we make them optional at least?) + conv_op = ConvOp((n_imgs,)+img_shape, filter_shape, n_filters, n_examples, + dx=1, dy=1, output_mode='valid') + + # - why is poolsize an op parameter here? + # - can we just have a maxpool function that creates this Op internally? + ds_op = DownsampleFactorMax(poolsize, ignore_border=True) + + # the filter tensor that we will apply is a 4D tensor + w_shp = (n_filters, n_imgs) + filter_shape + + # the bias we add is a 1D tensor + b_shp = (n_filters,) + + self.w = shared( + numpy.asarray( + rng.uniform( + low=-1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs), + high=1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs), + size=w_shp), + dtype=input.dtype)) + self.b = shared( + numpy.asarray( + rng.uniform(low=-.0, high=0., size=(n_filters,)), + dtype=input.dtype)) + + self.input = input + conv_out = conv_op(input, self.w) + self.output = tensor.tanh(ds_op(conv_out) + b.dimshuffle('x', 0, 'x', 'x')) + self.params = [self.w, self.b] + +class SigmoidalLayer(object): + def __init__(self, input, n_in, n_out): + """ + :param input: a symbolic tensor of shape (n_examples, n_in) + :param w: a symbolic weight matrix of shape (n_in, n_out) + :param b: symbolic bias terms of shape (n_out,) + :param squash: an squashing function + """ + self.input = input + self.w = shared( + numpy.asarray( + rng.uniform(low=-2/numpy.sqrt(n_in), high=2/numpy.sqrt(n_in), + size=(n_in, n_out)), dtype=input.dtype)) + self.b = shared(numpy.asarray(numpy.zeros(n_out), dtype=input.dtype)) + self.output = tensor.tanh(tensor.dot(input, self.w) + self.b) + self.params = [self.w, self.b] + +class LogisticRegression(object): + """WRITEME""" + + def __init__(self, input, n_in, n_out): + self.w = shared(numpy.zeros((n_in, n_out), dtype=input.dtype)) + self.b = shared(numpy.zeros((n_out,), dtype=input.dtype)) + self.l1=abs(self.w).sum() + self.l2_sqr = (self.w**2).sum() + self.output=nnet.softmax(theano.dot(input, self.w)+self.b) + self.argmax=theano.tensor.argmax(self.output, axis=1) + self.params = [self.w, self.b] + + def nll(self, target): + """Return the negative log-likelihood of the prediction of this model under a given + target distribution. Passing symbolic integers here means 1-hot. + WRITEME + """ + return nnet.categorical_crossentropy(self.output, target) + + def errors(self, target): + """Return a vector of 0s and 1s, with 1s on every line that was mis-classified. + """ + if target.ndim != self.argmax.ndim: + raise TypeError('target should have the same shape as self.argmax', ('target', target.type, + 'argmax', self.argmax.type)) + if target.dtype.startswith('int'): + return theano.tensor.neq(self.argmax, target) + else: + raise NotImplementedError() + +def evaluate_lenet5(batch_size=30, n_iter=1000): + rng = numpy.random.RandomState(23455) + + mnist = pylearn.datasets.MNIST.train_valid_test() + + ishape=(28,28) #this is the size of MNIST images + + # allocate symbolic variables for the data + x = tensor.fmatrix() # the data is presented as rasterized images + y = tensor.lvector() # the labels are presented as 1D vector of [long int] labels + + # construct the first convolutional pooling layer + layer0 = LeNetConvPool.new(rng, input=x.reshape((batch_size,1,28,28)), n_examples=batch_size, + n_imgs=1, img_shape=ishape, + n_filters=6, filter_shape=(5,5), + poolsize=(2,2)) + + # construct the second convolutional pooling layer + layer1 = LeNetConvPool.new(rng, input=layer0.output, n_examples=batch_size, + n_imgs=6, img_shape=(12,12), + n_filters=16, filter_shape=(5,5), + poolsize=(2,2)) + + # construct a fully-connected sigmoidal layer + layer2 = SigmoidalLayer.new(rng, input=layer1.output.flatten(2), n_in=16*16, n_out=128) # 128 ? + + # classify the values of the fully-connected sigmoidal layer + layer3 = LogisticRegression.new(input=layer2.output, n_in=128, n_out=10) + + # the cost we minimize during training is the NLL of the model + cost = layer3.nll(y).mean() + + # create a function to compute the mistakes that are made by the model + test_model = pfunc([x,y], layer3.errors(y)) + + # create a list of all model parameters to be fit by gradient descent + params = layer3.params+ layer2.params+ layer1.params + layer0.params + learning_rate = numpy.asarray(0.01, dtype='float32') + + # train_model is a function that updates the model parameters by SGD + train_model = pfunc([x, y], cost, + updates=[(p, p - learning_rate*gp) for p,gp in zip(params, tensor.grad(cost, params))]) + + # IS IT MORE SIMPLE TO USE A MINIMIZER OR THE DIRECT CODE? + + best_valid_score = float('inf') + for i in xrange(n_iter): + for j in xrange(len(mnist.train.x)/batch_size): + cost_ij = train_model( + mnist.train.x[j*batch_size:(j+1)*batch_size], + mnist.train.y[j*batch_size:(j+1)*batch_size]) + #if 0 == j % 100: + #print('epoch %i:%i, training error %f' % (i, j*batch_size, cost_ij)) + valid_score = numpy.mean([test_model( + mnist.valid.x[j*batch_size:(j+1)*batch_size], + mnist.valid.y[j*batch_size:(j+1)*batch_size]) + for j in xrange(len(mnist.valid.x)/batch_size)]) + print('epoch %i, validation error %f' % (i, valid_score)) + if valid_score < best_valid_score: + best_valid_score = valid_score + test_score = numpy.mean([test_model( + mnist.test.x[j*batch_size:(j+1)*batch_size], + mnist.test.y[j*batch_size:(j+1)*batch_size]) + for j in xrange(len(mnist.test.x)/batch_size)]) + print('epoch %i, test error of best model %f' % (i, test_score)) + +if __name__ == '__main__': + evaluate_lenet5() +