comparison code_tutoriel/convolutional_mlp.py @ 0:fda5f787baa6

commit initial
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Thu, 21 Jan 2010 11:26:43 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:fda5f787baa6
1
2 """
3 This tutorial introduces the LeNet5 neural network architecture using Theano. LeNet5 is a
4 convolutional neural network, good for classifying images. This tutorial shows how to build the
5 architecture, and comes with all the hyper-parameters you need to reproduce the paper's MNIST
6 results.
7
8 The best results are obtained after X iterations of the main program loop, which takes ***
9 minutes on my workstation (an Intel Core i7, circa July 2009), and *** minutes on my GPU (an
10 NVIDIA GTX 285 graphics processor).
11
12 This implementation simplifies the model in the following ways:
13
14 - LeNetConvPool doesn't implement location-specific gain and bias parameters
15
16 - LeNetConvPool doesn't implement pooling by average, it implements pooling by max.
17
18 - Digit classification is implemented with a logistic regression rather than an RBF network
19
20 - LeNet5 was not fully-connected convolutions at second layer
21
22 References:
23
24 - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner: Gradient-Based Learning Applied to Document
25 Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
26 http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
27
28
29 """
30 import numpy
31 from theano.compile.sandbox import shared, pfunc
32 from theano import tensor
33 from pylearn.shared.layers import LogisticRegression, SigmoidalLayer
34 import theano.sandbox.softsign
35 import pylearn.datasets.MNIST
36
37
38 try:
39 # this tells theano to use the GPU if possible
40 from theano.sandbox.cuda import use
41 use()
42 except Exception, e:
43 print('Warning: Attempt to use GPU resulted in error "%s"' % str(e))
44
45 class LeNetConvPool(object):
46 """WRITEME
47
48 Math of what the layer does, and what symbolic variables are created by the class (w, b,
49 output).
50
51 """
52
53 #TODO: implement biases & scales properly. There are supposed to be more parameters.
54 # - one bias & scale per filter
55 # - one bias & scale per downsample feature location (a 2d bias)
56 # - more?
57
58 def __init__(self, rng, input, n_examples, n_imgs, img_shape, n_filters, filter_shape=(5,5),
59 poolsize=(2,2)):
60 """
61 Allocate a LeNetConvPool layer with shared variable internal parameters.
62
63 :param rng: a random number generator used to initialize weights
64
65 :param input: symbolic images. Shape: (n_examples, n_imgs, img_shape[0], img_shape[1])
66
67 :param n_examples: input's shape[0] at runtime
68
69 :param n_imgs: input's shape[1] at runtime
70
71 :param img_shape: input's shape[2:4] at runtime
72
73 :param n_filters: the number of filters to apply to the image.
74
75 :param filter_shape: the size of the filters to apply
76 :type filter_shape: pair (rows, cols)
77
78 :param poolsize: the downsampling (pooling) factor
79 :type poolsize: pair (rows, cols)
80 """
81
82 #TODO: make a simpler convolution constructor!!
83 # - make dx and dy optional
84 # - why do we have to pass shapes? (Can we make them optional at least?)
85 conv_op = ConvOp((n_imgs,)+img_shape, filter_shape, n_filters, n_examples,
86 dx=1, dy=1, output_mode='valid')
87
88 # - why is poolsize an op parameter here?
89 # - can we just have a maxpool function that creates this Op internally?
90 ds_op = DownsampleFactorMax(poolsize, ignore_border=True)
91
92 # the filter tensor that we will apply is a 4D tensor
93 w_shp = (n_filters, n_imgs) + filter_shape
94
95 # the bias we add is a 1D tensor
96 b_shp = (n_filters,)
97
98 self.w = shared(
99 numpy.asarray(
100 rng.uniform(
101 low=-1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
102 high=1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
103 size=w_shp),
104 dtype=input.dtype))
105 self.b = shared(
106 numpy.asarray(
107 rng.uniform(low=-.0, high=0., size=(n_filters,)),
108 dtype=input.dtype))
109
110 self.input = input
111 conv_out = conv_op(input, self.w)
112 self.output = tensor.tanh(ds_op(conv_out) + b.dimshuffle('x', 0, 'x', 'x'))
113 self.params = [self.w, self.b]
114
115 class SigmoidalLayer(object):
116 def __init__(self, input, n_in, n_out):
117 """
118 :param input: a symbolic tensor of shape (n_examples, n_in)
119 :param w: a symbolic weight matrix of shape (n_in, n_out)
120 :param b: symbolic bias terms of shape (n_out,)
121 :param squash: an squashing function
122 """
123 self.input = input
124 self.w = shared(
125 numpy.asarray(
126 rng.uniform(low=-2/numpy.sqrt(n_in), high=2/numpy.sqrt(n_in),
127 size=(n_in, n_out)), dtype=input.dtype))
128 self.b = shared(numpy.asarray(numpy.zeros(n_out), dtype=input.dtype))
129 self.output = tensor.tanh(tensor.dot(input, self.w) + self.b)
130 self.params = [self.w, self.b]
131
132 class LogisticRegression(object):
133 """WRITEME"""
134
135 def __init__(self, input, n_in, n_out):
136 self.w = shared(numpy.zeros((n_in, n_out), dtype=input.dtype))
137 self.b = shared(numpy.zeros((n_out,), dtype=input.dtype))
138 self.l1=abs(self.w).sum()
139 self.l2_sqr = (self.w**2).sum()
140 self.output=nnet.softmax(theano.dot(input, self.w)+self.b)
141 self.argmax=theano.tensor.argmax(self.output, axis=1)
142 self.params = [self.w, self.b]
143
144 def nll(self, target):
145 """Return the negative log-likelihood of the prediction of this model under a given
146 target distribution. Passing symbolic integers here means 1-hot.
147 WRITEME
148 """
149 return nnet.categorical_crossentropy(self.output, target)
150
151 def errors(self, target):
152 """Return a vector of 0s and 1s, with 1s on every line that was mis-classified.
153 """
154 if target.ndim != self.argmax.ndim:
155 raise TypeError('target should have the same shape as self.argmax', ('target', target.type,
156 'argmax', self.argmax.type))
157 if target.dtype.startswith('int'):
158 return theano.tensor.neq(self.argmax, target)
159 else:
160 raise NotImplementedError()
161
162 def evaluate_lenet5(batch_size=30, n_iter=1000):
163 rng = numpy.random.RandomState(23455)
164
165 mnist = pylearn.datasets.MNIST.train_valid_test()
166
167 ishape=(28,28) #this is the size of MNIST images
168
169 # allocate symbolic variables for the data
170 x = tensor.fmatrix() # the data is presented as rasterized images
171 y = tensor.lvector() # the labels are presented as 1D vector of [long int] labels
172
173 # construct the first convolutional pooling layer
174 layer0 = LeNetConvPool.new(rng, input=x.reshape((batch_size,1,28,28)), n_examples=batch_size,
175 n_imgs=1, img_shape=ishape,
176 n_filters=6, filter_shape=(5,5),
177 poolsize=(2,2))
178
179 # construct the second convolutional pooling layer
180 layer1 = LeNetConvPool.new(rng, input=layer0.output, n_examples=batch_size,
181 n_imgs=6, img_shape=(12,12),
182 n_filters=16, filter_shape=(5,5),
183 poolsize=(2,2))
184
185 # construct a fully-connected sigmoidal layer
186 layer2 = SigmoidalLayer.new(rng, input=layer1.output.flatten(2), n_in=16*16, n_out=128) # 128 ?
187
188 # classify the values of the fully-connected sigmoidal layer
189 layer3 = LogisticRegression.new(input=layer2.output, n_in=128, n_out=10)
190
191 # the cost we minimize during training is the NLL of the model
192 cost = layer3.nll(y).mean()
193
194 # create a function to compute the mistakes that are made by the model
195 test_model = pfunc([x,y], layer3.errors(y))
196
197 # create a list of all model parameters to be fit by gradient descent
198 params = layer3.params+ layer2.params+ layer1.params + layer0.params
199 learning_rate = numpy.asarray(0.01, dtype='float32')
200
201 # train_model is a function that updates the model parameters by SGD
202 train_model = pfunc([x, y], cost,
203 updates=[(p, p - learning_rate*gp) for p,gp in zip(params, tensor.grad(cost, params))])
204
205 # IS IT MORE SIMPLE TO USE A MINIMIZER OR THE DIRECT CODE?
206
207 best_valid_score = float('inf')
208 for i in xrange(n_iter):
209 for j in xrange(len(mnist.train.x)/batch_size):
210 cost_ij = train_model(
211 mnist.train.x[j*batch_size:(j+1)*batch_size],
212 mnist.train.y[j*batch_size:(j+1)*batch_size])
213 #if 0 == j % 100:
214 #print('epoch %i:%i, training error %f' % (i, j*batch_size, cost_ij))
215 valid_score = numpy.mean([test_model(
216 mnist.valid.x[j*batch_size:(j+1)*batch_size],
217 mnist.valid.y[j*batch_size:(j+1)*batch_size])
218 for j in xrange(len(mnist.valid.x)/batch_size)])
219 print('epoch %i, validation error %f' % (i, valid_score))
220 if valid_score < best_valid_score:
221 best_valid_score = valid_score
222 test_score = numpy.mean([test_model(
223 mnist.test.x[j*batch_size:(j+1)*batch_size],
224 mnist.test.y[j*batch_size:(j+1)*batch_size])
225 for j in xrange(len(mnist.test.x)/batch_size)])
226 print('epoch %i, test error of best model %f' % (i, test_score))
227
228 if __name__ == '__main__':
229 evaluate_lenet5()
230