Mercurial > ift6266
comparison code_tutoriel/convolutional_mlp.py @ 0:fda5f787baa6
commit initial
author | Dumitru Erhan <dumitru.erhan@gmail.com> |
---|---|
date | Thu, 21 Jan 2010 11:26:43 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fda5f787baa6 |
---|---|
1 | |
2 """ | |
3 This tutorial introduces the LeNet5 neural network architecture using Theano. LeNet5 is a | |
4 convolutional neural network, good for classifying images. This tutorial shows how to build the | |
5 architecture, and comes with all the hyper-parameters you need to reproduce the paper's MNIST | |
6 results. | |
7 | |
8 The best results are obtained after X iterations of the main program loop, which takes *** | |
9 minutes on my workstation (an Intel Core i7, circa July 2009), and *** minutes on my GPU (an | |
10 NVIDIA GTX 285 graphics processor). | |
11 | |
12 This implementation simplifies the model in the following ways: | |
13 | |
14 - LeNetConvPool doesn't implement location-specific gain and bias parameters | |
15 | |
16 - LeNetConvPool doesn't implement pooling by average, it implements pooling by max. | |
17 | |
18 - Digit classification is implemented with a logistic regression rather than an RBF network | |
19 | |
20 - LeNet5 was not fully-connected convolutions at second layer | |
21 | |
22 References: | |
23 | |
24 - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner: Gradient-Based Learning Applied to Document | |
25 Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998. | |
26 http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf | |
27 | |
28 | |
29 """ | |
30 import numpy | |
31 from theano.compile.sandbox import shared, pfunc | |
32 from theano import tensor | |
33 from pylearn.shared.layers import LogisticRegression, SigmoidalLayer | |
34 import theano.sandbox.softsign | |
35 import pylearn.datasets.MNIST | |
36 | |
37 | |
38 try: | |
39 # this tells theano to use the GPU if possible | |
40 from theano.sandbox.cuda import use | |
41 use() | |
42 except Exception, e: | |
43 print('Warning: Attempt to use GPU resulted in error "%s"' % str(e)) | |
44 | |
45 class LeNetConvPool(object): | |
46 """WRITEME | |
47 | |
48 Math of what the layer does, and what symbolic variables are created by the class (w, b, | |
49 output). | |
50 | |
51 """ | |
52 | |
53 #TODO: implement biases & scales properly. There are supposed to be more parameters. | |
54 # - one bias & scale per filter | |
55 # - one bias & scale per downsample feature location (a 2d bias) | |
56 # - more? | |
57 | |
58 def __init__(self, rng, input, n_examples, n_imgs, img_shape, n_filters, filter_shape=(5,5), | |
59 poolsize=(2,2)): | |
60 """ | |
61 Allocate a LeNetConvPool layer with shared variable internal parameters. | |
62 | |
63 :param rng: a random number generator used to initialize weights | |
64 | |
65 :param input: symbolic images. Shape: (n_examples, n_imgs, img_shape[0], img_shape[1]) | |
66 | |
67 :param n_examples: input's shape[0] at runtime | |
68 | |
69 :param n_imgs: input's shape[1] at runtime | |
70 | |
71 :param img_shape: input's shape[2:4] at runtime | |
72 | |
73 :param n_filters: the number of filters to apply to the image. | |
74 | |
75 :param filter_shape: the size of the filters to apply | |
76 :type filter_shape: pair (rows, cols) | |
77 | |
78 :param poolsize: the downsampling (pooling) factor | |
79 :type poolsize: pair (rows, cols) | |
80 """ | |
81 | |
82 #TODO: make a simpler convolution constructor!! | |
83 # - make dx and dy optional | |
84 # - why do we have to pass shapes? (Can we make them optional at least?) | |
85 conv_op = ConvOp((n_imgs,)+img_shape, filter_shape, n_filters, n_examples, | |
86 dx=1, dy=1, output_mode='valid') | |
87 | |
88 # - why is poolsize an op parameter here? | |
89 # - can we just have a maxpool function that creates this Op internally? | |
90 ds_op = DownsampleFactorMax(poolsize, ignore_border=True) | |
91 | |
92 # the filter tensor that we will apply is a 4D tensor | |
93 w_shp = (n_filters, n_imgs) + filter_shape | |
94 | |
95 # the bias we add is a 1D tensor | |
96 b_shp = (n_filters,) | |
97 | |
98 self.w = shared( | |
99 numpy.asarray( | |
100 rng.uniform( | |
101 low=-1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs), | |
102 high=1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs), | |
103 size=w_shp), | |
104 dtype=input.dtype)) | |
105 self.b = shared( | |
106 numpy.asarray( | |
107 rng.uniform(low=-.0, high=0., size=(n_filters,)), | |
108 dtype=input.dtype)) | |
109 | |
110 self.input = input | |
111 conv_out = conv_op(input, self.w) | |
112 self.output = tensor.tanh(ds_op(conv_out) + b.dimshuffle('x', 0, 'x', 'x')) | |
113 self.params = [self.w, self.b] | |
114 | |
115 class SigmoidalLayer(object): | |
116 def __init__(self, input, n_in, n_out): | |
117 """ | |
118 :param input: a symbolic tensor of shape (n_examples, n_in) | |
119 :param w: a symbolic weight matrix of shape (n_in, n_out) | |
120 :param b: symbolic bias terms of shape (n_out,) | |
121 :param squash: an squashing function | |
122 """ | |
123 self.input = input | |
124 self.w = shared( | |
125 numpy.asarray( | |
126 rng.uniform(low=-2/numpy.sqrt(n_in), high=2/numpy.sqrt(n_in), | |
127 size=(n_in, n_out)), dtype=input.dtype)) | |
128 self.b = shared(numpy.asarray(numpy.zeros(n_out), dtype=input.dtype)) | |
129 self.output = tensor.tanh(tensor.dot(input, self.w) + self.b) | |
130 self.params = [self.w, self.b] | |
131 | |
132 class LogisticRegression(object): | |
133 """WRITEME""" | |
134 | |
135 def __init__(self, input, n_in, n_out): | |
136 self.w = shared(numpy.zeros((n_in, n_out), dtype=input.dtype)) | |
137 self.b = shared(numpy.zeros((n_out,), dtype=input.dtype)) | |
138 self.l1=abs(self.w).sum() | |
139 self.l2_sqr = (self.w**2).sum() | |
140 self.output=nnet.softmax(theano.dot(input, self.w)+self.b) | |
141 self.argmax=theano.tensor.argmax(self.output, axis=1) | |
142 self.params = [self.w, self.b] | |
143 | |
144 def nll(self, target): | |
145 """Return the negative log-likelihood of the prediction of this model under a given | |
146 target distribution. Passing symbolic integers here means 1-hot. | |
147 WRITEME | |
148 """ | |
149 return nnet.categorical_crossentropy(self.output, target) | |
150 | |
151 def errors(self, target): | |
152 """Return a vector of 0s and 1s, with 1s on every line that was mis-classified. | |
153 """ | |
154 if target.ndim != self.argmax.ndim: | |
155 raise TypeError('target should have the same shape as self.argmax', ('target', target.type, | |
156 'argmax', self.argmax.type)) | |
157 if target.dtype.startswith('int'): | |
158 return theano.tensor.neq(self.argmax, target) | |
159 else: | |
160 raise NotImplementedError() | |
161 | |
162 def evaluate_lenet5(batch_size=30, n_iter=1000): | |
163 rng = numpy.random.RandomState(23455) | |
164 | |
165 mnist = pylearn.datasets.MNIST.train_valid_test() | |
166 | |
167 ishape=(28,28) #this is the size of MNIST images | |
168 | |
169 # allocate symbolic variables for the data | |
170 x = tensor.fmatrix() # the data is presented as rasterized images | |
171 y = tensor.lvector() # the labels are presented as 1D vector of [long int] labels | |
172 | |
173 # construct the first convolutional pooling layer | |
174 layer0 = LeNetConvPool.new(rng, input=x.reshape((batch_size,1,28,28)), n_examples=batch_size, | |
175 n_imgs=1, img_shape=ishape, | |
176 n_filters=6, filter_shape=(5,5), | |
177 poolsize=(2,2)) | |
178 | |
179 # construct the second convolutional pooling layer | |
180 layer1 = LeNetConvPool.new(rng, input=layer0.output, n_examples=batch_size, | |
181 n_imgs=6, img_shape=(12,12), | |
182 n_filters=16, filter_shape=(5,5), | |
183 poolsize=(2,2)) | |
184 | |
185 # construct a fully-connected sigmoidal layer | |
186 layer2 = SigmoidalLayer.new(rng, input=layer1.output.flatten(2), n_in=16*16, n_out=128) # 128 ? | |
187 | |
188 # classify the values of the fully-connected sigmoidal layer | |
189 layer3 = LogisticRegression.new(input=layer2.output, n_in=128, n_out=10) | |
190 | |
191 # the cost we minimize during training is the NLL of the model | |
192 cost = layer3.nll(y).mean() | |
193 | |
194 # create a function to compute the mistakes that are made by the model | |
195 test_model = pfunc([x,y], layer3.errors(y)) | |
196 | |
197 # create a list of all model parameters to be fit by gradient descent | |
198 params = layer3.params+ layer2.params+ layer1.params + layer0.params | |
199 learning_rate = numpy.asarray(0.01, dtype='float32') | |
200 | |
201 # train_model is a function that updates the model parameters by SGD | |
202 train_model = pfunc([x, y], cost, | |
203 updates=[(p, p - learning_rate*gp) for p,gp in zip(params, tensor.grad(cost, params))]) | |
204 | |
205 # IS IT MORE SIMPLE TO USE A MINIMIZER OR THE DIRECT CODE? | |
206 | |
207 best_valid_score = float('inf') | |
208 for i in xrange(n_iter): | |
209 for j in xrange(len(mnist.train.x)/batch_size): | |
210 cost_ij = train_model( | |
211 mnist.train.x[j*batch_size:(j+1)*batch_size], | |
212 mnist.train.y[j*batch_size:(j+1)*batch_size]) | |
213 #if 0 == j % 100: | |
214 #print('epoch %i:%i, training error %f' % (i, j*batch_size, cost_ij)) | |
215 valid_score = numpy.mean([test_model( | |
216 mnist.valid.x[j*batch_size:(j+1)*batch_size], | |
217 mnist.valid.y[j*batch_size:(j+1)*batch_size]) | |
218 for j in xrange(len(mnist.valid.x)/batch_size)]) | |
219 print('epoch %i, validation error %f' % (i, valid_score)) | |
220 if valid_score < best_valid_score: | |
221 best_valid_score = valid_score | |
222 test_score = numpy.mean([test_model( | |
223 mnist.test.x[j*batch_size:(j+1)*batch_size], | |
224 mnist.test.y[j*batch_size:(j+1)*batch_size]) | |
225 for j in xrange(len(mnist.test.x)/batch_size)]) | |
226 print('epoch %i, test error of best model %f' % (i, test_score)) | |
227 | |
228 if __name__ == '__main__': | |
229 evaluate_lenet5() | |
230 |