Mercurial > ift6266
comparison code_tutoriel/DBN.py @ 165:4bc5eeec6394
Updating the tutorial code to the latest revisions.
author | Dumitru Erhan <dumitru.erhan@gmail.com> |
---|---|
date | Fri, 26 Feb 2010 13:55:27 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
164:e3de934a98b6 | 165:4bc5eeec6394 |
---|---|
1 """ | |
2 """ | |
3 import os | |
4 | |
5 import numpy, time, cPickle, gzip | |
6 | |
7 import theano | |
8 import theano.tensor as T | |
9 from theano.tensor.shared_randomstreams import RandomStreams | |
10 | |
11 from logistic_sgd import LogisticRegression, load_data | |
12 from mlp import HiddenLayer | |
13 from rbm import RBM | |
14 | |
15 | |
16 | |
17 class DBN(object): | |
18 """ | |
19 """ | |
20 | |
21 def __init__(self, numpy_rng, theano_rng = None, n_ins = 784, | |
22 hidden_layers_sizes = [500,500], n_outs = 10): | |
23 """This class is made to support a variable number of layers. | |
24 | |
25 :type numpy_rng: numpy.random.RandomState | |
26 :param numpy_rng: numpy random number generator used to draw initial | |
27 weights | |
28 | |
29 :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams | |
30 :param theano_rng: Theano random generator; if None is given one is | |
31 generated based on a seed drawn from `rng` | |
32 | |
33 :type n_ins: int | |
34 :param n_ins: dimension of the input to the DBN | |
35 | |
36 :type n_layers_sizes: list of ints | |
37 :param n_layers_sizes: intermidiate layers size, must contain | |
38 at least one value | |
39 | |
40 :type n_outs: int | |
41 :param n_outs: dimension of the output of the network | |
42 """ | |
43 | |
44 self.sigmoid_layers = [] | |
45 self.rbm_layers = [] | |
46 self.params = [] | |
47 self.n_layers = len(hidden_layers_sizes) | |
48 | |
49 assert self.n_layers > 0 | |
50 | |
51 if not theano_rng: | |
52 theano_rng = RandomStreams(numpy_rng.randint(2**30)) | |
53 | |
54 # allocate symbolic variables for the data | |
55 self.x = T.matrix('x') # the data is presented as rasterized images | |
56 self.y = T.ivector('y') # the labels are presented as 1D vector of | |
57 # [int] labels | |
58 | |
59 # The DBN is an MLP, for which all weights of intermidiate layers are shared with a | |
60 # different RBM. We will first construct the DBN as a deep multilayer perceptron, and | |
61 # when constructing each sigmoidal layer we also construct an RBM that shares weights | |
62 # with that layer. During pretraining we will train these RBMs (which will lead | |
63 # to chainging the weights of the MLP as well) During finetuning we will finish | |
64 # training the DBN by doing stochastic gradient descent on the MLP. | |
65 | |
66 for i in xrange( self.n_layers ): | |
67 # construct the sigmoidal layer | |
68 | |
69 # the size of the input is either the number of hidden units of the layer below or | |
70 # the input size if we are on the first layer | |
71 if i == 0 : | |
72 input_size = n_ins | |
73 else: | |
74 input_size = hidden_layers_sizes[i-1] | |
75 | |
76 # the input to this layer is either the activation of the hidden layer below or the | |
77 # input of the DBN if you are on the first layer | |
78 if i == 0 : | |
79 layer_input = self.x | |
80 else: | |
81 layer_input = self.sigmoid_layers[-1].output | |
82 | |
83 sigmoid_layer = HiddenLayer(rng = numpy_rng, | |
84 input = layer_input, | |
85 n_in = input_size, | |
86 n_out = hidden_layers_sizes[i], | |
87 activation = T.nnet.sigmoid) | |
88 | |
89 # add the layer to our list of layers | |
90 self.sigmoid_layers.append(sigmoid_layer) | |
91 | |
92 # its arguably a philosophical question... but we are going to only declare that | |
93 # the parameters of the sigmoid_layers are parameters of the DBN. The visible | |
94 # biases in the RBM are parameters of those RBMs, but not of the DBN. | |
95 self.params.extend(sigmoid_layer.params) | |
96 | |
97 # Construct an RBM that shared weights with this layer | |
98 rbm_layer = RBM(numpy_rng = numpy_rng, theano_rng = theano_rng, | |
99 input = layer_input, | |
100 n_visible = input_size, | |
101 n_hidden = hidden_layers_sizes[i], | |
102 W = sigmoid_layer.W, | |
103 hbias = sigmoid_layer.b) | |
104 self.rbm_layers.append(rbm_layer) | |
105 | |
106 | |
107 # We now need to add a logistic layer on top of the MLP | |
108 self.logLayer = LogisticRegression(\ | |
109 input = self.sigmoid_layers[-1].output,\ | |
110 n_in = hidden_layers_sizes[-1], n_out = n_outs) | |
111 self.params.extend(self.logLayer.params) | |
112 | |
113 # construct a function that implements one step of fine-tuning compute the cost for | |
114 # second phase of training, defined as the negative log likelihood | |
115 self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) | |
116 | |
117 # compute the gradients with respect to the model parameters | |
118 # symbolic variable that points to the number of errors made on the | |
119 # minibatch given by self.x and self.y | |
120 self.errors = self.logLayer.errors(self.y) | |
121 | |
122 def pretraining_functions(self, train_set_x, batch_size): | |
123 ''' Generates a list of functions, for performing one step of gradient descent at a | |
124 given layer. The function will require as input the minibatch index, and to train an | |
125 RBM you just need to iterate, calling the corresponding function on all minibatch | |
126 indexes. | |
127 | |
128 :type train_set_x: theano.tensor.TensorType | |
129 :param train_set_x: Shared var. that contains all datapoints used for training the RBM | |
130 :type batch_size: int | |
131 :param batch_size: size of a [mini]batch | |
132 ''' | |
133 | |
134 # index to a [mini]batch | |
135 index = T.lscalar('index') # index to a minibatch | |
136 learning_rate = T.scalar('lr') # learning rate to use | |
137 | |
138 # number of batches | |
139 n_batches = train_set_x.value.shape[0] / batch_size | |
140 # begining of a batch, given `index` | |
141 batch_begin = index * batch_size | |
142 # ending of a batch given `index` | |
143 batch_end = batch_begin+batch_size | |
144 | |
145 pretrain_fns = [] | |
146 for rbm in self.rbm_layers: | |
147 | |
148 # get the cost and the updates list | |
149 # TODO: change cost function to reconstruction error | |
150 cost,updates = rbm.cd(learning_rate, persistent=None) | |
151 | |
152 # compile the theano function | |
153 fn = theano.function(inputs = [index, | |
154 theano.Param(learning_rate, default = 0.1)], | |
155 outputs = cost, | |
156 updates = updates, | |
157 givens = {self.x :train_set_x[batch_begin:batch_end]}) | |
158 # append `fn` to the list of functions | |
159 pretrain_fns.append(fn) | |
160 | |
161 return pretrain_fns | |
162 | |
163 | |
164 def build_finetune_functions(self, datasets, batch_size, learning_rate): | |
165 '''Generates a function `train` that implements one step of finetuning, a function | |
166 `validate` that computes the error on a batch from the validation set, and a function | |
167 `test` that computes the error on a batch from the testing set | |
168 | |
169 :type datasets: list of pairs of theano.tensor.TensorType | |
170 :param datasets: It is a list that contain all the datasets; the has to contain three | |
171 pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano | |
172 variables, one for the datapoints, the other for the labels | |
173 :type batch_size: int | |
174 :param batch_size: size of a minibatch | |
175 :type learning_rate: float | |
176 :param learning_rate: learning rate used during finetune stage | |
177 ''' | |
178 | |
179 (train_set_x, train_set_y) = datasets[0] | |
180 (valid_set_x, valid_set_y) = datasets[1] | |
181 (test_set_x , test_set_y ) = datasets[2] | |
182 | |
183 # compute number of minibatches for training, validation and testing | |
184 n_valid_batches = valid_set_x.value.shape[0] / batch_size | |
185 n_test_batches = test_set_x.value.shape[0] / batch_size | |
186 | |
187 index = T.lscalar('index') # index to a [mini]batch | |
188 | |
189 # compute the gradients with respect to the model parameters | |
190 gparams = T.grad(self.finetune_cost, self.params) | |
191 | |
192 # compute list of fine-tuning updates | |
193 updates = {} | |
194 for param, gparam in zip(self.params, gparams): | |
195 updates[param] = param - gparam*learning_rate | |
196 | |
197 train_fn = theano.function(inputs = [index], | |
198 outputs = self.finetune_cost, | |
199 updates = updates, | |
200 givens = { | |
201 self.x : train_set_x[index*batch_size:(index+1)*batch_size], | |
202 self.y : train_set_y[index*batch_size:(index+1)*batch_size]}) | |
203 | |
204 test_score_i = theano.function([index], self.errors, | |
205 givens = { | |
206 self.x: test_set_x[index*batch_size:(index+1)*batch_size], | |
207 self.y: test_set_y[index*batch_size:(index+1)*batch_size]}) | |
208 | |
209 valid_score_i = theano.function([index], self.errors, | |
210 givens = { | |
211 self.x: valid_set_x[index*batch_size:(index+1)*batch_size], | |
212 self.y: valid_set_y[index*batch_size:(index+1)*batch_size]}) | |
213 | |
214 # Create a function that scans the entire validation set | |
215 def valid_score(): | |
216 return [valid_score_i(i) for i in xrange(n_valid_batches)] | |
217 | |
218 # Create a function that scans the entire test set | |
219 def test_score(): | |
220 return [test_score_i(i) for i in xrange(n_test_batches)] | |
221 | |
222 return train_fn, valid_score, test_score | |
223 | |
224 | |
225 | |
226 | |
227 | |
228 | |
229 def test_DBN( finetune_lr = 0.1, pretraining_epochs = 10, \ | |
230 pretrain_lr = 0.1, training_epochs = 1000, \ | |
231 dataset='mnist.pkl.gz'): | |
232 """ | |
233 Demonstrates how to train and test a Deep Belief Network. | |
234 | |
235 This is demonstrated on MNIST. | |
236 | |
237 :type learning_rate: float | |
238 :param learning_rate: learning rate used in the finetune stage | |
239 :type pretraining_epochs: int | |
240 :param pretraining_epochs: number of epoch to do pretraining | |
241 :type pretrain_lr: float | |
242 :param pretrain_lr: learning rate to be used during pre-training | |
243 :type n_iter: int | |
244 :param n_iter: maximal number of iterations ot run the optimizer | |
245 :type dataset: string | |
246 :param dataset: path the the pickled dataset | |
247 """ | |
248 | |
249 print 'finetune_lr = ', finetune_lr | |
250 print 'pretrain_lr = ', pretrain_lr | |
251 | |
252 datasets = load_data(dataset) | |
253 | |
254 train_set_x, train_set_y = datasets[0] | |
255 valid_set_x, valid_set_y = datasets[1] | |
256 test_set_x , test_set_y = datasets[2] | |
257 | |
258 | |
259 batch_size = 20 # size of the minibatch | |
260 | |
261 # compute number of minibatches for training, validation and testing | |
262 n_train_batches = train_set_x.value.shape[0] / batch_size | |
263 | |
264 # numpy random generator | |
265 numpy_rng = numpy.random.RandomState(123) | |
266 print '... building the model' | |
267 # construct the Deep Belief Network | |
268 dbn = DBN(numpy_rng = numpy_rng, n_ins = 28*28, | |
269 hidden_layers_sizes = [1000,1000,1000], | |
270 n_outs = 10) | |
271 | |
272 | |
273 ######################### | |
274 # PRETRAINING THE MODEL # | |
275 ######################### | |
276 print '... getting the pretraining functions' | |
277 pretraining_fns = dbn.pretraining_functions( | |
278 train_set_x = train_set_x, | |
279 batch_size = batch_size ) | |
280 | |
281 print '... pre-training the model' | |
282 start_time = time.clock() | |
283 ## Pre-train layer-wise | |
284 for i in xrange(dbn.n_layers): | |
285 # go through pretraining epochs | |
286 for epoch in xrange(pretraining_epochs): | |
287 # go through the training set | |
288 c = [] | |
289 for batch_index in xrange(n_train_batches): | |
290 c.append(pretraining_fns[i](index = batch_index, | |
291 lr = pretrain_lr ) ) | |
292 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),numpy.mean(c) | |
293 | |
294 end_time = time.clock() | |
295 | |
296 print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) | |
297 | |
298 ######################## | |
299 # FINETUNING THE MODEL # | |
300 ######################## | |
301 | |
302 # get the training, validation and testing function for the model | |
303 print '... getting the finetuning functions' | |
304 train_fn, validate_model, test_model = dbn.build_finetune_functions ( | |
305 datasets = datasets, batch_size = batch_size, | |
306 learning_rate = finetune_lr) | |
307 | |
308 print '... finetunning the model' | |
309 # early-stopping parameters | |
310 patience = 10000 # look as this many examples regardless | |
311 patience_increase = 2. # wait this much longer when a new best is | |
312 # found | |
313 improvement_threshold = 0.995 # a relative improvement of this much is | |
314 # considered significant | |
315 validation_frequency = min(n_train_batches, patience/2) | |
316 # go through this many | |
317 # minibatche before checking the network | |
318 # on the validation set; in this case we | |
319 # check every epoch | |
320 | |
321 | |
322 best_params = None | |
323 best_validation_loss = float('inf') | |
324 test_score = 0. | |
325 start_time = time.clock() | |
326 | |
327 done_looping = False | |
328 epoch = 0 | |
329 | |
330 while (epoch < training_epochs) and (not done_looping): | |
331 epoch = epoch + 1 | |
332 for minibatch_index in xrange(n_train_batches): | |
333 | |
334 minibatch_avg_cost = train_fn(minibatch_index) | |
335 iter = epoch * n_train_batches + minibatch_index | |
336 | |
337 if (iter+1) % validation_frequency == 0: | |
338 | |
339 validation_losses = validate_model() | |
340 this_validation_loss = numpy.mean(validation_losses) | |
341 print('epoch %i, minibatch %i/%i, validation error %f %%' % \ | |
342 (epoch, minibatch_index+1, n_train_batches, \ | |
343 this_validation_loss*100.)) | |
344 | |
345 | |
346 # if we got the best validation score until now | |
347 if this_validation_loss < best_validation_loss: | |
348 | |
349 #improve patience if loss improvement is good enough | |
350 if this_validation_loss < best_validation_loss * \ | |
351 improvement_threshold : | |
352 patience = max(patience, iter * patience_increase) | |
353 | |
354 # save best validation score and iteration number | |
355 best_validation_loss = this_validation_loss | |
356 best_iter = iter | |
357 | |
358 # test it on the test set | |
359 test_losses = test_model() | |
360 test_score = numpy.mean(test_losses) | |
361 print((' epoch %i, minibatch %i/%i, test error of best ' | |
362 'model %f %%') % | |
363 (epoch, minibatch_index+1, n_train_batches, | |
364 test_score*100.)) | |
365 | |
366 | |
367 if patience <= iter : | |
368 done_looping = True | |
369 break | |
370 | |
371 end_time = time.clock() | |
372 print(('Optimization complete with best validation score of %f %%,' | |
373 'with test performance %f %%') % | |
374 (best_validation_loss * 100., test_score*100.)) | |
375 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) | |
376 | |
377 | |
378 | |
379 | |
380 | |
381 if __name__ == '__main__': | |
382 pretrain_lr = numpy.float(os.sys.argv[1]) | |
383 finetune_lr = numpy.float(os.sys.argv[2]) | |
384 test_DBN(pretrain_lr=pretrain_lr, finetune_lr=finetune_lr) |