comparison code_tutoriel/DBN.py @ 165:4bc5eeec6394

Updating the tutorial code to the latest revisions.
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Fri, 26 Feb 2010 13:55:27 -0500
parents
children
comparison
equal deleted inserted replaced
164:e3de934a98b6 165:4bc5eeec6394
1 """
2 """
3 import os
4
5 import numpy, time, cPickle, gzip
6
7 import theano
8 import theano.tensor as T
9 from theano.tensor.shared_randomstreams import RandomStreams
10
11 from logistic_sgd import LogisticRegression, load_data
12 from mlp import HiddenLayer
13 from rbm import RBM
14
15
16
17 class DBN(object):
18 """
19 """
20
21 def __init__(self, numpy_rng, theano_rng = None, n_ins = 784,
22 hidden_layers_sizes = [500,500], n_outs = 10):
23 """This class is made to support a variable number of layers.
24
25 :type numpy_rng: numpy.random.RandomState
26 :param numpy_rng: numpy random number generator used to draw initial
27 weights
28
29 :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
30 :param theano_rng: Theano random generator; if None is given one is
31 generated based on a seed drawn from `rng`
32
33 :type n_ins: int
34 :param n_ins: dimension of the input to the DBN
35
36 :type n_layers_sizes: list of ints
37 :param n_layers_sizes: intermidiate layers size, must contain
38 at least one value
39
40 :type n_outs: int
41 :param n_outs: dimension of the output of the network
42 """
43
44 self.sigmoid_layers = []
45 self.rbm_layers = []
46 self.params = []
47 self.n_layers = len(hidden_layers_sizes)
48
49 assert self.n_layers > 0
50
51 if not theano_rng:
52 theano_rng = RandomStreams(numpy_rng.randint(2**30))
53
54 # allocate symbolic variables for the data
55 self.x = T.matrix('x') # the data is presented as rasterized images
56 self.y = T.ivector('y') # the labels are presented as 1D vector of
57 # [int] labels
58
59 # The DBN is an MLP, for which all weights of intermidiate layers are shared with a
60 # different RBM. We will first construct the DBN as a deep multilayer perceptron, and
61 # when constructing each sigmoidal layer we also construct an RBM that shares weights
62 # with that layer. During pretraining we will train these RBMs (which will lead
63 # to chainging the weights of the MLP as well) During finetuning we will finish
64 # training the DBN by doing stochastic gradient descent on the MLP.
65
66 for i in xrange( self.n_layers ):
67 # construct the sigmoidal layer
68
69 # the size of the input is either the number of hidden units of the layer below or
70 # the input size if we are on the first layer
71 if i == 0 :
72 input_size = n_ins
73 else:
74 input_size = hidden_layers_sizes[i-1]
75
76 # the input to this layer is either the activation of the hidden layer below or the
77 # input of the DBN if you are on the first layer
78 if i == 0 :
79 layer_input = self.x
80 else:
81 layer_input = self.sigmoid_layers[-1].output
82
83 sigmoid_layer = HiddenLayer(rng = numpy_rng,
84 input = layer_input,
85 n_in = input_size,
86 n_out = hidden_layers_sizes[i],
87 activation = T.nnet.sigmoid)
88
89 # add the layer to our list of layers
90 self.sigmoid_layers.append(sigmoid_layer)
91
92 # its arguably a philosophical question... but we are going to only declare that
93 # the parameters of the sigmoid_layers are parameters of the DBN. The visible
94 # biases in the RBM are parameters of those RBMs, but not of the DBN.
95 self.params.extend(sigmoid_layer.params)
96
97 # Construct an RBM that shared weights with this layer
98 rbm_layer = RBM(numpy_rng = numpy_rng, theano_rng = theano_rng,
99 input = layer_input,
100 n_visible = input_size,
101 n_hidden = hidden_layers_sizes[i],
102 W = sigmoid_layer.W,
103 hbias = sigmoid_layer.b)
104 self.rbm_layers.append(rbm_layer)
105
106
107 # We now need to add a logistic layer on top of the MLP
108 self.logLayer = LogisticRegression(\
109 input = self.sigmoid_layers[-1].output,\
110 n_in = hidden_layers_sizes[-1], n_out = n_outs)
111 self.params.extend(self.logLayer.params)
112
113 # construct a function that implements one step of fine-tuning compute the cost for
114 # second phase of training, defined as the negative log likelihood
115 self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
116
117 # compute the gradients with respect to the model parameters
118 # symbolic variable that points to the number of errors made on the
119 # minibatch given by self.x and self.y
120 self.errors = self.logLayer.errors(self.y)
121
122 def pretraining_functions(self, train_set_x, batch_size):
123 ''' Generates a list of functions, for performing one step of gradient descent at a
124 given layer. The function will require as input the minibatch index, and to train an
125 RBM you just need to iterate, calling the corresponding function on all minibatch
126 indexes.
127
128 :type train_set_x: theano.tensor.TensorType
129 :param train_set_x: Shared var. that contains all datapoints used for training the RBM
130 :type batch_size: int
131 :param batch_size: size of a [mini]batch
132 '''
133
134 # index to a [mini]batch
135 index = T.lscalar('index') # index to a minibatch
136 learning_rate = T.scalar('lr') # learning rate to use
137
138 # number of batches
139 n_batches = train_set_x.value.shape[0] / batch_size
140 # begining of a batch, given `index`
141 batch_begin = index * batch_size
142 # ending of a batch given `index`
143 batch_end = batch_begin+batch_size
144
145 pretrain_fns = []
146 for rbm in self.rbm_layers:
147
148 # get the cost and the updates list
149 # TODO: change cost function to reconstruction error
150 cost,updates = rbm.cd(learning_rate, persistent=None)
151
152 # compile the theano function
153 fn = theano.function(inputs = [index,
154 theano.Param(learning_rate, default = 0.1)],
155 outputs = cost,
156 updates = updates,
157 givens = {self.x :train_set_x[batch_begin:batch_end]})
158 # append `fn` to the list of functions
159 pretrain_fns.append(fn)
160
161 return pretrain_fns
162
163
164 def build_finetune_functions(self, datasets, batch_size, learning_rate):
165 '''Generates a function `train` that implements one step of finetuning, a function
166 `validate` that computes the error on a batch from the validation set, and a function
167 `test` that computes the error on a batch from the testing set
168
169 :type datasets: list of pairs of theano.tensor.TensorType
170 :param datasets: It is a list that contain all the datasets; the has to contain three
171 pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano
172 variables, one for the datapoints, the other for the labels
173 :type batch_size: int
174 :param batch_size: size of a minibatch
175 :type learning_rate: float
176 :param learning_rate: learning rate used during finetune stage
177 '''
178
179 (train_set_x, train_set_y) = datasets[0]
180 (valid_set_x, valid_set_y) = datasets[1]
181 (test_set_x , test_set_y ) = datasets[2]
182
183 # compute number of minibatches for training, validation and testing
184 n_valid_batches = valid_set_x.value.shape[0] / batch_size
185 n_test_batches = test_set_x.value.shape[0] / batch_size
186
187 index = T.lscalar('index') # index to a [mini]batch
188
189 # compute the gradients with respect to the model parameters
190 gparams = T.grad(self.finetune_cost, self.params)
191
192 # compute list of fine-tuning updates
193 updates = {}
194 for param, gparam in zip(self.params, gparams):
195 updates[param] = param - gparam*learning_rate
196
197 train_fn = theano.function(inputs = [index],
198 outputs = self.finetune_cost,
199 updates = updates,
200 givens = {
201 self.x : train_set_x[index*batch_size:(index+1)*batch_size],
202 self.y : train_set_y[index*batch_size:(index+1)*batch_size]})
203
204 test_score_i = theano.function([index], self.errors,
205 givens = {
206 self.x: test_set_x[index*batch_size:(index+1)*batch_size],
207 self.y: test_set_y[index*batch_size:(index+1)*batch_size]})
208
209 valid_score_i = theano.function([index], self.errors,
210 givens = {
211 self.x: valid_set_x[index*batch_size:(index+1)*batch_size],
212 self.y: valid_set_y[index*batch_size:(index+1)*batch_size]})
213
214 # Create a function that scans the entire validation set
215 def valid_score():
216 return [valid_score_i(i) for i in xrange(n_valid_batches)]
217
218 # Create a function that scans the entire test set
219 def test_score():
220 return [test_score_i(i) for i in xrange(n_test_batches)]
221
222 return train_fn, valid_score, test_score
223
224
225
226
227
228
229 def test_DBN( finetune_lr = 0.1, pretraining_epochs = 10, \
230 pretrain_lr = 0.1, training_epochs = 1000, \
231 dataset='mnist.pkl.gz'):
232 """
233 Demonstrates how to train and test a Deep Belief Network.
234
235 This is demonstrated on MNIST.
236
237 :type learning_rate: float
238 :param learning_rate: learning rate used in the finetune stage
239 :type pretraining_epochs: int
240 :param pretraining_epochs: number of epoch to do pretraining
241 :type pretrain_lr: float
242 :param pretrain_lr: learning rate to be used during pre-training
243 :type n_iter: int
244 :param n_iter: maximal number of iterations ot run the optimizer
245 :type dataset: string
246 :param dataset: path the the pickled dataset
247 """
248
249 print 'finetune_lr = ', finetune_lr
250 print 'pretrain_lr = ', pretrain_lr
251
252 datasets = load_data(dataset)
253
254 train_set_x, train_set_y = datasets[0]
255 valid_set_x, valid_set_y = datasets[1]
256 test_set_x , test_set_y = datasets[2]
257
258
259 batch_size = 20 # size of the minibatch
260
261 # compute number of minibatches for training, validation and testing
262 n_train_batches = train_set_x.value.shape[0] / batch_size
263
264 # numpy random generator
265 numpy_rng = numpy.random.RandomState(123)
266 print '... building the model'
267 # construct the Deep Belief Network
268 dbn = DBN(numpy_rng = numpy_rng, n_ins = 28*28,
269 hidden_layers_sizes = [1000,1000,1000],
270 n_outs = 10)
271
272
273 #########################
274 # PRETRAINING THE MODEL #
275 #########################
276 print '... getting the pretraining functions'
277 pretraining_fns = dbn.pretraining_functions(
278 train_set_x = train_set_x,
279 batch_size = batch_size )
280
281 print '... pre-training the model'
282 start_time = time.clock()
283 ## Pre-train layer-wise
284 for i in xrange(dbn.n_layers):
285 # go through pretraining epochs
286 for epoch in xrange(pretraining_epochs):
287 # go through the training set
288 c = []
289 for batch_index in xrange(n_train_batches):
290 c.append(pretraining_fns[i](index = batch_index,
291 lr = pretrain_lr ) )
292 print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),numpy.mean(c)
293
294 end_time = time.clock()
295
296 print ('Pretraining took %f minutes' %((end_time-start_time)/60.))
297
298 ########################
299 # FINETUNING THE MODEL #
300 ########################
301
302 # get the training, validation and testing function for the model
303 print '... getting the finetuning functions'
304 train_fn, validate_model, test_model = dbn.build_finetune_functions (
305 datasets = datasets, batch_size = batch_size,
306 learning_rate = finetune_lr)
307
308 print '... finetunning the model'
309 # early-stopping parameters
310 patience = 10000 # look as this many examples regardless
311 patience_increase = 2. # wait this much longer when a new best is
312 # found
313 improvement_threshold = 0.995 # a relative improvement of this much is
314 # considered significant
315 validation_frequency = min(n_train_batches, patience/2)
316 # go through this many
317 # minibatche before checking the network
318 # on the validation set; in this case we
319 # check every epoch
320
321
322 best_params = None
323 best_validation_loss = float('inf')
324 test_score = 0.
325 start_time = time.clock()
326
327 done_looping = False
328 epoch = 0
329
330 while (epoch < training_epochs) and (not done_looping):
331 epoch = epoch + 1
332 for minibatch_index in xrange(n_train_batches):
333
334 minibatch_avg_cost = train_fn(minibatch_index)
335 iter = epoch * n_train_batches + minibatch_index
336
337 if (iter+1) % validation_frequency == 0:
338
339 validation_losses = validate_model()
340 this_validation_loss = numpy.mean(validation_losses)
341 print('epoch %i, minibatch %i/%i, validation error %f %%' % \
342 (epoch, minibatch_index+1, n_train_batches, \
343 this_validation_loss*100.))
344
345
346 # if we got the best validation score until now
347 if this_validation_loss < best_validation_loss:
348
349 #improve patience if loss improvement is good enough
350 if this_validation_loss < best_validation_loss * \
351 improvement_threshold :
352 patience = max(patience, iter * patience_increase)
353
354 # save best validation score and iteration number
355 best_validation_loss = this_validation_loss
356 best_iter = iter
357
358 # test it on the test set
359 test_losses = test_model()
360 test_score = numpy.mean(test_losses)
361 print((' epoch %i, minibatch %i/%i, test error of best '
362 'model %f %%') %
363 (epoch, minibatch_index+1, n_train_batches,
364 test_score*100.))
365
366
367 if patience <= iter :
368 done_looping = True
369 break
370
371 end_time = time.clock()
372 print(('Optimization complete with best validation score of %f %%,'
373 'with test performance %f %%') %
374 (best_validation_loss * 100., test_score*100.))
375 print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
376
377
378
379
380
381 if __name__ == '__main__':
382 pretrain_lr = numpy.float(os.sys.argv[1])
383 finetune_lr = numpy.float(os.sys.argv[2])
384 test_DBN(pretrain_lr=pretrain_lr, finetune_lr=finetune_lr)