Mercurial > ift6266
comparison code_tutoriel/mlp.py @ 2:bcc87d3e33a3
adding latest tutorial code
author | Dumitru Erhan <dumitru.erhan@gmail.com> |
---|---|
date | Sun, 24 Jan 2010 22:34:29 -0500 |
parents | fda5f787baa6 |
children | 827de2cc34f8 |
comparison
equal
deleted
inserted
replaced
1:0fda55a7de99 | 2:bcc87d3e33a3 |
---|---|
69 # point towards; this is where pre-training helps, giving a good | 69 # point towards; this is where pre-training helps, giving a good |
70 # starting point for backpropagation, but more about this in the | 70 # starting point for backpropagation, but more about this in the |
71 # other tutorials | 71 # other tutorials |
72 | 72 |
73 # `W1` is initialized with `W1_values` which is uniformely sampled | 73 # `W1` is initialized with `W1_values` which is uniformely sampled |
74 # from -1/sqrt(n_in) and 1/sqrt(n_in) | 74 # from -6./sqrt(n_in+n_hidden) and 6./sqrt(n_in+n_hidden) |
75 # the output of uniform if converted using asarray to dtype | 75 # the output of uniform if converted using asarray to dtype |
76 # theano.config.floatX so that the code is runable on GPU | 76 # theano.config.floatX so that the code is runable on GPU |
77 W1_values = numpy.asarray( numpy.random.uniform( \ | 77 W1_values = numpy.asarray( numpy.random.uniform( \ |
78 low = -numpy.sqrt(6./(n_in+n_hidden)), high = numpy.sqrt(6./(n_in+n_hidden)), \ | 78 low = -numpy.sqrt(6./(n_in+n_hidden)), \ |
79 high = numpy.sqrt(6./(n_in+n_hidden)), \ | |
79 size = (n_in, n_hidden)), dtype = theano.config.floatX) | 80 size = (n_in, n_hidden)), dtype = theano.config.floatX) |
80 # `W2` is initialized with `W2_values` which is uniformely sampled | 81 # `W2` is initialized with `W2_values` which is uniformely sampled |
81 # from -1/sqrt(n_hidden) and 1/sqrt(n_hidden) | 82 # from -6./sqrt(n_hidden+n_out) and 6./sqrt(n_hidden+n_out) |
82 # the output of uniform if converted using asarray to dtype | 83 # the output of uniform if converted using asarray to dtype |
83 # theano.config.floatX so that the code is runable on GPU | 84 # theano.config.floatX so that the code is runable on GPU |
84 W2_values = numpy.asarray( numpy.random.uniform( | 85 W2_values = numpy.asarray( numpy.random.uniform( |
85 low = numpy.sqrt(6./(n_hidden+n_out)), high= numpy.sqrt(6./(n_hidden+n_out)),\ | 86 low = numpy.sqrt(6./(n_hidden+n_out)), \ |
87 high= numpy.sqrt(6./(n_hidden+n_out)),\ | |
86 size= (n_hidden, n_out)), dtype = theano.config.floatX) | 88 size= (n_hidden, n_out)), dtype = theano.config.floatX) |
87 | 89 |
88 self.W1 = theano.shared( value = W1_values ) | 90 self.W1 = theano.shared( value = W1_values ) |
89 self.b1 = theano.shared( value = numpy.zeros((n_hidden,), | 91 self.b1 = theano.shared( value = numpy.zeros((n_hidden,), |
90 dtype= theano.config.floatX)) | 92 dtype= theano.config.floatX)) |
159 This is demonstrated on MNIST. | 161 This is demonstrated on MNIST. |
160 | 162 |
161 :param learning_rate: learning rate used (factor for the stochastic | 163 :param learning_rate: learning rate used (factor for the stochastic |
162 gradient | 164 gradient |
163 | 165 |
164 :param n_iter: number of iterations ot run the optimizer | |
165 | |
166 :param L1_reg: L1-norm's weight when added to the cost (see | 166 :param L1_reg: L1-norm's weight when added to the cost (see |
167 regularization) | 167 regularization) |
168 | 168 |
169 :param L2_reg: L2-norm's weight when added to the cost (see | 169 :param L2_reg: L2-norm's weight when added to the cost (see |
170 regularization) | 170 regularization) |
171 """ | 171 |
172 :param n_iter: maximal number of iterations ot run the optimizer | |
173 | |
174 """ | |
172 | 175 |
173 # Load the dataset | 176 # Load the dataset |
174 f = gzip.open('mnist.pkl.gz','rb') | 177 f = gzip.open('mnist.pkl.gz','rb') |
175 train_set, valid_set, test_set = cPickle.load(f) | 178 train_set, valid_set, test_set = cPickle.load(f) |
176 f.close() | 179 f.close() |
262 # check every epoch | 265 # check every epoch |
263 | 266 |
264 | 267 |
265 best_params = None | 268 best_params = None |
266 best_validation_loss = float('inf') | 269 best_validation_loss = float('inf') |
270 best_iter = 0 | |
267 test_score = 0. | 271 test_score = 0. |
268 start_time = time.clock() | 272 start_time = time.clock() |
269 # have a maximum of `n_iter` iterations through the entire dataset | 273 # have a maximum of `n_iter` iterations through the entire dataset |
270 for iter in xrange(n_iter* n_minibatches): | 274 for iter in xrange(n_iter* n_minibatches): |
271 | 275 |
298 #improve patience if loss improvement is good enough | 302 #improve patience if loss improvement is good enough |
299 if this_validation_loss < best_validation_loss * \ | 303 if this_validation_loss < best_validation_loss * \ |
300 improvement_threshold : | 304 improvement_threshold : |
301 patience = max(patience, iter * patience_increase) | 305 patience = max(patience, iter * patience_increase) |
302 | 306 |
307 # save best validation score and iteration number | |
303 best_validation_loss = this_validation_loss | 308 best_validation_loss = this_validation_loss |
309 best_iter = iter | |
310 | |
304 # test it on the test set | 311 # test it on the test set |
305 | |
306 test_score = 0. | 312 test_score = 0. |
307 for x,y in test_batches: | 313 for x,y in test_batches: |
308 test_score += test_model(x,y) | 314 test_score += test_model(x,y) |
309 test_score /= len(test_batches) | 315 test_score /= len(test_batches) |
310 print((' epoch %i, minibatch %i/%i, test error of best ' | 316 print((' epoch %i, minibatch %i/%i, test error of best ' |
311 'model %f %%') % | 317 'model %f %%') % |
312 (epoch, minibatch_index+1, n_minibatches, | 318 (epoch, minibatch_index+1, n_minibatches, |
313 test_score*100.)) | 319 test_score*100.)) |
314 | 320 |
315 if patience <= iter : | 321 if patience <= iter : |
316 break | 322 break |
317 | 323 |
318 end_time = time.clock() | 324 end_time = time.clock() |
319 print(('Optimization complete with best validation score of %f %%,' | 325 print(('Optimization complete. Best validation score of %f %% ' |
320 'with test performance %f %%') % | 326 'obtained at iteration %i, with test performance %f %%') % |
321 (best_validation_loss * 100., test_score*100.)) | 327 (best_validation_loss * 100., best_iter, test_score*100.)) |
322 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) | 328 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) |
323 | |
324 | |
325 | |
326 | |
327 | 329 |
328 | 330 |
329 if __name__ == '__main__': | 331 if __name__ == '__main__': |
330 sgd_optimization_mnist() | 332 sgd_optimization_mnist() |
331 | 333 |