comparison code_tutoriel/mlp.py @ 2:bcc87d3e33a3

adding latest tutorial code
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Sun, 24 Jan 2010 22:34:29 -0500
parents fda5f787baa6
children 827de2cc34f8
comparison
equal deleted inserted replaced
1:0fda55a7de99 2:bcc87d3e33a3
69 # point towards; this is where pre-training helps, giving a good 69 # point towards; this is where pre-training helps, giving a good
70 # starting point for backpropagation, but more about this in the 70 # starting point for backpropagation, but more about this in the
71 # other tutorials 71 # other tutorials
72 72
73 # `W1` is initialized with `W1_values` which is uniformely sampled 73 # `W1` is initialized with `W1_values` which is uniformely sampled
74 # from -1/sqrt(n_in) and 1/sqrt(n_in) 74 # from -6./sqrt(n_in+n_hidden) and 6./sqrt(n_in+n_hidden)
75 # the output of uniform if converted using asarray to dtype 75 # the output of uniform if converted using asarray to dtype
76 # theano.config.floatX so that the code is runable on GPU 76 # theano.config.floatX so that the code is runable on GPU
77 W1_values = numpy.asarray( numpy.random.uniform( \ 77 W1_values = numpy.asarray( numpy.random.uniform( \
78 low = -numpy.sqrt(6./(n_in+n_hidden)), high = numpy.sqrt(6./(n_in+n_hidden)), \ 78 low = -numpy.sqrt(6./(n_in+n_hidden)), \
79 high = numpy.sqrt(6./(n_in+n_hidden)), \
79 size = (n_in, n_hidden)), dtype = theano.config.floatX) 80 size = (n_in, n_hidden)), dtype = theano.config.floatX)
80 # `W2` is initialized with `W2_values` which is uniformely sampled 81 # `W2` is initialized with `W2_values` which is uniformely sampled
81 # from -1/sqrt(n_hidden) and 1/sqrt(n_hidden) 82 # from -6./sqrt(n_hidden+n_out) and 6./sqrt(n_hidden+n_out)
82 # the output of uniform if converted using asarray to dtype 83 # the output of uniform if converted using asarray to dtype
83 # theano.config.floatX so that the code is runable on GPU 84 # theano.config.floatX so that the code is runable on GPU
84 W2_values = numpy.asarray( numpy.random.uniform( 85 W2_values = numpy.asarray( numpy.random.uniform(
85 low = numpy.sqrt(6./(n_hidden+n_out)), high= numpy.sqrt(6./(n_hidden+n_out)),\ 86 low = numpy.sqrt(6./(n_hidden+n_out)), \
87 high= numpy.sqrt(6./(n_hidden+n_out)),\
86 size= (n_hidden, n_out)), dtype = theano.config.floatX) 88 size= (n_hidden, n_out)), dtype = theano.config.floatX)
87 89
88 self.W1 = theano.shared( value = W1_values ) 90 self.W1 = theano.shared( value = W1_values )
89 self.b1 = theano.shared( value = numpy.zeros((n_hidden,), 91 self.b1 = theano.shared( value = numpy.zeros((n_hidden,),
90 dtype= theano.config.floatX)) 92 dtype= theano.config.floatX))
159 This is demonstrated on MNIST. 161 This is demonstrated on MNIST.
160 162
161 :param learning_rate: learning rate used (factor for the stochastic 163 :param learning_rate: learning rate used (factor for the stochastic
162 gradient 164 gradient
163 165
164 :param n_iter: number of iterations ot run the optimizer
165
166 :param L1_reg: L1-norm's weight when added to the cost (see 166 :param L1_reg: L1-norm's weight when added to the cost (see
167 regularization) 167 regularization)
168 168
169 :param L2_reg: L2-norm's weight when added to the cost (see 169 :param L2_reg: L2-norm's weight when added to the cost (see
170 regularization) 170 regularization)
171 """ 171
172 :param n_iter: maximal number of iterations ot run the optimizer
173
174 """
172 175
173 # Load the dataset 176 # Load the dataset
174 f = gzip.open('mnist.pkl.gz','rb') 177 f = gzip.open('mnist.pkl.gz','rb')
175 train_set, valid_set, test_set = cPickle.load(f) 178 train_set, valid_set, test_set = cPickle.load(f)
176 f.close() 179 f.close()
262 # check every epoch 265 # check every epoch
263 266
264 267
265 best_params = None 268 best_params = None
266 best_validation_loss = float('inf') 269 best_validation_loss = float('inf')
270 best_iter = 0
267 test_score = 0. 271 test_score = 0.
268 start_time = time.clock() 272 start_time = time.clock()
269 # have a maximum of `n_iter` iterations through the entire dataset 273 # have a maximum of `n_iter` iterations through the entire dataset
270 for iter in xrange(n_iter* n_minibatches): 274 for iter in xrange(n_iter* n_minibatches):
271 275
298 #improve patience if loss improvement is good enough 302 #improve patience if loss improvement is good enough
299 if this_validation_loss < best_validation_loss * \ 303 if this_validation_loss < best_validation_loss * \
300 improvement_threshold : 304 improvement_threshold :
301 patience = max(patience, iter * patience_increase) 305 patience = max(patience, iter * patience_increase)
302 306
307 # save best validation score and iteration number
303 best_validation_loss = this_validation_loss 308 best_validation_loss = this_validation_loss
309 best_iter = iter
310
304 # test it on the test set 311 # test it on the test set
305
306 test_score = 0. 312 test_score = 0.
307 for x,y in test_batches: 313 for x,y in test_batches:
308 test_score += test_model(x,y) 314 test_score += test_model(x,y)
309 test_score /= len(test_batches) 315 test_score /= len(test_batches)
310 print((' epoch %i, minibatch %i/%i, test error of best ' 316 print((' epoch %i, minibatch %i/%i, test error of best '
311 'model %f %%') % 317 'model %f %%') %
312 (epoch, minibatch_index+1, n_minibatches, 318 (epoch, minibatch_index+1, n_minibatches,
313 test_score*100.)) 319 test_score*100.))
314 320
315 if patience <= iter : 321 if patience <= iter :
316 break 322 break
317 323
318 end_time = time.clock() 324 end_time = time.clock()
319 print(('Optimization complete with best validation score of %f %%,' 325 print(('Optimization complete. Best validation score of %f %% '
320 'with test performance %f %%') % 326 'obtained at iteration %i, with test performance %f %%') %
321 (best_validation_loss * 100., test_score*100.)) 327 (best_validation_loss * 100., best_iter, test_score*100.))
322 print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) 328 print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
323
324
325
326
327 329
328 330
329 if __name__ == '__main__': 331 if __name__ == '__main__':
330 sgd_optimization_mnist() 332 sgd_optimization_mnist()
331 333