Mercurial > ift6266
diff code_tutoriel/mlp.py @ 2:bcc87d3e33a3
adding latest tutorial code
author | Dumitru Erhan <dumitru.erhan@gmail.com> |
---|---|
date | Sun, 24 Jan 2010 22:34:29 -0500 |
parents | fda5f787baa6 |
children | 827de2cc34f8 |
line wrap: on
line diff
--- a/code_tutoriel/mlp.py Sun Jan 24 22:33:33 2010 -0500 +++ b/code_tutoriel/mlp.py Sun Jan 24 22:34:29 2010 -0500 @@ -71,18 +71,20 @@ # other tutorials # `W1` is initialized with `W1_values` which is uniformely sampled - # from -1/sqrt(n_in) and 1/sqrt(n_in) + # from -6./sqrt(n_in+n_hidden) and 6./sqrt(n_in+n_hidden) # the output of uniform if converted using asarray to dtype # theano.config.floatX so that the code is runable on GPU W1_values = numpy.asarray( numpy.random.uniform( \ - low = -numpy.sqrt(6./(n_in+n_hidden)), high = numpy.sqrt(6./(n_in+n_hidden)), \ + low = -numpy.sqrt(6./(n_in+n_hidden)), \ + high = numpy.sqrt(6./(n_in+n_hidden)), \ size = (n_in, n_hidden)), dtype = theano.config.floatX) # `W2` is initialized with `W2_values` which is uniformely sampled - # from -1/sqrt(n_hidden) and 1/sqrt(n_hidden) + # from -6./sqrt(n_hidden+n_out) and 6./sqrt(n_hidden+n_out) # the output of uniform if converted using asarray to dtype # theano.config.floatX so that the code is runable on GPU W2_values = numpy.asarray( numpy.random.uniform( - low = numpy.sqrt(6./(n_hidden+n_out)), high= numpy.sqrt(6./(n_hidden+n_out)),\ + low = numpy.sqrt(6./(n_hidden+n_out)), \ + high= numpy.sqrt(6./(n_hidden+n_out)),\ size= (n_hidden, n_out)), dtype = theano.config.floatX) self.W1 = theano.shared( value = W1_values ) @@ -161,14 +163,15 @@ :param learning_rate: learning rate used (factor for the stochastic gradient - :param n_iter: number of iterations ot run the optimizer - :param L1_reg: L1-norm's weight when added to the cost (see regularization) :param L2_reg: L2-norm's weight when added to the cost (see regularization) - """ + + :param n_iter: maximal number of iterations ot run the optimizer + + """ # Load the dataset f = gzip.open('mnist.pkl.gz','rb') @@ -264,6 +267,7 @@ best_params = None best_validation_loss = float('inf') + best_iter = 0 test_score = 0. start_time = time.clock() # have a maximum of `n_iter` iterations through the entire dataset @@ -300,9 +304,11 @@ improvement_threshold : patience = max(patience, iter * patience_increase) + # save best validation score and iteration number best_validation_loss = this_validation_loss + best_iter = iter + # test it on the test set - test_score = 0. for x,y in test_batches: test_score += test_model(x,y) @@ -313,19 +319,15 @@ test_score*100.)) if patience <= iter : - break + break end_time = time.clock() - print(('Optimization complete with best validation score of %f %%,' - 'with test performance %f %%') % - (best_validation_loss * 100., test_score*100.)) + print(('Optimization complete. Best validation score of %f %% ' + 'obtained at iteration %i, with test performance %f %%') % + (best_validation_loss * 100., best_iter, test_score*100.)) print ('The code ran for %f minutes' % ((end_time-start_time)/60.)) - - - - if __name__ == '__main__': sgd_optimization_mnist()