diff code_tutoriel/mlp.py @ 2:bcc87d3e33a3

adding latest tutorial code
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Sun, 24 Jan 2010 22:34:29 -0500
parents fda5f787baa6
children 827de2cc34f8
line wrap: on
line diff
--- a/code_tutoriel/mlp.py	Sun Jan 24 22:33:33 2010 -0500
+++ b/code_tutoriel/mlp.py	Sun Jan 24 22:34:29 2010 -0500
@@ -71,18 +71,20 @@
         # other tutorials
         
         # `W1` is initialized with `W1_values` which is uniformely sampled
-        # from -1/sqrt(n_in) and 1/sqrt(n_in)
+        # from -6./sqrt(n_in+n_hidden) and 6./sqrt(n_in+n_hidden)
         # the output of uniform if converted using asarray to dtype 
         # theano.config.floatX so that the code is runable on GPU
         W1_values = numpy.asarray( numpy.random.uniform( \
-              low = -numpy.sqrt(6./(n_in+n_hidden)), high = numpy.sqrt(6./(n_in+n_hidden)), \
+              low = -numpy.sqrt(6./(n_in+n_hidden)), \
+              high = numpy.sqrt(6./(n_in+n_hidden)), \
               size = (n_in, n_hidden)), dtype = theano.config.floatX)
         # `W2` is initialized with `W2_values` which is uniformely sampled 
-        # from -1/sqrt(n_hidden) and 1/sqrt(n_hidden)
+        # from -6./sqrt(n_hidden+n_out) and 6./sqrt(n_hidden+n_out)
         # the output of uniform if converted using asarray to dtype 
         # theano.config.floatX so that the code is runable on GPU
         W2_values = numpy.asarray( numpy.random.uniform( 
-              low = numpy.sqrt(6./(n_hidden+n_out)), high= numpy.sqrt(6./(n_hidden+n_out)),\
+              low = numpy.sqrt(6./(n_hidden+n_out)), \
+              high= numpy.sqrt(6./(n_hidden+n_out)),\
               size= (n_hidden, n_out)), dtype = theano.config.floatX)
 
         self.W1 = theano.shared( value = W1_values )
@@ -161,14 +163,15 @@
     :param learning_rate: learning rate used (factor for the stochastic 
     gradient
 
-    :param n_iter: number of iterations ot run the optimizer 
-
     :param L1_reg: L1-norm's weight when added to the cost (see 
     regularization)
 
     :param L2_reg: L2-norm's weight when added to the cost (see 
     regularization)
-    """
+ 
+    :param n_iter: maximal number of iterations ot run the optimizer 
+
+   """
 
     # Load the dataset 
     f = gzip.open('mnist.pkl.gz','rb')
@@ -264,6 +267,7 @@
 
     best_params          = None
     best_validation_loss = float('inf')
+    best_iter            = 0
     test_score           = 0.
     start_time = time.clock()
     # have a maximum of `n_iter` iterations through the entire dataset
@@ -300,9 +304,11 @@
                        improvement_threshold :
                     patience = max(patience, iter * patience_increase)
 
+                # save best validation score and iteration number
                 best_validation_loss = this_validation_loss
+                best_iter = iter
+
                 # test it on the test set
-            
                 test_score = 0.
                 for x,y in test_batches:
                     test_score += test_model(x,y)
@@ -313,19 +319,15 @@
                               test_score*100.))
 
         if patience <= iter :
-                break
+            break
 
     end_time = time.clock()
-    print(('Optimization complete with best validation score of %f %%,'
-           'with test performance %f %%') %  
-                 (best_validation_loss * 100., test_score*100.))
+    print(('Optimization complete. Best validation score of %f %% '
+           'obtained at iteration %i, with test performance %f %%') %  
+                 (best_validation_loss * 100., best_iter, test_score*100.))
     print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
 
 
-
-
-
-
 if __name__ == '__main__':
     sgd_optimization_mnist()