diff baseline/mlp/mlp_nist.py @ 377:0b7e64e8e93f

branch merge
author Arnaud Bergeron <abergeron@gmail.com>
date Sun, 25 Apr 2010 17:12:03 -0400
parents 76b7182dd32e
children 60a4432b8071
line wrap: on
line diff
--- a/baseline/mlp/mlp_nist.py	Sun Apr 25 17:10:09 2010 -0400
+++ b/baseline/mlp/mlp_nist.py	Sun Apr 25 17:12:03 2010 -0400
@@ -23,6 +23,7 @@
 """
 __docformat__ = 'restructedtext en'
 
+import sys
 import pdb
 import numpy
 import pylab
@@ -163,6 +164,75 @@
         else:
             raise NotImplementedError()
 
+def mlp_get_nist_error(model_name='/u/mullerx/ift6266h10_sandbox_db/xvm_final_lr1_p073/8/best_model.npy.npz',
+                  data_set=0):
+    
+
+
+    # allocate symbolic variables for the data
+    x = T.fmatrix()  # the data is presented as rasterized images
+    y = T.lvector()  # the labels are presented as 1D vector of 
+                          # [long int] labels
+
+    # load the data set and create an mlp based on the dimensions of the model
+    model=numpy.load(model_name)
+    W1=model['W1']
+    W2=model['W2']
+    b1=model['b1']
+    b2=model['b2']
+    nb_hidden=b1.shape[0]
+    input_dim=W1.shape[0]
+    nb_targets=b2.shape[0]
+    learning_rate=0.1
+
+
+    if data_set==0:
+        dataset=datasets.nist_all()
+    elif data_set==1:
+        dataset=datasets.nist_P07()
+
+
+    classifier = MLP( input=x,\
+                        n_in=input_dim,\
+                        n_hidden=nb_hidden,\
+                        n_out=nb_targets,
+                        learning_rate=learning_rate)
+
+
+    #overwrite weights with weigths from model
+    classifier.W1.value=W1
+    classifier.W2.value=W2
+    classifier.b1.value=b1
+    classifier.b2.value=b2
+
+
+    cost = classifier.negative_log_likelihood(y) \
+         + 0.0 * classifier.L1 \
+         + 0.0 * classifier.L2_sqr 
+
+    # compiling a theano function that computes the mistakes that are made by 
+    # the model on a minibatch
+    test_model = theano.function([x,y], classifier.errors(y))
+
+
+
+    #get the test error
+    #use a batch size of 1 so we can get the sub-class error
+    #without messing with matrices (will be upgraded later)
+    test_score=0
+    temp=0
+    for xt,yt in dataset.test(20):
+        test_score += test_model(xt,yt)
+        temp = temp+1
+    test_score /= temp
+
+
+    return test_score*100
+    
+
+
+
+
 
 def mlp_full_nist(      verbose = 1,\
                         adaptive_lr = 0,\
@@ -174,15 +244,19 @@
                         batch_size=20,\
                         nb_hidden = 30,\
                         nb_targets = 62,
-			tau=1e6,\
-			lr_t2_factor=0.5):
+                        tau=1e6,\
+                        lr_t2_factor=0.5,\
+                        init_model=0,\
+                        channel=0):
    
     
+    if channel!=0:
+        channel.save()
     configuration = [learning_rate,nb_max_exemples,nb_hidden,adaptive_lr]
     
     #save initial learning rate if classical adaptive lr is used
     initial_lr=learning_rate
-    max_div_count=3
+    max_div_count=1000
     
     
     total_validation_error_list = []
@@ -195,6 +269,8 @@
     	dataset=datasets.nist_all()
     elif data_set==1:
         dataset=datasets.nist_P07()
+    elif data_set==2:
+        dataset=datasets.PNIST07()
     
     
     
@@ -215,6 +291,14 @@
                         learning_rate=learning_rate)
                         
                         
+    # check if we want to initialise the weights with a previously calculated model
+    # dimensions must be consistent between old model and current configuration!!!!!! (nb_hidden and nb_targets)
+    if init_model!=0:
+        old_model=numpy.load(init_model)
+        classifier.W1.value=old_model['W1']
+        classifier.W2.value=old_model['W2']
+        classifier.b1.value=old_model['b1']
+        classifier.b2.value=old_model['b2']
    
 
     # the cost we minimize during training is the negative log likelihood of 
@@ -289,8 +373,9 @@
     
     
     
-    if verbose == 1:
-        print 'starting training'
+    
+    print 'starting training'
+    sys.stdout.flush()
     while(minibatch_index*batch_size<nb_max_exemples):
         
         for x, y in dataset.train(batch_size):
@@ -303,10 +388,12 @@
             #train model
             cost_ij = train_model(x,y)
     
-            if (minibatch_index+1) % validation_frequency == 0: 
+            if (minibatch_index) % validation_frequency == 0: 
                 #save the current learning rate
                 learning_rate_list.append(classifier.lr.value)
                 divergence_flag_list.append(divergence_flag)
+
+                
                 
                 # compute the validation error
                 this_validation_loss = 0.
@@ -319,10 +406,15 @@
                 this_validation_loss /= temp
                 #save the validation loss
                 total_validation_error_list.append(this_validation_loss)
-                if verbose == 1:
-                    print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % 
-                                (epoch, minibatch_index+1,classifier.lr.value,
-                                this_validation_loss*100.))
+                
+		print(('epoch %i, minibatch %i, learning rate %f current validation error %f ') % 
+			(epoch, minibatch_index+1,classifier.lr.value,
+			this_validation_loss*100.))
+		sys.stdout.flush()
+				
+		#save temp results to check during training
+                numpy.savez('temp_results.npy',config=configuration,total_validation_error_list=total_validation_error_list,\
+                learning_rate_list=learning_rate_list, divergence_flag_list=divergence_flag_list)
     
                 # if we got the best validation score until now
                 if this_validation_loss < best_validation_loss:
@@ -344,11 +436,12 @@
                         test_score += test_model(xt,yt)
                         temp = temp+1
                     test_score /= temp
-                    if verbose == 1:
-                        print(('epoch %i, minibatch %i, test error of best '
-                            'model %f %%') % 
-                                    (epoch, minibatch_index+1,
-                                    test_score*100.))
+                    
+		    print(('epoch %i, minibatch %i, test error of best '
+			'model %f %%') % 
+				(epoch, minibatch_index+1,
+				test_score*100.))
+                    sys.stdout.flush()
                                     
                 # if the validation error is going up, we are overfitting (or oscillating)
                 # check if we are allowed to continue and if we will adjust the learning rate
@@ -374,12 +467,13 @@
                         test_score += test_model(xt,yt)
                         temp=temp+1
                     test_score /= temp
-                    if verbose == 1:
-                        print ' validation error is going up, possibly stopping soon'
-                        print(('     epoch %i, minibatch %i, test error of best '
-                            'model %f %%') % 
-                                    (epoch, minibatch_index+1,
-                                    test_score*100.))
+                    
+                    print ' validation error is going up, possibly stopping soon'
+                    print(('     epoch %i, minibatch %i, test error of best '
+                        'model %f %%') % 
+                                (epoch, minibatch_index+1,
+                                test_score*100.))
+                    sys.stdout.flush()
                                     
                     
     
@@ -393,6 +487,9 @@
             #force one epoch at least
             if epoch>0 and minibatch_index*batch_size>nb_max_exemples:
                 break
+
+
+                       
     
     
             time_n= time_n + batch_size
@@ -401,12 +498,13 @@
         # we have finished looping through the training set
         epoch = epoch+1
     end_time = time.clock()
-    if verbose == 1:
-        print(('Optimization complete. Best validation score of %f %% '
-            'obtained at iteration %i, with test performance %f %%') %  
-                    (best_validation_loss * 100., best_iter, test_score*100.))
-        print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
-        print minibatch_index
+   
+    print(('Optimization complete. Best validation score of %f %% '
+        'obtained at iteration %i, with test performance %f %%') %  
+                (best_validation_loss * 100., best_iter, test_score*100.))
+    print ('The code ran for %f minutes' % ((end_time-start_time)/60.))
+    print minibatch_index
+    sys.stdout.flush()
         
     #save the model and the weights
     numpy.savez('model.npy', config=configuration, W1=classifier.W1.value,W2=classifier.W2.value, b1=classifier.b1.value,b2=classifier.b2.value)
@@ -427,7 +525,8 @@
 										tau=state.tau,\
 										verbose = state.verbose,\
 										lr_t2_factor=state.lr_t2_factor,
-                                                                                data_set=state.data_set)
+                                        data_set=state.data_set,
+                                        channel=channel)
     state.train_error=train_error
     state.validation_error=validation_error
     state.test_error=test_error