changeset 421:0ffef3667865

merge
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Fri, 30 Apr 2010 16:23:45 -0400
parents a3a4a9c6476d (current diff) c91d7b67fa41 (diff)
children e4eb3ee7a0cf c06a3d9b5664
files
diffstat 2 files changed, 241 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deep/stacked_dae/v_sylvain/nist_apriori_error.py	Fri Apr 30 16:23:45 2010 -0400
@@ -0,0 +1,239 @@
+__docformat__ = 'restructedtext en'
+
+import pdb
+import numpy 
+from numpy import array
+import time
+import datetime
+import pylearn
+import copy
+import sys
+import os
+import os.path
+from pylearn.io import filetensor as ft
+from jobman import DD
+from ift6266 import datasets
+import cPickle
+from copy import copy
+import math
+
+from config import *
+
+data_path = '/data/lisa/data/nist/by_class/'
+test_data = 'all/all_train_data.ft' 
+test_labels = 'all/all_train_labels.ft'
+state = DD(DEFAULT_HP_NIST)
+
+#sda_model -> path for the parameters file
+#dataset -> the dataset we use for the test
+#part -> 0=train, 1=valid, 2=test
+#type -> non-linearity type 0=sigmoid, 1=tanh
+def test_data(sda_model,dataset,part=2,type=0):
+    
+    
+    f = open(sda_model)
+    parameters_pre=cPickle.load(f)
+    f.close()
+    
+    W1 = array(copy(parameters_pre[0]))
+    #print 'W1: ' + str(W1.shape)
+    b1 = array(copy(parameters_pre[1]))
+    #print 'b1: ' + str(b1.shape)
+    W2 = array(copy(parameters_pre[2]))
+    #print 'W2: ' + str(W2.shape)
+    b2 = array(copy(parameters_pre[3]))
+    #print 'b2: ' + str(b2.shape)
+    W3 = array(copy(parameters_pre[4]))
+    #print 'W3: ' + str(W3.shape)
+    b3 = array(copy(parameters_pre[5]))
+    #print 'b3: ' + str(b3.shape)
+    if state['num_hidden_layers'] == 4:
+        W4 = array(copy(parameters_pre[6]))
+        b4 = array(copy(parameters_pre[7]))
+        Wo = array(copy(parameters_pre[8]))
+        bo = array(copy(parameters_pre[9]))
+    elif state['num_hidden_layers'] == 3:
+        Wo = array(copy(parameters_pre[6]))
+        #print 'Wo: ' + str(Wo.shape)
+        bo = array(copy(parameters_pre[7]))
+        #print 'bo: ' + str(bo.shape)
+        W4=None
+        b4=None
+    else:
+        print('Number of layers not implemented yet, please do it')
+  
+    
+    total_error_count=0
+    total_exemple_count=0
+    if part == 0:
+        iter = dataset.train(1)
+    if part == 1:
+        iter = dataset.valid(1)
+    if part == 2:
+        iter = dataset.test(1)
+    for x,y in iter:
+        total_exemple_count = total_exemple_count +1
+        if type == 1:
+            #get output for layer 1
+            out1=(numpy.tanh(numpy.dot(x,W1) + b1)+1.0)/2.0
+            #get output for layer 2
+            out2=(numpy.tanh(numpy.dot(out1,W2) + b2)+1.0)/2.0
+            #get output for layer 3
+            out3=(numpy.tanh(numpy.dot(out2,W3) + b3)+1.0)/2.0
+            #if there is a fourth layer
+            if state['num_hidden_layers'] == 4:
+                outf = (numpy.tanh(numpy.dot(out3,W4) + b4)+1.0)/2.0
+            else:
+                outf = array(out3)
+        else:
+            #get output for layer 1
+            out1=1.0/(1.0+numpy.exp(-(numpy.dot(x,W1)+b1)))
+            #get output for layer 2
+            out2 = 1.0/(1.0+numpy.exp(-(numpy.dot(out1,W2)+b2)))
+            #get output for layer 3
+            out3 = 1.0/(1.0+numpy.exp(-(numpy.dot(out2,W3)+b3)))
+            #if there is a fourth layer
+            if state['num_hidden_layers'] == 4:
+                outf = 1.0/(1.0+numpy.exp(-(numpy.dot(out3,W4)+b4)))
+            else:
+                outf = out3
+        
+        out_act = numpy.dot(outf,Wo)+bo
+        
+        #add non linear function for output activation (softmax)
+        #We can also use sigmoid and results will be the same
+        out = numpy.zeros(len(out_act[0]),float)
+        a1_exp = numpy.exp(out_act)
+        sum_a1=numpy.sum(a1_exp)
+        out=a1_exp/sum_a1
+##        for i in xrange(len(out_act[0])):
+##            out[i]=sigmoid(array(out_act[0,i]))
+
+        #get grouped based error
+        #with a priori
+        if(y>9 and y<35):
+            predicted_class=numpy.argmax(out[0,10:35])+10
+            if(predicted_class!=y):
+                total_error_count+=1
+                
+        if(y<10):
+            predicted_class=numpy.argmax(out[0,0:10])
+            if(predicted_class!=y):
+                total_error_count+=1
+        if(y>34):
+            predicted_class=numpy.argmax(out[0,35:])+35
+            if(predicted_class!=y):
+                total_error_count+=1
+                
+    print '\t total exemples count: '+str(total_exemple_count)
+    print '\t total error count: '+str(total_error_count)
+    print '\t percentage of error: '+str(total_error_count*100.0/total_exemple_count*1.0)+' %'
+    
+
+def sigmoid(value):
+##    if len(value) > 1:
+##        retour = numpy.zeros(len(value),float)
+##        for i in xrange(len(value)):
+##            retour[i] = (1.0/(1.0+math.exp(-float(value[i]))))
+##        return retour
+##    else:
+##        print len(value)
+        return (1.0/(1.0+math.exp(-value)))
+
+if __name__ == '__main__':
+    
+    args = sys.argv[1:]
+    
+    if len(args) > 0 and args[0] == 'sigmoid':
+        type = 0
+    elif len(args) > 0 and args[0] == 'tanh':
+        type = 1
+    
+    part = 2    #0=train, 1=valid, 2=test
+    
+    PATH = ''   #Can be changed too if model is not in the current drectory
+    
+    if os.path.exists(PATH+'params_finetune_NIST.txt'):
+        start_time = time.clock()  
+        print ('\n finetune = NIST ')
+        print "NIST DIGITS"
+        test_data(PATH+'params_finetune_NIST.txt',datasets.nist_digits(),part=part,type=type)
+        print "NIST LOWER CASE"
+        test_data(PATH+'params_finetune_NIST.txt',datasets.nist_lower(),part=part,type=type)
+        print "NIST UPPER CASE"
+        test_data(PATH+'params_finetune_NIST.txt',datasets.nist_upper(),part=part,type=type)
+        end_time = time.clock()
+        print ('It took %f minutes' %((end_time-start_time)/60.))
+        
+    
+    if os.path.exists(PATH+'params_finetune_P07.txt'):
+        start_time = time.clock()  
+        print ('\n finetune = P07 ')
+        print "NIST DIGITS"
+        test_data(PATH+'params_finetune_P07.txt',datasets.nist_digits(),part=part,type=type)
+        print "NIST LOWER CASE"
+        test_data(PATH+'params_finetune_P07.txt',datasets.nist_lower(),part=part,type=type)
+        print "NIST UPPER CASE"
+        test_data(PATH+'params_finetune_P07.txt',datasets.nist_upper(),part=part,type=type)
+        end_time = time.clock()
+        print ('It took %f minutes' %((end_time-start_time)/60.))
+
+    
+    if os.path.exists(PATH+'params_finetune_NIST_then_P07.txt'):
+        start_time = time.clock()  
+        print ('\n finetune = NIST then P07')
+        print "NIST DIGITS"
+        test_data(PATH+'params_finetune_NIST_then_P07.txt',datasets.nist_digits(),part=part,type=type)
+        print "NIST LOWER CASE"
+        test_data(PATH+'params_finetune_NIST_then_P07.txt',datasets.nist_lower(),part=part,type=type)
+        print "NIST UPPER CASE"
+        test_data(PATH+'params_finetune_NIST_then_P07.txt',datasets.nist_upper(),part=part,type=type)
+        end_time = time.clock()
+        print ('It took %f minutes' %((end_time-start_time)/60.))
+    
+    if os.path.exists(PATH+'params_finetune_P07_then_NIST.txt'):
+        start_time = time.clock()  
+        print ('\n finetune = P07 then NIST')
+        print "NIST DIGITS"
+        test_data(PATH+'params_finetune_P07_then_NIST.txt',datasets.nist_digits(),part=part,type=type)
+        print "NIST LOWER CASE"
+        test_data(PATH+'params_finetune_P07_then_NIST.txt',datasets.nist_lower(),part=part,type=type)
+        print "NIST UPPER CASE"
+        test_data(PATH+'params_finetune_P07_then_NIST.txt',datasets.nist_upper(),part=part,type=type)
+        end_time = time.clock()
+        print ('It took %f minutes' %((end_time-start_time)/60.))
+    
+    if os.path.exists(PATH+'params_finetune_PNIST07.txt'):
+        start_time = time.clock()  
+        print ('\n finetune = PNIST07')
+        print "NIST DIGITS"
+        test_data(PATH+'params_finetune_PNIST07.txt',datasets.nist_digits(),part=part,type=type)
+        print "NIST LOWER CASE"
+        test_data(PATH+'params_finetune_PNIST07.txt',datasets.nist_lower(),part=part,type=type)
+        print "NIST UPPER CASE"
+        test_data(PATH+'params_finetune_PNIST07.txt',datasets.nist_upper(),part=part,type=type)
+        end_time = time.clock()
+        print ('It took %f minutes' %((end_time-start_time)/60.))
+        
+    if os.path.exists(PATH+'params_finetune_PNIST07_then_NIST.txt'):
+        start_time = time.clock()  
+        print ('\n finetune = PNIST07 then NIST')
+        print "NIST DIGITS"
+        test_data(PATH+'params_finetune_PNIST07_then_NIST.txt',datasets.nist_digits(),part=part,type=type)
+        print "NIST LOWER CASE"
+        test_data(PATH+'params_finetune_PNIST07_then_NIST.txt',datasets.nist_lower(),part=part,type=type)
+        print "NIST UPPER CASE"
+        test_data(PATH+'params_finetune_PNIST07_then_NIST.txt',datasets.nist_upper(),part=part,type=type)
+        end_time = time.clock()
+        print ('It took %f minutes' %((end_time-start_time)/60.))
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+ 
\ No newline at end of file
--- a/deep/stacked_dae/v_sylvain/nist_sda.py	Fri Apr 30 16:23:15 2010 -0400
+++ b/deep/stacked_dae/v_sylvain/nist_sda.py	Fri Apr 30 16:23:45 2010 -0400
@@ -158,7 +158,7 @@
         optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
     if finetune_choice == 4:
         print ('\n\n\tFinetune with PNIST07 then NIST\n\n')
-        optimizer.reload_parameters('params)pretrain.txt')
+        optimizer.reload_parameters('params_pretrain.txt')
         optimizer.finetune(datasets.PNIST07(),datasets.nist_all(),max_finetune_epoch_NIST,ind_test=30,decrease=decrease_lr,dataset_test2=datasets.nist_P07())    
         optimizer.finetune(datasets.nist_all(),datasets.PNIST07(),max_finetune_epoch_NIST,ind_test=31,decrease=decrease_lr,dataset_test2=datasets.nist_P07())    
         
@@ -187,7 +187,7 @@
         channel.save()
         print ('\n\n\tFinetune with PNIST07\n\n')
         sys.stdout.flush()
-        optimizer.reload_parameters('params)pretrain.txt')
+        optimizer.reload_parameters('params_pretrain.txt')
         optimizer.finetune(datasets.PNIST07(),datasets.nist_all(),max_finetune_epoch_NIST,ind_test=2,decrease=decrease_lr,dataset_test2=datasets.nist_P07())    
         channel.save()
         sys.stdout.flush()