diff scripts/CalcPropNist.py @ 93:7054d7afb948

Ajout d'un graphique (histogramme) comparatif pour l'occurence de chaque caractere
author SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca>
date Thu, 11 Feb 2010 11:23:35 -0500
parents 708ad2abebdd
children 2d671ab3b730
line wrap: on
line diff
--- a/scripts/CalcPropNist.py	Thu Feb 11 10:43:22 2010 -0500
+++ b/scripts/CalcPropNist.py	Thu Feb 11 11:23:35 2010 -0500
@@ -10,6 +10,8 @@
 '''
 
 from pylearn.io import filetensor as ft
+import matplotlib.pyplot as plt
+
 
 #f1 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/all_train_labels.ft')
 f1 = open('/data/lisa/data/nist/by_class/all/all_train_labels.ft')
@@ -59,3 +61,29 @@
     
 if train_maj+train_min+train_c != len(train):
     print "probleme avec le train, des donnees ne sont pas etiquetees"
+
+
+#train set
+plt.subplot(211)
+plt.hist(train,bins=62)
+plt.axis([0, 62,0,40000])
+plt.axvline(x=10, ymin=0, ymax=40000,linewidth=2, color='r')
+plt.axvline(x=36, ymin=0, ymax=40000,linewidth=2, color='r')
+plt.text(3,36000,'chiffres')
+plt.text(18,36000,'majuscules')
+plt.text(40,36000,'minuscules')
+plt.title('Train set')
+
+#test set
+plt.subplot(212)
+plt.hist(test,bins=62)
+plt.axis([0, 62,0,7000])
+plt.axvline(x=10, ymin=0, ymax=7000,linewidth=2, color='r')
+plt.axvline(x=36, ymin=0, ymax=7000,linewidth=2, color='r')
+plt.text(3,6400,'chiffres')
+plt.text(18,6400,'majuscules')
+plt.text(45,6400,'minuscules')
+plt.title('Test set')
+
+#afficher
+plt.show()
\ No newline at end of file