diff scripts/stat_graph.py @ 451:227ebc0be7ae

Add a graph for the NIST training set and normalize the values.
author Arnaud Bergeron <abergeron@gmail.com>
date Mon, 10 May 2010 13:44:11 -0400
parents c1df23c98eb6
children
line wrap: on
line diff
--- a/scripts/stat_graph.py	Mon May 10 11:41:02 2010 -0400
+++ b/scripts/stat_graph.py	Mon May 10 13:44:11 2010 -0400
@@ -7,11 +7,13 @@
 from ift6266 import datasets
 
 nistp_valid = stats.itemfreq(datasets.PNIST07().valid(10000000).next()[1])
+nistp_valid[:,1] /= sum(nistp_valid[:,1])
 nist_valid = stats.itemfreq(datasets.nist_all().valid(10000000).next()[1])
+nist_valid[:,1] /= sum(nist_valid[:,1])
 nist_test = stats.itemfreq(datasets.nist_all().test(10000000).next()[1])
-print 'nistp_valid', sum(nistp_valid[:,1])
-print 'nist_valid', sum(nist_valid[:,1])
-print 'nist_test', sum(nist_test[:,1])
+nist_test[:,1] /= sum(nist_test[:,1])
+nist_train = stats.itemfreq(datasets.nist_all().train(100000000).next()[1])
+nist_train[:,1] /= sum(nist_train[:,1])
 
 xloc = numpy.arange(62)+0.5
 
@@ -23,10 +25,10 @@
     bar(xloc, data, width=width)
     xticks([])
     for x, l in zip(xloc, labels):
-        text(x+width/2, -250, l, horizontalalignment='center', verticalalignment='baseline')
+        text(x+width/2, -0.004, l, horizontalalignment='center', verticalalignment='baseline')
 #    xticks(xloc+width/2, labels, verticalalignment='bottom')
     xlim(0, xloc[-1]+width*2)
-    ylim(0, 7000)
+    ylim(0, 0.1)
 
     savefig(fname)
 
@@ -34,3 +36,4 @@
 makegraph(nistp_valid[:,1], 'nistpvalidstats.png')
 makegraph(nist_valid[:,1], 'nistvalidstats.png')
 makegraph(nist_test[:,1], 'nistteststats.png')
+makegraph(nist_train[:,1], 'nisttrainstats.png')