diff scripts/stat_graph.py @ 450:c1df23c98eb6

New script to create histograms for datasets (for the report)
author Arnaud Bergeron <abergeron@gmail.com>
date Mon, 10 May 2010 11:41:02 -0400
parents
children 227ebc0be7ae
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/stat_graph.py	Mon May 10 11:41:02 2010 -0400
@@ -0,0 +1,36 @@
+import matplotlib
+matplotlib.use('Agg')
+
+from pylab import *
+from scipy import stats
+import numpy
+from ift6266 import datasets
+
+nistp_valid = stats.itemfreq(datasets.PNIST07().valid(10000000).next()[1])
+nist_valid = stats.itemfreq(datasets.nist_all().valid(10000000).next()[1])
+nist_test = stats.itemfreq(datasets.nist_all().test(10000000).next()[1])
+print 'nistp_valid', sum(nistp_valid[:,1])
+print 'nist_valid', sum(nist_valid[:,1])
+print 'nist_test', sum(nist_test[:,1])
+
+xloc = numpy.arange(62)+0.5
+
+labels = map(str, range(10)) + map(chr, range(65,91)) + map(chr, range(97,123))
+
+def makegraph(data, fname, labels=labels, xloc=xloc, width=0.5):
+    figure(figsize=(8,6))
+#    clf()
+    bar(xloc, data, width=width)
+    xticks([])
+    for x, l in zip(xloc, labels):
+        text(x+width/2, -250, l, horizontalalignment='center', verticalalignment='baseline')
+#    xticks(xloc+width/2, labels, verticalalignment='bottom')
+    xlim(0, xloc[-1]+width*2)
+    ylim(0, 7000)
+
+    savefig(fname)
+
+
+makegraph(nistp_valid[:,1], 'nistpvalidstats.png')
+makegraph(nist_valid[:,1], 'nistvalidstats.png')
+makegraph(nist_test[:,1], 'nistteststats.png')