view scripts/stat_graph.py @ 450:c1df23c98eb6

New script to create histograms for datasets (for the report)
author Arnaud Bergeron <abergeron@gmail.com>
date Mon, 10 May 2010 11:41:02 -0400
parents
children 227ebc0be7ae
line wrap: on
line source

import matplotlib
matplotlib.use('Agg')

from pylab import *
from scipy import stats
import numpy
from ift6266 import datasets

nistp_valid = stats.itemfreq(datasets.PNIST07().valid(10000000).next()[1])
nist_valid = stats.itemfreq(datasets.nist_all().valid(10000000).next()[1])
nist_test = stats.itemfreq(datasets.nist_all().test(10000000).next()[1])
print 'nistp_valid', sum(nistp_valid[:,1])
print 'nist_valid', sum(nist_valid[:,1])
print 'nist_test', sum(nist_test[:,1])

xloc = numpy.arange(62)+0.5

labels = map(str, range(10)) + map(chr, range(65,91)) + map(chr, range(97,123))

def makegraph(data, fname, labels=labels, xloc=xloc, width=0.5):
    figure(figsize=(8,6))
#    clf()
    bar(xloc, data, width=width)
    xticks([])
    for x, l in zip(xloc, labels):
        text(x+width/2, -250, l, horizontalalignment='center', verticalalignment='baseline')
#    xticks(xloc+width/2, labels, verticalalignment='bottom')
    xlim(0, xloc[-1]+width*2)
    ylim(0, 7000)

    savefig(fname)


makegraph(nistp_valid[:,1], 'nistpvalidstats.png')
makegraph(nist_valid[:,1], 'nistvalidstats.png')
makegraph(nist_test[:,1], 'nistteststats.png')