annotate scripts/stat_graph.py @ 481:3e4290448eeb

background.png and Background.PNG produce a case collision on Mac and Windows, renamed one of them
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Sun, 30 May 2010 19:43:13 -0400
parents 227ebc0be7ae
children
rev   line source
450
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
1 import matplotlib
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
2 matplotlib.use('Agg')
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
3
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
4 from pylab import *
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
5 from scipy import stats
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
6 import numpy
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
7 from ift6266 import datasets
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
8
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
9 nistp_valid = stats.itemfreq(datasets.PNIST07().valid(10000000).next()[1])
451
227ebc0be7ae Add a graph for the NIST training set and normalize the values.
Arnaud Bergeron <abergeron@gmail.com>
parents: 450
diff changeset
10 nistp_valid[:,1] /= sum(nistp_valid[:,1])
450
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
11 nist_valid = stats.itemfreq(datasets.nist_all().valid(10000000).next()[1])
451
227ebc0be7ae Add a graph for the NIST training set and normalize the values.
Arnaud Bergeron <abergeron@gmail.com>
parents: 450
diff changeset
12 nist_valid[:,1] /= sum(nist_valid[:,1])
450
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
13 nist_test = stats.itemfreq(datasets.nist_all().test(10000000).next()[1])
451
227ebc0be7ae Add a graph for the NIST training set and normalize the values.
Arnaud Bergeron <abergeron@gmail.com>
parents: 450
diff changeset
14 nist_test[:,1] /= sum(nist_test[:,1])
227ebc0be7ae Add a graph for the NIST training set and normalize the values.
Arnaud Bergeron <abergeron@gmail.com>
parents: 450
diff changeset
15 nist_train = stats.itemfreq(datasets.nist_all().train(100000000).next()[1])
227ebc0be7ae Add a graph for the NIST training set and normalize the values.
Arnaud Bergeron <abergeron@gmail.com>
parents: 450
diff changeset
16 nist_train[:,1] /= sum(nist_train[:,1])
450
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
17
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
18 xloc = numpy.arange(62)+0.5
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
19
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
20 labels = map(str, range(10)) + map(chr, range(65,91)) + map(chr, range(97,123))
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
21
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
22 def makegraph(data, fname, labels=labels, xloc=xloc, width=0.5):
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
23 figure(figsize=(8,6))
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
24 # clf()
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
25 bar(xloc, data, width=width)
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
26 xticks([])
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
27 for x, l in zip(xloc, labels):
451
227ebc0be7ae Add a graph for the NIST training set and normalize the values.
Arnaud Bergeron <abergeron@gmail.com>
parents: 450
diff changeset
28 text(x+width/2, -0.004, l, horizontalalignment='center', verticalalignment='baseline')
450
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
29 # xticks(xloc+width/2, labels, verticalalignment='bottom')
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
30 xlim(0, xloc[-1]+width*2)
451
227ebc0be7ae Add a graph for the NIST training set and normalize the values.
Arnaud Bergeron <abergeron@gmail.com>
parents: 450
diff changeset
31 ylim(0, 0.1)
450
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
32
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
33 savefig(fname)
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
34
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
35
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
36 makegraph(nistp_valid[:,1], 'nistpvalidstats.png')
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
37 makegraph(nist_valid[:,1], 'nistvalidstats.png')
c1df23c98eb6 New script to create histograms for datasets (for the report)
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
38 makegraph(nist_test[:,1], 'nistteststats.png')
451
227ebc0be7ae Add a graph for the NIST training set and normalize the values.
Arnaud Bergeron <abergeron@gmail.com>
parents: 450
diff changeset
39 makegraph(nist_train[:,1], 'nisttrainstats.png')