Mercurial > ift6266
comparison scripts/CalcPropNist.py @ 92:708ad2abebdd
Petit script primitif utilise pour calculer la proportion des trois classes dans Nist train et test.
author | SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca> |
---|---|
date | Thu, 11 Feb 2010 10:43:22 -0500 |
parents | |
children | 7054d7afb948 |
comparison
equal
deleted
inserted
replaced
91:6696391273ab | 92:708ad2abebdd |
---|---|
1 #!/usr/bin/python | |
2 # coding: utf-8 | |
3 | |
4 ''' | |
5 Script qui calcule la proportion de chiffres, lettres minuscules et lettres majuscules | |
6 dans NIST train et NIST test. | |
7 | |
8 Sylvain Pannetier Lebeuf dans le cadre de IFT6266, hiver 2010 | |
9 | |
10 ''' | |
11 | |
12 from pylearn.io import filetensor as ft | |
13 | |
14 #f1 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/all_train_labels.ft') | |
15 f1 = open('/data/lisa/data/nist/by_class/all/all_train_labels.ft') | |
16 train = ft.read(f1) | |
17 #f2 = open('/home/sylvain/Dropbox/Msc/IFT6266/donnees/all_test_labels.ft') | |
18 f2 = open('/data/lisa/data/nist/by_class/all/all_test_labels.ft') | |
19 test = ft.read(f2) | |
20 f1.close() | |
21 f2.close() | |
22 | |
23 #Les 6 variables | |
24 train_c=0 | |
25 train_min=0 | |
26 train_maj=0 | |
27 | |
28 test_c=0 | |
29 test_min=0 | |
30 test_maj=0 | |
31 | |
32 classe=0 #variable utilisee pour voir la classe presentement regardee | |
33 #Calcul pour le train_set | |
34 for i in xrange(len(train)): | |
35 classe=train[i] | |
36 if classe < 10: | |
37 train_c += 1 | |
38 elif classe < 36: | |
39 train_maj += 1 | |
40 elif classe < 62: | |
41 train_min += 1 | |
42 | |
43 for j in xrange(len(test)): | |
44 classe=test[j] | |
45 if classe < 10: | |
46 test_c += 1 | |
47 elif classe < 36: | |
48 test_maj += 1 | |
49 elif classe < 62: | |
50 test_min += 1 | |
51 print "Train set:",len(train),"\nchiffres:",float(train_c)/len(train),"\tmajuscules:",\ | |
52 float(train_maj)/len(train),"\tminuscules:",float(train_min)/len(train) | |
53 | |
54 print "\nTest set:",len(test),"\nchiffres:",float(test_c)/len(test),"\tmajuscules:",\ | |
55 float(test_maj)/len(test),"\tminuscules:",float(test_min)/len(test) | |
56 | |
57 if test_maj+test_min+test_c != len(test): | |
58 print "probleme avec le test, des donnees ne sont pas etiquetees" | |
59 | |
60 if train_maj+train_min+train_c != len(train): | |
61 print "probleme avec le train, des donnees ne sont pas etiquetees" |