comparison scripts/nist_divide.py @ 182:2b6a28e4cadc

J'ai reséparé NIST/OCR purs pour avoir des ensembles de test et de validation de 80000 plutôt que 20000, comme on a discuté au cours
author boulanni <nicolas_boulanger@hotmail.com>
date Sat, 27 Feb 2010 18:28:48 -0500
parents d508f5a8acd0
children
comparison
equal deleted inserted replaced
181:f0f47b045cbf 182:2b6a28e4cadc
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 ''' 3 '''
4 creation des ensembles train, valid et test NIST pur 4 creation des ensembles train, valid et test NIST pur
5 ensemble test est pris tel quel 5 ensemble test est pris tel quel
6 ensemble valid est trainorig[:20000] 6 ensemble valid est trainorig[:80000]
7 ensemble train est trainorig[20000:] 7 ensemble train est trainorig[80000:]
8 trainorig est deja shuffled 8 trainorig est deja shuffled
9 ''' 9 '''
10 10
11 from pylearn.io import filetensor as ft 11 from pylearn.io import filetensor as ft
12 import numpy, os 12 import numpy, os
18 os.system("cp %s %s" % (dir1 + "all_test_labels.ft", dir2 + "test_labels.ft")) 18 os.system("cp %s %s" % (dir1 + "all_test_labels.ft", dir2 + "test_labels.ft"))
19 19
20 f = open(dir1 + "/all_train_data.ft") 20 f = open(dir1 + "/all_train_data.ft")
21 d = ft.read(f) 21 d = ft.read(f)
22 f = open(dir2 + "valid_data.ft", 'wb') 22 f = open(dir2 + "valid_data.ft", 'wb')
23 ft.write(f, d[:20000]) 23 ft.write(f, d[:80000])
24 f = open(dir2 + "train_data.ft", 'wb') 24 f = open(dir2 + "train_data.ft", 'wb')
25 ft.write(f, d[20000:]) 25 ft.write(f, d[80000:])
26 26
27 f = open(dir1 + "/all_train_labels.ft") 27 f = open(dir1 + "/all_train_labels.ft")
28 d = ft.read(f) 28 d = ft.read(f)
29 f = open(dir2 + "valid_labels.ft", 'wb') 29 f = open(dir2 + "valid_labels.ft", 'wb')
30 ft.write(f, d[:20000]) 30 ft.write(f, d[:80000])
31 f = open(dir2 + "train_labels.ft", 'wb') 31 f = open(dir2 + "train_labels.ft", 'wb')
32 ft.write(f, d[20000:]) 32 ft.write(f, d[80000:])
33 33
34 for i in ["train", "valid", "test"]: 34 for i in ["train", "valid", "test"]:
35 os.chmod(dir2 + i + "_data.ft", 0744) 35 os.chmod(dir2 + i + "_data.ft", 0744)
36 os.chmod(dir2 + i + "_labels.ft", 0744) 36 os.chmod(dir2 + i + "_labels.ft", 0744)
37 37