Mercurial > ift6266
comparison scripts/nist_divide.py @ 182:2b6a28e4cadc
J'ai reséparé NIST/OCR purs pour avoir des ensembles de test et de validation de 80000 plutôt que 20000, comme on a discuté au cours
author | boulanni <nicolas_boulanger@hotmail.com> |
---|---|
date | Sat, 27 Feb 2010 18:28:48 -0500 |
parents | d508f5a8acd0 |
children |
comparison
equal
deleted
inserted
replaced
181:f0f47b045cbf | 182:2b6a28e4cadc |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 ''' | 3 ''' |
4 creation des ensembles train, valid et test NIST pur | 4 creation des ensembles train, valid et test NIST pur |
5 ensemble test est pris tel quel | 5 ensemble test est pris tel quel |
6 ensemble valid est trainorig[:20000] | 6 ensemble valid est trainorig[:80000] |
7 ensemble train est trainorig[20000:] | 7 ensemble train est trainorig[80000:] |
8 trainorig est deja shuffled | 8 trainorig est deja shuffled |
9 ''' | 9 ''' |
10 | 10 |
11 from pylearn.io import filetensor as ft | 11 from pylearn.io import filetensor as ft |
12 import numpy, os | 12 import numpy, os |
18 os.system("cp %s %s" % (dir1 + "all_test_labels.ft", dir2 + "test_labels.ft")) | 18 os.system("cp %s %s" % (dir1 + "all_test_labels.ft", dir2 + "test_labels.ft")) |
19 | 19 |
20 f = open(dir1 + "/all_train_data.ft") | 20 f = open(dir1 + "/all_train_data.ft") |
21 d = ft.read(f) | 21 d = ft.read(f) |
22 f = open(dir2 + "valid_data.ft", 'wb') | 22 f = open(dir2 + "valid_data.ft", 'wb') |
23 ft.write(f, d[:20000]) | 23 ft.write(f, d[:80000]) |
24 f = open(dir2 + "train_data.ft", 'wb') | 24 f = open(dir2 + "train_data.ft", 'wb') |
25 ft.write(f, d[20000:]) | 25 ft.write(f, d[80000:]) |
26 | 26 |
27 f = open(dir1 + "/all_train_labels.ft") | 27 f = open(dir1 + "/all_train_labels.ft") |
28 d = ft.read(f) | 28 d = ft.read(f) |
29 f = open(dir2 + "valid_labels.ft", 'wb') | 29 f = open(dir2 + "valid_labels.ft", 'wb') |
30 ft.write(f, d[:20000]) | 30 ft.write(f, d[:80000]) |
31 f = open(dir2 + "train_labels.ft", 'wb') | 31 f = open(dir2 + "train_labels.ft", 'wb') |
32 ft.write(f, d[20000:]) | 32 ft.write(f, d[80000:]) |
33 | 33 |
34 for i in ["train", "valid", "test"]: | 34 for i in ["train", "valid", "test"]: |
35 os.chmod(dir2 + i + "_data.ft", 0744) | 35 os.chmod(dir2 + i + "_data.ft", 0744) |
36 os.chmod(dir2 + i + "_labels.ft", 0744) | 36 os.chmod(dir2 + i + "_labels.ft", 0744) |
37 | 37 |