Mercurial > ift6266
changeset 182:2b6a28e4cadc
J'ai reséparé NIST/OCR purs pour avoir des ensembles de test et de validation de 80000 plutôt que 20000, comme on a discuté au cours
author | boulanni <nicolas_boulanger@hotmail.com> |
---|---|
date | Sat, 27 Feb 2010 18:28:48 -0500 |
parents | f0f47b045cbf |
children | 992ca8035a4d |
files | scripts/nist_divide.py scripts/ocr_divide.py |
diffstat | 2 files changed, 15 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/nist_divide.py Sat Feb 27 17:10:37 2010 -0500 +++ b/scripts/nist_divide.py Sat Feb 27 18:28:48 2010 -0500 @@ -3,8 +3,8 @@ ''' creation des ensembles train, valid et test NIST pur ensemble test est pris tel quel -ensemble valid est trainorig[:20000] -ensemble train est trainorig[20000:] +ensemble valid est trainorig[:80000] +ensemble train est trainorig[80000:] trainorig est deja shuffled ''' @@ -20,16 +20,16 @@ f = open(dir1 + "/all_train_data.ft") d = ft.read(f) f = open(dir2 + "valid_data.ft", 'wb') -ft.write(f, d[:20000]) +ft.write(f, d[:80000]) f = open(dir2 + "train_data.ft", 'wb') -ft.write(f, d[20000:]) +ft.write(f, d[80000:]) f = open(dir1 + "/all_train_labels.ft") d = ft.read(f) f = open(dir2 + "valid_labels.ft", 'wb') -ft.write(f, d[:20000]) +ft.write(f, d[:80000]) f = open(dir2 + "train_labels.ft", 'wb') -ft.write(f, d[20000:]) +ft.write(f, d[80000:]) for i in ["train", "valid", "test"]: os.chmod(dir2 + i + "_data.ft", 0744)
--- a/scripts/ocr_divide.py Sat Feb 27 17:10:37 2010 -0500 +++ b/scripts/ocr_divide.py Sat Feb 27 18:28:48 2010 -0500 @@ -2,9 +2,9 @@ ''' creation des ensembles train, valid et test OCR -ensemble valid est trainorig[:20000] -ensemble test est trainorig[20000:40000] -ensemble train est trainorig[40000:] +ensemble valid est trainorig[:80000] +ensemble test est trainorig[80000:160000] +ensemble train est trainorig[160000:] trainorig est deja shuffled ''' @@ -17,20 +17,20 @@ f = open(dir1 + 'unlv-corrected-2010-02-01-shuffled.ft') d = ft.read(f) f = open(dir2 + "ocr_valid_data.ft", 'wb') -ft.write(f, d[:20000]) +ft.write(f, d[:80000]) f = open(dir2 + "ocr_test_data.ft", 'wb') -ft.write(f, d[20000:40000]) +ft.write(f, d[80000:160000]) f = open(dir2 + "ocr_train_data.ft", 'wb') -ft.write(f, d[40000:]) +ft.write(f, d[160000:]) f = open(dir1 + 'unlv-corrected-2010-02-01-labels-shuffled.ft') d = ft.read(f) f = open(dir2 + "ocr_valid_labels.ft", 'wb') -ft.write(f, d[:20000]) +ft.write(f, d[:80000]) f = open(dir2 + "ocr_test_labels.ft", 'wb') -ft.write(f, d[20000:40000]) +ft.write(f, d[80000:160000]) f = open(dir2 + "ocr_train_labels.ft", 'wb') -ft.write(f, d[40000:]) +ft.write(f, d[160000:]) for i in ["train", "valid", "test"]: os.chmod(dir2 + "ocr_" + i + "_data.ft", 0744)