Mercurial > ift6266
comparison scripts/nist_divide.py @ 60:d508f5a8acd0
Script pour creer ensembles train, valid et test NIST pur
author | boulanni <nicolas_boulanger@hotmail.com> |
---|---|
date | Mon, 08 Feb 2010 14:55:22 -0500 |
parents | |
children | 2b6a28e4cadc |
comparison
equal
deleted
inserted
replaced
59:8b9fca879543 | 60:d508f5a8acd0 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ''' | |
4 creation des ensembles train, valid et test NIST pur | |
5 ensemble test est pris tel quel | |
6 ensemble valid est trainorig[:20000] | |
7 ensemble train est trainorig[20000:] | |
8 trainorig est deja shuffled | |
9 ''' | |
10 | |
11 from pylearn.io import filetensor as ft | |
12 import numpy, os | |
13 | |
14 dir1 = "/data/lisa/data/nist/by_class/all/" | |
15 dir2 = "/data/lisa/data/ift6266h10/" | |
16 | |
17 os.system("cp %s %s" % (dir1 + "all_test_data.ft", dir2 + "test_data.ft")) | |
18 os.system("cp %s %s" % (dir1 + "all_test_labels.ft", dir2 + "test_labels.ft")) | |
19 | |
20 f = open(dir1 + "/all_train_data.ft") | |
21 d = ft.read(f) | |
22 f = open(dir2 + "valid_data.ft", 'wb') | |
23 ft.write(f, d[:20000]) | |
24 f = open(dir2 + "train_data.ft", 'wb') | |
25 ft.write(f, d[20000:]) | |
26 | |
27 f = open(dir1 + "/all_train_labels.ft") | |
28 d = ft.read(f) | |
29 f = open(dir2 + "valid_labels.ft", 'wb') | |
30 ft.write(f, d[:20000]) | |
31 f = open(dir2 + "train_labels.ft", 'wb') | |
32 ft.write(f, d[20000:]) | |
33 | |
34 for i in ["train", "valid", "test"]: | |
35 os.chmod(dir2 + i + "_data.ft", 0744) | |
36 os.chmod(dir2 + i + "_labels.ft", 0744) | |
37 | |
38 | |
39 |