view scripts/nist_divide.py @ 166:17ae5a1a4dd1

Moving the convolutional MLP code into baseline
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Fri, 26 Feb 2010 14:03:24 -0500
parents d508f5a8acd0
children 2b6a28e4cadc
line wrap: on
line source

#!/usr/bin/env python

'''
creation des ensembles train, valid et test NIST pur
ensemble test est pris tel quel
ensemble valid est trainorig[:20000]
ensemble train est trainorig[20000:]
trainorig est deja shuffled
'''

from pylearn.io import filetensor as ft
import numpy, os

dir1 = "/data/lisa/data/nist/by_class/all/"
dir2 = "/data/lisa/data/ift6266h10/"

os.system("cp %s %s" % (dir1 + "all_test_data.ft", dir2 + "test_data.ft"))
os.system("cp %s %s" % (dir1 + "all_test_labels.ft", dir2 + "test_labels.ft"))

f = open(dir1 + "/all_train_data.ft")
d = ft.read(f)
f = open(dir2 + "valid_data.ft", 'wb')
ft.write(f, d[:20000])
f = open(dir2 + "train_data.ft", 'wb')
ft.write(f, d[20000:])

f = open(dir1 + "/all_train_labels.ft")
d = ft.read(f)
f = open(dir2 + "valid_labels.ft", 'wb')
ft.write(f, d[:20000])
f = open(dir2 + "train_labels.ft", 'wb')
ft.write(f, d[20000:])

for i in ["train", "valid", "test"]:
    os.chmod(dir2 + i + "_data.ft", 0744)
    os.chmod(dir2 + i + "_labels.ft", 0744)