# HG changeset patch # User boulanni # Date 1265658922 18000 # Node ID d508f5a8acd028b47d897d1fdc7e52f72f3592f5 # Parent 8b9fca8795437424e04863a94f9668b498bf6032 Script pour creer ensembles train, valid et test NIST pur diff -r 8b9fca879543 -r d508f5a8acd0 scripts/nist_divide.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/nist_divide.py Mon Feb 08 14:55:22 2010 -0500 @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +''' +creation des ensembles train, valid et test NIST pur +ensemble test est pris tel quel +ensemble valid est trainorig[:20000] +ensemble train est trainorig[20000:] +trainorig est deja shuffled +''' + +from pylearn.io import filetensor as ft +import numpy, os + +dir1 = "/data/lisa/data/nist/by_class/all/" +dir2 = "/data/lisa/data/ift6266h10/" + +os.system("cp %s %s" % (dir1 + "all_test_data.ft", dir2 + "test_data.ft")) +os.system("cp %s %s" % (dir1 + "all_test_labels.ft", dir2 + "test_labels.ft")) + +f = open(dir1 + "/all_train_data.ft") +d = ft.read(f) +f = open(dir2 + "valid_data.ft", 'wb') +ft.write(f, d[:20000]) +f = open(dir2 + "train_data.ft", 'wb') +ft.write(f, d[20000:]) + +f = open(dir1 + "/all_train_labels.ft") +d = ft.read(f) +f = open(dir2 + "valid_labels.ft", 'wb') +ft.write(f, d[:20000]) +f = open(dir2 + "train_labels.ft", 'wb') +ft.write(f, d[20000:]) + +for i in ["train", "valid", "test"]: + os.chmod(dir2 + i + "_data.ft", 0744) + os.chmod(dir2 + i + "_labels.ft", 0744) + + +