diff scripts/nist_divide.py @ 60:d508f5a8acd0

Script pour creer ensembles train, valid et test NIST pur
author boulanni <nicolas_boulanger@hotmail.com>
date Mon, 08 Feb 2010 14:55:22 -0500
parents
children 2b6a28e4cadc
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/nist_divide.py	Mon Feb 08 14:55:22 2010 -0500
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+'''
+creation des ensembles train, valid et test NIST pur
+ensemble test est pris tel quel
+ensemble valid est trainorig[:20000]
+ensemble train est trainorig[20000:]
+trainorig est deja shuffled
+'''
+
+from pylearn.io import filetensor as ft
+import numpy, os
+
+dir1 = "/data/lisa/data/nist/by_class/all/"
+dir2 = "/data/lisa/data/ift6266h10/"
+
+os.system("cp %s %s" % (dir1 + "all_test_data.ft", dir2 + "test_data.ft"))
+os.system("cp %s %s" % (dir1 + "all_test_labels.ft", dir2 + "test_labels.ft"))
+
+f = open(dir1 + "/all_train_data.ft")
+d = ft.read(f)
+f = open(dir2 + "valid_data.ft", 'wb')
+ft.write(f, d[:20000])
+f = open(dir2 + "train_data.ft", 'wb')
+ft.write(f, d[20000:])
+
+f = open(dir1 + "/all_train_labels.ft")
+d = ft.read(f)
+f = open(dir2 + "valid_labels.ft", 'wb')
+ft.write(f, d[:20000])
+f = open(dir2 + "train_labels.ft", 'wb')
+ft.write(f, d[20000:])
+
+for i in ["train", "valid", "test"]:
+    os.chmod(dir2 + i + "_data.ft", 0744)
+    os.chmod(dir2 + i + "_labels.ft", 0744)
+
+
+