changeset 175:224321bf043a

Define the ocr dataset and use the existing split for nist.
author Arnaud Bergeron <abergeron@gmail.com>
date Sat, 27 Feb 2010 13:56:14 -0500
parents ff26436d42d6
children d6672a7daea5
files datasets/__init__.py datasets/defs.py datasets/nist.py
diffstat 3 files changed, 37 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/datasets/__init__.py	Sat Feb 27 12:18:26 2010 -0500
+++ b/datasets/__init__.py	Sat Feb 27 13:56:14 2010 -0500
@@ -1,1 +1,2 @@
-from nist import *
+from defs import *
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datasets/defs.py	Sat Feb 27 13:56:14 2010 -0500
@@ -0,0 +1,35 @@
+__all__ = ['nist_digits', 'nist_lower', 'nist_upper', 'nist_all']
+
+from ftfile import FTDataSet
+
+NIST_PATH = '/data/lisa/data/nist/by_class/'
+DATA_PATH = '/data/lisa/data/ift6266h10/'
+
+nist_digits = FTDataSet(train_data = [NIST_PATH+'digits/digits_train_data.ft'],
+                        train_lbl = [NIST_PATH+'digits/digits_train_labels.ft'],
+                        test_data = [NIST_PATH+'digits/digits_test_data.ft'],
+                        test_lbl = [NIST_PATH+'digits/digits_test_labels.ft'])
+nist_lower = FTDataSet(train_data = [NIST_PATH+'lower/lower_train_data.ft'],
+                        train_lbl = [NIST_PATH+'lower/lower_train_labels.ft'],
+                        test_data = [NIST_PATH+'lower/lower_test_data.ft'],
+                        test_lbl = [NIST_PATH+'lower/lower_test_labels.ft'])
+nist_upper = FTDataSet(train_data = [NIST_PATH+'upper/upper_train_data.ft'],
+                        train_lbl = [NIST_PATH+'upper/upper_train_labels.ft'],
+                        test_data = [NIST_PATH+'upper/upper_test_data.ft'],
+                        test_lbl = [NIST_PATH+'upper/upper_test_labels.ft'])
+
+
+
+nist_all = FTDataSet(train_data = [DATA_PATH+'train_data.ft'],
+                     train_lbl = [DATA_PATH+'train_labels.ft'],
+                     test_data = [DATA_PATH+'test_data.ft'],
+                     test_lbl = [DATA_PATH+'test_labels.ft'],
+                     valid_data = [DATA_PATH+'valid_data.ft'],
+                     valid_lbl = [DATA_PATH+'valid_labels.ft'])
+
+ocr = FTDataSet(train_data = [DATA_PATH+'ocr_train_data.ft'],
+                train_lbl = [DATA_PATH+'ocr_train_labels.ft'],
+                test_data = [DATA_PATH+'ocr_test_data.ft'],
+                test_lbl = [DATA_PATH+'ocr_test_labels.ft'],
+                valid_data = [DATA_PATH+'ocr_valid_data.ft'],
+                valid_lbl = [DATA_PATH+'ocr_valid_labels.ft'])
--- a/datasets/nist.py	Sat Feb 27 12:18:26 2010 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-__all__ = ['nist_digits', 'nist_lower', 'nist_upper', 'nist_all']
-
-from ftfile import FTDataSet
-
-PATH = '/data/lisa/data/nist/by_class/'
-
-nist_digits = FTDataSet(train_data = [PATH+'digits/digits_train_data.ft'],
-                        train_lbl = [PATH+'digits/digits_train_labels.ft'],
-                        test_data = [PATH+'digits/digits_test_data.ft'],
-                        test_lbl = [PATH+'digits/digits_test_labels.ft'])
-nist_lower = FTDataSet(train_data = [PATH+'lower/lower_train_data.ft'],
-                        train_lbl = [PATH+'lower/lower_train_labels.ft'],
-                        test_data = [PATH+'lower/lower_test_data.ft'],
-                        test_lbl = [PATH+'lower/lower_test_labels.ft'])
-nist_upper = FTDataSet(train_data = [PATH+'upper/upper_train_data.ft'],
-                        train_lbl = [PATH+'upper/upper_train_labels.ft'],
-                        test_data = [PATH+'upper/upper_test_data.ft'],
-                        test_lbl = [PATH+'upper/upper_test_labels.ft'])
-nist_all = FTDataSet(train_data = [PATH+'all/all_train_data.ft'],
-                        train_lbl = [PATH+'all/all_train_labels.ft'],
-                        test_data = [PATH+'all/all_test_data.ft'],
-                        test_lbl = [PATH+'all/all_test_labels.ft'])
-