annotate datasets/defs.py @ 223:8547b0cbe4ff

Branch merge
author Arnaud Bergeron <abergeron@gmail.com>
date Thu, 11 Mar 2010 14:42:54 -0500
parents 4cfd0eb438af
children 6f4e3719a3cc
rev   line source
211
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
1 __all__ = ['nist_digits', 'nist_lower', 'nist_upper', 'nist_all', 'ocr',
222
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents: 211
diff changeset
2 'nist_P07', 'mnist']
163
4b28d7382dbf Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
3
4b28d7382dbf Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
4 from ftfile import FTDataSet
222
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents: 211
diff changeset
5 from gzpklfile import GzpklDataSet
180
76bc047df5ee Add dtype conversion and rescaling to the read path.
Arnaud Bergeron <abergeron@gmail.com>
parents: 175
diff changeset
6 import theano
163
4b28d7382dbf Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
7
175
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
8 NIST_PATH = '/data/lisa/data/nist/by_class/'
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
9 DATA_PATH = '/data/lisa/data/ift6266h10/'
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
10
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
11 nist_digits = FTDataSet(train_data = [NIST_PATH+'digits/digits_train_data.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
12 train_lbl = [NIST_PATH+'digits/digits_train_labels.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
13 test_data = [NIST_PATH+'digits/digits_test_data.ft'],
180
76bc047df5ee Add dtype conversion and rescaling to the read path.
Arnaud Bergeron <abergeron@gmail.com>
parents: 175
diff changeset
14 test_lbl = [NIST_PATH+'digits/digits_test_labels.ft'],
76bc047df5ee Add dtype conversion and rescaling to the read path.
Arnaud Bergeron <abergeron@gmail.com>
parents: 175
diff changeset
15 indtype=theano.config.floatX, inscale=255.)
175
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
16 nist_lower = FTDataSet(train_data = [NIST_PATH+'lower/lower_train_data.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
17 train_lbl = [NIST_PATH+'lower/lower_train_labels.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
18 test_data = [NIST_PATH+'lower/lower_test_data.ft'],
180
76bc047df5ee Add dtype conversion and rescaling to the read path.
Arnaud Bergeron <abergeron@gmail.com>
parents: 175
diff changeset
19 test_lbl = [NIST_PATH+'lower/lower_test_labels.ft'],
76bc047df5ee Add dtype conversion and rescaling to the read path.
Arnaud Bergeron <abergeron@gmail.com>
parents: 175
diff changeset
20 indtype=theano.config.floatX, inscale=255.)
175
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
21 nist_upper = FTDataSet(train_data = [NIST_PATH+'upper/upper_train_data.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
22 train_lbl = [NIST_PATH+'upper/upper_train_labels.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
23 test_data = [NIST_PATH+'upper/upper_test_data.ft'],
180
76bc047df5ee Add dtype conversion and rescaling to the read path.
Arnaud Bergeron <abergeron@gmail.com>
parents: 175
diff changeset
24 test_lbl = [NIST_PATH+'upper/upper_test_labels.ft'],
76bc047df5ee Add dtype conversion and rescaling to the read path.
Arnaud Bergeron <abergeron@gmail.com>
parents: 175
diff changeset
25 indtype=theano.config.floatX, inscale=255.)
163
4b28d7382dbf Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff changeset
26
175
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
27 nist_all = FTDataSet(train_data = [DATA_PATH+'train_data.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
28 train_lbl = [DATA_PATH+'train_labels.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
29 test_data = [DATA_PATH+'test_data.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
30 test_lbl = [DATA_PATH+'test_labels.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
31 valid_data = [DATA_PATH+'valid_data.ft'],
180
76bc047df5ee Add dtype conversion and rescaling to the read path.
Arnaud Bergeron <abergeron@gmail.com>
parents: 175
diff changeset
32 valid_lbl = [DATA_PATH+'valid_labels.ft'],
76bc047df5ee Add dtype conversion and rescaling to the read path.
Arnaud Bergeron <abergeron@gmail.com>
parents: 175
diff changeset
33 indtype=theano.config.floatX, inscale=255.)
175
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
34
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
35 ocr = FTDataSet(train_data = [DATA_PATH+'ocr_train_data.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
36 train_lbl = [DATA_PATH+'ocr_train_labels.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
37 test_data = [DATA_PATH+'ocr_test_data.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
38 test_lbl = [DATA_PATH+'ocr_test_labels.ft'],
224321bf043a Define the ocr dataset and use the existing split for nist.
Arnaud Bergeron <abergeron@gmail.com>
parents: 164
diff changeset
39 valid_data = [DATA_PATH+'ocr_valid_data.ft'],
211
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
40 valid_lbl = [DATA_PATH+'ocr_valid_labels.ft'],
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
41 indtype=theano.config.floatX, inscale=255.)
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
42
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
43 nist_P07 = FTDataSet(train_data = [DATA_PATH+'data/P07_train'+str(i)+'_data.ft' for i in range(100)],
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
44 train_lbl = [DATA_PATH+'data/P07_train'+str(i)+'_labels.ft' for i in range(100)],
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
45 test_data = [DATA_PATH+'data/P07_test_data.ft'],
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
46 test_lbl = [DATA_PATH+'data/P07_test_labels.ft'],
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
47 valid_data = [DATA_PATH+'data/P07_valid_data.ft'],
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
48 valid_lbl = [DATA_PATH+'data/P07_valid_labels.ft'],
476da2ba6a12 Add nist_P07 datasets to the predefs.
Arnaud Bergeron <abergeron@gmail.com>
parents: 181
diff changeset
49 indtype=theano.config.floatX, inscale=255.)
222
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents: 211
diff changeset
50
4cfd0eb438af Add mnist to datasets (and supporting code).
Arnaud Bergeron <abergeron@gmail.com>
parents: 211
diff changeset
51 mnist = GzpklDataSet(DATA_PATH+'mnist.pkl.gz')