diff datasets/defs.py @ 257:966272e7f14b

Make the datasets lazy-loading and add a maxsize parameter.
author Arnaud Bergeron <abergeron@gmail.com>
date Tue, 16 Mar 2010 18:51:27 -0400
parents 6f4e3719a3cc
children 4533350d7361
line wrap: on
line diff
--- a/datasets/defs.py	Tue Mar 16 14:46:25 2010 -0400
+++ b/datasets/defs.py	Tue Mar 16 18:51:27 2010 -0400
@@ -11,44 +11,45 @@
 NIST_PATH = os.getenv('NIST_PATH','/data/lisa/data/nist/by_class/')
 DATA_PATH = os.getenv('DATA_PATH','/data/lisa/data/ift6266h10/')
 
-nist_digits = FTDataSet(train_data = [os.path.join(NIST_PATH,'digits/digits_train_data.ft')],
+nist_digits = lambda maxsize=None: FTDataSet(train_data = [os.path.join(NIST_PATH,'digits/digits_train_data.ft')],
                         train_lbl = [os.path.join(NIST_PATH,'digits/digits_train_labels.ft')],
                         test_data = [os.path.join(NIST_PATH,'digits/digits_test_data.ft')],
                         test_lbl = [os.path.join(NIST_PATH,'digits/digits_test_labels.ft')],
-                        indtype=theano.config.floatX, inscale=255.)
-nist_lower = FTDataSet(train_data = [os.path.join(NIST_PATH,'lower/lower_train_data.ft')],
+                        indtype=theano.config.floatX, inscale=255., maxsize=maxsize)
+nist_lower = lambda maxsize=None: FTDataSet(train_data = [os.path.join(NIST_PATH,'lower/lower_train_data.ft')],
                         train_lbl = [os.path.join(NIST_PATH,'lower/lower_train_labels.ft')],
                         test_data = [os.path.join(NIST_PATH,'lower/lower_test_data.ft')],
                         test_lbl = [os.path.join(NIST_PATH,'lower/lower_test_labels.ft')],
-                        indtype=theano.config.floatX, inscale=255.)
-nist_upper = FTDataSet(train_data = [os.path.join(NIST_PATH,'upper/upper_train_data.ft')],
+                        indtype=theano.config.floatX, inscale=255., maxsize=maxsize)
+nist_upper = lambda maxsize=None: FTDataSet(train_data = [os.path.join(NIST_PATH,'upper/upper_train_data.ft')],
                         train_lbl = [os.path.join(NIST_PATH,'upper/upper_train_labels.ft')],
                         test_data = [os.path.join(NIST_PATH,'upper/upper_test_data.ft')],
                         test_lbl = [os.path.join(NIST_PATH,'upper/upper_test_labels.ft')],
-                        indtype=theano.config.floatX, inscale=255.)
+                        indtype=theano.config.floatX, inscale=255., maxsize=maxsize)
 
-nist_all = FTDataSet(train_data = [os.path.join(DATA_PATH,'train_data.ft')],
+nist_all = lambda maxsize=None: FTDataSet(train_data = [os.path.join(DATA_PATH,'train_data.ft')],
                      train_lbl = [os.path.join(DATA_PATH,'train_labels.ft')],
                      test_data = [os.path.join(DATA_PATH,'test_data.ft')],
                      test_lbl = [os.path.join(DATA_PATH,'test_labels.ft')],
                      valid_data = [os.path.join(DATA_PATH,'valid_data.ft')],
                      valid_lbl = [os.path.join(DATA_PATH,'valid_labels.ft')],
-                     indtype=theano.config.floatX, inscale=255.)
+                     indtype=theano.config.floatX, inscale=255., maxsize=maxsize)
 
-ocr = FTDataSet(train_data = [os.path.join(DATA_PATH,'ocr_train_data.ft')],
+ocr = lambda maxsize=None: FTDataSet(train_data = [os.path.join(DATA_PATH,'ocr_train_data.ft')],
                 train_lbl = [os.path.join(DATA_PATH,'ocr_train_labels.ft')],
                 test_data = [os.path.join(DATA_PATH,'ocr_test_data.ft')],
                 test_lbl = [os.path.join(DATA_PATH,'ocr_test_labels.ft')],
                 valid_data = [os.path.join(DATA_PATH,'ocr_valid_data.ft')],
                 valid_lbl = [os.path.join(DATA_PATH,'ocr_valid_labels.ft')],
-                indtype=theano.config.floatX, inscale=255.)
+                indtype=theano.config.floatX, inscale=255., maxsize=maxsize)
 
-nist_P07 = FTDataSet(train_data = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_data.ft') for i in range(100)],
+nist_P07 = lambda maxsize=None: FTDataSet(train_data = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_data.ft') for i in range(100)],
                      train_lbl = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_labels.ft') for i in range(100)],
                      test_data = [os.path.join(DATA_PATH,'data/P07_test_data.ft')],
                      test_lbl = [os.path.join(DATA_PATH,'data/P07_test_labels.ft')],
                      valid_data = [os.path.join(DATA_PATH,'data/P07_valid_data.ft')],
                      valid_lbl = [os.path.join(DATA_PATH,'data/P07_valid_labels.ft')],
-                     indtype=theano.config.floatX, inscale=255.)
+                     indtype=theano.config.floatX, inscale=255., maxsize=maxsize)
 
-mnist = GzpklDataSet(os.path.join(DATA_PATH,'mnist.pkl.gz'))
+mnist = lambda maxsize=None: GzpklDataSet(os.path.join(DATA_PATH,'mnist.pkl.gz'),
+                                          maxsize=maxsize)