Mercurial > ift6266
diff datasets/defs.py @ 257:966272e7f14b
Make the datasets lazy-loading and add a maxsize parameter.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Tue, 16 Mar 2010 18:51:27 -0400 |
parents | 6f4e3719a3cc |
children | 4533350d7361 |
line wrap: on
line diff
--- a/datasets/defs.py Tue Mar 16 14:46:25 2010 -0400 +++ b/datasets/defs.py Tue Mar 16 18:51:27 2010 -0400 @@ -11,44 +11,45 @@ NIST_PATH = os.getenv('NIST_PATH','/data/lisa/data/nist/by_class/') DATA_PATH = os.getenv('DATA_PATH','/data/lisa/data/ift6266h10/') -nist_digits = FTDataSet(train_data = [os.path.join(NIST_PATH,'digits/digits_train_data.ft')], +nist_digits = lambda maxsize=None: FTDataSet(train_data = [os.path.join(NIST_PATH,'digits/digits_train_data.ft')], train_lbl = [os.path.join(NIST_PATH,'digits/digits_train_labels.ft')], test_data = [os.path.join(NIST_PATH,'digits/digits_test_data.ft')], test_lbl = [os.path.join(NIST_PATH,'digits/digits_test_labels.ft')], - indtype=theano.config.floatX, inscale=255.) -nist_lower = FTDataSet(train_data = [os.path.join(NIST_PATH,'lower/lower_train_data.ft')], + indtype=theano.config.floatX, inscale=255., maxsize=maxsize) +nist_lower = lambda maxsize=None: FTDataSet(train_data = [os.path.join(NIST_PATH,'lower/lower_train_data.ft')], train_lbl = [os.path.join(NIST_PATH,'lower/lower_train_labels.ft')], test_data = [os.path.join(NIST_PATH,'lower/lower_test_data.ft')], test_lbl = [os.path.join(NIST_PATH,'lower/lower_test_labels.ft')], - indtype=theano.config.floatX, inscale=255.) -nist_upper = FTDataSet(train_data = [os.path.join(NIST_PATH,'upper/upper_train_data.ft')], + indtype=theano.config.floatX, inscale=255., maxsize=maxsize) +nist_upper = lambda maxsize=None: FTDataSet(train_data = [os.path.join(NIST_PATH,'upper/upper_train_data.ft')], train_lbl = [os.path.join(NIST_PATH,'upper/upper_train_labels.ft')], test_data = [os.path.join(NIST_PATH,'upper/upper_test_data.ft')], test_lbl = [os.path.join(NIST_PATH,'upper/upper_test_labels.ft')], - indtype=theano.config.floatX, inscale=255.) + indtype=theano.config.floatX, inscale=255., maxsize=maxsize) -nist_all = FTDataSet(train_data = [os.path.join(DATA_PATH,'train_data.ft')], +nist_all = lambda maxsize=None: FTDataSet(train_data = [os.path.join(DATA_PATH,'train_data.ft')], train_lbl = [os.path.join(DATA_PATH,'train_labels.ft')], test_data = [os.path.join(DATA_PATH,'test_data.ft')], test_lbl = [os.path.join(DATA_PATH,'test_labels.ft')], valid_data = [os.path.join(DATA_PATH,'valid_data.ft')], valid_lbl = [os.path.join(DATA_PATH,'valid_labels.ft')], - indtype=theano.config.floatX, inscale=255.) + indtype=theano.config.floatX, inscale=255., maxsize=maxsize) -ocr = FTDataSet(train_data = [os.path.join(DATA_PATH,'ocr_train_data.ft')], +ocr = lambda maxsize=None: FTDataSet(train_data = [os.path.join(DATA_PATH,'ocr_train_data.ft')], train_lbl = [os.path.join(DATA_PATH,'ocr_train_labels.ft')], test_data = [os.path.join(DATA_PATH,'ocr_test_data.ft')], test_lbl = [os.path.join(DATA_PATH,'ocr_test_labels.ft')], valid_data = [os.path.join(DATA_PATH,'ocr_valid_data.ft')], valid_lbl = [os.path.join(DATA_PATH,'ocr_valid_labels.ft')], - indtype=theano.config.floatX, inscale=255.) + indtype=theano.config.floatX, inscale=255., maxsize=maxsize) -nist_P07 = FTDataSet(train_data = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_data.ft') for i in range(100)], +nist_P07 = lambda maxsize=None: FTDataSet(train_data = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_data.ft') for i in range(100)], train_lbl = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_labels.ft') for i in range(100)], test_data = [os.path.join(DATA_PATH,'data/P07_test_data.ft')], test_lbl = [os.path.join(DATA_PATH,'data/P07_test_labels.ft')], valid_data = [os.path.join(DATA_PATH,'data/P07_valid_data.ft')], valid_lbl = [os.path.join(DATA_PATH,'data/P07_valid_labels.ft')], - indtype=theano.config.floatX, inscale=255.) + indtype=theano.config.floatX, inscale=255., maxsize=maxsize) -mnist = GzpklDataSet(os.path.join(DATA_PATH,'mnist.pkl.gz')) +mnist = lambda maxsize=None: GzpklDataSet(os.path.join(DATA_PATH,'mnist.pkl.gz'), + maxsize=maxsize)