Mercurial > ift6266
view datasets/defs.py @ 613:5e481b224117
fix the reading of PNIST dataset following Dumi compression of the data.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Thu, 06 Jan 2011 13:57:05 -0500 |
parents | 22efb4968054 |
children |
line wrap: on
line source
__all__ = ['nist_digits', 'nist_lower', 'nist_upper', 'nist_all', 'ocr', 'nist_P07', 'PNIST07', 'mnist'] from ftfile import FTDataSet from gzpklfile import GzpklDataSet import theano import os # if the environmental variables exist, get the path from them, # otherwise fall back on the default NIST_PATH = os.getenv('NIST_PATH','/data/lisa/data/nist/by_class/') DATA_PATH = os.getenv('DATA_PATH','/data/lisa/data/ift6266h10/') nist_digits = lambda maxsize=None: FTDataSet(train_data = [os.path.join(NIST_PATH,'digits/digits_train_data.ft')], train_lbl = [os.path.join(NIST_PATH,'digits/digits_train_labels.ft')], test_data = [os.path.join(NIST_PATH,'digits/digits_test_data.ft')], test_lbl = [os.path.join(NIST_PATH,'digits/digits_test_labels.ft')], indtype=theano.config.floatX, inscale=255., maxsize=maxsize) nist_lower = lambda maxsize=None: FTDataSet(train_data = [os.path.join(NIST_PATH,'lower/lower_train_data.ft')], train_lbl = [os.path.join(NIST_PATH,'lower/lower_train_labels.ft')], test_data = [os.path.join(NIST_PATH,'lower/lower_test_data.ft')], test_lbl = [os.path.join(NIST_PATH,'lower/lower_test_labels.ft')], indtype=theano.config.floatX, inscale=255., maxsize=maxsize) nist_upper = lambda maxsize=None: FTDataSet(train_data = [os.path.join(NIST_PATH,'upper/upper_train_data.ft')], train_lbl = [os.path.join(NIST_PATH,'upper/upper_train_labels.ft')], test_data = [os.path.join(NIST_PATH,'upper/upper_test_data.ft')], test_lbl = [os.path.join(NIST_PATH,'upper/upper_test_labels.ft')], indtype=theano.config.floatX, inscale=255., maxsize=maxsize) nist_all = lambda maxsize=None: FTDataSet(train_data = [os.path.join(DATA_PATH,'train_data.ft')], train_lbl = [os.path.join(DATA_PATH,'train_labels.ft')], test_data = [os.path.join(DATA_PATH,'test_data.ft')], test_lbl = [os.path.join(DATA_PATH,'test_labels.ft')], valid_data = [os.path.join(DATA_PATH,'valid_data.ft')], valid_lbl = [os.path.join(DATA_PATH,'valid_labels.ft')], indtype=theano.config.floatX, inscale=255., maxsize=maxsize) ocr = lambda maxsize=None: FTDataSet(train_data = [os.path.join(DATA_PATH,'ocr_train_data.ft')], train_lbl = [os.path.join(DATA_PATH,'ocr_train_labels.ft')], test_data = [os.path.join(DATA_PATH,'ocr_test_data.ft')], test_lbl = [os.path.join(DATA_PATH,'ocr_test_labels.ft')], valid_data = [os.path.join(DATA_PATH,'ocr_valid_data.ft')], valid_lbl = [os.path.join(DATA_PATH,'ocr_valid_labels.ft')], indtype=theano.config.floatX, inscale=255., maxsize=maxsize) #There is 2 more arguments here to can choose smaller datasets based on the file number. #This is usefull to get different data for pre-training and finetuning nist_P07 = lambda maxsize=None, min_file=0, max_file=100: FTDataSet(train_data = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_data.ft') for i in range(min_file, max_file)], train_lbl = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_labels.ft') for i in range(min_file, max_file)], test_data = [os.path.join(DATA_PATH,'data/P07_test_data.ft')], test_lbl = [os.path.join(DATA_PATH,'data/P07_test_labels.ft')], valid_data = [os.path.join(DATA_PATH,'data/P07_valid_data.ft')], valid_lbl = [os.path.join(DATA_PATH,'data/P07_valid_labels.ft')], indtype=theano.config.floatX, inscale=255., maxsize=maxsize) #Added PNIST07 PNIST07 = lambda maxsize=None, min_file=0, max_file=100: FTDataSet(train_data = [os.path.join(DATA_PATH,'data/PNIST07_train'+str(i)+'_data.ft') for i in range(min_file, max_file)], train_lbl = [os.path.join(DATA_PATH,'data/PNIST07_train'+str(i)+'_labels.ft') for i in range(min_file, max_file)], test_data = [os.path.join(DATA_PATH,'data/PNIST07_test_data.ft')], test_lbl = [os.path.join(DATA_PATH,'data/PNIST07_test_labels.ft')], valid_data = [os.path.join(DATA_PATH,'data/PNIST07_valid_data.ft')], valid_lbl = [os.path.join(DATA_PATH,'data/PNIST07_valid_labels.ft')], indtype=theano.config.floatX, inscale=255., maxsize=maxsize) mnist = lambda maxsize=None: GzpklDataSet(os.path.join(DATA_PATH,'mnist.pkl.gz'), maxsize=maxsize)