# HG changeset patch # User SylvainPL # Date 1269094751 14400 # Node ID 4533350d7361030059592a680a78184dc2fdb440 # Parent b077d9e97a3beda320bb0a9d4b8f6a7a17a9c484 Ajout d'une fonctionnalite pour pouvoir definir un range de fichiers d'entrainement de P07 utilises. Utile pour pre-entrainer et fine-tuner avec des donnees differentes diff -r b077d9e97a3b -r 4533350d7361 datasets/defs.py --- a/datasets/defs.py Fri Mar 19 11:31:57 2010 -0400 +++ b/datasets/defs.py Sat Mar 20 10:19:11 2010 -0400 @@ -43,8 +43,10 @@ valid_lbl = [os.path.join(DATA_PATH,'ocr_valid_labels.ft')], indtype=theano.config.floatX, inscale=255., maxsize=maxsize) -nist_P07 = lambda maxsize=None: FTDataSet(train_data = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_data.ft') for i in range(100)], - train_lbl = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_labels.ft') for i in range(100)], +#There is 2 more arguments here to can choose smaller datasets based on the file number. +#This is usefull to get different data for pre-training and finetuning +nist_P07 = lambda maxsize=None, min_file=0, max_file=100: FTDataSet(train_data = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_data.ft') for i in range(min_file, max_file)], + train_lbl = [os.path.join(DATA_PATH,'data/P07_train'+str(i)+'_labels.ft') for i in range(min_file, max_file)], test_data = [os.path.join(DATA_PATH,'data/P07_test_data.ft')], test_lbl = [os.path.join(DATA_PATH,'data/P07_test_labels.ft')], valid_data = [os.path.join(DATA_PATH,'data/P07_valid_data.ft')],