Mercurial > ift6266
diff scripts/stacked_dae/nist_sda.py @ 144:c958941c1b9d
merge
author | XavierMuller |
---|---|
date | Tue, 23 Feb 2010 18:16:55 -0500 |
parents | 3346fcd3818b |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/stacked_dae/nist_sda.py Tue Feb 23 18:16:55 2010 -0500 @@ -0,0 +1,264 @@ +#!/usr/bin/python +# coding: utf-8 + +import numpy +import theano +import time +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams +import copy + +import sys +import os.path + +from sgd_optimization import SdaSgdOptimizer + +from jobman import DD +import jobman, jobman.sql +from pylearn.io import filetensor + +from utils import produit_croise_jobs + +TEST_CONFIG = False + +NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' + +JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/' +REDUCE_TRAIN_TO = None +MAX_FINETUNING_EPOCHS = 1000 +if TEST_CONFIG: + JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/' + REDUCE_TRAIN_TO = 1000 + MAX_FINETUNING_EPOCHS = 2 + +JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs' +JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results' +EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint" + +# There used to be +# 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1] +# and +# 'num_hidden_layers':[1,2,3] +# but this is now handled by a special mechanism in SgdOptimizer +# to reuse intermediate results (for the same training of lower layers, +# we can test many finetuning_lr) +JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001], + 'pretraining_epochs_per_layer': [10,20], + 'hidden_layers_sizes': [300,800], + 'corruption_levels': [0.1,0.2], + 'minibatch_size': [20], + 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]} +FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001] +NUM_HIDDEN_LAYERS_VALS = [1,2,3] + +# Just useful for tests... minimal number of epochs +DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, + 'pretraining_lr':0.01, + 'pretraining_epochs_per_layer':1, + 'max_finetuning_epochs':1, + 'hidden_layers_sizes':[1000], + 'corruption_levels':[0.2], + 'minibatch_size':20}) + +def jobman_entrypoint(state, channel): + state = copy.copy(state) + + print "Will load NIST" + nist = NIST(20) + print "NIST loaded" + + rtt = None + if state.has_key('reduce_train_to'): + rtt = state['reduce_train_to'] + elif REDUCE_TRAIN_TO: + rtt = REDUCE_TRAIN_TO + + if rtt: + print "Reducing training set to ", rtt, " examples" + nist.reduce_train_set(rtt) + + train,valid,test = nist.get_tvt() + dataset = (train,valid,test) + + n_ins = 32*32 + n_outs = 62 # 10 digits, 26*2 (lower, capitals) + + db = jobman.sql.db(JOBDB_RESULTS) + optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\ + input_divider=255.0, job_tree=True, results_db=db, \ + experiment=EXPERIMENT_PATH, \ + finetuning_lr_to_try=FINETUNING_LR_VALS, \ + num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS) + optimizer.train() + + return channel.COMPLETE + +def estimate_pretraining_time(job): + job = DD(job) + # time spent on pretraining estimated as O(n^2) where n=num hidens + # no need to multiply by num_hidden_layers, as results from num=1 + # is reused for num=2, or 3, so in the end we get the same time + # as if we were training 3 times a single layer + # constants: + # - 20 mins to pretrain a layer with 1000 units (per 1 epoch) + # - 12 mins to finetune (per 1 epoch) + # basically the job_tree trick gives us a 5 times speedup on the + # pretraining time due to reusing for finetuning_lr + # and gives us a second x2 speedup for reusing previous layers + # to explore num_hidden_layers + return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \ + * job.hidden_layer_sizes * job.hidden_layer_sizes) + +def estimate_total_time(): + jobs = produit_croise_jobs(JOB_VALS) + sumtime = 0.0 + sum_without = 0.0 + for job in jobs: + sumtime += estimate_pretraining_time(job) + # 12 mins per epoch * 30 epochs + # 5 finetuning_lr per pretraining combination + sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS) + sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20 + print "num jobs=", len(jobs) + print "estimate", sumtime/60, " hours" + print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without + +def jobman_insert_nist(): + jobs = produit_croise_jobs(JOB_VALS) + + db = jobman.sql.db(JOBDB_JOBS) + for job in jobs: + job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) + jobman.sql.insert_dict(job, db) + + print "inserted" + +class NIST: + def __init__(self, minibatch_size, basepath=None, reduce_train_to=None): + global NIST_ALL_LOCATION + + self.minibatch_size = minibatch_size + self.basepath = basepath and basepath or NIST_ALL_LOCATION + + self.set_filenames() + + # arrays of 2 elements: .x, .y + self.train = [None, None] + self.test = [None, None] + + self.load_train_test() + + self.valid = [[], []] + self.split_train_valid() + if reduce_train_to: + self.reduce_train_set(reduce_train_to) + + def get_tvt(self): + return self.train, self.valid, self.test + + def set_filenames(self): + self.train_files = ['all_train_data.ft', + 'all_train_labels.ft'] + + self.test_files = ['all_test_data.ft', + 'all_test_labels.ft'] + + def load_train_test(self): + self.load_data_labels(self.train_files, self.train) + self.load_data_labels(self.test_files, self.test) + + def load_data_labels(self, filenames, pair): + for i, fn in enumerate(filenames): + f = open(os.path.join(self.basepath, fn)) + pair[i] = filetensor.read(f) + f.close() + + def reduce_train_set(self, max): + self.train[0] = self.train[0][:max] + self.train[1] = self.train[1][:max] + + if max < len(self.test[0]): + for ar in (self.test, self.valid): + ar[0] = ar[0][:max] + ar[1] = ar[1][:max] + + def split_train_valid(self): + test_len = len(self.test[0]) + + new_train_x = self.train[0][:-test_len] + new_train_y = self.train[1][:-test_len] + + self.valid[0] = self.train[0][-test_len:] + self.valid[1] = self.train[1][-test_len:] + + self.train[0] = new_train_x + self.train[1] = new_train_y + +def test_load_nist(): + print "Will load NIST" + + import time + t1 = time.time() + nist = NIST(20) + t2 = time.time() + + print "NIST loaded. time delta = ", t2-t1 + + tr,v,te = nist.get_tvt() + + print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0]) + + raw_input("Press any key") + +# hp for hyperparameters +def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'): + global DEFAULT_HP_NIST + hp = hp and hp or DEFAULT_HP_NIST + + print "Will load NIST" + + import time + t1 = time.time() + nist = NIST(20, reduce_train_to=100) + t2 = time.time() + + print "NIST loaded. time delta = ", t2-t1 + + train,valid,test = nist.get_tvt() + dataset = (train,valid,test) + + print train[0][15] + print type(train[0][1]) + + + print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0]) + + n_ins = 32*32 + n_outs = 62 # 10 digits, 26*2 (lower, capitals) + + optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0) + optimizer.train() + +if __name__ == '__main__': + + import sys + + args = sys.argv[1:] + + if len(args) > 0 and args[0] == 'load_nist': + test_load_nist() + + elif len(args) > 0 and args[0] == 'jobman_insert': + jobman_insert_nist() + elif len(args) > 0 and args[0] == 'test_job_tree': + # dont forget to comment out sql.inserts and make reduce_train_to=100 + print "TESTING JOB TREE" + chanmock = {'COMPLETE':0} + hp = copy.copy(DEFAULT_HP_NIST) + hp.update({'reduce_train_to':100}) + jobman_entrypoint(hp, chanmock) + elif len(args) > 0 and args[0] == 'estimate': + estimate_total_time() + else: + sgd_optimization_nist() +