Mercurial > ift6266
view deep/stacked_dae/nist_sda.py @ 176:d6672a7daea5
Update comments in the dataset definition (you can't pass 0 as minibatch size).
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Sat, 27 Feb 2010 14:10:14 -0500 |
parents | 1f5937e9e530 |
children | b9ea8e2d071a |
line wrap: on
line source
#!/usr/bin/python # coding: utf-8 import numpy import theano import time import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams import copy import sys import os.path from sgd_optimization import SdaSgdOptimizer from jobman import DD import jobman, jobman.sql from pylearn.io import filetensor from utils import produit_croise_jobs TEST_CONFIG = False NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/' REDUCE_TRAIN_TO = None MAX_FINETUNING_EPOCHS = 1000 if TEST_CONFIG: JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/' REDUCE_TRAIN_TO = 1000 MAX_FINETUNING_EPOCHS = 2 JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs' JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results' EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint" # There used to be # 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1] # and # 'num_hidden_layers':[1,2,3] # but this is now handled by a special mechanism in SgdOptimizer # to reuse intermediate results (for the same training of lower layers, # we can test many finetuning_lr) JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001], 'pretraining_epochs_per_layer': [10,20], 'hidden_layers_sizes': [300,800], 'corruption_levels': [0.1,0.2], 'minibatch_size': [20], 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]} FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001] NUM_HIDDEN_LAYERS_VALS = [1,2,3] # Just useful for tests... minimal number of epochs DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, 'pretraining_lr':0.01, 'pretraining_epochs_per_layer':1, 'max_finetuning_epochs':1, 'hidden_layers_sizes':[1000], 'corruption_levels':[0.2], 'minibatch_size':20}) def jobman_entrypoint(state, channel): state = copy.copy(state) print "Will load NIST" nist = NIST(20) print "NIST loaded" rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] elif REDUCE_TRAIN_TO: rtt = REDUCE_TRAIN_TO if rtt: print "Reducing training set to ", rtt, " examples" nist.reduce_train_set(rtt) train,valid,test = nist.get_tvt() dataset = (train,valid,test) n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) db = jobman.sql.db(JOBDB_RESULTS) optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\ input_divider=255.0, job_tree=True, results_db=db, \ experiment=EXPERIMENT_PATH, \ finetuning_lr_to_try=FINETUNING_LR_VALS, \ num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS) optimizer.train() return channel.COMPLETE def estimate_pretraining_time(job): job = DD(job) # time spent on pretraining estimated as O(n^2) where n=num hidens # no need to multiply by num_hidden_layers, as results from num=1 # is reused for num=2, or 3, so in the end we get the same time # as if we were training 3 times a single layer # constants: # - 20 mins to pretrain a layer with 1000 units (per 1 epoch) # - 12 mins to finetune (per 1 epoch) # basically the job_tree trick gives us a 5 times speedup on the # pretraining time due to reusing for finetuning_lr # and gives us a second x2 speedup for reusing previous layers # to explore num_hidden_layers return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \ * job.hidden_layer_sizes * job.hidden_layer_sizes) def estimate_total_time(): jobs = produit_croise_jobs(JOB_VALS) sumtime = 0.0 sum_without = 0.0 for job in jobs: sumtime += estimate_pretraining_time(job) # 12 mins per epoch * 30 epochs # 5 finetuning_lr per pretraining combination sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS) sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20 print "num jobs=", len(jobs) print "estimate", sumtime/60, " hours" print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without def jobman_insert_nist(): jobs = produit_croise_jobs(JOB_VALS) db = jobman.sql.db(JOBDB_JOBS) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) print "inserted" class NIST: def __init__(self, minibatch_size, basepath=None, reduce_train_to=None): global NIST_ALL_LOCATION self.minibatch_size = minibatch_size self.basepath = basepath and basepath or NIST_ALL_LOCATION self.set_filenames() # arrays of 2 elements: .x, .y self.train = [None, None] self.test = [None, None] self.load_train_test() self.valid = [[], []] self.split_train_valid() if reduce_train_to: self.reduce_train_set(reduce_train_to) def get_tvt(self): return self.train, self.valid, self.test def set_filenames(self): self.train_files = ['all_train_data.ft', 'all_train_labels.ft'] self.test_files = ['all_test_data.ft', 'all_test_labels.ft'] def load_train_test(self): self.load_data_labels(self.train_files, self.train) self.load_data_labels(self.test_files, self.test) def load_data_labels(self, filenames, pair): for i, fn in enumerate(filenames): f = open(os.path.join(self.basepath, fn)) pair[i] = filetensor.read(f) f.close() def reduce_train_set(self, max): self.train[0] = self.train[0][:max] self.train[1] = self.train[1][:max] if max < len(self.test[0]): for ar in (self.test, self.valid): ar[0] = ar[0][:max] ar[1] = ar[1][:max] def split_train_valid(self): test_len = len(self.test[0]) new_train_x = self.train[0][:-test_len] new_train_y = self.train[1][:-test_len] self.valid[0] = self.train[0][-test_len:] self.valid[1] = self.train[1][-test_len:] self.train[0] = new_train_x self.train[1] = new_train_y def test_load_nist(): print "Will load NIST" import time t1 = time.time() nist = NIST(20) t2 = time.time() print "NIST loaded. time delta = ", t2-t1 tr,v,te = nist.get_tvt() print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0]) raw_input("Press any key") # hp for hyperparameters def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'): global DEFAULT_HP_NIST hp = hp and hp or DEFAULT_HP_NIST print "Will load NIST" import time t1 = time.time() nist = NIST(20, reduce_train_to=100) t2 = time.time() print "NIST loaded. time delta = ", t2-t1 train,valid,test = nist.get_tvt() dataset = (train,valid,test) print train[0][15] print type(train[0][1]) print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0]) n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0) optimizer.train() if __name__ == '__main__': import sys args = sys.argv[1:] if len(args) > 0 and args[0] == 'load_nist': test_load_nist() elif len(args) > 0 and args[0] == 'jobman_insert': jobman_insert_nist() elif len(args) > 0 and args[0] == 'test_job_tree': # dont forget to comment out sql.inserts and make reduce_train_to=100 print "TESTING JOB TREE" chanmock = {'COMPLETE':0} hp = copy.copy(DEFAULT_HP_NIST) hp.update({'reduce_train_to':100}) jobman_entrypoint(hp, chanmock) elif len(args) > 0 and args[0] == 'estimate': estimate_total_time() else: sgd_optimization_nist()