Mercurial > ift6266
diff deep/stacked_dae/nist_sda.py @ 192:e656edaedb48
Commented a few things, renamed the produit_croise_jobs function, replaced the cost function (NOT TESTED YET).
author | fsavard |
---|---|
date | Wed, 03 Mar 2010 12:51:40 -0500 |
parents | 3632e6258642 |
children | 6ea5dcf0541e e1f5f66dd7dd |
line wrap: on
line diff
--- a/deep/stacked_dae/nist_sda.py Tue Mar 02 14:47:18 2010 -0500 +++ b/deep/stacked_dae/nist_sda.py Wed Mar 03 12:51:40 2010 -0500 @@ -21,7 +21,7 @@ import jobman, jobman.sql from pylearn.io import filetensor -from utils import produit_croise_jobs +from utils import produit_cartesien_jobs from sgd_optimization import SdaSgdOptimizer @@ -31,7 +31,7 @@ NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' -JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/fsavard_sda2' +JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4' REDUCE_TRAIN_TO = None MAX_FINETUNING_EPOCHS = 1000 @@ -43,6 +43,10 @@ EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" +# Possible values the hyperparameters can take. These are then +# combined with produit_cartesien_jobs so we get a list of all +# possible combinations, each one resulting in a job inserted +# in the jobman DB. JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001], 'pretraining_epochs_per_layer': [10,20], 'hidden_layers_sizes': [300,800], @@ -63,7 +67,11 @@ #'reduce_train_to':300, 'num_hidden_layers':2}) +# Function called by jobman upon launching each job +# Its path is the one given when inserting jobs: +# ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint def jobman_entrypoint(state, channel): + # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) channel.save() @@ -71,10 +79,12 @@ print "Will load NIST" - nist = NIST(20) + nist = NIST(minibatch_size=20) print "NIST loaded" + # For test runs, we don't want to use the whole dataset so + # reduce it to fewer elements if asked to. rtt = None if state.has_key('reduce_train_to'): rtt = state['reduce_train_to'] @@ -82,7 +92,7 @@ rtt = REDUCE_TRAIN_TO if rtt: - print "Reducing training set to "+str( rtt)+ " examples" + print "Reducing training set to "+str(rtt)+ " examples" nist.reduce_train_set(rtt) train,valid,test = nist.get_tvt() @@ -91,14 +101,9 @@ n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) - hls = state.hidden_layers_sizes - cl = state.corruption_levels - nhl = state.num_hidden_layers - state.hidden_layers_sizes = [hls] * nhl - state.corruption_levels = [cl] * nhl - - # b,b',W for each hidden layer + b,W of last layer (logreg) - numparams = nhl * 3 + 2 + # b,b',W for each hidden layer + # + b,W of last layer (logreg) + numparams = state.num_hidden_layers * 3 + 2 series_mux = None series_mux = create_series(workingdir, numparams) @@ -114,11 +119,10 @@ optimizer.finetune() channel.save() - pylearn.version.record_versions(state,[theano,ift6266,pylearn]) - channel.save() - return channel.COMPLETE +# These Series objects are used to save various statistics +# during the training. def create_series(basedir, numparams): mux = SeriesMultiplexer() @@ -140,8 +144,11 @@ return mux +# Perform insertion into the Postgre DB based on combination +# of hyperparameter values above +# (see comment for produit_cartesien_jobs() to know how it works) def jobman_insert_nist(): - jobs = produit_croise_jobs(JOB_VALS) + jobs = produit_cartesien_jobs(JOB_VALS) db = jobman.sql.db(JOBDB) for job in jobs: @@ -227,35 +234,6 @@ raw_input("Press any key") -# hp for hyperparameters -def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'): - global DEFAULT_HP_NIST - hp = hp and hp or DEFAULT_HP_NIST - - print "Will load NIST" - - import time - t1 = time.time() - nist = NIST(20, reduce_train_to=100) - t2 = time.time() - - print "NIST loaded. time delta = ", t2-t1 - - train,valid,test = nist.get_tvt() - dataset = (train,valid,test) - - print train[0][15] - print type(train[0][1]) - - - print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0]) - - n_ins = 32*32 - n_outs = 62 # 10 digits, 26*2 (lower, capitals) - - optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0) - optimizer.train() - if __name__ == '__main__': import sys @@ -277,5 +255,4 @@ elif len(args) > 0 and args[0] == 'estimate': estimate_total_time() else: - sgd_optimization_nist() - + print "Bad arguments"