Mercurial > ift6266
diff deep/stacked_dae/nist_sda.py @ 185:b9ea8e2d071a
Enlevé ce qui concernait la réutilisation de résultats de préentraînement (trop compliqué pour peu de bénéfice: c'est le finetuning qui est vraiment long
author | fsavard |
---|---|
date | Fri, 26 Feb 2010 17:45:52 -0500 |
parents | 1f5937e9e530 |
children | d364a130b221 |
line wrap: on
line diff
--- a/deep/stacked_dae/nist_sda.py Fri Feb 26 15:25:44 2010 -0500 +++ b/deep/stacked_dae/nist_sda.py Fri Feb 26 17:45:52 2010 -0500 @@ -1,71 +1,86 @@ #!/usr/bin/python # coding: utf-8 +import ift6266 +import pylearn + import numpy import theano import time + +import pylearn.version import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams + import copy - import sys +import os import os.path -from sgd_optimization import SdaSgdOptimizer - from jobman import DD import jobman, jobman.sql from pylearn.io import filetensor from utils import produit_croise_jobs -TEST_CONFIG = False +from sgd_optimization import SdaSgdOptimizer + +SERIES_AVAILABLE = False +try: + from scalar_series import * + SERIES_AVAILABLE = True +except ImportError: + print "Could not import Series" + +TEST_CONFIG = True NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' -JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/' +JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda2' + REDUCE_TRAIN_TO = None MAX_FINETUNING_EPOCHS = 1000 +REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc. if TEST_CONFIG: - JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/' REDUCE_TRAIN_TO = 1000 MAX_FINETUNING_EPOCHS = 2 + REDUCE_EVERY = 10 -JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs' -JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results' EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint" -# There used to be -# 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1] -# and -# 'num_hidden_layers':[1,2,3] -# but this is now handled by a special mechanism in SgdOptimizer -# to reuse intermediate results (for the same training of lower layers, -# we can test many finetuning_lr) -JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001], +JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001], 'pretraining_epochs_per_layer': [10,20], 'hidden_layers_sizes': [300,800], - 'corruption_levels': [0.1,0.2], + 'corruption_levels': [0.1,0.2,0.3], 'minibatch_size': [20], - 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]} -FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001] -NUM_HIDDEN_LAYERS_VALS = [1,2,3] + 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS], + 'finetuning_lr':[0.1, 0.01], #0.001 was very bad, so we leave it out + 'num_hidden_layers':[2,3]} # Just useful for tests... minimal number of epochs DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, 'pretraining_lr':0.01, 'pretraining_epochs_per_layer':1, 'max_finetuning_epochs':1, - 'hidden_layers_sizes':[1000], - 'corruption_levels':[0.2], - 'minibatch_size':20}) + 'hidden_layers_sizes':1000, + 'corruption_levels':0.2, + 'minibatch_size':20, + 'reduce_train_to':1000, + 'num_hidden_layers':1}) def jobman_entrypoint(state, channel): - state = copy.copy(state) + pylearn.version.record_versions(state,[theano,ift6266,pylearn]) + channel.save() + + workingdir = os.getcwd() print "Will load NIST" + sys.stdout.flush() + nist = NIST(20) + print "NIST loaded" + sys.stdout.flush() rtt = None if state.has_key('reduce_train_to'): @@ -83,50 +98,58 @@ n_ins = 32*32 n_outs = 62 # 10 digits, 26*2 (lower, capitals) - db = jobman.sql.db(JOBDB_RESULTS) - optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\ - input_divider=255.0, job_tree=True, results_db=db, \ - experiment=EXPERIMENT_PATH, \ - finetuning_lr_to_try=FINETUNING_LR_VALS, \ - num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS) - optimizer.train() + hls = state.hidden_layers_sizes + cl = state.corruption_levels + nhl = state.num_hidden_layers + state.hidden_layers_sizes = [hls] * nhl + state.corruption_levels = [cl] * nhl + + # b,b',W for each hidden layer + b,W of last layer (logreg) + numparams = nhl * 3 + 2 + series_mux = None + if SERIES_AVAILABLE: + series_mux = create_series(workingdir, numparams) + + optimizer = SdaSgdOptimizer(dataset=dataset, hyperparameters=state, \ + n_ins=n_ins, n_outs=n_outs,\ + input_divider=255.0, series_mux=series_mux) + + optimizer.pretrain() + channel.save() + + optimizer.finetune() + channel.save() + + pylearn.version.record_versions(state,[theano,ift6266,pylearn]) + channel.save() return channel.COMPLETE -def estimate_pretraining_time(job): - job = DD(job) - # time spent on pretraining estimated as O(n^2) where n=num hidens - # no need to multiply by num_hidden_layers, as results from num=1 - # is reused for num=2, or 3, so in the end we get the same time - # as if we were training 3 times a single layer - # constants: - # - 20 mins to pretrain a layer with 1000 units (per 1 epoch) - # - 12 mins to finetune (per 1 epoch) - # basically the job_tree trick gives us a 5 times speedup on the - # pretraining time due to reusing for finetuning_lr - # and gives us a second x2 speedup for reusing previous layers - # to explore num_hidden_layers - return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \ - * job.hidden_layer_sizes * job.hidden_layer_sizes) +def create_series(basedir, numparams): + mux = SeriesMultiplexer() + + # comment out series we don't want to save + mux.add_series(AccumulatorSeries(name="reconstruction_error", + reduce_every=REDUCE_EVERY, # every 1000 batches, we take the mean and save + mean=True, + directory=basedir, flush_every=1)) -def estimate_total_time(): - jobs = produit_croise_jobs(JOB_VALS) - sumtime = 0.0 - sum_without = 0.0 - for job in jobs: - sumtime += estimate_pretraining_time(job) - # 12 mins per epoch * 30 epochs - # 5 finetuning_lr per pretraining combination - sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS) - sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20 - print "num jobs=", len(jobs) - print "estimate", sumtime/60, " hours" - print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without + mux.add_series(AccumulatorSeries(name="training_error", + reduce_every=REDUCE_EVERY, # every 1000 batches, we take the mean and save + mean=True, + directory=basedir, flush_every=1)) + + mux.add_series(BaseSeries(name="validation_error", directory=basedir, flush_every=1)) + mux.add_series(BaseSeries(name="test_error", directory=basedir, flush_every=1)) + + mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir)) + + return mux def jobman_insert_nist(): jobs = produit_croise_jobs(JOB_VALS) - db = jobman.sql.db(JOBDB_JOBS) + db = jobman.sql.db(JOBDB) for job in jobs: job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) jobman.sql.insert_dict(job, db) @@ -250,13 +273,11 @@ elif len(args) > 0 and args[0] == 'jobman_insert': jobman_insert_nist() - elif len(args) > 0 and args[0] == 'test_job_tree': - # dont forget to comment out sql.inserts and make reduce_train_to=100 - print "TESTING JOB TREE" - chanmock = {'COMPLETE':0} - hp = copy.copy(DEFAULT_HP_NIST) - hp.update({'reduce_train_to':100}) - jobman_entrypoint(hp, chanmock) + + elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': + chanmock = DD({'COMPLETE':0}) + jobman_entrypoint(DEFAULT_HP_NIST, chanmock) + elif len(args) > 0 and args[0] == 'estimate': estimate_total_time() else: