diff deep/stacked_dae/nist_sda.py @ 185:b9ea8e2d071a

Enlevé ce qui concernait la réutilisation de résultats de préentraînement (trop compliqué pour peu de bénéfice: c'est le finetuning qui est vraiment long
author fsavard
date Fri, 26 Feb 2010 17:45:52 -0500
parents 1f5937e9e530
children d364a130b221
line wrap: on
line diff
--- a/deep/stacked_dae/nist_sda.py	Fri Feb 26 15:25:44 2010 -0500
+++ b/deep/stacked_dae/nist_sda.py	Fri Feb 26 17:45:52 2010 -0500
@@ -1,71 +1,86 @@
 #!/usr/bin/python
 # coding: utf-8
 
+import ift6266
+import pylearn
+
 import numpy 
 import theano
 import time
+
+import pylearn.version
 import theano.tensor as T
 from theano.tensor.shared_randomstreams import RandomStreams
+
 import copy
-
 import sys
+import os
 import os.path
 
-from sgd_optimization import SdaSgdOptimizer
-
 from jobman import DD
 import jobman, jobman.sql
 from pylearn.io import filetensor
 
 from utils import produit_croise_jobs
 
-TEST_CONFIG = False
+from sgd_optimization import SdaSgdOptimizer
+
+SERIES_AVAILABLE = False
+try:
+    from scalar_series import *
+    SERIES_AVAILABLE = True
+except ImportError:
+    print "Could not import Series"
+
+TEST_CONFIG = True
 
 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
 
-JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/'
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda2'
+
 REDUCE_TRAIN_TO = None
 MAX_FINETUNING_EPOCHS = 1000
+REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc.
 if TEST_CONFIG:
-    JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/'
     REDUCE_TRAIN_TO = 1000
     MAX_FINETUNING_EPOCHS = 2
+    REDUCE_EVERY = 10
 
-JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs'
-JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results'
 EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint"
 
-# There used to be
-# 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1]
-# and
-#  'num_hidden_layers':[1,2,3]
-# but this is now handled by a special mechanism in SgdOptimizer
-# to reuse intermediate results (for the same training of lower layers,
-# we can test many finetuning_lr)
-JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001],
+JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001],
         'pretraining_epochs_per_layer': [10,20],
         'hidden_layers_sizes': [300,800],
-        'corruption_levels': [0.1,0.2],
+        'corruption_levels': [0.1,0.2,0.3],
         'minibatch_size': [20],
-        'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]}
-FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001]
-NUM_HIDDEN_LAYERS_VALS = [1,2,3]
+        'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS],
+        'finetuning_lr':[0.1, 0.01], #0.001 was very bad, so we leave it out
+        'num_hidden_layers':[2,3]}
 
 # Just useful for tests... minimal number of epochs
 DEFAULT_HP_NIST = DD({'finetuning_lr':0.01,
                        'pretraining_lr':0.01,
                        'pretraining_epochs_per_layer':1,
                        'max_finetuning_epochs':1,
-                       'hidden_layers_sizes':[1000],
-                       'corruption_levels':[0.2],
-                       'minibatch_size':20})
+                       'hidden_layers_sizes':1000,
+                       'corruption_levels':0.2,
+                       'minibatch_size':20,
+                       'reduce_train_to':1000,
+                       'num_hidden_layers':1})
 
 def jobman_entrypoint(state, channel):
-    state = copy.copy(state)
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    channel.save()
+
+    workingdir = os.getcwd()
 
     print "Will load NIST"
+    sys.stdout.flush()
+
     nist = NIST(20)
+
     print "NIST loaded"
+    sys.stdout.flush()
 
     rtt = None
     if state.has_key('reduce_train_to'):
@@ -83,50 +98,58 @@
     n_ins = 32*32
     n_outs = 62 # 10 digits, 26*2 (lower, capitals)
 
-    db = jobman.sql.db(JOBDB_RESULTS)
-    optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\
-                    input_divider=255.0, job_tree=True, results_db=db, \
-                    experiment=EXPERIMENT_PATH, \
-                    finetuning_lr_to_try=FINETUNING_LR_VALS, \
-                    num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS)
-    optimizer.train()
+    hls = state.hidden_layers_sizes
+    cl = state.corruption_levels
+    nhl = state.num_hidden_layers
+    state.hidden_layers_sizes = [hls] * nhl
+    state.corruption_levels = [cl] * nhl
+
+    # b,b',W for each hidden layer + b,W of last layer (logreg)
+    numparams = nhl * 3 + 2
+    series_mux = None
+    if SERIES_AVAILABLE:
+        series_mux = create_series(workingdir, numparams)
+
+    optimizer = SdaSgdOptimizer(dataset=dataset, hyperparameters=state, \
+                                    n_ins=n_ins, n_outs=n_outs,\
+                                    input_divider=255.0, series_mux=series_mux)
+
+    optimizer.pretrain()
+    channel.save()
+
+    optimizer.finetune()
+    channel.save()
+
+    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
+    channel.save()
 
     return channel.COMPLETE
 
-def estimate_pretraining_time(job):
-    job = DD(job)
-    # time spent on pretraining estimated as O(n^2) where n=num hidens
-    # no need to multiply by num_hidden_layers, as results from num=1 
-    # is reused for num=2, or 3, so in the end we get the same time
-    # as if we were training 3 times a single layer
-    # constants:
-    # - 20 mins to pretrain a layer with 1000 units (per 1 epoch)
-    # - 12 mins to finetune (per 1 epoch)
-    # basically the job_tree trick gives us a 5 times speedup on the
-    # pretraining time due to reusing for finetuning_lr
-    # and gives us a second x2 speedup for reusing previous layers
-    # to explore num_hidden_layers
-    return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \
-            * job.hidden_layer_sizes * job.hidden_layer_sizes)
+def create_series(basedir, numparams):
+    mux = SeriesMultiplexer()
+
+    # comment out series we don't want to save
+    mux.add_series(AccumulatorSeries(name="reconstruction_error",
+                    reduce_every=REDUCE_EVERY, # every 1000 batches, we take the mean and save
+                    mean=True,
+                    directory=basedir, flush_every=1))
 
-def estimate_total_time():
-    jobs = produit_croise_jobs(JOB_VALS)
-    sumtime = 0.0
-    sum_without = 0.0
-    for job in jobs:
-        sumtime += estimate_pretraining_time(job)
-        # 12 mins per epoch * 30 epochs
-        # 5 finetuning_lr per pretraining combination
-    sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS)
-    sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20
-    print "num jobs=", len(jobs)
-    print "estimate", sumtime/60, " hours"
-    print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without
+    mux.add_series(AccumulatorSeries(name="training_error",
+                    reduce_every=REDUCE_EVERY, # every 1000 batches, we take the mean and save
+                    mean=True,
+                    directory=basedir, flush_every=1))
+
+    mux.add_series(BaseSeries(name="validation_error", directory=basedir, flush_every=1))
+    mux.add_series(BaseSeries(name="test_error", directory=basedir, flush_every=1))
+
+    mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir))
+
+    return mux
 
 def jobman_insert_nist():
     jobs = produit_croise_jobs(JOB_VALS)
 
-    db = jobman.sql.db(JOBDB_JOBS)
+    db = jobman.sql.db(JOBDB)
     for job in jobs:
         job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
         jobman.sql.insert_dict(job, db)
@@ -250,13 +273,11 @@
 
     elif len(args) > 0 and args[0] == 'jobman_insert':
         jobman_insert_nist()
-    elif len(args) > 0 and args[0] == 'test_job_tree':
-        # dont forget to comment out sql.inserts and make reduce_train_to=100
-        print "TESTING JOB TREE"
-        chanmock = {'COMPLETE':0}
-        hp = copy.copy(DEFAULT_HP_NIST)
-        hp.update({'reduce_train_to':100})
-        jobman_entrypoint(hp, chanmock)
+
+    elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
+        chanmock = DD({'COMPLETE':0})
+        jobman_entrypoint(DEFAULT_HP_NIST, chanmock)
+
     elif len(args) > 0 and args[0] == 'estimate':
         estimate_total_time()
     else: