diff deep/stacked_dae/nist_sda.py @ 201:25444fc301e0

Branch merge
author Arnaud Bergeron <abergeron@gmail.com>
date Wed, 03 Mar 2010 16:46:16 -0500
parents e656edaedb48
children 6ea5dcf0541e e1f5f66dd7dd
line wrap: on
line diff
--- a/deep/stacked_dae/nist_sda.py	Tue Mar 02 20:16:30 2010 -0500
+++ b/deep/stacked_dae/nist_sda.py	Wed Mar 03 16:46:16 2010 -0500
@@ -21,7 +21,7 @@
 import jobman, jobman.sql
 from pylearn.io import filetensor
 
-from utils import produit_croise_jobs
+from utils import produit_cartesien_jobs
 
 from sgd_optimization import SdaSgdOptimizer
 
@@ -31,7 +31,7 @@
 
 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
 
-JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/fsavard_sda2'
+JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4'
 
 REDUCE_TRAIN_TO = None
 MAX_FINETUNING_EPOCHS = 1000
@@ -43,6 +43,10 @@
 
 EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint"
 
+# Possible values the hyperparameters can take. These are then
+# combined with produit_cartesien_jobs so we get a list of all
+# possible combinations, each one resulting in a job inserted
+# in the jobman DB.
 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001],
         'pretraining_epochs_per_layer': [10,20],
         'hidden_layers_sizes': [300,800],
@@ -63,7 +67,11 @@
                        #'reduce_train_to':300,
                        'num_hidden_layers':2})
 
+# Function called by jobman upon launching each job
+# Its path is the one given when inserting jobs:
+# ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint
 def jobman_entrypoint(state, channel):
+    # record mercurial versions of each package
     pylearn.version.record_versions(state,[theano,ift6266,pylearn])
     channel.save()
 
@@ -71,10 +79,12 @@
 
     print "Will load NIST"
 
-    nist = NIST(20)
+    nist = NIST(minibatch_size=20)
 
     print "NIST loaded"
 
+    # For test runs, we don't want to use the whole dataset so
+    # reduce it to fewer elements if asked to.
     rtt = None
     if state.has_key('reduce_train_to'):
         rtt = state['reduce_train_to']
@@ -82,7 +92,7 @@
         rtt = REDUCE_TRAIN_TO
 
     if rtt:
-        print "Reducing training set to "+str( rtt)+ " examples"
+        print "Reducing training set to "+str(rtt)+ " examples"
         nist.reduce_train_set(rtt)
 
     train,valid,test = nist.get_tvt()
@@ -91,14 +101,9 @@
     n_ins = 32*32
     n_outs = 62 # 10 digits, 26*2 (lower, capitals)
 
-    hls = state.hidden_layers_sizes
-    cl = state.corruption_levels
-    nhl = state.num_hidden_layers
-    state.hidden_layers_sizes = [hls] * nhl
-    state.corruption_levels = [cl] * nhl
-
-    # b,b',W for each hidden layer + b,W of last layer (logreg)
-    numparams = nhl * 3 + 2
+    # b,b',W for each hidden layer 
+    # + b,W of last layer (logreg)
+    numparams = state.num_hidden_layers * 3 + 2
     series_mux = None
     series_mux = create_series(workingdir, numparams)
 
@@ -114,11 +119,10 @@
     optimizer.finetune()
     channel.save()
 
-    pylearn.version.record_versions(state,[theano,ift6266,pylearn])
-    channel.save()
-
     return channel.COMPLETE
 
+# These Series objects are used to save various statistics
+# during the training.
 def create_series(basedir, numparams):
     mux = SeriesMultiplexer()
 
@@ -140,8 +144,11 @@
 
     return mux
 
+# Perform insertion into the Postgre DB based on combination
+# of hyperparameter values above
+# (see comment for produit_cartesien_jobs() to know how it works)
 def jobman_insert_nist():
-    jobs = produit_croise_jobs(JOB_VALS)
+    jobs = produit_cartesien_jobs(JOB_VALS)
 
     db = jobman.sql.db(JOBDB)
     for job in jobs:
@@ -227,35 +234,6 @@
 
     raw_input("Press any key")
 
-# hp for hyperparameters
-def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'):
-    global DEFAULT_HP_NIST
-    hp = hp and hp or DEFAULT_HP_NIST
-
-    print "Will load NIST"
-
-    import time
-    t1 = time.time()
-    nist = NIST(20, reduce_train_to=100)
-    t2 = time.time()
-
-    print "NIST loaded. time delta = ", t2-t1
-
-    train,valid,test = nist.get_tvt()
-    dataset = (train,valid,test)
-
-    print train[0][15]
-    print type(train[0][1])
-
-
-    print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0])
-
-    n_ins = 32*32
-    n_outs = 62 # 10 digits, 26*2 (lower, capitals)
-
-    optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0)
-    optimizer.train()
-
 if __name__ == '__main__':
 
     import sys
@@ -277,5 +255,4 @@
     elif len(args) > 0 and args[0] == 'estimate':
         estimate_total_time()
     else:
-        sgd_optimization_nist()
-
+        print "Bad arguments"