changeset 204:e1f5f66dd7dd

Changé le coût de reconstruction pour stabilité numérique, en ajoutant une petite constante dans le log.
author fsavard
date Thu, 04 Mar 2010 08:18:42 -0500
parents e656edaedb48
children 10a801240bfc
files deep/stacked_dae/nist_sda.py deep/stacked_dae/stacked_dae.py
diffstat 2 files changed, 24 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/deep/stacked_dae/nist_sda.py	Wed Mar 03 12:51:40 2010 -0500
+++ b/deep/stacked_dae/nist_sda.py	Thu Mar 04 08:18:42 2010 -0500
@@ -27,22 +27,25 @@
 
 from ift6266.utils.scalar_series import *
 
+##############################################################################
+# GLOBALS
+
 TEST_CONFIG = False
 
 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
-
 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4'
+EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint"
 
 REDUCE_TRAIN_TO = None
 MAX_FINETUNING_EPOCHS = 1000
-REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc.
+# number of minibatches before taking means for valid error etc.
+REDUCE_EVERY = 1000
+
 if TEST_CONFIG:
     REDUCE_TRAIN_TO = 1000
     MAX_FINETUNING_EPOCHS = 2
     REDUCE_EVERY = 10
 
-EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint"
-
 # Possible values the hyperparameters can take. These are then
 # combined with produit_cartesien_jobs so we get a list of all
 # possible combinations, each one resulting in a job inserted
@@ -67,9 +70,11 @@
                        #'reduce_train_to':300,
                        'num_hidden_layers':2})
 
-# Function called by jobman upon launching each job
-# Its path is the one given when inserting jobs:
-# ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint
+'''
+Function called by jobman upon launching each job
+Its path is the one given when inserting jobs:
+ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint
+'''
 def jobman_entrypoint(state, channel):
     # record mercurial versions of each package
     pylearn.version.record_versions(state,[theano,ift6266,pylearn])
@@ -247,12 +252,12 @@
         jobman_insert_nist()
 
     elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
-        def f():
-            pass
-        chanmock = DD({'COMPLETE':0,'save':f})
+        chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
         jobman_entrypoint(DEFAULT_HP_NIST, chanmock)
 
     elif len(args) > 0 and args[0] == 'estimate':
         estimate_total_time()
+
     else:
         print "Bad arguments"
+
--- a/deep/stacked_dae/stacked_dae.py	Wed Mar 03 12:51:40 2010 -0500
+++ b/deep/stacked_dae/stacked_dae.py	Thu Mar 04 08:18:42 2010 -0500
@@ -138,7 +138,15 @@
     # note : we sum over the size of a datapoint; if we are using minibatches,
     #        L will  be a vector, with one entry per example in minibatch
     #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
-    self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
+    #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
+
+    # I added this epsilon to avoid getting log(0) and 1/0 in grad
+    # This means conceptually that there'd be no probability of 0, but that
+    # doesn't seem to me as important (maybe I'm wrong?).
+    eps = 0.00000001
+    eps_1 = 1-eps
+    self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \
+                    + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 )
     # note : L is now a vector, where each element is the cross-entropy cost 
     #        of the reconstruction of the corresponding example of the 
     #        minibatch. We need to compute the average of all these to get