# HG changeset patch # User fsavard # Date 1267708722 18000 # Node ID e1f5f66dd7dda8f0c8ff2feaccc7020638f2b516 # Parent e656edaedb486c452835e3c1fbe10e38b578858a Changé le coût de reconstruction pour stabilité numérique, en ajoutant une petite constante dans le log. diff -r e656edaedb48 -r e1f5f66dd7dd deep/stacked_dae/nist_sda.py --- a/deep/stacked_dae/nist_sda.py Wed Mar 03 12:51:40 2010 -0500 +++ b/deep/stacked_dae/nist_sda.py Thu Mar 04 08:18:42 2010 -0500 @@ -27,22 +27,25 @@ from ift6266.utils.scalar_series import * +############################################################################## +# GLOBALS + TEST_CONFIG = False NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' - JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4' +EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" REDUCE_TRAIN_TO = None MAX_FINETUNING_EPOCHS = 1000 -REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc. +# number of minibatches before taking means for valid error etc. +REDUCE_EVERY = 1000 + if TEST_CONFIG: REDUCE_TRAIN_TO = 1000 MAX_FINETUNING_EPOCHS = 2 REDUCE_EVERY = 10 -EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" - # Possible values the hyperparameters can take. These are then # combined with produit_cartesien_jobs so we get a list of all # possible combinations, each one resulting in a job inserted @@ -67,9 +70,11 @@ #'reduce_train_to':300, 'num_hidden_layers':2}) -# Function called by jobman upon launching each job -# Its path is the one given when inserting jobs: -# ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint +''' +Function called by jobman upon launching each job +Its path is the one given when inserting jobs: +ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint +''' def jobman_entrypoint(state, channel): # record mercurial versions of each package pylearn.version.record_versions(state,[theano,ift6266,pylearn]) @@ -247,12 +252,12 @@ jobman_insert_nist() elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': - def f(): - pass - chanmock = DD({'COMPLETE':0,'save':f}) + chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) jobman_entrypoint(DEFAULT_HP_NIST, chanmock) elif len(args) > 0 and args[0] == 'estimate': estimate_total_time() + else: print "Bad arguments" + diff -r e656edaedb48 -r e1f5f66dd7dd deep/stacked_dae/stacked_dae.py --- a/deep/stacked_dae/stacked_dae.py Wed Mar 03 12:51:40 2010 -0500 +++ b/deep/stacked_dae/stacked_dae.py Thu Mar 04 08:18:42 2010 -0500 @@ -138,7 +138,15 @@ # note : we sum over the size of a datapoint; if we are using minibatches, # L will be a vector, with one entry per example in minibatch #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) - self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) + #self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1) + + # I added this epsilon to avoid getting log(0) and 1/0 in grad + # This means conceptually that there'd be no probability of 0, but that + # doesn't seem to me as important (maybe I'm wrong?). + eps = 0.00000001 + eps_1 = 1-eps + self.L = - T.sum( self.x * T.log(eps + eps_1*self.z) \ + + (1-self.x)*T.log(eps + eps_1*(1-self.z)), axis=1 ) # note : L is now a vector, where each element is the cross-entropy cost # of the reconstruction of the corresponding example of the # minibatch. We need to compute the average of all these to get