diff deep/stacked_dae/stacked_dae.py @ 192:e656edaedb48

Commented a few things, renamed the produit_croise_jobs function, replaced the cost function (NOT TESTED YET).
author fsavard
date Wed, 03 Mar 2010 12:51:40 -0500
parents 3632e6258642
children e1f5f66dd7dd
line wrap: on
line diff
--- a/deep/stacked_dae/stacked_dae.py	Tue Mar 02 14:47:18 2010 -0500
+++ b/deep/stacked_dae/stacked_dae.py	Wed Mar 03 12:51:40 2010 -0500
@@ -10,6 +10,15 @@
 
 from utils import update_locals
 
+# taken from LeDeepNet/daa.py
+# has a special case when taking log(0) (defined =0)
+# modified to not take the mean anymore
+from theano.tensor.xlogx import xlogx, xlogy0
+# it's target*log(output)
+def binary_cross_entropy(target, output, sum_axis=1):
+    XE = xlogy0(target, output) + xlogy0((1 - target), (1 - output))
+    return -T.sum(XE, axis=sum_axis)
+
 class LogisticRegression(object):
     def __init__(self, input, n_in, n_out):
         # initialize with 0 the weights W as a matrix of shape (n_in, n_out) 
@@ -128,7 +137,8 @@
     # Equation (4)
     # note : we sum over the size of a datapoint; if we are using minibatches,
     #        L will  be a vector, with one entry per example in minibatch
-    self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
+    #self.L = - T.sum( self.x*T.log(self.z) + (1-self.x)*T.log(1-self.z), axis=1 ) 
+    self.L = binary_cross_entropy(target=self.x, output=self.z, sum_axis=1)
     # note : L is now a vector, where each element is the cross-entropy cost 
     #        of the reconstruction of the corresponding example of the 
     #        minibatch. We need to compute the average of all these to get 
@@ -156,6 +166,17 @@
         self.all_params         = []
         self.n_layers           = len(hidden_layers_sizes)
 
+        print "Creating SdA with params:"
+        print "batch_size", batch_size
+        print "hidden_layers_sizes", hidden_layers_sizes
+        print "corruption_levels", corruption_levels
+        print "n_ins", n_ins
+        print "n_outs", n_outs
+        print "pretrain_lr", pretrain_lr
+        print "finetune_lr", finetune_lr
+        print "input_divider", input_divider
+        print "----"
+
         self.shared_divider = theano.shared(numpy.asarray(input_divider, dtype=theano.config.floatX))
 
         if len(hidden_layers_sizes) < 1 :