changeset 632:a11b7fbf3171

Cost should be summed over the units, but averaged across batches. Taking the mean blindly results in larger than necessary learning rates.
author desjagui@atchoum.iro.umontreal.ca
date Wed, 21 Jan 2009 03:20:16 -0500
parents 4e7781972999
children e242c12eb30d
files pylearn/algorithms/cost.py
diffstat 1 files changed, 3 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/cost.py	Mon Jan 19 18:35:43 2009 -0500
+++ b/pylearn/algorithms/cost.py	Wed Jan 21 03:20:16 2009 -0500
@@ -14,12 +14,13 @@
 def quadratic(target, output, axis=1):
     return T.mean(T.sqr(target - output), axis=axis)
 
-def cross_entropy(target, output, axis=1):
+def cross_entropy(target, output, mean_axis=0, sum_axis=1):
     """
     @todo: This is essentially duplicated as nnet_ops.binary_crossentropy
     @warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy
     """
-    return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis)
+    XE = target * T.log(output) + (1 - target) * T.log(1 - output)
+    return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis)
 
 def KL_divergence(target, output):
     """