# HG changeset patch # User desjagui@atchoum.iro.umontreal.ca # Date 1232526016 18000 # Node ID a11b7fbf3171d4ae866fdbc0c75dac5abcaba72f # Parent 4e778197299974dac2933eab0d4659363284bf4a Cost should be summed over the units, but averaged across batches. Taking the mean blindly results in larger than necessary learning rates. diff -r 4e7781972999 -r a11b7fbf3171 pylearn/algorithms/cost.py --- a/pylearn/algorithms/cost.py Mon Jan 19 18:35:43 2009 -0500 +++ b/pylearn/algorithms/cost.py Wed Jan 21 03:20:16 2009 -0500 @@ -14,12 +14,13 @@ def quadratic(target, output, axis=1): return T.mean(T.sqr(target - output), axis=axis) -def cross_entropy(target, output, axis=1): +def cross_entropy(target, output, mean_axis=0, sum_axis=1): """ @todo: This is essentially duplicated as nnet_ops.binary_crossentropy @warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy """ - return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis) + XE = target * T.log(output) + (1 - target) * T.log(1 - output) + return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis) def KL_divergence(target, output): """