# HG changeset patch # User Olivier Delalleau # Date 1243432779 14400 # Node ID 98a99aafd14a4fe424be73729740af4baebefe2f # Parent d42b4bcbb5822aacbb218767dff8c17996fd8b4a# Parent 2881c67026c1456454c0276115783dc36f5cbafe Merged diff -r d42b4bcbb582 -r 98a99aafd14a pylearn/algorithms/cost.py --- a/pylearn/algorithms/cost.py Wed May 27 09:59:25 2009 -0400 +++ b/pylearn/algorithms/cost.py Wed May 27 09:59:39 2009 -0400 @@ -17,6 +17,8 @@ def cross_entropy(target, output, mean_axis=0, sum_axis=1): """ + This is the cross-entropy over a binomial event, in which each dimension + is an independent binomial trial. @todo: This is essentially duplicated as nnet_ops.binary_crossentropy @warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy """ @@ -25,6 +27,8 @@ def KL_divergence(target, output): """ + This is a KL divergence over a binomial event, in which each dimension + is an independent binomial trial. @note: We do not compute the mean, because if target and output have different shapes then the result will be garbled. """ diff -r d42b4bcbb582 -r 98a99aafd14a pylearn/algorithms/sgd.py --- a/pylearn/algorithms/sgd.py Wed May 27 09:59:25 2009 -0400 +++ b/pylearn/algorithms/sgd.py Wed May 27 09:59:39 2009 -0400 @@ -5,7 +5,9 @@ class StochasticGradientDescent(theano.Module): """Fixed stepsize gradient descent""" - def __init__(self, args, cost, params, gradients=None, stepsize=None, updates=None): + def __init__(self, args, cost, params, + gradients=None, stepsize=None, + updates=None, auxout=None): """ :param stepsize: the step to take in (negative) gradient direction :type stepsize: None, scalar value, or scalar TensorVariable @@ -13,6 +15,8 @@ :param updates: extra symbolic updates to make when evating either step or step_cost (these override the gradients if necessary) :type updatess: dict Variable -> Variable + :type auxout: auxiliary outputs, list containing output symbols to + compute at the same time as cost (for efficiency) """ super(StochasticGradientDescent, self).__init__() self.stepsize_init = None @@ -34,12 +38,12 @@ if updates is not None: self._updates.update(updates) - + auxout = auxout if auxout else [] self.step = theano.Method( - args, [], + args, auxout, updates=self._updates) self.step_cost = theano.Method( - args, cost, + args, [cost]+auxout, updates=self._updates) updates = property(lambda self: self._updates.copy()) @@ -52,6 +56,7 @@ :returns: standard minimizer constructor f(args, cost, params, gradient=None) """ - def f(args, cost, params, gradient=None, updates=None): - return StochasticGradientDescent(args, cost, params, gradient, stepsize, updates=updates) + def f(args, cost, params, gradient=None, updates=None, auxout=None): + return StochasticGradientDescent(args, cost, params, gradient, stepsize, + updates=updates, auxout=auxout) return f