# HG changeset patch
# User Olivier Delalleau <delallea@iro>
# Date 1243432779 14400
# Node ID 98a99aafd14a4fe424be73729740af4baebefe2f
# Parent  d42b4bcbb5822aacbb218767dff8c17996fd8b4a# Parent  2881c67026c1456454c0276115783dc36f5cbafe
Merged

diff -r d42b4bcbb582 -r 98a99aafd14a pylearn/algorithms/cost.py
--- a/pylearn/algorithms/cost.py	Wed May 27 09:59:25 2009 -0400
+++ b/pylearn/algorithms/cost.py	Wed May 27 09:59:39 2009 -0400
@@ -17,6 +17,8 @@
 
 def cross_entropy(target, output, mean_axis=0, sum_axis=1):
     """
+    This is the cross-entropy over a binomial event, in which each dimension
+    is an independent binomial trial.
     @todo: This is essentially duplicated as nnet_ops.binary_crossentropy
     @warning: OUTPUT and TARGET are reversed in nnet_ops.binary_crossentropy
     """
@@ -25,6 +27,8 @@
 
 def KL_divergence(target, output):
     """
+    This is a KL divergence over a binomial event, in which each dimension
+    is an independent binomial trial.
     @note: We do not compute the mean, because if target and output have
     different shapes then the result will be garbled.
     """
diff -r d42b4bcbb582 -r 98a99aafd14a pylearn/algorithms/sgd.py
--- a/pylearn/algorithms/sgd.py	Wed May 27 09:59:25 2009 -0400
+++ b/pylearn/algorithms/sgd.py	Wed May 27 09:59:39 2009 -0400
@@ -5,7 +5,9 @@
 
 class StochasticGradientDescent(theano.Module):
     """Fixed stepsize gradient descent"""
-    def __init__(self, args, cost, params, gradients=None, stepsize=None, updates=None):
+    def __init__(self, args, cost, params, 
+                 gradients=None, stepsize=None, 
+                 updates=None, auxout=None):
         """
         :param stepsize: the step to take in (negative) gradient direction
         :type stepsize: None, scalar value, or scalar TensorVariable
@@ -13,6 +15,8 @@
         :param updates: extra symbolic updates to make when evating either step or step_cost
         (these override the gradients if necessary)
         :type updatess: dict Variable -> Variable
+        :type auxout: auxiliary outputs, list containing output symbols to 
+                      compute at the same time as cost (for efficiency)
         """
         super(StochasticGradientDescent, self).__init__()
         self.stepsize_init = None
@@ -34,12 +38,12 @@
         if updates is not None:
             self._updates.update(updates)
 
-
+        auxout = auxout if auxout else []
         self.step = theano.Method(
-                args, [],
+                args, auxout,
                 updates=self._updates)
         self.step_cost = theano.Method(
-                args, cost,
+                args, [cost]+auxout,
                 updates=self._updates)
 
     updates = property(lambda self: self._updates.copy())
@@ -52,6 +56,7 @@
     
     :returns: standard minimizer constructor f(args, cost, params, gradient=None)
     """
-    def f(args, cost, params, gradient=None, updates=None):
-        return StochasticGradientDescent(args, cost, params, gradient, stepsize, updates=updates)
+    def f(args, cost, params, gradient=None, updates=None, auxout=None):
+        return StochasticGradientDescent(args, cost, params, gradient, stepsize,
+                updates=updates, auxout=auxout)
     return f