# HG changeset patch
# User Joseph Turian <turian@iro.umontreal.ca>
# Date 1236726218 14400
# Node ID 6c602a86e7119b8965a77979c1abebd3e5fce463
# Parent  d8ad0ce259a6fd12f2098402fc9634af7a2eec4d
Refactored poisson loss.

diff -r d8ad0ce259a6 -r 6c602a86e711 pylearn/algorithms/sandbox/cost.py
--- a/pylearn/algorithms/sandbox/cost.py	Tue Mar 10 15:58:52 2009 -0400
+++ b/pylearn/algorithms/sandbox/cost.py	Tue Mar 10 19:03:38 2009 -0400
@@ -28,8 +28,9 @@
         return v
     def impl(self, x):
         return LogFactorial.st_impl(x)
-    def grad(self, (x,), (gz,)):
-        raise NotImplementedError('gradient not defined over discrete values')
+#    def grad(self, (x,), (gz,)):
+#        raise NotImplementedError('gradient not defined over discrete values')
+#        return None
 #        return [gz * (1 + scalar.log(x))]
 #    def c_code(self, node, name, (x,), (z,), sub):
 #        if node.inputs[0].type in [scalar.float32, scalar.float64]:
@@ -42,7 +43,22 @@
 logfactorial = tensor.Elemwise(scalar_logfactorial, name='logfactorial')
 
 
-def nlpoisson(target, output, beta_scale=1, axis=0):
+def poissonlambda(unscaled_output, doclen, beta_scale):
+    """
+    A continuous parameter lambda_i which is the expected number of
+    occurence of word i in the document.  Note how this must be positive,
+    and that is why Ranzato and Szummer (2008) use an exponential.
+
+    Yoshua: I don't like exponentials to guarantee positivity. softplus
+    is numerically much better behaved (but you might want to try both
+    to see if it makes a difference).
+
+    @todo: Maybe there are more sensible ways to set the beta_scale.
+    """
+    beta = beta_scale * doclen
+    return beta * tensor.exp(unscaled_output)
+
+def nlpoisson(target, output, beta_scale=1, axis=0, sumloss=True, zerothreshold=0):
     """
     The negative log Poisson regression probability.
     From Ranzato and Szummer (2008).
@@ -58,11 +74,22 @@
 
     Axis is the axis along which we sum the target values, to obtain
     the document length.
+
+    If sumloss, we sum the loss along axis.
+
+    If zerothreshold is non-zero, we threshold the loss:
+        If this target dimension is zero and beta * tensor.exp(output)
+        < zerothreshold, let this loss be zero.
     """
 #    from theano.printing import Print
     doclen = tensor.sum(target, axis=axis)
-    beta = beta_scale * doclen
-    return tensor.sum(beta * tensor.exp(output) - target*output + logfactorial(target), axis=axis)
+    lambdav = poissonlambda(output, doclen, beta_scale)
+    lossterms = lambdav - target*output
+    if sumloss:
+        return tensor.sum(lossterms, axis=axis)
+    else:
+        return lossterms
+#    return tensor.sum(beta * tensor.exp(output) - target*output + logfactorial(target), axis=axis)
 
 
 #import numpy
diff -r d8ad0ce259a6 -r 6c602a86e711 pylearn/algorithms/sandbox/test_cost.py
--- a/pylearn/algorithms/sandbox/test_cost.py	Tue Mar 10 15:58:52 2009 -0400
+++ b/pylearn/algorithms/sandbox/test_cost.py	Tue Mar 10 19:03:38 2009 -0400
@@ -35,7 +35,9 @@
         output = TT.as_tensor([0., 1, 1., 0, 1, 0, 5, 1])
         loss = cost.nlpoisson(target, output)
         (goutput) = TT.grad(loss, [output])
+#        (goutput) = TT.grad(loss, [target])
         f = T.function([], goutput)
+        print f()
         self.failUnless(f() - 33751.7816277 < 1e-5)
 
 if __name__ == '__main__':