# HG changeset patch
# User James Bergstra <bergstrj@iro.umontreal.ca>
# Date 1209766755 14400
# Node ID 76e5c0f371651b9053c7389b532402d1d8c8dcd0
# Parent  8c2607f387e61ec543df1bbc758ef73cb99a17d5
better docs & precondition testing for cross_entropy_softmax_1hot & friends

diff -r 8c2607f387e6 -r 76e5c0f37165 _nnet_ops.py
--- a/_nnet_ops.py	Mon Apr 21 15:23:49 2008 -0400
+++ b/_nnet_ops.py	Fri May 02 18:19:15 2008 -0400
@@ -23,10 +23,16 @@
     def test0(self):
         y_idx = [0,1,3]
         def output1(a,b):
-            return crossentropy_softmax_1hot(a, b, y_idx)[0:1]
+            return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0:1]
         TT.verify_grad(self, output1, [numpy.random.rand(3,4),
             numpy.random.rand(4)])
 
+    def test1(self):
+        y_idx = [0,1,3]
+        def output1(a):
+            return crossentropy_softmax_1hot(a, y_idx)[0:1]
+        TT.verify_grad(self, output1, [numpy.random.rand(3,4)])
+
 
 
 if __name__ == '__main__':
diff -r 8c2607f387e6 -r 76e5c0f37165 nnet_ops.py
--- a/nnet_ops.py	Mon Apr 21 15:23:49 2008 -0400
+++ b/nnet_ops.py	Fri May 02 18:19:15 2008 -0400
@@ -65,19 +65,30 @@
 #
 
 
-class CrossentropySoftmax1Hot(gof.op.Op):
-    """A special compound Op for the output of neural-net classifiers.
+class CrossentropySoftmax1HotWithBias(gof.op.Op):
+    """A special compound L{Op} for the output of neural-net classifiers.
+
+    @type x: is a matrix of floats (32 or 64)
+    @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
+    @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x
+
+    @precondition: every entry in y_idx is a valid (non-negative) column index into x
 
-    This Op has two outputs:
-    - KL(softmax(x), y)
-    - softmax(x)
+    This L{Op} has two outputs:
+     - KL(softmax(x+b), y)
+     - softmax(x+b)
 
-    x[i] is assumed to be a dense vector
+    
     softmax(x[i]) is the i'th distribution over len(x[i]) options
-    y[i] is an integer index, encoding a 1-hot distribution
+
+    y_idx[i] is an integer index, encoding a 1-hot distribution. 
+    
+    In practice, when we're trying to do classification, we have one row in x
+    and y_idx per example, and y[i] is the index of the (correct) class of the
+    i'th example.
 
     """
-    nin=2
+    nin=3
     nout=2
     def __init__(self, x, b, y_idx, **kwargs):
         x = tensor._as_tensor(x)
@@ -102,7 +113,9 @@
     def perform(self):
         x, b, y_idx = [i.data for i in self.inputs]
         if b.shape[0] != x.shape[1]:
-            raise ValueError('b must have same shape as x[0]')
+            raise ValueError('b must have same number of columns as x')
+        if y_idx.shape[0] != x.shape[0]:
+            raise ValueError('y_idx must have same number of rows as x')
 
         sm = numpy.zeros_like(x) # softmax
         nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x))
@@ -116,8 +129,8 @@
     def grad(self, (x, b, y_idx), (g_nll, g_sm)):
         if g_sm is not None:
             raise NotImplementedError()
-        nll, sm = crossentropy_softmax_1hot(x, b, y_idx)
-        dx = CrossentropySoftmax1HotDx(g_nll, sm, y_idx).outputs[0]
+        nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
+        dx = CrossentropySoftmax1HotWithBiasDx(g_nll, sm, y_idx).outputs[0]
         db = tensor.Sum(dx, axis = [0]).outputs[0]
         return dx, db, None
 
@@ -268,9 +281,10 @@
         }
         """ % dict(locals(), **sub)
 
-crossentropy_softmax_1hot = gof.op.constructor(CrossentropySoftmax1Hot)
+crossentropy_softmax_1hot_with_bias = \
+        gof.op.constructor(CrossentropySoftmax1HotWithBias)
 
-class CrossentropySoftmax1HotDx (gof.op.Op):
+class CrossentropySoftmax1HotWithBiasDx (gof.op.Op):
     nin=3
     nout=1
     """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
@@ -348,3 +362,7 @@
         }
         """ % dict(locals(), **sub)
 
+def crossentropy_softmax_1hot(x, y_idx, **kwargs):
+    b = tensor.zeros_like(x[0,:])
+    return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
+