changeset 446:23960ee12b52

Add argmax as output of the big softmax-NLL thingy.
author Pascal Lamblin <lamblinp@iro.umontreal.ca>
date Mon, 25 Aug 2008 18:15:43 -0400
parents 6eb0900fb553
children 0392b666320a
files nnet_ops.py
diffstat 1 files changed, 41 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/nnet_ops.py	Fri Aug 22 17:34:06 2008 -0400
+++ b/nnet_ops.py	Mon Aug 25 18:15:43 2008 -0400
@@ -323,7 +323,7 @@
     return softmax_with_bias(x, b, **kwargs)
 
 
-class CrossentropySoftmax1HotWithBias(theano.Op):
+class CrossentropySoftmaxArgmax1HotWithBias(theano.Op):
     """A special compound L{Op} for the output of neural-net classifiers.
 
     @type x: is a matrix of floats (32 or 64)
@@ -332,13 +332,14 @@
 
     @precondition: every entry in y_idx is a valid (non-negative) column index into x
 
-    This L{Op} has two outputs:
+    This L{Op} has three outputs:
      - KL(softmax(x+b), y)
      - softmax(x+b)
+     - argmax(x+b)
 
 
     softmax(x[i]) is the i'th distribution over len(x[i]) options
-
+    argmax(x) is the index of x's greatest element
     y_idx[i] is an integer index, encoding a 1-hot distribution. 
 
     In practice, when we're trying to do classification, we have one row in x
@@ -347,7 +348,7 @@
 
     """
     nin=3
-    nout=2
+    nout=3
     def __init__(self, **kwargs):
         theano.Op.__init__(self, **kwargs)
 
@@ -366,11 +367,12 @@
             raise ValueError('y_idx must be 1-d tensor of ints')
 
 #       TODO: Is this correct? It used to be y, not y_idx
-        nll = tensor.Tensor(x.type.dtype, 
+        nll = tensor.Tensor(x.type.dtype,
                 y_idx.type.broadcastable).make_result()
 #        nll = Tensor(x.dtype, y.broadcastable)
         sm = x.type.make_result()
-        return theano.Apply(self, [x, b, y_idx], [nll, sm])
+        am = y_idx.type.make_result()
+        return theano.Apply(self, [x, b, y_idx], [nll, sm, am])
     def perform(self, node, input_storage, output_storage):
         x, b, y_idx = input_storage
         if b.shape[0] != x.shape[1]:
@@ -380,15 +382,18 @@
 
         sm = numpy.zeros_like(x) # softmax
         nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x))
+        am = numpy.zeros_like(y_idx)
         for i in xrange(sm.shape[0]):
             row = x[i] + b
-            sm[i] = numpy.exp(row - numpy.max(row)) #softmax
+            am[i] = numpy.argmax(row)
+            sm[i] = numpy.exp(row - row[am[i]]) #softmax
             sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale
-            nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy
+            nll[i] = -numpy.log(sm[i, y_idx[i]]) #cross-entropy
         output_storage[0][0] = nll
         output_storage[1][0] = sm
-    def grad(self, (x, b, y_idx), (g_nll, g_sm)):
-        if g_sm is not None:
+        output_storage[2][0] = am
+    def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)):
+        if g_sm is not None or g_am is not None:
             raise NotImplementedError()
         nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
         dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
@@ -442,28 +447,40 @@
                 %(fail)s;
             }
         }
+        if ((NULL == %(am)s)
+            || (%(am)s->dimensions[0] != %(y_idx)s->dimensions[0]))
+        {
+            Py_XDECREF(%(am)s);
+            %(am)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(y_idx)s);
+            if(!%(am)s)
+            {
+                PyErr_SetString(PyExc_MemoryError, "failed to alloc am output");
+                %(fail)s;
+            }
+        }
                 """,
                 begin_row_loop,
                 """
             const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
             double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);
+            %(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i);
                 """,
                 inside_row_loop,
                 """
             nll_i[0] = - x_i[y_i*Sx]
                        - b_i[y_i*Sb]
                        + log(sum);
+            am_i[0] = row_max_j;
                 """,
                 end_row_loop)
 
 
-    def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub):
+    def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub):
         y_idx_type = node.inputs[2].type.dtype_specs()[1]
+        am_type = y_idx_type
         code_template = ''.join(self.c_code_template())
         return code_template % dict(locals(), **sub)
 
-crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias()
-
 class CrossentropySoftmax1HotWithBiasDx (theano.Op):
     nin=3
     nout=1
@@ -552,10 +569,21 @@
         }
         """ % dict(locals(), **sub)
 
+crossentropy_softmax_argmax_1hot_with_bias = \
+    CrossentropySoftmaxArgmax1HotWithBias()
+
+def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs):
+    return crossentropy_softmax_argmax_1hot_with_bias(x, b, y_idx, **kwargs)[0:2]
+
 def crossentropy_softmax_1hot(x, y_idx, **kwargs):
     b = tensor.zeros_like(x[0,:])
     return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
 
+
+class MultinomialCrossentropy1Hot(theano.Op):
+    pass
+
+
 def binary_crossentropy(output, target):
     """
     Compute the crossentropy of binary output wrt binary target.