diff nnet_ops.py @ 27:e6c550cb2896

Merging? what?
author bengioy@grenat.iro.umontreal.ca
date Fri, 11 Apr 2008 11:16:09 -0400
parents b63e8c0bf21b
children bf0145fa73e8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nnet_ops.py	Fri Apr 11 11:16:09 2008 -0400
@@ -0,0 +1,81 @@
+import theano
+from theano import tensor, gof, scalar
+import numpy
+
+class ScalarSigmoid(scalar.UnaryScalarOp):
+    def impl(self, x):
+        return 1.0 / (1 + numpy.exp(-x))
+    def grad(self, (x,), (gz,)):
+        return gz * scalar_sigmoid(x) * (1.0 - scalar_sigmoid(x)),
+    def c_foreach(self, (x,), (z,)): 
+        return "%(z)s = 1.0 / (1 + exp(-%(x)s));" % locals()
+scalar_sigmoid = gof.op.constructor(ScalarSigmoid)
+Sigmoid, sigmoid, SigmoidInplace, sigmoid_inplace \
+        = theano.tensor.broadcast(ScalarSigmoid, 'Sigmoid')
+
+
+
+class CrossentropySoftmax1Hot(gof.op.Op):
+    """A special compound Op for the output of neural-net classifiers.
+
+    This Op has two outputs:
+    - KL(softmax(x), y)
+    - softmax(x)
+
+    x[i] is assumed to be a dense vector
+    softmax(x[i]) is the i'th distribution over len(x[i]) options
+    y[i] is an integer index, encoding a 1-hot distribution
+
+    """
+    nin=2
+    nout=2
+    def __init__(self, x, y_idx,**kwargs):
+        x = tensor._as_tensor(x)
+        y_idx = tensor._as_tensor(y_idx)
+#       TODO: Is this correct? It used to be y, not y_idx
+        nll = tensor.Tensor(x.dtype, y_idx.broadcastable)
+#        nll = Tensor(x.dtype, y.broadcastable)
+        sm = tensor.Tensor(x.dtype, x.broadcastable)
+        self.inputs = [x, y_idx]
+        self.outputs = [nll,sm]
+    def perform(self):
+        x, y_idx = [i.data for i in self.inputs]
+        sm = numpy.zeros_like(x) # softmax
+        nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x))
+        for i in xrange(sm.shape[0]):
+            sm[i] = numpy.exp(x[i] - numpy.max(x[i])) #softmax
+            sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale
+            nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy
+        self.outputs[0].data = nll
+        self.outputs[1].data = sm
+    def grad(self, (x, y_idx), (g_nll, g_sm)):
+        if g_sm is not None:
+            raise NotImplementedError()
+        nll, sm = crossentropy_softmax_1hot(x, y_idx)
+        dx = CrossentropySoftmax1Hot.Dx(g_nll, sm, y_idx).outputs[0]
+        return dx, None
+
+    class Dx (gof.op.Op):
+        nin=3
+        nout=1
+        """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
+        def __init__(self, dy, sm, y_idx,**kwargs):
+            dy = tensor._as_tensor(dy)
+            sm = tensor._as_tensor(sm)
+            y_idx = tensor._as_tensor(y_idx)
+            self.inputs = [dy, sm, y_idx]
+            self.outputs = [tensor.Tensor(sm.dtype, sm.broadcastable)]
+        def perform(self):
+            dy,sm,y_idx = [i.data for i in self.inputs]
+            dx = numpy.zeros_like(sm)
+            for i in xrange(sm.shape[0]):
+                dx[i] = dy[i] * sm[i] #vector scale
+                dx[i, y_idx[i]] -= dy[i] #scalar decrement
+            self.outputs[0].data = dx
+        def grad(self, *args):
+            raise NotImplementedError()
+crossentropy_softmax_1hot = gof.op.constructor(CrossentropySoftmax1Hot)
+
+#TODO: write a version of CrossentropySoftmax1Hot that accepts a bias for x, if
+# this op needs to be faster.
+