24
|
1 import theano
|
|
2 from theano import tensor, gof, scalar
|
|
3 import numpy
|
|
4
|
|
5 class ScalarSigmoid(scalar.UnaryScalarOp):
|
|
6 def impl(self, x):
|
|
7 return 1.0 / (1 + numpy.exp(-x))
|
|
8 def grad(self, (x,), (gz,)):
|
|
9 return gz * scalar_sigmoid(x) * (1.0 - scalar_sigmoid(x)),
|
|
10 def c_foreach(self, (x,), (z,)):
|
|
11 return "%(z)s = 1.0 / (1 + exp(-%(x)s));" % locals()
|
|
12 scalar_sigmoid = gof.op.constructor(ScalarSigmoid)
|
|
13 Sigmoid, sigmoid, SigmoidInplace, sigmoid_inplace \
|
|
14 = theano.tensor.broadcast(ScalarSigmoid, 'Sigmoid')
|
|
15
|
|
16
|
|
17
|
|
18 class CrossentropySoftmax1Hot(gof.op.Op):
|
|
19 """A special compound Op for the output of neural-net classifiers.
|
|
20
|
|
21 This Op has two outputs:
|
|
22 - KL(softmax(x), y)
|
|
23 - softmax(x)
|
|
24
|
|
25 x[i] is assumed to be a dense vector
|
|
26 softmax(x[i]) is the i'th distribution over len(x[i]) options
|
|
27 y[i] is an integer index, encoding a 1-hot distribution
|
|
28
|
|
29 """
|
|
30 nin=2
|
|
31 nout=2
|
|
32 def __init__(self, x, y_idx,**kwargs):
|
|
33 x = tensor._as_tensor(x)
|
|
34 y_idx = tensor._as_tensor(y_idx)
|
|
35 # TODO: Is this correct? It used to be y, not y_idx
|
|
36 nll = tensor.Tensor(x.dtype, y_idx.broadcastable)
|
|
37 # nll = Tensor(x.dtype, y.broadcastable)
|
|
38 sm = tensor.Tensor(x.dtype, x.broadcastable)
|
|
39 self.inputs = [x, y_idx]
|
|
40 self.outputs = [nll,sm]
|
|
41 def perform(self):
|
|
42 x, y_idx = [i.data for i in self.inputs]
|
|
43 sm = numpy.zeros_like(x) # softmax
|
|
44 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x))
|
|
45 for i in xrange(sm.shape[0]):
|
|
46 sm[i] = numpy.exp(x[i] - numpy.max(x[i])) #softmax
|
|
47 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale
|
|
48 nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy
|
|
49 self.outputs[0].data = nll
|
|
50 self.outputs[1].data = sm
|
|
51 def grad(self, (x, y_idx), (g_nll, g_sm)):
|
|
52 if g_sm is not None:
|
|
53 raise NotImplementedError()
|
|
54 nll, sm = cross_entropy_softmax_1hot(x, y_idx)
|
|
55 dx = CrossentropySoftmax1Hot.Dx(g_nll, sm, y_idx).outputs[0]
|
|
56 return dx, None
|
|
57
|
|
58 class Dx (gof.op.Op):
|
|
59 nin=3
|
|
60 nout=1
|
|
61 """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
|
|
62 def __init__(self, dy, sm, y_idx,**kwargs):
|
|
63 dy = tensor._as_tensor(dy)
|
|
64 sm = tensor._as_tensor(sm)
|
|
65 y_idx = tensor._as_tensor(y_idx)
|
|
66 self.inputs = [dy, sm, y_idx]
|
|
67 self.outputs = [tensor.Tensor(sm.dtype, sm.broadcastable)]
|
|
68 def perform(self):
|
|
69 dy,sm,y_idx = [i.data for i in self.inputs]
|
|
70 dx = numpy.zeros_like(sm)
|
|
71 for i in xrange(sm.shape[0]):
|
|
72 dx[i] = dy[i] * sm[i] #vector scale
|
|
73 dx[i, y_idx[i]] -= dy[i] #scalar decrement
|
|
74 self.outputs[0].data = dx
|
|
75 def grad(self, *args):
|
|
76 raise NotImplementedError()
|
|
77 cross_entropy_softmax_1hot = gof.op.constructor(CrossentropySoftmax1Hot)
|
|
78
|
|
79 #TODO: write a version of CrossentropySoftmax1Hot that accepts a bias for x, if
|
|
80 # this op needs to be faster.
|
|
81
|