Mercurial > pylearn
comparison nnet_ops.py @ 24:2e8be9f5412b
added nnet_ops
author | bergstrj@iro.umontreal.ca |
---|---|
date | Thu, 10 Apr 2008 17:25:13 -0400 |
parents | |
children | b63e8c0bf21b |
comparison
equal
deleted
inserted
replaced
23:526e192b0699 | 24:2e8be9f5412b |
---|---|
1 import theano | |
2 from theano import tensor, gof, scalar | |
3 import numpy | |
4 | |
5 class ScalarSigmoid(scalar.UnaryScalarOp): | |
6 def impl(self, x): | |
7 return 1.0 / (1 + numpy.exp(-x)) | |
8 def grad(self, (x,), (gz,)): | |
9 return gz * scalar_sigmoid(x) * (1.0 - scalar_sigmoid(x)), | |
10 def c_foreach(self, (x,), (z,)): | |
11 return "%(z)s = 1.0 / (1 + exp(-%(x)s));" % locals() | |
12 scalar_sigmoid = gof.op.constructor(ScalarSigmoid) | |
13 Sigmoid, sigmoid, SigmoidInplace, sigmoid_inplace \ | |
14 = theano.tensor.broadcast(ScalarSigmoid, 'Sigmoid') | |
15 | |
16 | |
17 | |
18 class CrossentropySoftmax1Hot(gof.op.Op): | |
19 """A special compound Op for the output of neural-net classifiers. | |
20 | |
21 This Op has two outputs: | |
22 - KL(softmax(x), y) | |
23 - softmax(x) | |
24 | |
25 x[i] is assumed to be a dense vector | |
26 softmax(x[i]) is the i'th distribution over len(x[i]) options | |
27 y[i] is an integer index, encoding a 1-hot distribution | |
28 | |
29 """ | |
30 nin=2 | |
31 nout=2 | |
32 def __init__(self, x, y_idx,**kwargs): | |
33 x = tensor._as_tensor(x) | |
34 y_idx = tensor._as_tensor(y_idx) | |
35 # TODO: Is this correct? It used to be y, not y_idx | |
36 nll = tensor.Tensor(x.dtype, y_idx.broadcastable) | |
37 # nll = Tensor(x.dtype, y.broadcastable) | |
38 sm = tensor.Tensor(x.dtype, x.broadcastable) | |
39 self.inputs = [x, y_idx] | |
40 self.outputs = [nll,sm] | |
41 def perform(self): | |
42 x, y_idx = [i.data for i in self.inputs] | |
43 sm = numpy.zeros_like(x) # softmax | |
44 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) | |
45 for i in xrange(sm.shape[0]): | |
46 sm[i] = numpy.exp(x[i] - numpy.max(x[i])) #softmax | |
47 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale | |
48 nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy | |
49 self.outputs[0].data = nll | |
50 self.outputs[1].data = sm | |
51 def grad(self, (x, y_idx), (g_nll, g_sm)): | |
52 if g_sm is not None: | |
53 raise NotImplementedError() | |
54 nll, sm = cross_entropy_softmax_1hot(x, y_idx) | |
55 dx = CrossentropySoftmax1Hot.Dx(g_nll, sm, y_idx).outputs[0] | |
56 return dx, None | |
57 | |
58 class Dx (gof.op.Op): | |
59 nin=3 | |
60 nout=1 | |
61 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" | |
62 def __init__(self, dy, sm, y_idx,**kwargs): | |
63 dy = tensor._as_tensor(dy) | |
64 sm = tensor._as_tensor(sm) | |
65 y_idx = tensor._as_tensor(y_idx) | |
66 self.inputs = [dy, sm, y_idx] | |
67 self.outputs = [tensor.Tensor(sm.dtype, sm.broadcastable)] | |
68 def perform(self): | |
69 dy,sm,y_idx = [i.data for i in self.inputs] | |
70 dx = numpy.zeros_like(sm) | |
71 for i in xrange(sm.shape[0]): | |
72 dx[i] = dy[i] * sm[i] #vector scale | |
73 dx[i, y_idx[i]] -= dy[i] #scalar decrement | |
74 self.outputs[0].data = dx | |
75 def grad(self, *args): | |
76 raise NotImplementedError() | |
77 cross_entropy_softmax_1hot = gof.op.constructor(CrossentropySoftmax1Hot) | |
78 | |
79 #TODO: write a version of CrossentropySoftmax1Hot that accepts a bias for x, if | |
80 # this op needs to be faster. | |
81 |