comparison nnet_ops.py @ 117:3ef569b92fba

ported nnet_ops to new theano
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 07 May 2008 15:28:17 -0400
parents 76e5c0f37165
children 2ca8dccba270
comparison
equal deleted inserted replaced
116:9330d941fa1f 117:3ef569b92fba
1 import theano 1 import theano
2 from theano import tensor, gof, scalar 2 from theano import tensor, scalar
3 import numpy 3 import numpy
4 4
5 ############ 5 ############
6 # 6 #
7 # SCALAR OPS 7 # SCALAR OPS
8 # 8 #
9 9
10 class ScalarSigmoid(scalar.FloatUnaryScalarOp): 10 class ScalarSigmoid(scalar.UnaryScalarOp):
11 @staticmethod 11 @staticmethod
12 def st_impl(x): 12 def st_impl(x):
13 if x < -30.0: 13 if x < -30.0:
14 return 0.0 14 return 0.0
15 if x > 30.0: 15 if x > 30.0:
18 def impl(self, x): 18 def impl(self, x):
19 return ScalarSigmoid.st_impl(x) 19 return ScalarSigmoid.st_impl(x)
20 def grad(self, (x,), (gz,)): 20 def grad(self, (x,), (gz,)):
21 y = scalar_sigmoid(x) 21 y = scalar_sigmoid(x)
22 return [gz * y * (1.0 - y)] 22 return [gz * y * (1.0 - y)]
23 def c_foreach(self, (x,), (z,), sub): 23 def c_code(self, (x,), (z,), sub):
24 if 'float' in self.inputs[0].dtype: 24 if 'float' in self.inputs[0].dtype:
25 return """%(z)s = 25 return """%(z)s =
26 %(x)s < -30.0 26 %(x)s < -30.0
27 ? 0.0 27 ? 0.0
28 : %(x)s > 30.0 28 : %(x)s > 30.0
29 ? 1.0 29 ? 1.0
30 : 1.0 /(1.0+exp(-%(x)s));""" % locals() 30 : 1.0 /(1.0+exp(-%(x)s));""" % locals()
31 raise NotImplementedError('only floatingpoint is implemented') 31 return NotImplemented#Error('only floatingpoint is implemented')
32 scalar_sigmoid = gof.op.constructor(ScalarSigmoid) 32 scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
33 Sigmoid, sigmoid, SigmoidInplace, sigmoid_inplace =\ 33 sigmoid = tensor.Elemwise(scalar_sigmoid, name='sigmoid')
34 tensor.broadcast(ScalarSigmoid, 'Sigmoid') 34
35 35 class ScalarSoftplus(scalar.UnaryScalarOp):
36 class ScalarSoftplus(scalar.FloatUnaryScalarOp):
37 @staticmethod 36 @staticmethod
38 def static_impl(x): 37 def static_impl(x):
39 if x < -30.0: 38 if x < -30.0:
40 return 0.0 39 return 0.0
41 if x > 30.0: 40 if x > 30.0:
43 return numpy.log1p(numpy.exp(x)) 42 return numpy.log1p(numpy.exp(x))
44 def impl(self, x): 43 def impl(self, x):
45 return ScalarSoftplus.static_impl(x) 44 return ScalarSoftplus.static_impl(x)
46 def grad(self, (x,), (gz,)): 45 def grad(self, (x,), (gz,)):
47 return [gz * scalar_sigmoid(x)] 46 return [gz * scalar_sigmoid(x)]
48 def c_foreach(self, (x,), (z,), sub): 47 def c_code(self, (x,), (z,), sub):
49 if 'float' in self.inputs[0].dtype: 48 if 'float' in self.inputs[0].dtype:
50 return """%(z)s = 49 return """%(z)s =
51 %(x)s < -30.0 50 %(x)s < -30.0
52 ? 0.0 51 ? 0.0
53 : %(x)s > 30.0 52 : %(x)s > 30.0
54 ? %(x)s 53 ? %(x)s
55 : log1p(exp(%(x)s));""" % locals() 54 : log1p(exp(%(x)s));""" % locals()
56 raise NotImplementedError('only floating point x is implemented') 55 return NotImplemented#Error('only floating point x is implemented')
57 scalar_softplus = gof.op.constructor(ScalarSoftplus) 56 scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
58 Softplus, softplus, SoftplusInplace, softplus_inplace =\ 57 softplus = tensor.Elemwise(scalar_softplus, name='softplus')
59 tensor.broadcast(ScalarSoftplus, 'Softplus')
60 58
61 59
62 ############ 60 ############
63 # 61 #
64 # TENSOR OPS 62 # TENSOR OPS
65 # 63 #
66 64
67 65 class CrossentropySoftmax1HotWithBias(theano.Op):
68 class CrossentropySoftmax1HotWithBias(gof.op.Op):
69 """A special compound L{Op} for the output of neural-net classifiers. 66 """A special compound L{Op} for the output of neural-net classifiers.
70 67
71 @type x: is a matrix of floats (32 or 64) 68 @type x: is a matrix of floats (32 or 64)
72 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x 69 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
73 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x 70 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x
88 i'th example. 85 i'th example.
89 86
90 """ 87 """
91 nin=3 88 nin=3
92 nout=2 89 nout=2
93 def __init__(self, x, b, y_idx, **kwargs): 90 def __init__(self, **kwargs):
94 x = tensor._as_tensor(x) 91 theano.Op.__init__(self, **kwargs)
95 b = tensor._as_tensor(b) 92
96 y_idx = tensor._as_tensor(y_idx) 93 def make_node(self, x, b, y_idx):
97 if len(x.broadcastable) != 2 \ 94 x = tensor.as_tensor(x)
98 or x.dtype not in ['float32', 'float64']: 95 b = tensor.as_tensor(b)
96 y_idx = tensor.as_tensor(y_idx)
97 if x.type.ndim != 2 \
98 or x.type.dtype not in ['float32', 'float64']:
99 raise ValueError('x must be 2-d tensor of floats') 99 raise ValueError('x must be 2-d tensor of floats')
100 if len(b.broadcastable) != 1 \ 100 if b.type.ndim != 1 \
101 or x.dtype not in ['float32', 'float64']: 101 or x.type.dtype not in ['float32', 'float64']:
102 raise ValueError('x must be 1-d tensor of floats') 102 raise ValueError('x must be 1-d tensor of floats')
103 if len(y_idx.broadcastable) != 1 \ 103 if y_idx.type.ndim != 1 \
104 or y_idx.dtype not in ['int32', 'int64']: 104 or y_idx.type.dtype not in ['int32', 'int64']:
105 raise ValueError('x must be 1-d tensor of ints') 105 raise ValueError('x must be 1-d tensor of ints')
106 106
107 # TODO: Is this correct? It used to be y, not y_idx 107 # TODO: Is this correct? It used to be y, not y_idx
108 nll = tensor.Tensor(x.dtype, y_idx.broadcastable) 108 nll = tensor.Tensor(x.type.dtype,
109 y_idx.type.broadcastable).make_result()
109 # nll = Tensor(x.dtype, y.broadcastable) 110 # nll = Tensor(x.dtype, y.broadcastable)
110 sm = tensor.Tensor(x.dtype, x.broadcastable) 111 sm = x.type.make_result()
111 self.inputs = [x, b, y_idx] 112 return theano.Apply(self, [x, b, y_idx],[nll, sm])
112 self.outputs = [nll, sm] 113 def perform(self, node, input_storage, output_storage):
113 def perform(self): 114 x, b, y_idx = input_storage
114 x, b, y_idx = [i.data for i in self.inputs]
115 if b.shape[0] != x.shape[1]: 115 if b.shape[0] != x.shape[1]:
116 raise ValueError('b must have same number of columns as x') 116 raise ValueError('b must have same number of columns as x')
117 if y_idx.shape[0] != x.shape[0]: 117 if y_idx.shape[0] != x.shape[0]:
118 raise ValueError('y_idx must have same number of rows as x') 118 raise ValueError('y_idx must have same number of rows as x')
119 119
122 for i in xrange(sm.shape[0]): 122 for i in xrange(sm.shape[0]):
123 row = x[i] + b 123 row = x[i] + b
124 sm[i] = numpy.exp(row - numpy.max(row)) #softmax 124 sm[i] = numpy.exp(row - numpy.max(row)) #softmax
125 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale 125 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale
126 nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy 126 nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy
127 self.outputs[0].data = nll 127 output_storage[0][0] = nll
128 self.outputs[1].data = sm 128 output_storage[1][0] = sm
129 def grad(self, (x, b, y_idx), (g_nll, g_sm)): 129 def grad(self, (x, b, y_idx), (g_nll, g_sm)):
130 if g_sm is not None: 130 if g_sm is not None:
131 raise NotImplementedError() 131 raise NotImplementedError()
132 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) 132 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
133 dx = CrossentropySoftmax1HotWithBiasDx(g_nll, sm, y_idx).outputs[0] 133 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
134 db = tensor.Sum(dx, axis = [0]).outputs[0] 134 db = tensor.sum(dx, axis = [0])
135 return dx, db, None 135 return dx, db, None
136 136
137 def c_headers(self): return ['<iostream>'] 137 def c_headers(self): return ['<iostream>']
138 def c_code(self, (x, b, y_idx), (nll, sm), sub): 138 def c_code(self, (x, b, y_idx), (nll, sm), sub):
139 # this implementation was lifted from 139 # this implementation was lifted from
278 + log(sum); 278 + log(sum);
279 //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate? 279 //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate?
280 //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); 280 //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum);
281 } 281 }
282 """ % dict(locals(), **sub) 282 """ % dict(locals(), **sub)
283 283 crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias()
284 crossentropy_softmax_1hot_with_bias = \ 284
285 gof.op.constructor(CrossentropySoftmax1HotWithBias) 285 class CrossentropySoftmax1HotWithBiasDx (theano.Op):
286
287 class CrossentropySoftmax1HotWithBiasDx (gof.op.Op):
288 nin=3 286 nin=3
289 nout=1 287 nout=1
290 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" 288 """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
291 def __init__(self, dy, sm, y_idx,**kwargs): 289 def __init__(self, **kwargs):
292 dy = tensor._as_tensor(dy) 290 theano.Op.__init__(self,**kwargs)
293 sm = tensor._as_tensor(sm) 291 def make_node(self, dy, sm, y_idx,**kwargs):
294 y_idx = tensor._as_tensor(y_idx) 292 dy = tensor.as_tensor(dy)
295 self.inputs = [dy, sm, y_idx] 293 sm = tensor.as_tensor(sm)
296 self.outputs = [tensor.Tensor(sm.dtype, sm.broadcastable)] 294 y_idx = tensor.as_tensor(y_idx)
297 def perform(self): 295 return theano.Apply(self, [dy, sm, y_idx],[sm.type.make_result()])
298 dy,sm,y_idx = [i.data for i in self.inputs] 296 def perform(self, node, input_storage, output_storage):
297 dy,sm,y_idx = input_storage
299 dx = numpy.zeros_like(sm) 298 dx = numpy.zeros_like(sm)
300 for i in xrange(sm.shape[0]): 299 for i in xrange(sm.shape[0]):
301 dx[i] = dy[i] * sm[i] #vector scale 300 dx[i] = dy[i] * sm[i] #vector scale
302 dx[i, y_idx[i]] -= dy[i] #scalar decrement 301 dx[i, y_idx[i]] -= dy[i] #scalar decrement
303 self.outputs[0].data = dx 302 output_storage[0][0] = dx
304 def grad(self, *args): 303 def grad(self, *args):
305 raise NotImplementedError() 304 raise NotImplementedError()
306 def c_code(self, (dnll, sm, y_idx), (dx,), sub): 305 def c_code(self, (dnll, sm, y_idx), (dx,), sub):
307 return """ 306 return """
308 307