Mercurial > pylearn
comparison nnet_ops.py @ 117:3ef569b92fba
ported nnet_ops to new theano
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 07 May 2008 15:28:17 -0400 |
parents | 76e5c0f37165 |
children | 2ca8dccba270 |
comparison
equal
deleted
inserted
replaced
116:9330d941fa1f | 117:3ef569b92fba |
---|---|
1 import theano | 1 import theano |
2 from theano import tensor, gof, scalar | 2 from theano import tensor, scalar |
3 import numpy | 3 import numpy |
4 | 4 |
5 ############ | 5 ############ |
6 # | 6 # |
7 # SCALAR OPS | 7 # SCALAR OPS |
8 # | 8 # |
9 | 9 |
10 class ScalarSigmoid(scalar.FloatUnaryScalarOp): | 10 class ScalarSigmoid(scalar.UnaryScalarOp): |
11 @staticmethod | 11 @staticmethod |
12 def st_impl(x): | 12 def st_impl(x): |
13 if x < -30.0: | 13 if x < -30.0: |
14 return 0.0 | 14 return 0.0 |
15 if x > 30.0: | 15 if x > 30.0: |
18 def impl(self, x): | 18 def impl(self, x): |
19 return ScalarSigmoid.st_impl(x) | 19 return ScalarSigmoid.st_impl(x) |
20 def grad(self, (x,), (gz,)): | 20 def grad(self, (x,), (gz,)): |
21 y = scalar_sigmoid(x) | 21 y = scalar_sigmoid(x) |
22 return [gz * y * (1.0 - y)] | 22 return [gz * y * (1.0 - y)] |
23 def c_foreach(self, (x,), (z,), sub): | 23 def c_code(self, (x,), (z,), sub): |
24 if 'float' in self.inputs[0].dtype: | 24 if 'float' in self.inputs[0].dtype: |
25 return """%(z)s = | 25 return """%(z)s = |
26 %(x)s < -30.0 | 26 %(x)s < -30.0 |
27 ? 0.0 | 27 ? 0.0 |
28 : %(x)s > 30.0 | 28 : %(x)s > 30.0 |
29 ? 1.0 | 29 ? 1.0 |
30 : 1.0 /(1.0+exp(-%(x)s));""" % locals() | 30 : 1.0 /(1.0+exp(-%(x)s));""" % locals() |
31 raise NotImplementedError('only floatingpoint is implemented') | 31 return NotImplemented#Error('only floatingpoint is implemented') |
32 scalar_sigmoid = gof.op.constructor(ScalarSigmoid) | 32 scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid') |
33 Sigmoid, sigmoid, SigmoidInplace, sigmoid_inplace =\ | 33 sigmoid = tensor.Elemwise(scalar_sigmoid, name='sigmoid') |
34 tensor.broadcast(ScalarSigmoid, 'Sigmoid') | 34 |
35 | 35 class ScalarSoftplus(scalar.UnaryScalarOp): |
36 class ScalarSoftplus(scalar.FloatUnaryScalarOp): | |
37 @staticmethod | 36 @staticmethod |
38 def static_impl(x): | 37 def static_impl(x): |
39 if x < -30.0: | 38 if x < -30.0: |
40 return 0.0 | 39 return 0.0 |
41 if x > 30.0: | 40 if x > 30.0: |
43 return numpy.log1p(numpy.exp(x)) | 42 return numpy.log1p(numpy.exp(x)) |
44 def impl(self, x): | 43 def impl(self, x): |
45 return ScalarSoftplus.static_impl(x) | 44 return ScalarSoftplus.static_impl(x) |
46 def grad(self, (x,), (gz,)): | 45 def grad(self, (x,), (gz,)): |
47 return [gz * scalar_sigmoid(x)] | 46 return [gz * scalar_sigmoid(x)] |
48 def c_foreach(self, (x,), (z,), sub): | 47 def c_code(self, (x,), (z,), sub): |
49 if 'float' in self.inputs[0].dtype: | 48 if 'float' in self.inputs[0].dtype: |
50 return """%(z)s = | 49 return """%(z)s = |
51 %(x)s < -30.0 | 50 %(x)s < -30.0 |
52 ? 0.0 | 51 ? 0.0 |
53 : %(x)s > 30.0 | 52 : %(x)s > 30.0 |
54 ? %(x)s | 53 ? %(x)s |
55 : log1p(exp(%(x)s));""" % locals() | 54 : log1p(exp(%(x)s));""" % locals() |
56 raise NotImplementedError('only floating point x is implemented') | 55 return NotImplemented#Error('only floating point x is implemented') |
57 scalar_softplus = gof.op.constructor(ScalarSoftplus) | 56 scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus') |
58 Softplus, softplus, SoftplusInplace, softplus_inplace =\ | 57 softplus = tensor.Elemwise(scalar_softplus, name='softplus') |
59 tensor.broadcast(ScalarSoftplus, 'Softplus') | |
60 | 58 |
61 | 59 |
62 ############ | 60 ############ |
63 # | 61 # |
64 # TENSOR OPS | 62 # TENSOR OPS |
65 # | 63 # |
66 | 64 |
67 | 65 class CrossentropySoftmax1HotWithBias(theano.Op): |
68 class CrossentropySoftmax1HotWithBias(gof.op.Op): | |
69 """A special compound L{Op} for the output of neural-net classifiers. | 66 """A special compound L{Op} for the output of neural-net classifiers. |
70 | 67 |
71 @type x: is a matrix of floats (32 or 64) | 68 @type x: is a matrix of floats (32 or 64) |
72 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x | 69 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x |
73 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x | 70 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x |
88 i'th example. | 85 i'th example. |
89 | 86 |
90 """ | 87 """ |
91 nin=3 | 88 nin=3 |
92 nout=2 | 89 nout=2 |
93 def __init__(self, x, b, y_idx, **kwargs): | 90 def __init__(self, **kwargs): |
94 x = tensor._as_tensor(x) | 91 theano.Op.__init__(self, **kwargs) |
95 b = tensor._as_tensor(b) | 92 |
96 y_idx = tensor._as_tensor(y_idx) | 93 def make_node(self, x, b, y_idx): |
97 if len(x.broadcastable) != 2 \ | 94 x = tensor.as_tensor(x) |
98 or x.dtype not in ['float32', 'float64']: | 95 b = tensor.as_tensor(b) |
96 y_idx = tensor.as_tensor(y_idx) | |
97 if x.type.ndim != 2 \ | |
98 or x.type.dtype not in ['float32', 'float64']: | |
99 raise ValueError('x must be 2-d tensor of floats') | 99 raise ValueError('x must be 2-d tensor of floats') |
100 if len(b.broadcastable) != 1 \ | 100 if b.type.ndim != 1 \ |
101 or x.dtype not in ['float32', 'float64']: | 101 or x.type.dtype not in ['float32', 'float64']: |
102 raise ValueError('x must be 1-d tensor of floats') | 102 raise ValueError('x must be 1-d tensor of floats') |
103 if len(y_idx.broadcastable) != 1 \ | 103 if y_idx.type.ndim != 1 \ |
104 or y_idx.dtype not in ['int32', 'int64']: | 104 or y_idx.type.dtype not in ['int32', 'int64']: |
105 raise ValueError('x must be 1-d tensor of ints') | 105 raise ValueError('x must be 1-d tensor of ints') |
106 | 106 |
107 # TODO: Is this correct? It used to be y, not y_idx | 107 # TODO: Is this correct? It used to be y, not y_idx |
108 nll = tensor.Tensor(x.dtype, y_idx.broadcastable) | 108 nll = tensor.Tensor(x.type.dtype, |
109 y_idx.type.broadcastable).make_result() | |
109 # nll = Tensor(x.dtype, y.broadcastable) | 110 # nll = Tensor(x.dtype, y.broadcastable) |
110 sm = tensor.Tensor(x.dtype, x.broadcastable) | 111 sm = x.type.make_result() |
111 self.inputs = [x, b, y_idx] | 112 return theano.Apply(self, [x, b, y_idx],[nll, sm]) |
112 self.outputs = [nll, sm] | 113 def perform(self, node, input_storage, output_storage): |
113 def perform(self): | 114 x, b, y_idx = input_storage |
114 x, b, y_idx = [i.data for i in self.inputs] | |
115 if b.shape[0] != x.shape[1]: | 115 if b.shape[0] != x.shape[1]: |
116 raise ValueError('b must have same number of columns as x') | 116 raise ValueError('b must have same number of columns as x') |
117 if y_idx.shape[0] != x.shape[0]: | 117 if y_idx.shape[0] != x.shape[0]: |
118 raise ValueError('y_idx must have same number of rows as x') | 118 raise ValueError('y_idx must have same number of rows as x') |
119 | 119 |
122 for i in xrange(sm.shape[0]): | 122 for i in xrange(sm.shape[0]): |
123 row = x[i] + b | 123 row = x[i] + b |
124 sm[i] = numpy.exp(row - numpy.max(row)) #softmax | 124 sm[i] = numpy.exp(row - numpy.max(row)) #softmax |
125 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale | 125 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale |
126 nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy | 126 nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy |
127 self.outputs[0].data = nll | 127 output_storage[0][0] = nll |
128 self.outputs[1].data = sm | 128 output_storage[1][0] = sm |
129 def grad(self, (x, b, y_idx), (g_nll, g_sm)): | 129 def grad(self, (x, b, y_idx), (g_nll, g_sm)): |
130 if g_sm is not None: | 130 if g_sm is not None: |
131 raise NotImplementedError() | 131 raise NotImplementedError() |
132 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) | 132 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) |
133 dx = CrossentropySoftmax1HotWithBiasDx(g_nll, sm, y_idx).outputs[0] | 133 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) |
134 db = tensor.Sum(dx, axis = [0]).outputs[0] | 134 db = tensor.sum(dx, axis = [0]) |
135 return dx, db, None | 135 return dx, db, None |
136 | 136 |
137 def c_headers(self): return ['<iostream>'] | 137 def c_headers(self): return ['<iostream>'] |
138 def c_code(self, (x, b, y_idx), (nll, sm), sub): | 138 def c_code(self, (x, b, y_idx), (nll, sm), sub): |
139 # this implementation was lifted from | 139 # this implementation was lifted from |
278 + log(sum); | 278 + log(sum); |
279 //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate? | 279 //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate? |
280 //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); | 280 //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); |
281 } | 281 } |
282 """ % dict(locals(), **sub) | 282 """ % dict(locals(), **sub) |
283 | 283 crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias() |
284 crossentropy_softmax_1hot_with_bias = \ | 284 |
285 gof.op.constructor(CrossentropySoftmax1HotWithBias) | 285 class CrossentropySoftmax1HotWithBiasDx (theano.Op): |
286 | |
287 class CrossentropySoftmax1HotWithBiasDx (gof.op.Op): | |
288 nin=3 | 286 nin=3 |
289 nout=1 | 287 nout=1 |
290 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" | 288 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" |
291 def __init__(self, dy, sm, y_idx,**kwargs): | 289 def __init__(self, **kwargs): |
292 dy = tensor._as_tensor(dy) | 290 theano.Op.__init__(self,**kwargs) |
293 sm = tensor._as_tensor(sm) | 291 def make_node(self, dy, sm, y_idx,**kwargs): |
294 y_idx = tensor._as_tensor(y_idx) | 292 dy = tensor.as_tensor(dy) |
295 self.inputs = [dy, sm, y_idx] | 293 sm = tensor.as_tensor(sm) |
296 self.outputs = [tensor.Tensor(sm.dtype, sm.broadcastable)] | 294 y_idx = tensor.as_tensor(y_idx) |
297 def perform(self): | 295 return theano.Apply(self, [dy, sm, y_idx],[sm.type.make_result()]) |
298 dy,sm,y_idx = [i.data for i in self.inputs] | 296 def perform(self, node, input_storage, output_storage): |
297 dy,sm,y_idx = input_storage | |
299 dx = numpy.zeros_like(sm) | 298 dx = numpy.zeros_like(sm) |
300 for i in xrange(sm.shape[0]): | 299 for i in xrange(sm.shape[0]): |
301 dx[i] = dy[i] * sm[i] #vector scale | 300 dx[i] = dy[i] * sm[i] #vector scale |
302 dx[i, y_idx[i]] -= dy[i] #scalar decrement | 301 dx[i, y_idx[i]] -= dy[i] #scalar decrement |
303 self.outputs[0].data = dx | 302 output_storage[0][0] = dx |
304 def grad(self, *args): | 303 def grad(self, *args): |
305 raise NotImplementedError() | 304 raise NotImplementedError() |
306 def c_code(self, (dnll, sm, y_idx), (dx,), sub): | 305 def c_code(self, (dnll, sm, y_idx), (dx,), sub): |
307 return """ | 306 return """ |
308 | 307 |