comparison nnet_ops.py @ 457:34acf8db186d

Deprecated pylearn.nnet_ops. Use theano.sandbox.nnet_ops instead.
author Joseph Turian <turian@iro.umontreal.ca>
date Tue, 07 Oct 2008 18:21:32 -0400
parents 2bb67e978c28
children 2bef0768bc27
comparison
equal deleted inserted replaced
456:131e19dfe793 457:34acf8db186d
1 ## This file contain ops that are not currently integrated in the core of threano.
2 ## Not all of those ops have been thoroughly tested.
3 1
4 import theano 2 import sys
5 from theano import tensor, scalar 3 sys.stderr.write("Use theano.sandbox.nnet_ops instead of pylearn.nnet_ops.\n")
6 import numpy 4 if 0:
7 5 ## This file contain ops that are not currently integrated in the core of threano.
8 ############ 6 ## Not all of those ops have been thoroughly tested.
9 # 7
10 # SCALAR OPS 8 import theano
11 # 9 from theano import tensor, scalar
12 10 import numpy
13 class ScalarSigmoid(scalar.UnaryScalarOp): 11
14 @staticmethod 12 ############
15 def st_impl(x): 13 #
16 if x < -30.0: 14 # SCALAR OPS
17 return 0.0 15 #
18 if x > 30.0: 16
19 return 1.0 17 class ScalarSigmoid(scalar.UnaryScalarOp):
20 return 1.0 / (1.0 + numpy.exp(-x)) 18 @staticmethod
21 def impl(self, x): 19 def st_impl(x):
22 return ScalarSigmoid.st_impl(x) 20 if x < -30.0:
23 def grad(self, (x,), (gz,)): 21 return 0.0
24 y = scalar_sigmoid(x) 22 if x > 30.0:
25 return [gz * y * (1.0 - y)] 23 return 1.0
26 def c_code(self, node, name, (x,), (z,), sub): 24 return 1.0 / (1.0 + numpy.exp(-x))
27 if node.inputs[0].type in [scalar.float32, scalar.float64]: 25 def impl(self, x):
28 return """%(z)s = 26 return ScalarSigmoid.st_impl(x)
29 %(x)s < -30.0 27 def grad(self, (x,), (gz,)):
30 ? 0.0 28 y = scalar_sigmoid(x)
31 : %(x)s > 30.0 29 return [gz * y * (1.0 - y)]
32 ? 1.0 30 def c_code(self, node, name, (x,), (z,), sub):
33 : 1.0 /(1.0+exp(-%(x)s));""" % locals() 31 if node.inputs[0].type in [scalar.float32, scalar.float64]:
34 raise NotImplementedError('only floatingpoint is implemented') 32 return """%(z)s =
35 scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid') 33 %(x)s < -30.0
36 sigmoid = tensor.Elemwise(scalar_sigmoid, name='sigmoid') 34 ? 0.0
37 35 : %(x)s > 30.0
38 class ScalarSoftplus(scalar.UnaryScalarOp): 36 ? 1.0
39 @staticmethod 37 : 1.0 /(1.0+exp(-%(x)s));""" % locals()
40 def static_impl(x): 38 raise NotImplementedError('only floatingpoint is implemented')
41 if x < -30.0: 39 scalar_sigmoid = ScalarSigmoid(scalar.upgrade_to_float, name='scalar_sigmoid')
42 return 0.0 40 sigmoid = tensor.Elemwise(scalar_sigmoid, name='sigmoid')
43 if x > 30.0: 41
44 return x 42 class ScalarSoftplus(scalar.UnaryScalarOp):
45 return numpy.log1p(numpy.exp(x)) 43 @staticmethod
46 def impl(self, x): 44 def static_impl(x):
47 return ScalarSoftplus.static_impl(x) 45 if x < -30.0:
48 def grad(self, (x,), (gz,)): 46 return 0.0
49 return [gz * scalar_sigmoid(x)] 47 if x > 30.0:
50 def c_code(self, node, name, (x,), (z,), sub): 48 return x
51 if node.inputs[0].type in [scalar.float32, scalar.float64]: 49 return numpy.log1p(numpy.exp(x))
52 return """%(z)s = 50 def impl(self, x):
53 %(x)s < -30.0 51 return ScalarSoftplus.static_impl(x)
54 ? 0.0 52 def grad(self, (x,), (gz,)):
55 : %(x)s > 30.0 53 return [gz * scalar_sigmoid(x)]
56 ? %(x)s 54 def c_code(self, node, name, (x,), (z,), sub):
57 : log1p(exp(%(x)s));""" % locals() 55 if node.inputs[0].type in [scalar.float32, scalar.float64]:
58 raise NotImplementedError('only floating point x is implemented') 56 return """%(z)s =
59 scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus') 57 %(x)s < -30.0
60 softplus = tensor.Elemwise(scalar_softplus, name='softplus') 58 ? 0.0
61 59 : %(x)s > 30.0
62 60 ? %(x)s
63 ############ 61 : log1p(exp(%(x)s));""" % locals()
64 # 62 raise NotImplementedError('only floating point x is implemented')
65 # TENSOR OPS 63 scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
66 # 64 softplus = tensor.Elemwise(scalar_softplus, name='softplus')
67 65
68 66
69 class SoftmaxWithBias(theano.Op): 67 ############
70 """ 68 #
71 An L{Op} for the output of neural-net multiclass classifiers. 69 # TENSOR OPS
72 70 #
73 @type x: is a matrix of floats (32 or 64) 71
74 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x 72
75 73 class SoftmaxWithBias(theano.Op):
76 This L{Op}'s output is softmax(x+b). 74 """
77 softmax(x[i]) is the i'th distribution over len(x[i]) options. 75 An L{Op} for the output of neural-net multiclass classifiers.
78 """ 76
79 77 @type x: is a matrix of floats (32 or 64)
80 nin = 2 78 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
81 nout = 1 79
82 def __init__(self, **kwargs): 80 This L{Op}'s output is softmax(x+b).
83 theano.Op.__init__(self, **kwargs) 81 softmax(x[i]) is the i'th distribution over len(x[i]) options.
84 82 """
85 def make_node(self, x, b): 83
86 x = tensor.as_tensor(x) 84 nin = 2
87 b = tensor.as_tensor(b) 85 nout = 1
88 if x.type.ndim != 2 \ 86 def __init__(self, **kwargs):
89 or x.type.dtype not in ['float32', 'float64']: 87 theano.Op.__init__(self, **kwargs)
90 raise ValueError('x must be 2-d tensor of floats') 88
91 if b.type.ndim != 1 \ 89 def make_node(self, x, b):
92 or x.type.dtype not in ['float32', 'float64']: 90 x = tensor.as_tensor(x)
93 raise ValueError('b must be 1-d tensor of floats') 91 b = tensor.as_tensor(b)
94 92 if x.type.ndim != 2 \
95 sm = x.type.make_result() 93 or x.type.dtype not in ['float32', 'float64']:
96 return theano.Apply(self, [x, b], [sm]) 94 raise ValueError('x must be 2-d tensor of floats')
97 95 if b.type.ndim != 1 \
98 def perform(self, node, input_storage, output_storage): 96 or x.type.dtype not in ['float32', 'float64']:
99 x, b = input_storage 97 raise ValueError('b must be 1-d tensor of floats')
100 if b.shape[0] != x.shape[1]: 98
101 raise ValueError('b must have same number of columns as x') 99 sm = x.type.make_result()
102 100 return theano.Apply(self, [x, b], [sm])
103 sm = numpy.zeros_like(x) 101
104 for i in xrange(sm.shape[0]): 102 def perform(self, node, input_storage, output_storage):
105 row = x[i] + b 103 x, b = input_storage
106 sm[i] = numpy.exp(row - numpy.max(row)) 104 if b.shape[0] != x.shape[1]:
107 sm[i] *= 1.0 / numpy.sum(sm[i]) 105 raise ValueError('b must have same number of columns as x')
108 output_storage[0][0] = sm 106
109 107 sm = numpy.zeros_like(x)
110 def grad(self, (x, b), (g_sm,)): 108 for i in xrange(sm.shape[0]):
111 sm = softmax_with_bias(x, b) 109 row = x[i] + b
112 dx = SoftmaxWithBiasDx()(g_sm, sm) 110 sm[i] = numpy.exp(row - numpy.max(row))
113 db = tensor.sum(dx, axis = 0) 111 sm[i] *= 1.0 / numpy.sum(sm[i])
114 return dx, db 112 output_storage[0][0] = sm
115 113
116 def c_headers(self): 114 def grad(self, (x, b), (g_sm,)):
117 return ['<iostream>'] 115 sm = softmax_with_bias(x, b)
118 116 dx = SoftmaxWithBiasDx()(g_sm, sm)
119 @staticmethod 117 db = tensor.sum(dx, axis = 0)
120 def c_code_template(): 118 return dx, db
121 # this implementation was lifted from 119
122 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx 120 def c_headers(self):
123 121 return ['<iostream>']
124 #TODO: put this into a templated function, in the support code 122
125 #TODO: declare the max of each row as an Op output 123 @staticmethod
126 124 def c_code_template():
127 #TODO: set error messages for failures in this code 125 # this implementation was lifted from
128 126 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
129 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] 127
130 init_decl = """ 128 #TODO: put this into a templated function, in the support code
131 npy_intp* Nx = %(x)s->dimensions; 129 #TODO: declare the max of each row as an Op output
132 130
133 if (%(x)s->nd != 2) 131 #TODO: set error messages for failures in this code
134 { 132
135 PyErr_SetString(PyExc_ValueError, "a not 2d tensor"); 133 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
136 %(fail)s; 134 init_decl = """
137 } 135 npy_intp* Nx = %(x)s->dimensions;
138 if (%(b)s->nd != 1) 136
139 { 137 if (%(x)s->nd != 2)
140 PyErr_SetString(PyExc_ValueError, "b not 1d tensor"); 138 {
141 %(fail)s; 139 PyErr_SetString(PyExc_ValueError, "a not 2d tensor");
142 } 140 %(fail)s;
143 if (%(x)s->descr->type_num != PyArray_DOUBLE) 141 }
144 { 142 if (%(b)s->nd != 1)
145 PyErr_SetString(PyExc_TypeError, "a not float64"); 143 {
146 %(fail)s; 144 PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
147 } 145 %(fail)s;
148 if (%(b)s->descr->type_num != PyArray_DOUBLE) 146 }
149 { 147 if (%(x)s->descr->type_num != PyArray_DOUBLE)
150 PyErr_SetString(PyExc_TypeError, "b not float64"); 148 {
151 %(fail)s; 149 PyErr_SetString(PyExc_TypeError, "a not float64");
152 } 150 %(fail)s;
153 if ((%(x)s->dimensions[1] != %(b)s->dimensions[0])) 151 }
154 { 152 if (%(b)s->descr->type_num != PyArray_DOUBLE)
155 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments"); 153 {
156 %(fail)s; 154 PyErr_SetString(PyExc_TypeError, "b not float64");
157 } 155 %(fail)s;
158 156 }
159 if ((NULL == %(sm)s) 157 if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]))
160 || (%(sm)s->dimensions[0] != %(x)s->dimensions[0]) 158 {
161 || (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) 159 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments");
162 { 160 %(fail)s;
163 if (NULL != %(sm)s) Py_XDECREF(%(sm)s); 161 }
164 %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s); 162
165 if(!%(sm)s) { 163 if ((NULL == %(sm)s)
166 PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output"); 164 || (%(sm)s->dimensions[0] != %(x)s->dimensions[0])
167 %(fail)s 165 || (%(sm)s->dimensions[1] != %(x)s->dimensions[1]))
168 } 166 {
169 } 167 if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
170 """ 168 %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s);
171 169 if(!%(sm)s) {
172 begin_row_loop = """ 170 PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output");
173 for (size_t i = 0; i < Nx[0]; ++i) 171 %(fail)s
174 { 172 }
175 size_t j; 173 }
176 double sum = 0.0; 174 """
177 bool discount_max = false; 175
178 176 begin_row_loop = """
179 const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i); 177 for (size_t i = 0; i < Nx[0]; ++i)
180 const double* __restrict__ b_i = (double*)(%(b)s->data); 178 {
181 double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i); 179 size_t j;
182 """ 180 double sum = 0.0;
183 181 bool discount_max = false;
184 inside_row_loop = """ 182
185 npy_intp Sx = %(x)s->strides[1]/sizeof(double); 183 const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
186 npy_intp Sb = %(b)s->strides[0]/sizeof(double); 184 const double* __restrict__ b_i = (double*)(%(b)s->data);
187 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); 185 double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
188 186 """
189 size_t row_max_j=0; 187
190 double row_max = x_i[0] + b_i[0]; 188 inside_row_loop = """
191 // Get the maximum value of the row 189 npy_intp Sx = %(x)s->strides[1]/sizeof(double);
192 for (j = 0; j < Nx[1]; ++j) 190 npy_intp Sb = %(b)s->strides[0]/sizeof(double);
193 { 191 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
194 double row_ij = x_i[j * Sx] + b_i[j * Sb]; 192
195 row_max_j = (row_ij > row_max) ? j : row_max_j; 193 size_t row_max_j=0;
196 row_max = (row_ij > row_max) ? row_ij : row_max; 194 double row_max = x_i[0] + b_i[0];
197 } 195 // Get the maximum value of the row
198 196 for (j = 0; j < Nx[1]; ++j)
199 for (j = 0; j < Nx[1]; ++j) 197 {
200 { 198 double row_ij = x_i[j * Sx] + b_i[j * Sb];
201 double row_ij = x_i[j * Sx] + b_i[j * Sb]; 199 row_max_j = (row_ij > row_max) ? j : row_max_j;
202 double sm_ij = exp(row_ij - row_max); 200 row_max = (row_ij > row_max) ? row_ij : row_max;
203 sum += sm_ij; 201 }
204 sm_i[j * Ssm] = sm_ij; 202
205 } 203 for (j = 0; j < Nx[1]; ++j)
206 if ( (0.0 == sum) || (isinf(sum))) 204 {
207 { 205 double row_ij = x_i[j * Sx] + b_i[j * Sb];
208 //that was our best... 206 double sm_ij = exp(row_ij - row_max);
209 %(fail)s; 207 sum += sm_ij;
210 } 208 sm_i[j * Ssm] = sm_ij;
211 209 }
212 //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n); 210 if ( (0.0 == sum) || (isinf(sum)))
213 double sum_inv = 1.0 / sum; 211 {
214 for (j = 0; j < Nx[1]; ++j) 212 //that was our best...
215 { 213 %(fail)s;
216 sm_i[j * Ssm] *= sum_inv; 214 }
217 } 215
218 216 //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
219 """ 217 double sum_inv = 1.0 / sum;
220 218 for (j = 0; j < Nx[1]; ++j)
221 end_row_loop = """ 219 {
222 } 220 sm_i[j * Ssm] *= sum_inv;
223 """ 221 }
224 222
225 return (init_decl, begin_row_loop, inside_row_loop, end_row_loop) 223 """
226 224
227 225 end_row_loop = """
228 def c_code(self, node, name, (x, b), (sm,), sub): 226 }
229 code_template = ''.join(self.c_code_template()) 227 """
230 return code_template % dict(locals(), **sub) 228
231 229 return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
232 softmax_with_bias = SoftmaxWithBias() 230
233 231
234 232 def c_code(self, node, name, (x, b), (sm,), sub):
235 class SoftmaxWithBiasDx(theano.Op): 233 code_template = ''.join(self.c_code_template())
236 nin = 2 234 return code_template % dict(locals(), **sub)
237 nout = 1 235
238 """Gradient wrt x of the SoftmaxWithBias Op""" 236 softmax_with_bias = SoftmaxWithBias()
239 237
240 def __init__(self, **kwargs): 238
241 theano.Op.__init__(self, **kwargs) 239 class SoftmaxWithBiasDx(theano.Op):
242 240 nin = 2
243 def make_node(self, dy, sm, **kwargs): 241 nout = 1
244 dy = tensor.as_tensor(dy) 242 """Gradient wrt x of the SoftmaxWithBias Op"""
245 sm = tensor.as_tensor(sm) 243
246 return theano.Apply(self, [dy, sm], [sm.type.make_result()]) 244 def __init__(self, **kwargs):
247 245 theano.Op.__init__(self, **kwargs)
248 def perform(self, node, input_storage, output_storage): 246
249 dy, sm = input_storage 247 def make_node(self, dy, sm, **kwargs):
250 dx = numpy.zeros_like(sm) 248 dy = tensor.as_tensor(dy)
251 #dx[i,j] = - (\sum_k dy[i,k] sm[i,k]) sm[i,j] + dy[i,j] sm[i,j] 249 sm = tensor.as_tensor(sm)
252 for i in xrange(sm.shape[0]): 250 return theano.Apply(self, [dy, sm], [sm.type.make_result()])
253 dy_times_sm_i = dy[i] * sm[i] 251
254 dx[i] = dy_times_sm_i - sum(dy_times_sm_i) * sm[i] 252 def perform(self, node, input_storage, output_storage):
255 output_storage[0][0] = dx 253 dy, sm = input_storage
256 254 dx = numpy.zeros_like(sm)
257 def grad(self, *args): 255 #dx[i,j] = - (\sum_k dy[i,k] sm[i,k]) sm[i,j] + dy[i,j] sm[i,j]
258 raise NotImplementedError() 256 for i in xrange(sm.shape[0]):
259 257 dy_times_sm_i = dy[i] * sm[i]
260 def c_code(self, node, name, (dy, sm), (dx,), sub): 258 dx[i] = dy_times_sm_i - sum(dy_times_sm_i) * sm[i]
261 return ''' 259 output_storage[0][0] = dx
262 if ((%(dy)s->descr->type_num != PyArray_DOUBLE) 260
263 || (%(sm)s->descr->type_num != PyArray_DOUBLE)) 261 def grad(self, *args):
264 { 262 raise NotImplementedError()
265 PyErr_SetString(PyExc_TypeError, "types should be float64, float64"); 263
266 %(fail)s; 264 def c_code(self, node, name, (dy, sm), (dx,), sub):
267 } 265 return '''
268 if ((%(dy)s->nd != 2) 266 if ((%(dy)s->descr->type_num != PyArray_DOUBLE)
269 || (%(sm)s->nd != 2)) 267 || (%(sm)s->descr->type_num != PyArray_DOUBLE))
270 { 268 {
271 PyErr_SetString(PyExc_ValueError, "rank error"); 269 PyErr_SetString(PyExc_TypeError, "types should be float64, float64");
272 %(fail)s; 270 %(fail)s;
273 } 271 }
274 if (%(dy)s->dimensions[0] != %(sm)s->dimensions[0]) 272 if ((%(dy)s->nd != 2)
275 { 273 || (%(sm)s->nd != 2))
276 PyErr_SetString(PyExc_ValueError, "dimension mismatch"); 274 {
277 %(fail)s; 275 PyErr_SetString(PyExc_ValueError, "rank error");
278 } 276 %(fail)s;
279 if ((NULL == %(dx)s) 277 }
280 || (%(dx)s->dimensions[0] != %(sm)s->dimensions[0]) 278 if (%(dy)s->dimensions[0] != %(sm)s->dimensions[0])
281 || (%(dx)s->dimensions[1] != %(sm)s->dimensions[1])) 279 {
282 { 280 PyErr_SetString(PyExc_ValueError, "dimension mismatch");
283 Py_XDECREF(%(dx)s); 281 %(fail)s;
284 %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2, PyArray_DIMS(%(sm)s), 282 }
285 type_num_%(sm)s); 283 if ((NULL == %(dx)s)
286 if (!%(dx)s) 284 || (%(dx)s->dimensions[0] != %(sm)s->dimensions[0])
287 { 285 || (%(dx)s->dimensions[1] != %(sm)s->dimensions[1]))
288 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output"); 286 {
289 %(fail)s; 287 Py_XDECREF(%(dx)s);
290 } 288 %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2, PyArray_DIMS(%(sm)s),
291 } 289 type_num_%(sm)s);
292 290 if (!%(dx)s)
293 for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i) 291 {
294 { 292 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output");
295 const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i); 293 %(fail)s;
296 npy_intp Sdy = %(dy)s->strides[1]/sizeof(double); 294 }
297 const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i); 295 }
298 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); 296
299 double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i); 297 for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
300 npy_intp Sdx = %(dx)s->strides[1]/sizeof(double); 298 {
301 299 const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i);
302 double sum_dy_times_sm = 0.; 300 npy_intp Sdy = %(dy)s->strides[1]/sizeof(double);
303 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) 301 const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i);
304 { 302 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
305 dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm]; 303 double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i);
306 sum_dy_times_sm += dx_i[j * Sdx]; 304 npy_intp Sdx = %(dx)s->strides[1]/sizeof(double);
307 } 305
308 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) 306 double sum_dy_times_sm = 0.;
309 { 307 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
310 dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm]; 308 {
311 } 309 dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm];
312 } 310 sum_dy_times_sm += dx_i[j * Sdx];
313 ''' % dict(locals(), **sub) 311 }
314 312 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
315 def softmax(x, **kwargs): 313 {
316 b = tensor.zeros_like(x[0,:]) 314 dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm];
317 return softmax_with_bias(x, b, **kwargs) 315 }
318 316 }
319 317 ''' % dict(locals(), **sub)
320 class CrossentropySoftmaxArgmax1HotWithBias(theano.Op): 318
321 """A special compound L{Op} for the output of neural-net classifiers. 319 def softmax(x, **kwargs):
322 320 b = tensor.zeros_like(x[0,:])
323 @type x: is a matrix of floats (32 or 64) 321 return softmax_with_bias(x, b, **kwargs)
324 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x 322
325 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x 323
326 324 class CrossentropySoftmaxArgmax1HotWithBias(theano.Op):
327 @precondition: every entry in y_idx is a valid (non-negative) column index into x 325 """A special compound L{Op} for the output of neural-net classifiers.
328 326
329 This L{Op} has three outputs: 327 @type x: is a matrix of floats (32 or 64)
330 - KL(softmax(x+b), y) 328 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
331 - softmax(x+b) 329 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x
332 - argmax(x+b) 330
333 331 @precondition: every entry in y_idx is a valid (non-negative) column index into x
334 softmax(x[i]) is the i'th distribution over len(x[i]) options 332
335 argmax(x) is the index of x's greatest element 333 This L{Op} has three outputs:
336 y_idx[i] is an integer index, encoding a 1-hot distribution. 334 - KL(softmax(x+b), y)
337 335 - softmax(x+b)
338 In practice, when we're trying to do classification, we have one row in x 336 - argmax(x+b)
339 and y_idx per example, and y[i] is the index of the (correct) class of the 337
340 i'th example. 338 softmax(x[i]) is the i'th distribution over len(x[i]) options
341 339 argmax(x) is the index of x's greatest element
342 """ 340 y_idx[i] is an integer index, encoding a 1-hot distribution.
343 nin=3 341
344 nout=3 342 In practice, when we're trying to do classification, we have one row in x
345 def __init__(self, **kwargs): 343 and y_idx per example, and y[i] is the index of the (correct) class of the
346 theano.Op.__init__(self, **kwargs) 344 i'th example.
347 345
348 def make_node(self, x, b, y_idx): 346 """
349 x = tensor.as_tensor(x) 347 nin=3
350 b = tensor.as_tensor(b) 348 nout=3
351 y_idx = tensor.as_tensor(y_idx) 349 def __init__(self, **kwargs):
352 if x.type.ndim != 2 \ 350 theano.Op.__init__(self, **kwargs)
353 or x.type.dtype not in ['float32', 'float64']: 351
354 raise ValueError('x must be 2-d tensor of floats') 352 def make_node(self, x, b, y_idx):
355 if b.type.ndim != 1 \ 353 x = tensor.as_tensor(x)
356 or x.type.dtype not in ['float32', 'float64']: 354 b = tensor.as_tensor(b)
357 raise ValueError('b must be 1-d tensor of floats') 355 y_idx = tensor.as_tensor(y_idx)
358 if y_idx.type.ndim != 1 \ 356 if x.type.ndim != 2 \
359 or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']: 357 or x.type.dtype not in ['float32', 'float64']:
360 raise ValueError('y_idx must be 1-d tensor of ints') 358 raise ValueError('x must be 2-d tensor of floats')
361 359 if b.type.ndim != 1 \
362 # TODO: Is this correct? It used to be y, not y_idx 360 or x.type.dtype not in ['float32', 'float64']:
363 nll = tensor.Tensor(x.type.dtype, 361 raise ValueError('b must be 1-d tensor of floats')
364 y_idx.type.broadcastable).make_result() 362 if y_idx.type.ndim != 1 \
365 # nll = Tensor(x.dtype, y.broadcastable) 363 or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']:
366 sm = x.type.make_result() 364 raise ValueError('y_idx must be 1-d tensor of ints')
367 am = y_idx.type.make_result() 365
368 return theano.Apply(self, [x, b, y_idx], [nll, sm, am]) 366 # TODO: Is this correct? It used to be y, not y_idx
369 def perform(self, node, input_storage, output_storage): 367 nll = tensor.Tensor(x.type.dtype,
370 """ 368 y_idx.type.broadcastable).make_result()
371 The math, where x is an input vector, and t is a target index: 369 # nll = Tensor(x.dtype, y.broadcastable)
372 370 sm = x.type.make_result()
373 softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j])) 371 am = y_idx.type.make_result()
374 nll(x,t) = -log(softmax(x)[t]) 372 return theano.Apply(self, [x, b, y_idx], [nll, sm, am])
375 373 def perform(self, node, input_storage, output_storage):
376 We compute this by subtracting off the max of x. This avoids numerical instability. 374 """
377 375 The math, where x is an input vector, and t is a target index:
378 m = max_j x[j] 376
379 softmax(x)[i] = exp(x[i] -m) / sum_j(exp(x[j] - m)) 377 softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j]))
380 378 nll(x,t) = -log(softmax(x)[t])
381 nll = -log(exp(x[t] -m) / sum_j(exp(x[j] - m))) 379
382 = -x[t] + m + log( sum_j(exp(x[j] - m))) 380 We compute this by subtracting off the max of x. This avoids numerical instability.
383 381
384 """ 382 m = max_j x[j]
385 x, b, y_idx = input_storage 383 softmax(x)[i] = exp(x[i] -m) / sum_j(exp(x[j] - m))
386 if b.shape[0] != x.shape[1]: 384
387 raise ValueError('b must have same number of columns as x') 385 nll = -log(exp(x[t] -m) / sum_j(exp(x[j] - m)))
388 if y_idx.shape[0] != x.shape[0]: 386 = -x[t] + m + log( sum_j(exp(x[j] - m)))
389 raise ValueError('y_idx must have same number of rows as x') 387
390 388 """
391 sm = numpy.zeros_like(x) # softmax 389 x, b, y_idx = input_storage
392 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) 390 if b.shape[0] != x.shape[1]:
393 am = numpy.zeros_like(y_idx) 391 raise ValueError('b must have same number of columns as x')
394 for i in xrange(sm.shape[0]): 392 if y_idx.shape[0] != x.shape[0]:
395 #add the bias vector to the i'th row of x 393 raise ValueError('y_idx must have same number of rows as x')
396 row = x[i] + b 394
397 395 sm = numpy.zeros_like(x) # softmax
398 #get the maximum value of i'th row for numerically safe softmax / nll 396 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x))
399 am[i] = numpy.argmax(row) 397 am = numpy.zeros_like(y_idx)
400 m = row[am[i]] 398 for i in xrange(sm.shape[0]):
401 399 #add the bias vector to the i'th row of x
402 #compute the unnormalized softmax, and normalization constant 400 row = x[i] + b
403 sm[i] = numpy.exp(row - m) 401
404 sum_j = numpy.sum(sm[i]) # sum_j(exp(x[j] - m)) 402 #get the maximum value of i'th row for numerically safe softmax / nll
405 403 am[i] = numpy.argmax(row)
406 #normalized our softmax 404 m = row[am[i]]
407 sm[i] *= 1.0 / sum_j 405
408 406 #compute the unnormalized softmax, and normalization constant
409 # store the nll 407 sm[i] = numpy.exp(row - m)
410 nll[i] = -row[y_idx[i]] + m + numpy.log(sum_j) 408 sum_j = numpy.sum(sm[i]) # sum_j(exp(x[j] - m))
411 409
412 output_storage[0][0] = nll 410 #normalized our softmax
413 output_storage[1][0] = sm 411 sm[i] *= 1.0 / sum_j
414 output_storage[2][0] = am 412
415 def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)): 413 # store the nll
416 if g_sm is not None or g_am is not None: 414 nll[i] = -row[y_idx[i]] + m + numpy.log(sum_j)
417 raise NotImplementedError() 415
418 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) 416 output_storage[0][0] = nll
419 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) 417 output_storage[1][0] = sm
420 db = tensor.sum(dx, axis = [0]) 418 output_storage[2][0] = am
421 return dx, db, None 419 def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)):
422 420 if g_sm is not None or g_am is not None:
423 def c_headers(self): return ['<iostream>'] 421 raise NotImplementedError()
424 422 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
425 @staticmethod 423 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
426 def c_code_template(): 424 db = tensor.sum(dx, axis = [0])
427 # this implementation was lifted from 425 return dx, db, None
428 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx 426
429 427 def c_headers(self): return ['<iostream>']
430 #TODO: put this into a templated function, in the support code 428
431 #TODO: declare the max of each row as an Op output 429 @staticmethod
432 430 def c_code_template():
433 #TODO: set error messages for failures in this code 431 # this implementation was lifted from
434 432 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
435 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] 433
436 (init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \ 434 #TODO: put this into a templated function, in the support code
437 SoftmaxWithBias.c_code_template() 435 #TODO: declare the max of each row as an Op output
438 return (init_decl, 436
439 """ 437 #TODO: set error messages for failures in this code
440 if (%(y_idx)s->nd != 1) 438
441 { 439 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
442 PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor"); 440 (init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \
443 %(fail)s; 441 SoftmaxWithBias.c_code_template()
444 } 442 return (init_decl,
445 if ((%(y_idx)s->descr->type_num != PyArray_INT64) 443 """
446 && (%(y_idx)s->descr->type_num != PyArray_INT32) 444 if (%(y_idx)s->nd != 1)
447 && (%(y_idx)s->descr->type_num != PyArray_INT16) 445 {
448 && (%(y_idx)s->descr->type_num != PyArray_INT8)) 446 PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
449 { 447 %(fail)s;
450 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64"); 448 }
451 %(fail)s; 449 if ((%(y_idx)s->descr->type_num != PyArray_INT64)
452 } 450 && (%(y_idx)s->descr->type_num != PyArray_INT32)
453 if (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0]) 451 && (%(y_idx)s->descr->type_num != PyArray_INT16)
454 { 452 && (%(y_idx)s->descr->type_num != PyArray_INT8))
455 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments"); 453 {
456 %(fail)s; 454 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
457 } 455 %(fail)s;
458 456 }
459 if ((NULL == %(nll)s) //initial condition 457 if (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0])
460 || (%(nll)s->dimensions[0] != %(y_idx)s->dimensions[0])) 458 {
461 { 459 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments");
462 if (NULL != %(nll)s) Py_XDECREF(%(nll)s); 460 %(fail)s;
463 %(nll)s = (PyArrayObject*)PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(x)s); 461 }
464 if(!%(nll)s) 462
465 { 463 if ((NULL == %(nll)s) //initial condition
466 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output"); 464 || (%(nll)s->dimensions[0] != %(y_idx)s->dimensions[0]))
467 %(fail)s; 465 {
468 } 466 if (NULL != %(nll)s) Py_XDECREF(%(nll)s);
469 } 467 %(nll)s = (PyArrayObject*)PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(x)s);
470 if ((NULL == %(am)s) 468 if(!%(nll)s)
471 || (%(am)s->dimensions[0] != %(y_idx)s->dimensions[0])) 469 {
472 { 470 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output");
473 Py_XDECREF(%(am)s); 471 %(fail)s;
474 %(am)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(y_idx)s); 472 }
475 if(!%(am)s) 473 }
476 { 474 if ((NULL == %(am)s)
477 PyErr_SetString(PyExc_MemoryError, "failed to alloc am output"); 475 || (%(am)s->dimensions[0] != %(y_idx)s->dimensions[0]))
478 %(fail)s; 476 {
479 } 477 Py_XDECREF(%(am)s);
480 } 478 %(am)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(y_idx)s);
481 """, 479 if(!%(am)s)
482 begin_row_loop, 480 {
483 """ 481 PyErr_SetString(PyExc_MemoryError, "failed to alloc am output");
484 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; 482 %(fail)s;
485 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); 483 }
486 %(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i); 484 }
487 """, 485 """,
488 inside_row_loop, 486 begin_row_loop,
489 """ 487 """
490 nll_i[0] = - x_i[y_i*Sx] 488 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
491 - b_i[y_i*Sb] 489 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);
492 + row_max 490 %(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i);
493 + log(sum); 491 """,
494 am_i[0] = row_max_j; 492 inside_row_loop,
495 """, 493 """
496 end_row_loop) 494 nll_i[0] = - x_i[y_i*Sx]
497 495 - b_i[y_i*Sb]
498 496 + row_max
499 def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub): 497 + log(sum);
500 y_idx_type = node.inputs[2].type.dtype_specs()[1] 498 am_i[0] = row_max_j;
501 am_type = y_idx_type 499 """,
502 code_template = ''.join(self.c_code_template()) 500 end_row_loop)
503 return code_template % dict(locals(), **sub) 501
504 502
505 class CrossentropySoftmax1HotWithBiasDx (theano.Op): 503 def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub):
506 nin=3 504 y_idx_type = node.inputs[2].type.dtype_specs()[1]
507 nout=1 505 am_type = y_idx_type
508 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" 506 code_template = ''.join(self.c_code_template())
509 def __init__(self, **kwargs): 507 return code_template % dict(locals(), **sub)
510 theano.Op.__init__(self,**kwargs) 508
511 def make_node(self, dy, sm, y_idx,**kwargs): 509 class CrossentropySoftmax1HotWithBiasDx (theano.Op):
512 dy = tensor.as_tensor(dy) 510 nin=3
513 sm = tensor.as_tensor(sm) 511 nout=1
514 y_idx = tensor.as_tensor(y_idx) 512 """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
515 return theano.Apply(self, [dy, sm, y_idx],[sm.type.make_result()]) 513 def __init__(self, **kwargs):
516 def perform(self, node, input_storage, output_storage): 514 theano.Op.__init__(self,**kwargs)
517 dy,sm,y_idx = input_storage 515 def make_node(self, dy, sm, y_idx,**kwargs):
518 dx = numpy.zeros_like(sm) 516 dy = tensor.as_tensor(dy)
519 for i in xrange(sm.shape[0]): 517 sm = tensor.as_tensor(sm)
520 dx[i] = dy[i] * sm[i] #vector scale 518 y_idx = tensor.as_tensor(y_idx)
521 dx[i, y_idx[i]] -= dy[i] #scalar decrement 519 return theano.Apply(self, [dy, sm, y_idx],[sm.type.make_result()])
522 output_storage[0][0] = dx 520 def perform(self, node, input_storage, output_storage):
523 def grad(self, *args): 521 dy,sm,y_idx = input_storage
524 raise NotImplementedError() 522 dx = numpy.zeros_like(sm)
525 def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub): 523 for i in xrange(sm.shape[0]):
526 y_idx_type = node.inputs[2].type.dtype_specs()[1] 524 dx[i] = dy[i] * sm[i] #vector scale
527 return """ 525 dx[i, y_idx[i]] -= dy[i] #scalar decrement
528 526 output_storage[0][0] = dx
529 if ((%(dnll)s->descr->type_num != PyArray_DOUBLE) 527 def grad(self, *args):
530 || (%(sm)s->descr->type_num != PyArray_DOUBLE) 528 raise NotImplementedError()
531 ) 529 def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
532 { 530 y_idx_type = node.inputs[2].type.dtype_specs()[1]
533 PyErr_SetString(PyExc_TypeError, "types should be float64, float64, int64"); 531 return """
534 %(fail)s; 532
535 } 533 if ((%(dnll)s->descr->type_num != PyArray_DOUBLE)
536 if ((%(y_idx)s->descr->type_num != PyArray_INT64) 534 || (%(sm)s->descr->type_num != PyArray_DOUBLE)
537 && (%(y_idx)s->descr->type_num != PyArray_INT32) 535 )
538 && (%(y_idx)s->descr->type_num != PyArray_INT16) 536 {
539 && (%(y_idx)s->descr->type_num != PyArray_INT8)) 537 PyErr_SetString(PyExc_TypeError, "types should be float64, float64, int64");
540 { 538 %(fail)s;
541 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64"); 539 }
542 %(fail)s; 540 if ((%(y_idx)s->descr->type_num != PyArray_INT64)
543 } 541 && (%(y_idx)s->descr->type_num != PyArray_INT32)
544 if ((%(dnll)s->nd != 1) 542 && (%(y_idx)s->descr->type_num != PyArray_INT16)
545 || (%(sm)s->nd != 2) 543 && (%(y_idx)s->descr->type_num != PyArray_INT8))
546 || (%(y_idx)s->nd != 1)) 544 {
547 { 545 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
548 PyErr_SetString(PyExc_ValueError, "rank error"); 546 %(fail)s;
549 %(fail)s; 547 }
550 } 548 if ((%(dnll)s->nd != 1)
551 if ((%(dnll)s->dimensions[0] != %(sm)s->dimensions[0]) 549 || (%(sm)s->nd != 2)
552 || (%(dnll)s->dimensions[0] != %(y_idx)s->dimensions[0])) 550 || (%(y_idx)s->nd != 1))
553 { 551 {
554 PyErr_SetString(PyExc_ValueError, "dimension mismatch"); 552 PyErr_SetString(PyExc_ValueError, "rank error");
555 %(fail)s; 553 %(fail)s;
556 } 554 }
557 if ((NULL == %(dx)s) 555 if ((%(dnll)s->dimensions[0] != %(sm)s->dimensions[0])
558 || (%(dx)s->dimensions[0] != %(sm)s->dimensions[0]) 556 || (%(dnll)s->dimensions[0] != %(y_idx)s->dimensions[0]))
559 || (%(dx)s->dimensions[1] != %(sm)s->dimensions[1])) 557 {
560 { 558 PyErr_SetString(PyExc_ValueError, "dimension mismatch");
561 if (NULL != %(dx)s) Py_XDECREF(%(dx)s); 559 %(fail)s;
562 %(dx)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(sm)s), type_num_%(sm)s); 560 }
563 if(!%(dx)s) { 561 if ((NULL == %(dx)s)
564 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output"); 562 || (%(dx)s->dimensions[0] != %(sm)s->dimensions[0])
565 %(fail)s 563 || (%(dx)s->dimensions[1] != %(sm)s->dimensions[1]))
566 } 564 {
567 } 565 if (NULL != %(dx)s) Py_XDECREF(%(dx)s);
568 566 %(dx)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(sm)s), type_num_%(sm)s);
569 for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i) 567 if(!%(dx)s) {
570 { 568 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output");
571 const double dnll_i = ((double*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0]; 569 %(fail)s
572 570 }
573 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; 571 }
574 572
575 const double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i); 573 for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
576 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); 574 {
577 575 const double dnll_i = ((double*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0];
578 double* __restrict__ dx_i = (double*)(%(dx)s->data + %(dx)s->strides[0] * i); 576
579 npy_intp Sdx = %(dx)s->strides[1]/sizeof(double); 577 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
580 578
581 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) 579 const double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
582 { 580 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
583 dx_i[j * Sdx] = dnll_i * sm_i[j * Ssm]; 581
584 } 582 double* __restrict__ dx_i = (double*)(%(dx)s->data + %(dx)s->strides[0] * i);
585 if (y_i >= %(dx)s->dimensions[1]) 583 npy_intp Sdx = %(dx)s->strides[1]/sizeof(double);
586 { 584
587 %(fail)s; 585 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
588 } 586 {
589 dx_i[y_i * Sdx] -= dnll_i; 587 dx_i[j * Sdx] = dnll_i * sm_i[j * Ssm];
590 } 588 }
591 """ % dict(locals(), **sub) 589 if (y_i >= %(dx)s->dimensions[1])
592 590 {
593 crossentropy_softmax_argmax_1hot_with_bias = \ 591 %(fail)s;
594 CrossentropySoftmaxArgmax1HotWithBias() 592 }
595 593 dx_i[y_i * Sdx] -= dnll_i;
596 def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs): 594 }
597 return crossentropy_softmax_argmax_1hot_with_bias(x, b, y_idx, **kwargs)[0:2] 595 """ % dict(locals(), **sub)
598 596
599 def crossentropy_softmax_1hot(x, y_idx, **kwargs): 597 crossentropy_softmax_argmax_1hot_with_bias = \
600 b = tensor.zeros_like(x[0,:]) 598 CrossentropySoftmaxArgmax1HotWithBias()
601 return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs) 599
602 600 def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs):
603 601 return crossentropy_softmax_argmax_1hot_with_bias(x, b, y_idx, **kwargs)[0:2]
604 class MultinomialCrossentropy1Hot(theano.Op): 602
605 pass 603 def crossentropy_softmax_1hot(x, y_idx, **kwargs):
606 604 b = tensor.zeros_like(x[0,:])
607 605 return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
608 def binary_crossentropy(output, target): 606
609 """ 607
610 Compute the crossentropy of binary output wrt binary target. 608 class MultinomialCrossentropy1Hot(theano.Op):
611 @note: We do not sum, crossentropy is computed by component. 609 pass
612 @todo: Rewrite as a scalar, and then broadcast to tensor. 610
613 @todo: This is essentially duplicated as cost.cross_entropy 611
614 @warning: OUTPUT and TARGET are reversed in cost.cross_entropy 612 def binary_crossentropy(output, target):
615 """ 613 """
616 return -(target * tensor.log(output) + (1 - target) * tensor.log(1 - output)) 614 Compute the crossentropy of binary output wrt binary target.
617 615 @note: We do not sum, crossentropy is computed by component.
618 616 @todo: Rewrite as a scalar, and then broadcast to tensor.
619 617 @todo: This is essentially duplicated as cost.cross_entropy
620 class Prepend_scalar_constant_to_each_row(theano.Op): 618 @warning: OUTPUT and TARGET are reversed in cost.cross_entropy
621 def __init__(self, val = 0): 619 """
622 if isinstance(val, float): 620 return -(target * tensor.log(output) + (1 - target) * tensor.log(1 - output))
623 val = scalar.constant(val) 621
624 self.val = val 622
625 623
626 def make_node(self, mat): 624 class Prepend_scalar_constant_to_each_row(theano.Op):
627 #check type of input 625 def __init__(self, val = 0):
628 if not isinstance(mat,theano.Result) or not mat.type==tensor.matrix().type: 626 if isinstance(val, float):
629 raise TypeError("Expected a matrix as input") 627 val = scalar.constant(val)
630 x = tensor.as_tensor(mat) 628 self.val = val
631 y = tensor.as_tensor(self.val) 629
632 if x.type.dtype != y.type.dtype: 630 def make_node(self, mat):
633 TypeError("the value to prepend don't have the same type as the matrix") 631 #check type of input
634 632 if not isinstance(mat,theano.Result) or not mat.type==tensor.matrix().type:
635 node = theano.Apply(op=self, inputs=[mat], outputs=[tensor.matrix()]) 633 raise TypeError("Expected a matrix as input")
636 return node 634 x = tensor.as_tensor(mat)
637 635 y = tensor.as_tensor(self.val)
638 def perform(self, node, (mat, ), (output, )): 636 if x.type.dtype != y.type.dtype:
639 new_shape=(mat.shape[0],mat.shape[1]+1) 637 TypeError("the value to prepend don't have the same type as the matrix")
640 if output[0] == None: 638
641 output[0]=numpy.empty(new_shape,dtype=mat.dtype) 639 node = theano.Apply(op=self, inputs=[mat], outputs=[tensor.matrix()])
642 out=output[0] 640 return node
643 else: 641
644 if output[0].shape!=new_shape: 642 def perform(self, node, (mat, ), (output, )):
645 try: 643 new_shape=(mat.shape[0],mat.shape[1]+1)
646 output[0].resize(new_shape) 644 if output[0] == None:
647 except: 645 output[0]=numpy.empty(new_shape,dtype=mat.dtype)
648 output[0]=numpy.empty(new_shape, dtype=mat.dtype) 646 out=output[0]
649 out=output[0] 647 else:
650 648 if output[0].shape!=new_shape:
651 out[:,0].fill(self.val.data) 649 try:
652 out[:,1:]=mat 650 output[0].resize(new_shape)
653 651 except:
654 def grad(self, (mat,), (goutput,)): 652 output[0]=numpy.empty(new_shape, dtype=mat.dtype)
655 return goutput[:,1:] 653 out=output[0]
656 654
657 class Prepend_scalar_to_each_row(theano.Op): 655 out[:,0].fill(self.val.data)
658 def make_node(self, val, mat): 656 out[:,1:]=mat
659 #check type of input 657
660 if isinstance(val, float): 658 def grad(self, (mat,), (goutput,)):
661 val = scalar.constant(val) 659 return goutput[:,1:]
662 if not isinstance(mat,theano.Result) or not mat.type==tensor.matrix().type: 660
663 raise TypeError("Expected a matrix as input") 661 class Prepend_scalar_to_each_row(theano.Op):
664 x = tensor.as_tensor(mat) 662 def make_node(self, val, mat):
665 y = tensor.as_tensor(val) 663 #check type of input
666 if x.type.dtype != y.type.dtype: 664 if isinstance(val, float):
667 TypeError("the value to prepend don't have the same type as the matrix") 665 val = scalar.constant(val)
668 666 if not isinstance(mat,theano.Result) or not mat.type==tensor.matrix().type:
669 node = theano.Apply(op=self, inputs=[val,mat], outputs=[tensor.matrix()]) 667 raise TypeError("Expected a matrix as input")
670 return node 668 x = tensor.as_tensor(mat)
671 669 y = tensor.as_tensor(val)
672 def perform(self, node, (val,mat), (output, )): 670 if x.type.dtype != y.type.dtype:
673 new_shape=(mat.shape[0],mat.shape[1]+1) 671 TypeError("the value to prepend don't have the same type as the matrix")
674 if output[0] == None: 672
675 output[0]=numpy.empty(new_shape,dtype=mat.dtype) 673 node = theano.Apply(op=self, inputs=[val,mat], outputs=[tensor.matrix()])
676 out=output[0] 674 return node
677 else: 675
678 if output[0].shape!=new_shape: 676 def perform(self, node, (val,mat), (output, )):
679 try: 677 new_shape=(mat.shape[0],mat.shape[1]+1)
680 output[0].resize(new_shape) 678 if output[0] == None:
681 except: 679 output[0]=numpy.empty(new_shape,dtype=mat.dtype)
682 output[0]=numpy.empty(new_shape, dtype=mat.dtype) 680 out=output[0]
683 out=output[0] 681 else:
684 out[:,0].fill(val) 682 if output[0].shape!=new_shape:
685 out[:,1:]=mat 683 try:
686 684 output[0].resize(new_shape)
687 def grad(self, (val, mat), (goutput,)): 685 except:
688 return goutput[:,0], goutput[:,1:] 686 output[0]=numpy.empty(new_shape, dtype=mat.dtype)
689 687 out=output[0]
690 prepend_scalar_to_each_row = Prepend_scalar_to_each_row() 688 out[:,0].fill(val)
691 prepend_0_to_each_row = Prepend_scalar_constant_to_each_row(0.) 689 out[:,1:]=mat
692 prepend_1_to_each_row = Prepend_scalar_constant_to_each_row(1.) 690
693 691 def grad(self, (val, mat), (goutput,)):
694 class solve(theano.Op): 692 return goutput[:,0], goutput[:,1:]
695 """ 693
696 Find the solution to the linear equation Ax=b, 694 prepend_scalar_to_each_row = Prepend_scalar_to_each_row()
697 where A is a 2d matrix and b is a 1d or 2d matrix. 695 prepend_0_to_each_row = Prepend_scalar_constant_to_each_row(0.)
698 It use numpy.solve to find the solution. 696 prepend_1_to_each_row = Prepend_scalar_constant_to_each_row(1.)
699 """ 697
700 698 class solve(theano.Op):
701 def make_node(self, A, b): 699 """
702 if not isinstance(A, theano.Result) or not A.type==tensor.matrix().type: 700 Find the solution to the linear equation Ax=b,
703 raise TypeError("We expected that A had a matrix type") 701 where A is a 2d matrix and b is a 1d or 2d matrix.
704 if not isinstance(B, theano.Result) or not B.type==tensor.matrix().type: 702 It use numpy.solve to find the solution.
705 raise TypeError("We expected that B had a matrix type") 703 """
706 704
707 node = theano.Apply(op=self, inputs=[A, B], outputs=[tensor.matrix()]) 705 def make_node(self, A, b):
708 return node 706 if not isinstance(A, theano.Result) or not A.type==tensor.matrix().type:
709 707 raise TypeError("We expected that A had a matrix type")
710 def perform(self, node, (A, B), (output, )): 708 if not isinstance(B, theano.Result) or not B.type==tensor.matrix().type:
711 ret=numpy.solve(A,B) 709 raise TypeError("We expected that B had a matrix type")
712 output[0]=ret 710
713 711 node = theano.Apply(op=self, inputs=[A, B], outputs=[tensor.matrix()])
714 def grad(self, (theta, A, B), (gtheta,)): 712 return node
715 raise NotImplementedError() 713
716 714 def perform(self, node, (A, B), (output, )):
717 715 ret=numpy.solve(A,B)
716 output[0]=ret
717
718 def grad(self, (theta, A, B), (gtheta,)):
719 raise NotImplementedError()
720
721