comparison nnet_ops.py @ 442:b3315b252824

Finished derivative of softmax gradient.
author Pascal Lamblin <lamblinp@iro.umontreal.ca>
date Fri, 22 Aug 2008 15:53:34 -0400
parents 18dbc1c11647
children 060c12314734
comparison
equal deleted inserted replaced
441:a2e8de4669cd 442:b3315b252824
98 def perform(self, node, input_storage, output_storage): 98 def perform(self, node, input_storage, output_storage):
99 x, b = input_storage 99 x, b = input_storage
100 if b.shape[0] != x.shape[1]: 100 if b.shape[0] != x.shape[1]:
101 raise ValueError('b must have same number of columns as x') 101 raise ValueError('b must have same number of columns as x')
102 102
103 sm = nympy.zeros_like(x) 103 sm = numpy.zeros_like(x)
104 for i in xrange(sm.shape[0]): 104 for i in xrange(sm.shape[0]):
105 row = x[i] + b 105 row = x[i] + b
106 sm[i] = numpy.exp(row - numpy.max(row)) 106 sm[i] = numpy.exp(row - numpy.max(row))
107 sm[i] *= 1.0 / numpy.sum(sm[i]) 107 sm[i] *= 1.0 / numpy.sum(sm[i])
108 output_storage[0][0] = nll 108 output_storage[0][0] = nll
235 code_template = ''.join(self.c_code_template()) 235 code_template = ''.join(self.c_code_template())
236 return code_template % dict(locals(), **sub) 236 return code_template % dict(locals(), **sub)
237 237
238 softmax_with_bias = SoftmaxWithBias() 238 softmax_with_bias = SoftmaxWithBias()
239 239
240
241 class SoftmaxWithBiasDx(theano.Op):
242 nin = 2
243 nout = 1
244 """Gradient wrt x of the SoftmaxWithBias Op"""
245
246 def __init__(self, **kwargs):
247 theano.Op.__init__(self, **kwargs)
248
249 def make_node(self, dy, sm, **kwargs):
250 dy = tensor.as_tensor(dy)
251 sm = tensor.as_tensor(sm)
252 return theano.Apply(self, [dy, sm], [sm.type.make_result()])
253
254 def perform(self, node, input_storage, output_storage):
255 dy, sm = input_storage
256 dx = numpy.zeros_like(sm)
257 #dx[i,j] = - (\sum_k dy[i,k] sm[i,k]) sm[i,j] + dy[i,j] sm[i,j]
258 for i in xrange(sm.shape[0]):
259 dy_times_sm_i = dy[i] * sm[i]
260 dx[i] = dy_times_sm - sum(dy_times_sm_i) * y[i]
261 output_storage[0][0] = dx
262
263 def grad(self, *args):
264 raise NotImplementedError()
265
266 def c_code(self, node, name, (dy, sm), (dx,), sub):
267 return '''
268 if ((%(dy)s->descr->type_num != PyArray_DOUBLE)
269 || (%(sm)s->descr->type_num != PyArray_DOUBLE))
270 {
271 PyErr_SetString(PyExc_TypeError, "types should be float64, float64");
272 %(fail)s;
273 }
274 if ((%(dy)s->nd != 2)
275 || (%(sm)s->nd != 2))
276 {
277 PyErr_SetString(PyExc_ValueError, "rank error");
278 %(fail)s;
279 }
280 if (%(dy)s->dimensions[0] != %(sm)s->dimensions[0])
281 {
282 PyErr_SetString(PyExc_ValueError, "dimension mismatch");
283 %(fail)s;
284 }
285 if ((NULL == %(dx)s)
286 || (%(dx)s->dimensions[0] != %(sm)s->dimensions[0])
287 || (%(dx)s->dimensions[1] != %(sm)s->dimensions[1]))
288 {
289 Py_XDECREF(%(dx)s);
290 %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2, PyArray_DIMS(%(sm)s),
291 type_num_%(sm)s);
292 if (!%(dx)s)
293 {
294 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output");
295 %(fail)s;
296 }
297 }
298
299 for (size_t i = 0; i < %(dx)s->dimenstions[0]; ++i)
300 {
301 const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i);
302 npy_intp Sdy = %(dy)s->strides[1]/sizeof(double);
303 const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i);
304 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
305 const double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i);
306 npy_intp Sdx = %(dx)s->strides[1]/sizeof(double);
307
308 double sum_dy_times_sm = 0.;
309 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
310 {
311 dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm];
312 sum_dy_times_sm += dx_i[j * Sdx];
313 }
314 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
315 {
316 dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm];
317 }
318 }
319 ''' % dict(locals(), **sub)
240 320
241 321
242 class CrossentropySoftmax1HotWithBias(theano.Op): 322 class CrossentropySoftmax1HotWithBias(theano.Op):
243 """A special compound L{Op} for the output of neural-net classifiers. 323 """A special compound L{Op} for the output of neural-net classifiers.
244 324