comparison nnet_ops.py @ 447:0392b666320a

fixed c typos, math error in nnet_ops.py
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 27 Aug 2008 17:08:33 -0400
parents 23960ee12b52
children 0961d4b56ec5
comparison
equal deleted inserted replaced
446:23960ee12b52 447:0392b666320a
161 || (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) 161 || (%(sm)s->dimensions[1] != %(x)s->dimensions[1]))
162 { 162 {
163 if (NULL != %(sm)s) Py_XDECREF(%(sm)s); 163 if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
164 %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s); 164 %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s);
165 if(!%(sm)s) { 165 if(!%(sm)s) {
166 // The normal cleanup code will take care of %(nll)s
167 // Py_XDECREF(%(nll)s); %(nll)s=NULL;
168 PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output"); 166 PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output");
169 %(fail)s 167 %(fail)s
170 } 168 }
171 } 169 }
172 """ 170 """
216 for (j = 0; j < Nx[1]; ++j) 214 for (j = 0; j < Nx[1]; ++j)
217 { 215 {
218 sm_i[j * Ssm] *= sum_inv; 216 sm_i[j * Ssm] *= sum_inv;
219 } 217 }
220 218
221 if (y_i >= Nx[1])
222 {
223 %(fail)s;
224 }
225 """ 219 """
226 220
227 end_row_loop = """ 221 end_row_loop = """
228 } 222 }
229 """ 223 """
294 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output"); 288 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output");
295 %(fail)s; 289 %(fail)s;
296 } 290 }
297 } 291 }
298 292
299 for (size_t i = 0; i < %(dx)s->dimenstions[0]; ++i) 293 for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
300 { 294 {
301 const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i); 295 const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i);
302 npy_intp Sdy = %(dy)s->strides[1]/sizeof(double); 296 npy_intp Sdy = %(dy)s->strides[1]/sizeof(double);
303 const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i); 297 const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i);
304 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); 298 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
305 const double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i); 299 double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i);
306 npy_intp Sdx = %(dx)s->strides[1]/sizeof(double); 300 npy_intp Sdx = %(dx)s->strides[1]/sizeof(double);
307 301
308 double sum_dy_times_sm = 0.; 302 double sum_dy_times_sm = 0.;
309 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) 303 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
310 { 304 {
334 328
335 This L{Op} has three outputs: 329 This L{Op} has three outputs:
336 - KL(softmax(x+b), y) 330 - KL(softmax(x+b), y)
337 - softmax(x+b) 331 - softmax(x+b)
338 - argmax(x+b) 332 - argmax(x+b)
339
340 333
341 softmax(x[i]) is the i'th distribution over len(x[i]) options 334 softmax(x[i]) is the i'th distribution over len(x[i]) options
342 argmax(x) is the index of x's greatest element 335 argmax(x) is the index of x's greatest element
343 y_idx[i] is an integer index, encoding a 1-hot distribution. 336 y_idx[i] is an integer index, encoding a 1-hot distribution.
344 337
372 # nll = Tensor(x.dtype, y.broadcastable) 365 # nll = Tensor(x.dtype, y.broadcastable)
373 sm = x.type.make_result() 366 sm = x.type.make_result()
374 am = y_idx.type.make_result() 367 am = y_idx.type.make_result()
375 return theano.Apply(self, [x, b, y_idx], [nll, sm, am]) 368 return theano.Apply(self, [x, b, y_idx], [nll, sm, am])
376 def perform(self, node, input_storage, output_storage): 369 def perform(self, node, input_storage, output_storage):
370 """
371 The math, where x is an input vector, and t is a target index:
372
373 softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j]))
374 nll(x,t) = -log(softmax(x)[t])
375
376 We compute this by subtracting off the max of x. This avoids numerical instability.
377
378 m = max_j x[j]
379 softmax(x)[i] = exp(x[i] -m) / sum_j(exp(x[j] - m))
380
381 nll = -log(exp(x[t] -m) / sum_j(exp(x[j] - m)))
382 = -x[t] + m + log( sum_j(exp(x[j] - m)))
383
384 """
377 x, b, y_idx = input_storage 385 x, b, y_idx = input_storage
378 if b.shape[0] != x.shape[1]: 386 if b.shape[0] != x.shape[1]:
379 raise ValueError('b must have same number of columns as x') 387 raise ValueError('b must have same number of columns as x')
380 if y_idx.shape[0] != x.shape[0]: 388 if y_idx.shape[0] != x.shape[0]:
381 raise ValueError('y_idx must have same number of rows as x') 389 raise ValueError('y_idx must have same number of rows as x')
382 390
383 sm = numpy.zeros_like(x) # softmax 391 sm = numpy.zeros_like(x) # softmax
384 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) 392 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x))
385 am = numpy.zeros_like(y_idx) 393 am = numpy.zeros_like(y_idx)
386 for i in xrange(sm.shape[0]): 394 for i in xrange(sm.shape[0]):
387 row = x[i] + b 395 #add the bias vector to the i'th row of x
396 row = x[i] + b
397
398 #get the maximum value of i'th row for numerically safe softmax / nll
388 am[i] = numpy.argmax(row) 399 am[i] = numpy.argmax(row)
389 sm[i] = numpy.exp(row - row[am[i]]) #softmax 400 m = row[am[i]]
390 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale 401
391 nll[i] = -numpy.log(sm[i, y_idx[i]]) #cross-entropy 402 #compute the unnormalized softmax, and normalization constant
403 sm[i] = numpy.exp(row - m)
404 sum_j = numpy.sum(sm[i]) # sum_j(exp(x[j] - m))
405
406 #normalized our softmax
407 sm[i] *= 1.0 / sum_j
408
409 # store the nll
410 nll[i] = -row[y_idx[i]] + m + numpy.log(sum_j)
411
392 output_storage[0][0] = nll 412 output_storage[0][0] = nll
393 output_storage[1][0] = sm 413 output_storage[1][0] = sm
394 output_storage[2][0] = am 414 output_storage[2][0] = am
395 def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)): 415 def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)):
396 if g_sm is not None or g_am is not None: 416 if g_sm is not None or g_am is not None:
467 """, 487 """,
468 inside_row_loop, 488 inside_row_loop,
469 """ 489 """
470 nll_i[0] = - x_i[y_i*Sx] 490 nll_i[0] = - x_i[y_i*Sx]
471 - b_i[y_i*Sb] 491 - b_i[y_i*Sb]
492 + row_max
472 + log(sum); 493 + log(sum);
473 am_i[0] = row_max_j; 494 am_i[0] = row_max_j;
474 """, 495 """,
475 end_row_loop) 496 end_row_loop)
476 497