Mercurial > pylearn
comparison nnet_ops.py @ 447:0392b666320a
fixed c typos, math error in nnet_ops.py
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 27 Aug 2008 17:08:33 -0400 |
parents | 23960ee12b52 |
children | 0961d4b56ec5 |
comparison
equal
deleted
inserted
replaced
446:23960ee12b52 | 447:0392b666320a |
---|---|
161 || (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) | 161 || (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) |
162 { | 162 { |
163 if (NULL != %(sm)s) Py_XDECREF(%(sm)s); | 163 if (NULL != %(sm)s) Py_XDECREF(%(sm)s); |
164 %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s); | 164 %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s); |
165 if(!%(sm)s) { | 165 if(!%(sm)s) { |
166 // The normal cleanup code will take care of %(nll)s | |
167 // Py_XDECREF(%(nll)s); %(nll)s=NULL; | |
168 PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output"); | 166 PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output"); |
169 %(fail)s | 167 %(fail)s |
170 } | 168 } |
171 } | 169 } |
172 """ | 170 """ |
216 for (j = 0; j < Nx[1]; ++j) | 214 for (j = 0; j < Nx[1]; ++j) |
217 { | 215 { |
218 sm_i[j * Ssm] *= sum_inv; | 216 sm_i[j * Ssm] *= sum_inv; |
219 } | 217 } |
220 | 218 |
221 if (y_i >= Nx[1]) | |
222 { | |
223 %(fail)s; | |
224 } | |
225 """ | 219 """ |
226 | 220 |
227 end_row_loop = """ | 221 end_row_loop = """ |
228 } | 222 } |
229 """ | 223 """ |
294 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output"); | 288 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output"); |
295 %(fail)s; | 289 %(fail)s; |
296 } | 290 } |
297 } | 291 } |
298 | 292 |
299 for (size_t i = 0; i < %(dx)s->dimenstions[0]; ++i) | 293 for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i) |
300 { | 294 { |
301 const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i); | 295 const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i); |
302 npy_intp Sdy = %(dy)s->strides[1]/sizeof(double); | 296 npy_intp Sdy = %(dy)s->strides[1]/sizeof(double); |
303 const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i); | 297 const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i); |
304 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); | 298 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); |
305 const double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i); | 299 double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i); |
306 npy_intp Sdx = %(dx)s->strides[1]/sizeof(double); | 300 npy_intp Sdx = %(dx)s->strides[1]/sizeof(double); |
307 | 301 |
308 double sum_dy_times_sm = 0.; | 302 double sum_dy_times_sm = 0.; |
309 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) | 303 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) |
310 { | 304 { |
334 | 328 |
335 This L{Op} has three outputs: | 329 This L{Op} has three outputs: |
336 - KL(softmax(x+b), y) | 330 - KL(softmax(x+b), y) |
337 - softmax(x+b) | 331 - softmax(x+b) |
338 - argmax(x+b) | 332 - argmax(x+b) |
339 | |
340 | 333 |
341 softmax(x[i]) is the i'th distribution over len(x[i]) options | 334 softmax(x[i]) is the i'th distribution over len(x[i]) options |
342 argmax(x) is the index of x's greatest element | 335 argmax(x) is the index of x's greatest element |
343 y_idx[i] is an integer index, encoding a 1-hot distribution. | 336 y_idx[i] is an integer index, encoding a 1-hot distribution. |
344 | 337 |
372 # nll = Tensor(x.dtype, y.broadcastable) | 365 # nll = Tensor(x.dtype, y.broadcastable) |
373 sm = x.type.make_result() | 366 sm = x.type.make_result() |
374 am = y_idx.type.make_result() | 367 am = y_idx.type.make_result() |
375 return theano.Apply(self, [x, b, y_idx], [nll, sm, am]) | 368 return theano.Apply(self, [x, b, y_idx], [nll, sm, am]) |
376 def perform(self, node, input_storage, output_storage): | 369 def perform(self, node, input_storage, output_storage): |
370 """ | |
371 The math, where x is an input vector, and t is a target index: | |
372 | |
373 softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j])) | |
374 nll(x,t) = -log(softmax(x)[t]) | |
375 | |
376 We compute this by subtracting off the max of x. This avoids numerical instability. | |
377 | |
378 m = max_j x[j] | |
379 softmax(x)[i] = exp(x[i] -m) / sum_j(exp(x[j] - m)) | |
380 | |
381 nll = -log(exp(x[t] -m) / sum_j(exp(x[j] - m))) | |
382 = -x[t] + m + log( sum_j(exp(x[j] - m))) | |
383 | |
384 """ | |
377 x, b, y_idx = input_storage | 385 x, b, y_idx = input_storage |
378 if b.shape[0] != x.shape[1]: | 386 if b.shape[0] != x.shape[1]: |
379 raise ValueError('b must have same number of columns as x') | 387 raise ValueError('b must have same number of columns as x') |
380 if y_idx.shape[0] != x.shape[0]: | 388 if y_idx.shape[0] != x.shape[0]: |
381 raise ValueError('y_idx must have same number of rows as x') | 389 raise ValueError('y_idx must have same number of rows as x') |
382 | 390 |
383 sm = numpy.zeros_like(x) # softmax | 391 sm = numpy.zeros_like(x) # softmax |
384 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) | 392 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) |
385 am = numpy.zeros_like(y_idx) | 393 am = numpy.zeros_like(y_idx) |
386 for i in xrange(sm.shape[0]): | 394 for i in xrange(sm.shape[0]): |
387 row = x[i] + b | 395 #add the bias vector to the i'th row of x |
396 row = x[i] + b | |
397 | |
398 #get the maximum value of i'th row for numerically safe softmax / nll | |
388 am[i] = numpy.argmax(row) | 399 am[i] = numpy.argmax(row) |
389 sm[i] = numpy.exp(row - row[am[i]]) #softmax | 400 m = row[am[i]] |
390 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale | 401 |
391 nll[i] = -numpy.log(sm[i, y_idx[i]]) #cross-entropy | 402 #compute the unnormalized softmax, and normalization constant |
403 sm[i] = numpy.exp(row - m) | |
404 sum_j = numpy.sum(sm[i]) # sum_j(exp(x[j] - m)) | |
405 | |
406 #normalized our softmax | |
407 sm[i] *= 1.0 / sum_j | |
408 | |
409 # store the nll | |
410 nll[i] = -row[y_idx[i]] + m + numpy.log(sum_j) | |
411 | |
392 output_storage[0][0] = nll | 412 output_storage[0][0] = nll |
393 output_storage[1][0] = sm | 413 output_storage[1][0] = sm |
394 output_storage[2][0] = am | 414 output_storage[2][0] = am |
395 def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)): | 415 def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)): |
396 if g_sm is not None or g_am is not None: | 416 if g_sm is not None or g_am is not None: |
467 """, | 487 """, |
468 inside_row_loop, | 488 inside_row_loop, |
469 """ | 489 """ |
470 nll_i[0] = - x_i[y_i*Sx] | 490 nll_i[0] = - x_i[y_i*Sx] |
471 - b_i[y_i*Sb] | 491 - b_i[y_i*Sb] |
492 + row_max | |
472 + log(sum); | 493 + log(sum); |
473 am_i[0] = row_max_j; | 494 am_i[0] = row_max_j; |
474 """, | 495 """, |
475 end_row_loop) | 496 end_row_loop) |
476 | 497 |