Mercurial > pylearn
comparison nnet_ops.py @ 446:23960ee12b52
Add argmax as output of the big softmax-NLL thingy.
author | Pascal Lamblin <lamblinp@iro.umontreal.ca> |
---|---|
date | Mon, 25 Aug 2008 18:15:43 -0400 |
parents | 060c12314734 |
children | 0392b666320a |
comparison
equal
deleted
inserted
replaced
445:6eb0900fb553 | 446:23960ee12b52 |
---|---|
321 def softmax(x, **kwargs): | 321 def softmax(x, **kwargs): |
322 b = tensor.zeros_like(x[0,:]) | 322 b = tensor.zeros_like(x[0,:]) |
323 return softmax_with_bias(x, b, **kwargs) | 323 return softmax_with_bias(x, b, **kwargs) |
324 | 324 |
325 | 325 |
326 class CrossentropySoftmax1HotWithBias(theano.Op): | 326 class CrossentropySoftmaxArgmax1HotWithBias(theano.Op): |
327 """A special compound L{Op} for the output of neural-net classifiers. | 327 """A special compound L{Op} for the output of neural-net classifiers. |
328 | 328 |
329 @type x: is a matrix of floats (32 or 64) | 329 @type x: is a matrix of floats (32 or 64) |
330 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x | 330 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x |
331 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x | 331 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x |
332 | 332 |
333 @precondition: every entry in y_idx is a valid (non-negative) column index into x | 333 @precondition: every entry in y_idx is a valid (non-negative) column index into x |
334 | 334 |
335 This L{Op} has two outputs: | 335 This L{Op} has three outputs: |
336 - KL(softmax(x+b), y) | 336 - KL(softmax(x+b), y) |
337 - softmax(x+b) | 337 - softmax(x+b) |
338 - argmax(x+b) | |
338 | 339 |
339 | 340 |
340 softmax(x[i]) is the i'th distribution over len(x[i]) options | 341 softmax(x[i]) is the i'th distribution over len(x[i]) options |
341 | 342 argmax(x) is the index of x's greatest element |
342 y_idx[i] is an integer index, encoding a 1-hot distribution. | 343 y_idx[i] is an integer index, encoding a 1-hot distribution. |
343 | 344 |
344 In practice, when we're trying to do classification, we have one row in x | 345 In practice, when we're trying to do classification, we have one row in x |
345 and y_idx per example, and y[i] is the index of the (correct) class of the | 346 and y_idx per example, and y[i] is the index of the (correct) class of the |
346 i'th example. | 347 i'th example. |
347 | 348 |
348 """ | 349 """ |
349 nin=3 | 350 nin=3 |
350 nout=2 | 351 nout=3 |
351 def __init__(self, **kwargs): | 352 def __init__(self, **kwargs): |
352 theano.Op.__init__(self, **kwargs) | 353 theano.Op.__init__(self, **kwargs) |
353 | 354 |
354 def make_node(self, x, b, y_idx): | 355 def make_node(self, x, b, y_idx): |
355 x = tensor.as_tensor(x) | 356 x = tensor.as_tensor(x) |
364 if y_idx.type.ndim != 1 \ | 365 if y_idx.type.ndim != 1 \ |
365 or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']: | 366 or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']: |
366 raise ValueError('y_idx must be 1-d tensor of ints') | 367 raise ValueError('y_idx must be 1-d tensor of ints') |
367 | 368 |
368 # TODO: Is this correct? It used to be y, not y_idx | 369 # TODO: Is this correct? It used to be y, not y_idx |
369 nll = tensor.Tensor(x.type.dtype, | 370 nll = tensor.Tensor(x.type.dtype, |
370 y_idx.type.broadcastable).make_result() | 371 y_idx.type.broadcastable).make_result() |
371 # nll = Tensor(x.dtype, y.broadcastable) | 372 # nll = Tensor(x.dtype, y.broadcastable) |
372 sm = x.type.make_result() | 373 sm = x.type.make_result() |
373 return theano.Apply(self, [x, b, y_idx], [nll, sm]) | 374 am = y_idx.type.make_result() |
375 return theano.Apply(self, [x, b, y_idx], [nll, sm, am]) | |
374 def perform(self, node, input_storage, output_storage): | 376 def perform(self, node, input_storage, output_storage): |
375 x, b, y_idx = input_storage | 377 x, b, y_idx = input_storage |
376 if b.shape[0] != x.shape[1]: | 378 if b.shape[0] != x.shape[1]: |
377 raise ValueError('b must have same number of columns as x') | 379 raise ValueError('b must have same number of columns as x') |
378 if y_idx.shape[0] != x.shape[0]: | 380 if y_idx.shape[0] != x.shape[0]: |
379 raise ValueError('y_idx must have same number of rows as x') | 381 raise ValueError('y_idx must have same number of rows as x') |
380 | 382 |
381 sm = numpy.zeros_like(x) # softmax | 383 sm = numpy.zeros_like(x) # softmax |
382 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) | 384 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) |
385 am = numpy.zeros_like(y_idx) | |
383 for i in xrange(sm.shape[0]): | 386 for i in xrange(sm.shape[0]): |
384 row = x[i] + b | 387 row = x[i] + b |
385 sm[i] = numpy.exp(row - numpy.max(row)) #softmax | 388 am[i] = numpy.argmax(row) |
389 sm[i] = numpy.exp(row - row[am[i]]) #softmax | |
386 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale | 390 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale |
387 nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy | 391 nll[i] = -numpy.log(sm[i, y_idx[i]]) #cross-entropy |
388 output_storage[0][0] = nll | 392 output_storage[0][0] = nll |
389 output_storage[1][0] = sm | 393 output_storage[1][0] = sm |
390 def grad(self, (x, b, y_idx), (g_nll, g_sm)): | 394 output_storage[2][0] = am |
391 if g_sm is not None: | 395 def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)): |
396 if g_sm is not None or g_am is not None: | |
392 raise NotImplementedError() | 397 raise NotImplementedError() |
393 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) | 398 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) |
394 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) | 399 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) |
395 db = tensor.sum(dx, axis = [0]) | 400 db = tensor.sum(dx, axis = [0]) |
396 return dx, db, None | 401 return dx, db, None |
440 { | 445 { |
441 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output"); | 446 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output"); |
442 %(fail)s; | 447 %(fail)s; |
443 } | 448 } |
444 } | 449 } |
450 if ((NULL == %(am)s) | |
451 || (%(am)s->dimensions[0] != %(y_idx)s->dimensions[0])) | |
452 { | |
453 Py_XDECREF(%(am)s); | |
454 %(am)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(y_idx)s); | |
455 if(!%(am)s) | |
456 { | |
457 PyErr_SetString(PyExc_MemoryError, "failed to alloc am output"); | |
458 %(fail)s; | |
459 } | |
460 } | |
445 """, | 461 """, |
446 begin_row_loop, | 462 begin_row_loop, |
447 """ | 463 """ |
448 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; | 464 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; |
449 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); | 465 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); |
466 %(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i); | |
450 """, | 467 """, |
451 inside_row_loop, | 468 inside_row_loop, |
452 """ | 469 """ |
453 nll_i[0] = - x_i[y_i*Sx] | 470 nll_i[0] = - x_i[y_i*Sx] |
454 - b_i[y_i*Sb] | 471 - b_i[y_i*Sb] |
455 + log(sum); | 472 + log(sum); |
473 am_i[0] = row_max_j; | |
456 """, | 474 """, |
457 end_row_loop) | 475 end_row_loop) |
458 | 476 |
459 | 477 |
460 def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub): | 478 def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub): |
461 y_idx_type = node.inputs[2].type.dtype_specs()[1] | 479 y_idx_type = node.inputs[2].type.dtype_specs()[1] |
480 am_type = y_idx_type | |
462 code_template = ''.join(self.c_code_template()) | 481 code_template = ''.join(self.c_code_template()) |
463 return code_template % dict(locals(), **sub) | 482 return code_template % dict(locals(), **sub) |
464 | |
465 crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias() | |
466 | 483 |
467 class CrossentropySoftmax1HotWithBiasDx (theano.Op): | 484 class CrossentropySoftmax1HotWithBiasDx (theano.Op): |
468 nin=3 | 485 nin=3 |
469 nout=1 | 486 nout=1 |
470 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" | 487 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" |
550 } | 567 } |
551 dx_i[y_i * Sdx] -= dnll_i; | 568 dx_i[y_i * Sdx] -= dnll_i; |
552 } | 569 } |
553 """ % dict(locals(), **sub) | 570 """ % dict(locals(), **sub) |
554 | 571 |
572 crossentropy_softmax_argmax_1hot_with_bias = \ | |
573 CrossentropySoftmaxArgmax1HotWithBias() | |
574 | |
575 def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs): | |
576 return crossentropy_softmax_argmax_1hot_with_bias(x, b, y_idx, **kwargs)[0:2] | |
577 | |
555 def crossentropy_softmax_1hot(x, y_idx, **kwargs): | 578 def crossentropy_softmax_1hot(x, y_idx, **kwargs): |
556 b = tensor.zeros_like(x[0,:]) | 579 b = tensor.zeros_like(x[0,:]) |
557 return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs) | 580 return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs) |
581 | |
582 | |
583 class MultinomialCrossentropy1Hot(theano.Op): | |
584 pass | |
585 | |
558 | 586 |
559 def binary_crossentropy(output, target): | 587 def binary_crossentropy(output, target): |
560 """ | 588 """ |
561 Compute the crossentropy of binary output wrt binary target. | 589 Compute the crossentropy of binary output wrt binary target. |
562 @note: We do not sum, crossentropy is computed by component. | 590 @note: We do not sum, crossentropy is computed by component. |