comparison nnet_ops.py @ 446:23960ee12b52

Add argmax as output of the big softmax-NLL thingy.
author Pascal Lamblin <lamblinp@iro.umontreal.ca>
date Mon, 25 Aug 2008 18:15:43 -0400
parents 060c12314734
children 0392b666320a
comparison
equal deleted inserted replaced
445:6eb0900fb553 446:23960ee12b52
321 def softmax(x, **kwargs): 321 def softmax(x, **kwargs):
322 b = tensor.zeros_like(x[0,:]) 322 b = tensor.zeros_like(x[0,:])
323 return softmax_with_bias(x, b, **kwargs) 323 return softmax_with_bias(x, b, **kwargs)
324 324
325 325
326 class CrossentropySoftmax1HotWithBias(theano.Op): 326 class CrossentropySoftmaxArgmax1HotWithBias(theano.Op):
327 """A special compound L{Op} for the output of neural-net classifiers. 327 """A special compound L{Op} for the output of neural-net classifiers.
328 328
329 @type x: is a matrix of floats (32 or 64) 329 @type x: is a matrix of floats (32 or 64)
330 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x 330 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
331 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x 331 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x
332 332
333 @precondition: every entry in y_idx is a valid (non-negative) column index into x 333 @precondition: every entry in y_idx is a valid (non-negative) column index into x
334 334
335 This L{Op} has two outputs: 335 This L{Op} has three outputs:
336 - KL(softmax(x+b), y) 336 - KL(softmax(x+b), y)
337 - softmax(x+b) 337 - softmax(x+b)
338 - argmax(x+b)
338 339
339 340
340 softmax(x[i]) is the i'th distribution over len(x[i]) options 341 softmax(x[i]) is the i'th distribution over len(x[i]) options
341 342 argmax(x) is the index of x's greatest element
342 y_idx[i] is an integer index, encoding a 1-hot distribution. 343 y_idx[i] is an integer index, encoding a 1-hot distribution.
343 344
344 In practice, when we're trying to do classification, we have one row in x 345 In practice, when we're trying to do classification, we have one row in x
345 and y_idx per example, and y[i] is the index of the (correct) class of the 346 and y_idx per example, and y[i] is the index of the (correct) class of the
346 i'th example. 347 i'th example.
347 348
348 """ 349 """
349 nin=3 350 nin=3
350 nout=2 351 nout=3
351 def __init__(self, **kwargs): 352 def __init__(self, **kwargs):
352 theano.Op.__init__(self, **kwargs) 353 theano.Op.__init__(self, **kwargs)
353 354
354 def make_node(self, x, b, y_idx): 355 def make_node(self, x, b, y_idx):
355 x = tensor.as_tensor(x) 356 x = tensor.as_tensor(x)
364 if y_idx.type.ndim != 1 \ 365 if y_idx.type.ndim != 1 \
365 or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']: 366 or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']:
366 raise ValueError('y_idx must be 1-d tensor of ints') 367 raise ValueError('y_idx must be 1-d tensor of ints')
367 368
368 # TODO: Is this correct? It used to be y, not y_idx 369 # TODO: Is this correct? It used to be y, not y_idx
369 nll = tensor.Tensor(x.type.dtype, 370 nll = tensor.Tensor(x.type.dtype,
370 y_idx.type.broadcastable).make_result() 371 y_idx.type.broadcastable).make_result()
371 # nll = Tensor(x.dtype, y.broadcastable) 372 # nll = Tensor(x.dtype, y.broadcastable)
372 sm = x.type.make_result() 373 sm = x.type.make_result()
373 return theano.Apply(self, [x, b, y_idx], [nll, sm]) 374 am = y_idx.type.make_result()
375 return theano.Apply(self, [x, b, y_idx], [nll, sm, am])
374 def perform(self, node, input_storage, output_storage): 376 def perform(self, node, input_storage, output_storage):
375 x, b, y_idx = input_storage 377 x, b, y_idx = input_storage
376 if b.shape[0] != x.shape[1]: 378 if b.shape[0] != x.shape[1]:
377 raise ValueError('b must have same number of columns as x') 379 raise ValueError('b must have same number of columns as x')
378 if y_idx.shape[0] != x.shape[0]: 380 if y_idx.shape[0] != x.shape[0]:
379 raise ValueError('y_idx must have same number of rows as x') 381 raise ValueError('y_idx must have same number of rows as x')
380 382
381 sm = numpy.zeros_like(x) # softmax 383 sm = numpy.zeros_like(x) # softmax
382 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) 384 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x))
385 am = numpy.zeros_like(y_idx)
383 for i in xrange(sm.shape[0]): 386 for i in xrange(sm.shape[0]):
384 row = x[i] + b 387 row = x[i] + b
385 sm[i] = numpy.exp(row - numpy.max(row)) #softmax 388 am[i] = numpy.argmax(row)
389 sm[i] = numpy.exp(row - row[am[i]]) #softmax
386 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale 390 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale
387 nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy 391 nll[i] = -numpy.log(sm[i, y_idx[i]]) #cross-entropy
388 output_storage[0][0] = nll 392 output_storage[0][0] = nll
389 output_storage[1][0] = sm 393 output_storage[1][0] = sm
390 def grad(self, (x, b, y_idx), (g_nll, g_sm)): 394 output_storage[2][0] = am
391 if g_sm is not None: 395 def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)):
396 if g_sm is not None or g_am is not None:
392 raise NotImplementedError() 397 raise NotImplementedError()
393 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) 398 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
394 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) 399 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
395 db = tensor.sum(dx, axis = [0]) 400 db = tensor.sum(dx, axis = [0])
396 return dx, db, None 401 return dx, db, None
440 { 445 {
441 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output"); 446 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output");
442 %(fail)s; 447 %(fail)s;
443 } 448 }
444 } 449 }
450 if ((NULL == %(am)s)
451 || (%(am)s->dimensions[0] != %(y_idx)s->dimensions[0]))
452 {
453 Py_XDECREF(%(am)s);
454 %(am)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(y_idx)s);
455 if(!%(am)s)
456 {
457 PyErr_SetString(PyExc_MemoryError, "failed to alloc am output");
458 %(fail)s;
459 }
460 }
445 """, 461 """,
446 begin_row_loop, 462 begin_row_loop,
447 """ 463 """
448 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; 464 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
449 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); 465 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);
466 %(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i);
450 """, 467 """,
451 inside_row_loop, 468 inside_row_loop,
452 """ 469 """
453 nll_i[0] = - x_i[y_i*Sx] 470 nll_i[0] = - x_i[y_i*Sx]
454 - b_i[y_i*Sb] 471 - b_i[y_i*Sb]
455 + log(sum); 472 + log(sum);
473 am_i[0] = row_max_j;
456 """, 474 """,
457 end_row_loop) 475 end_row_loop)
458 476
459 477
460 def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub): 478 def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub):
461 y_idx_type = node.inputs[2].type.dtype_specs()[1] 479 y_idx_type = node.inputs[2].type.dtype_specs()[1]
480 am_type = y_idx_type
462 code_template = ''.join(self.c_code_template()) 481 code_template = ''.join(self.c_code_template())
463 return code_template % dict(locals(), **sub) 482 return code_template % dict(locals(), **sub)
464
465 crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias()
466 483
467 class CrossentropySoftmax1HotWithBiasDx (theano.Op): 484 class CrossentropySoftmax1HotWithBiasDx (theano.Op):
468 nin=3 485 nin=3
469 nout=1 486 nout=1
470 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" 487 """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
550 } 567 }
551 dx_i[y_i * Sdx] -= dnll_i; 568 dx_i[y_i * Sdx] -= dnll_i;
552 } 569 }
553 """ % dict(locals(), **sub) 570 """ % dict(locals(), **sub)
554 571
572 crossentropy_softmax_argmax_1hot_with_bias = \
573 CrossentropySoftmaxArgmax1HotWithBias()
574
575 def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs):
576 return crossentropy_softmax_argmax_1hot_with_bias(x, b, y_idx, **kwargs)[0:2]
577
555 def crossentropy_softmax_1hot(x, y_idx, **kwargs): 578 def crossentropy_softmax_1hot(x, y_idx, **kwargs):
556 b = tensor.zeros_like(x[0,:]) 579 b = tensor.zeros_like(x[0,:])
557 return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs) 580 return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
581
582
583 class MultinomialCrossentropy1Hot(theano.Op):
584 pass
585
558 586
559 def binary_crossentropy(output, target): 587 def binary_crossentropy(output, target):
560 """ 588 """
561 Compute the crossentropy of binary output wrt binary target. 589 Compute the crossentropy of binary output wrt binary target.
562 @note: We do not sum, crossentropy is computed by component. 590 @note: We do not sum, crossentropy is computed by component.