# HG changeset patch # User Pascal Lamblin # Date 1219702543 14400 # Node ID 23960ee12b52492bf7378b4101c0f4f4fa9adb83 # Parent 6eb0900fb55371c38a7862a22ed68661df4243c7 Add argmax as output of the big softmax-NLL thingy. diff -r 6eb0900fb553 -r 23960ee12b52 nnet_ops.py --- a/nnet_ops.py Fri Aug 22 17:34:06 2008 -0400 +++ b/nnet_ops.py Mon Aug 25 18:15:43 2008 -0400 @@ -323,7 +323,7 @@ return softmax_with_bias(x, b, **kwargs) -class CrossentropySoftmax1HotWithBias(theano.Op): +class CrossentropySoftmaxArgmax1HotWithBias(theano.Op): """A special compound L{Op} for the output of neural-net classifiers. @type x: is a matrix of floats (32 or 64) @@ -332,13 +332,14 @@ @precondition: every entry in y_idx is a valid (non-negative) column index into x - This L{Op} has two outputs: + This L{Op} has three outputs: - KL(softmax(x+b), y) - softmax(x+b) + - argmax(x+b) softmax(x[i]) is the i'th distribution over len(x[i]) options - + argmax(x) is the index of x's greatest element y_idx[i] is an integer index, encoding a 1-hot distribution. In practice, when we're trying to do classification, we have one row in x @@ -347,7 +348,7 @@ """ nin=3 - nout=2 + nout=3 def __init__(self, **kwargs): theano.Op.__init__(self, **kwargs) @@ -366,11 +367,12 @@ raise ValueError('y_idx must be 1-d tensor of ints') # TODO: Is this correct? It used to be y, not y_idx - nll = tensor.Tensor(x.type.dtype, + nll = tensor.Tensor(x.type.dtype, y_idx.type.broadcastable).make_result() # nll = Tensor(x.dtype, y.broadcastable) sm = x.type.make_result() - return theano.Apply(self, [x, b, y_idx], [nll, sm]) + am = y_idx.type.make_result() + return theano.Apply(self, [x, b, y_idx], [nll, sm, am]) def perform(self, node, input_storage, output_storage): x, b, y_idx = input_storage if b.shape[0] != x.shape[1]: @@ -380,15 +382,18 @@ sm = numpy.zeros_like(x) # softmax nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) + am = numpy.zeros_like(y_idx) for i in xrange(sm.shape[0]): row = x[i] + b - sm[i] = numpy.exp(row - numpy.max(row)) #softmax + am[i] = numpy.argmax(row) + sm[i] = numpy.exp(row - row[am[i]]) #softmax sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale - nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy + nll[i] = -numpy.log(sm[i, y_idx[i]]) #cross-entropy output_storage[0][0] = nll output_storage[1][0] = sm - def grad(self, (x, b, y_idx), (g_nll, g_sm)): - if g_sm is not None: + output_storage[2][0] = am + def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)): + if g_sm is not None or g_am is not None: raise NotImplementedError() nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) @@ -442,28 +447,40 @@ %(fail)s; } } + if ((NULL == %(am)s) + || (%(am)s->dimensions[0] != %(y_idx)s->dimensions[0])) + { + Py_XDECREF(%(am)s); + %(am)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(y_idx)s); + if(!%(am)s) + { + PyErr_SetString(PyExc_MemoryError, "failed to alloc am output"); + %(fail)s; + } + } """, begin_row_loop, """ const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); + %(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i); """, inside_row_loop, """ nll_i[0] = - x_i[y_i*Sx] - b_i[y_i*Sb] + log(sum); + am_i[0] = row_max_j; """, end_row_loop) - def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub): + def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub): y_idx_type = node.inputs[2].type.dtype_specs()[1] + am_type = y_idx_type code_template = ''.join(self.c_code_template()) return code_template % dict(locals(), **sub) -crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias() - class CrossentropySoftmax1HotWithBiasDx (theano.Op): nin=3 nout=1 @@ -552,10 +569,21 @@ } """ % dict(locals(), **sub) +crossentropy_softmax_argmax_1hot_with_bias = \ + CrossentropySoftmaxArgmax1HotWithBias() + +def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs): + return crossentropy_softmax_argmax_1hot_with_bias(x, b, y_idx, **kwargs)[0:2] + def crossentropy_softmax_1hot(x, y_idx, **kwargs): b = tensor.zeros_like(x[0,:]) return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs) + +class MultinomialCrossentropy1Hot(theano.Op): + pass + + def binary_crossentropy(output, target): """ Compute the crossentropy of binary output wrt binary target.