pylearn: nnet_ops.py comparison

comparison nnet_ops.py @ 446:23960ee12b52

Add argmax as output of the big softmax-NLL thingy.

author	Pascal Lamblin <lamblinp@iro.umontreal.ca>
date	Mon, 25 Aug 2008 18:15:43 -0400
parents	060c12314734
children	0392b666320a

comparison

equal deleted inserted replaced

-:6eb0900fb553
+:23960ee12b52
 def softmax(x, **kwargs):
 b = tensor.zeros_like(x[0,:])
 return softmax_with_bias(x, b, **kwargs)
-class CrossentropySoftmax1HotWithBias(theano.Op):
+class CrossentropySoftmaxArgmax1HotWithBias(theano.Op):
 """A special compound L{Op} for the output of neural-net classifiers.
 @type x: is a matrix of floats (32 or 64)
 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x
 @precondition: every entry in y_idx is a valid (non-negative) column index into x
-This L{Op} has two outputs:
+This L{Op} has three outputs:
 - KL(softmax(x+b), y)
 - softmax(x+b)
+- argmax(x+b)
 softmax(x[i]) is the i'th distribution over len(x[i]) options
+argmax(x) is the index of x's greatest element
 y_idx[i] is an integer index, encoding a 1-hot distribution.
 In practice, when we're trying to do classification, we have one row in x
 and y_idx per example, and y[i] is the index of the (correct) class of the
 i'th example.
 """
 nin=3
-nout=2
+nout=3
 def __init__(self, **kwargs):
 theano.Op.__init__(self, **kwargs)
 def make_node(self, x, b, y_idx):
 x = tensor.as_tensor(x)
 if y_idx.type.ndim != 1 \
 or y_idx.type.dtype not in ['int8', 'int16', 'int32', 'int64']:
 raise ValueError('y_idx must be 1-d tensor of ints')
 #       TODO: Is this correct? It used to be y, not y_idx
 nll = tensor.Tensor(x.type.dtype,
 y_idx.type.broadcastable).make_result()
 #        nll = Tensor(x.dtype, y.broadcastable)
 sm = x.type.make_result()
-return theano.Apply(self, [x, b, y_idx], [nll, sm])
+am = y_idx.type.make_result()
+return theano.Apply(self, [x, b, y_idx], [nll, sm, am])
 def perform(self, node, input_storage, output_storage):
 x, b, y_idx = input_storage
 if b.shape[0] != x.shape[1]:
 raise ValueError('b must have same number of columns as x')
 if y_idx.shape[0] != x.shape[0]:
 raise ValueError('y_idx must have same number of rows as x')
 sm = numpy.zeros_like(x) # softmax
 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x))
+am = numpy.zeros_like(y_idx)
 for i in xrange(sm.shape[0]):
 row = x[i] + b
-sm[i] = numpy.exp(row - numpy.max(row)) #softmax
+am[i] = numpy.argmax(row)
+sm[i] = numpy.exp(row - row[am[i]]) #softmax
 sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale
-nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy
+nll[i] = -numpy.log(sm[i, y_idx[i]]) #cross-entropy
 output_storage[0][0] = nll
 output_storage[1][0] = sm
-def grad(self, (x, b, y_idx), (g_nll, g_sm)):
+output_storage[2][0] = am
-if g_sm is not None:
+def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)):
+if g_sm is not None or g_am is not None:
 raise NotImplementedError()
 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
 db = tensor.sum(dx, axis = [0])
 return dx, db, None
 {
 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output");
 %(fail)s;
 }
 }
+if ((NULL == %(am)s)
+|| (%(am)s->dimensions[0] != %(y_idx)s->dimensions[0]))
+{
+Py_XDECREF(%(am)s);
+%(am)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(y_idx)s), type_num_%(y_idx)s);
+if(!%(am)s)
+{
+PyErr_SetString(PyExc_MemoryError, "failed to alloc am output");
+%(fail)s;
+}
+}
 """,
 begin_row_loop,
 """
 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);
+%(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i);
 """,
 inside_row_loop,
 """
 nll_i[0] = - x_i[y_i*Sx]
 - b_i[y_i*Sb]
 + log(sum);
+am_i[0] = row_max_j;
 """,
 end_row_loop)
-def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub):
+def c_code(self, node, name, (x, b, y_idx), (nll, sm, am), sub):
 y_idx_type = node.inputs[2].type.dtype_specs()[1]
+am_type = y_idx_type
 code_template = ''.join(self.c_code_template())
 return code_template % dict(locals(), **sub)
-crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias()
 class CrossentropySoftmax1HotWithBiasDx (theano.Op):
 nin=3
 nout=1
 """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
 }
 dx_i[y_i * Sdx] -= dnll_i;
 }
 """ % dict(locals(), **sub)
+crossentropy_softmax_argmax_1hot_with_bias = \
+CrossentropySoftmaxArgmax1HotWithBias()
+def crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs):
+return crossentropy_softmax_argmax_1hot_with_bias(x, b, y_idx, **kwargs)[0:2]
 def crossentropy_softmax_1hot(x, y_idx, **kwargs):
 b = tensor.zeros_like(x[0,:])
 return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
+class MultinomialCrossentropy1Hot(theano.Op):
+pass
 def binary_crossentropy(output, target):
 """
 Compute the crossentropy of binary output wrt binary target.
 @note: We do not sum, crossentropy is computed by component.

Mercurial > pylearn

comparison nnet_ops.py @ 446:23960ee12b52