Mercurial > pylearn
changeset 442:b3315b252824
Finished derivative of softmax gradient.
author | Pascal Lamblin <lamblinp@iro.umontreal.ca> |
---|---|
date | Fri, 22 Aug 2008 15:53:34 -0400 |
parents | a2e8de4669cd |
children | 060c12314734 |
files | nnet_ops.py |
diffstat | 1 files changed, 81 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/nnet_ops.py Thu Aug 21 13:55:43 2008 -0400 +++ b/nnet_ops.py Fri Aug 22 15:53:34 2008 -0400 @@ -100,7 +100,7 @@ if b.shape[0] != x.shape[1]: raise ValueError('b must have same number of columns as x') - sm = nympy.zeros_like(x) + sm = numpy.zeros_like(x) for i in xrange(sm.shape[0]): row = x[i] + b sm[i] = numpy.exp(row - numpy.max(row)) @@ -238,6 +238,86 @@ softmax_with_bias = SoftmaxWithBias() +class SoftmaxWithBiasDx(theano.Op): + nin = 2 + nout = 1 + """Gradient wrt x of the SoftmaxWithBias Op""" + + def __init__(self, **kwargs): + theano.Op.__init__(self, **kwargs) + + def make_node(self, dy, sm, **kwargs): + dy = tensor.as_tensor(dy) + sm = tensor.as_tensor(sm) + return theano.Apply(self, [dy, sm], [sm.type.make_result()]) + + def perform(self, node, input_storage, output_storage): + dy, sm = input_storage + dx = numpy.zeros_like(sm) + #dx[i,j] = - (\sum_k dy[i,k] sm[i,k]) sm[i,j] + dy[i,j] sm[i,j] + for i in xrange(sm.shape[0]): + dy_times_sm_i = dy[i] * sm[i] + dx[i] = dy_times_sm - sum(dy_times_sm_i) * y[i] + output_storage[0][0] = dx + + def grad(self, *args): + raise NotImplementedError() + + def c_code(self, node, name, (dy, sm), (dx,), sub): + return ''' + if ((%(dy)s->descr->type_num != PyArray_DOUBLE) + || (%(sm)s->descr->type_num != PyArray_DOUBLE)) + { + PyErr_SetString(PyExc_TypeError, "types should be float64, float64"); + %(fail)s; + } + if ((%(dy)s->nd != 2) + || (%(sm)s->nd != 2)) + { + PyErr_SetString(PyExc_ValueError, "rank error"); + %(fail)s; + } + if (%(dy)s->dimensions[0] != %(sm)s->dimensions[0]) + { + PyErr_SetString(PyExc_ValueError, "dimension mismatch"); + %(fail)s; + } + if ((NULL == %(dx)s) + || (%(dx)s->dimensions[0] != %(sm)s->dimensions[0]) + || (%(dx)s->dimensions[1] != %(sm)s->dimensions[1])) + { + Py_XDECREF(%(dx)s); + %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2, PyArray_DIMS(%(sm)s), + type_num_%(sm)s); + if (!%(dx)s) + { + PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output"); + %(fail)s; + } + } + + for (size_t i = 0; i < %(dx)s->dimenstions[0]; ++i) + { + const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i); + npy_intp Sdy = %(dy)s->strides[1]/sizeof(double); + const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i); + npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); + const double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i); + npy_intp Sdx = %(dx)s->strides[1]/sizeof(double); + + double sum_dy_times_sm = 0.; + for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) + { + dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm]; + sum_dy_times_sm += dx_i[j * Sdx]; + } + for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) + { + dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm]; + } + } + ''' % dict(locals(), **sub) + class CrossentropySoftmax1HotWithBias(theano.Op): """A special compound L{Op} for the output of neural-net classifiers.