Mercurial > pylearn
comparison nnet_ops.py @ 442:b3315b252824
Finished derivative of softmax gradient.
author | Pascal Lamblin <lamblinp@iro.umontreal.ca> |
---|---|
date | Fri, 22 Aug 2008 15:53:34 -0400 |
parents | 18dbc1c11647 |
children | 060c12314734 |
comparison
equal
deleted
inserted
replaced
441:a2e8de4669cd | 442:b3315b252824 |
---|---|
98 def perform(self, node, input_storage, output_storage): | 98 def perform(self, node, input_storage, output_storage): |
99 x, b = input_storage | 99 x, b = input_storage |
100 if b.shape[0] != x.shape[1]: | 100 if b.shape[0] != x.shape[1]: |
101 raise ValueError('b must have same number of columns as x') | 101 raise ValueError('b must have same number of columns as x') |
102 | 102 |
103 sm = nympy.zeros_like(x) | 103 sm = numpy.zeros_like(x) |
104 for i in xrange(sm.shape[0]): | 104 for i in xrange(sm.shape[0]): |
105 row = x[i] + b | 105 row = x[i] + b |
106 sm[i] = numpy.exp(row - numpy.max(row)) | 106 sm[i] = numpy.exp(row - numpy.max(row)) |
107 sm[i] *= 1.0 / numpy.sum(sm[i]) | 107 sm[i] *= 1.0 / numpy.sum(sm[i]) |
108 output_storage[0][0] = nll | 108 output_storage[0][0] = nll |
235 code_template = ''.join(self.c_code_template()) | 235 code_template = ''.join(self.c_code_template()) |
236 return code_template % dict(locals(), **sub) | 236 return code_template % dict(locals(), **sub) |
237 | 237 |
238 softmax_with_bias = SoftmaxWithBias() | 238 softmax_with_bias = SoftmaxWithBias() |
239 | 239 |
240 | |
241 class SoftmaxWithBiasDx(theano.Op): | |
242 nin = 2 | |
243 nout = 1 | |
244 """Gradient wrt x of the SoftmaxWithBias Op""" | |
245 | |
246 def __init__(self, **kwargs): | |
247 theano.Op.__init__(self, **kwargs) | |
248 | |
249 def make_node(self, dy, sm, **kwargs): | |
250 dy = tensor.as_tensor(dy) | |
251 sm = tensor.as_tensor(sm) | |
252 return theano.Apply(self, [dy, sm], [sm.type.make_result()]) | |
253 | |
254 def perform(self, node, input_storage, output_storage): | |
255 dy, sm = input_storage | |
256 dx = numpy.zeros_like(sm) | |
257 #dx[i,j] = - (\sum_k dy[i,k] sm[i,k]) sm[i,j] + dy[i,j] sm[i,j] | |
258 for i in xrange(sm.shape[0]): | |
259 dy_times_sm_i = dy[i] * sm[i] | |
260 dx[i] = dy_times_sm - sum(dy_times_sm_i) * y[i] | |
261 output_storage[0][0] = dx | |
262 | |
263 def grad(self, *args): | |
264 raise NotImplementedError() | |
265 | |
266 def c_code(self, node, name, (dy, sm), (dx,), sub): | |
267 return ''' | |
268 if ((%(dy)s->descr->type_num != PyArray_DOUBLE) | |
269 || (%(sm)s->descr->type_num != PyArray_DOUBLE)) | |
270 { | |
271 PyErr_SetString(PyExc_TypeError, "types should be float64, float64"); | |
272 %(fail)s; | |
273 } | |
274 if ((%(dy)s->nd != 2) | |
275 || (%(sm)s->nd != 2)) | |
276 { | |
277 PyErr_SetString(PyExc_ValueError, "rank error"); | |
278 %(fail)s; | |
279 } | |
280 if (%(dy)s->dimensions[0] != %(sm)s->dimensions[0]) | |
281 { | |
282 PyErr_SetString(PyExc_ValueError, "dimension mismatch"); | |
283 %(fail)s; | |
284 } | |
285 if ((NULL == %(dx)s) | |
286 || (%(dx)s->dimensions[0] != %(sm)s->dimensions[0]) | |
287 || (%(dx)s->dimensions[1] != %(sm)s->dimensions[1])) | |
288 { | |
289 Py_XDECREF(%(dx)s); | |
290 %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2, PyArray_DIMS(%(sm)s), | |
291 type_num_%(sm)s); | |
292 if (!%(dx)s) | |
293 { | |
294 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output"); | |
295 %(fail)s; | |
296 } | |
297 } | |
298 | |
299 for (size_t i = 0; i < %(dx)s->dimenstions[0]; ++i) | |
300 { | |
301 const double* __restrict__ dy_i = (double*) (%(dy)s->data + %(dy)s->strides[0] * i); | |
302 npy_intp Sdy = %(dy)s->strides[1]/sizeof(double); | |
303 const double* __restrict__ sm_i = (double*) (%(sm)s->data + %(sm)s->strides[0] * i); | |
304 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); | |
305 const double* __restrict__ dx_i = (double*) (%(dx)s->data + %(dx)s->strides[0] * i); | |
306 npy_intp Sdx = %(dx)s->strides[1]/sizeof(double); | |
307 | |
308 double sum_dy_times_sm = 0.; | |
309 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) | |
310 { | |
311 dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm]; | |
312 sum_dy_times_sm += dx_i[j * Sdx]; | |
313 } | |
314 for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) | |
315 { | |
316 dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm]; | |
317 } | |
318 } | |
319 ''' % dict(locals(), **sub) | |
240 | 320 |
241 | 321 |
242 class CrossentropySoftmax1HotWithBias(theano.Op): | 322 class CrossentropySoftmax1HotWithBias(theano.Op): |
243 """A special compound L{Op} for the output of neural-net classifiers. | 323 """A special compound L{Op} for the output of neural-net classifiers. |
244 | 324 |