Mercurial > pylearn
view nnet_ops.py @ 24:2e8be9f5412b
added nnet_ops
author | bergstrj@iro.umontreal.ca |
---|---|
date | Thu, 10 Apr 2008 17:25:13 -0400 |
parents | |
children | b63e8c0bf21b |
line wrap: on
line source
import theano from theano import tensor, gof, scalar import numpy class ScalarSigmoid(scalar.UnaryScalarOp): def impl(self, x): return 1.0 / (1 + numpy.exp(-x)) def grad(self, (x,), (gz,)): return gz * scalar_sigmoid(x) * (1.0 - scalar_sigmoid(x)), def c_foreach(self, (x,), (z,)): return "%(z)s = 1.0 / (1 + exp(-%(x)s));" % locals() scalar_sigmoid = gof.op.constructor(ScalarSigmoid) Sigmoid, sigmoid, SigmoidInplace, sigmoid_inplace \ = theano.tensor.broadcast(ScalarSigmoid, 'Sigmoid') class CrossentropySoftmax1Hot(gof.op.Op): """A special compound Op for the output of neural-net classifiers. This Op has two outputs: - KL(softmax(x), y) - softmax(x) x[i] is assumed to be a dense vector softmax(x[i]) is the i'th distribution over len(x[i]) options y[i] is an integer index, encoding a 1-hot distribution """ nin=2 nout=2 def __init__(self, x, y_idx,**kwargs): x = tensor._as_tensor(x) y_idx = tensor._as_tensor(y_idx) # TODO: Is this correct? It used to be y, not y_idx nll = tensor.Tensor(x.dtype, y_idx.broadcastable) # nll = Tensor(x.dtype, y.broadcastable) sm = tensor.Tensor(x.dtype, x.broadcastable) self.inputs = [x, y_idx] self.outputs = [nll,sm] def perform(self): x, y_idx = [i.data for i in self.inputs] sm = numpy.zeros_like(x) # softmax nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) for i in xrange(sm.shape[0]): sm[i] = numpy.exp(x[i] - numpy.max(x[i])) #softmax sm[i] *= 1.0 / numpy.sum(sm[i]) #vector scale nll[i] = -numpy.log( sm[i, y_idx[i]]) #cross-entropy self.outputs[0].data = nll self.outputs[1].data = sm def grad(self, (x, y_idx), (g_nll, g_sm)): if g_sm is not None: raise NotImplementedError() nll, sm = cross_entropy_softmax_1hot(x, y_idx) dx = CrossentropySoftmax1Hot.Dx(g_nll, sm, y_idx).outputs[0] return dx, None class Dx (gof.op.Op): nin=3 nout=1 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" def __init__(self, dy, sm, y_idx,**kwargs): dy = tensor._as_tensor(dy) sm = tensor._as_tensor(sm) y_idx = tensor._as_tensor(y_idx) self.inputs = [dy, sm, y_idx] self.outputs = [tensor.Tensor(sm.dtype, sm.broadcastable)] def perform(self): dy,sm,y_idx = [i.data for i in self.inputs] dx = numpy.zeros_like(sm) for i in xrange(sm.shape[0]): dx[i] = dy[i] * sm[i] #vector scale dx[i, y_idx[i]] -= dy[i] #scalar decrement self.outputs[0].data = dx def grad(self, *args): raise NotImplementedError() cross_entropy_softmax_1hot = gof.op.constructor(CrossentropySoftmax1Hot) #TODO: write a version of CrossentropySoftmax1Hot that accepts a bias for x, if # this op needs to be faster.