Mercurial > pylearn
view pylearn/algorithms/sandbox/cost.py @ 1498:0f326860210e
Merged
author | Olivier Delalleau <delallea@iro> |
---|---|
date | Thu, 01 Sep 2011 13:35:15 -0400 |
parents | 576eb7f77c35 |
children |
line wrap: on
line source
""" Cost functions. @note: All of these functions return one cost per example. So it is your job to perform a tensor.sum over the individual example losses. """ import theano as T from theano import tensor, scalar import numpy class UndefinedGradient(Exception): """ Raised by UndefinedGradientOp to indicate that the gradient is undefined mathematically. """ pass from theano import gof class UndefinedGradientOp(gof.Op): def perform(self, x=None): if x is not None: raise UndefinedGradient(x) else: raise UndefinedGradient(x) undefined_gradient = UndefinedGradientOp() class LogFactorial(scalar.UnaryScalarOp): """ Compute log x!. @todo: Rewrite so that it uses INTs not FLOATs. @todo: Move this to Theano. @todo: This function is slow, probably want to cache the values. """ @staticmethod def st_impl(x): if not isinstance(x, int) and not isinstance(x, long): raise TypeError('type(x) = %s, must be int or long' % type(x)) if x == 0.0: return 0.0 v = 0.0 for i in range(x): v += numpy.log(x) return v def impl(self, x): return LogFactorial.st_impl(x) def grad(self, (x,), (gz,)): undefined_gradient(self) # def grad(self, (x,), (gz,)): # raise NotImplementedError('gradient not defined over discrete values') # return None # return [gz * (1 + scalar.log(x))] # def c_code(self, node, name, (x,), (z,), sub): # if node.inputs[0].type in [scalar.float32, scalar.float64]: # return """%(z)s = # %(x)s == 0.0 # ? 0.0 # : %(x)s * log(%(x)s);""" % locals() # raise NotImplementedError('only floatingpoint is implemented') scalar_logfactorial = LogFactorial(scalar.upgrade_to_float, name='scalar_logfactoral') logfactorial = tensor.Elemwise(scalar_logfactorial, name='logfactorial') def poissonlambda(unscaled_output, doclen, beta_scale): """ A continuous parameter lambda_i which is the expected number of occurence of word i in the document. Note how this must be positive, and that is why Ranzato and Szummer (2008) use an exponential. Yoshua: I don't like exponentials to guarantee positivity. softplus is numerically much better behaved (but you might want to try both to see if it makes a difference). @todo: Maybe there are more sensible ways to set the beta_scale. """ beta = beta_scale * doclen return beta * tensor.exp(unscaled_output) def nlpoisson(target, output, beta_scale=1, axis=0, sumloss=True, zerothreshold=0): """ The negative log Poisson regression probability. From Ranzato and Szummer (2008). Output should be of the form Weight*code+bias, i.e. unsquashed. NB this is different than the formulation in Salakhutdinov and Hinton (2007), in which the output is softmax'ed and multiplied by the input document length. That is also what Welling et. al (2005) do. It would be useful to try the softmax, because it is more well-behaved. There is a beta term that is proportional to document length. We are not sure what beta scale is used by the authors. We use 1 as the default, but this value might be inappropriate. For numerical reasons, Yoshua recommends choosing beta such that the lambda is expected to be around 1 for words that have a non-zero count. So he would take: beta = document_size / unique_words_per_document I am not sure the above math is correct, I need to talk to him. Yoshua notes that ``there is a x_i log(beta) term missing, if you compare with eqn 2 (i.e., take the log). They did not include in 3 because it does not depend on the parameters, so the gradient wrt it would be 0. But if you really want log-likelihood it should be included.'' If you want a true log-likelihood, you probably should actually compute the derivative of the entire eqn 2. Axis is the axis along which we sum the target values, to obtain the document length. If sumloss, we sum the loss along axis. If zerothreshold is non-zero, we threshold the loss: If this target dimension is zero and beta * tensor.exp(output) < zerothreshold, let this loss be zero. @todo: Include logfactorial term """ # from theano.printing import Print # print dtype(target) # make sure dtype is int32 or int64 # print target.dtype doclen = tensor.sum(target, axis=axis) lambdav = poissonlambda(output, doclen, beta_scale) lossterms = lambdav - target*output if sumloss: return tensor.sum(lossterms, axis=axis) else: return lossterms # return tensor.sum(beta * tensor.exp(output) - target*output + logfactorial(target), axis=axis) #import numpy #def nlpoisson_nontheano(target, output, beta_scale=1, axis=0): # doclen = numpy.sum(target, axis=axis) # print "doclen", doclen # beta = beta_scale * doclen # print "beta", beta # print "exp", numpy.exp(output) # print "beta * exp", beta * numpy.exp(output) # print "x * y", target * output # # import theano.tensor as TT # x = TT.as_tensor(target) # o = logfactorial(x) # f = T.function([],o) # logf = f() # print "log factorial(x)", logf # print "beta * exp - dot + log factorial", beta * numpy.exp(output) - target*output + f() # print "total loss", numpy.sum(beta * numpy.exp(output) - target*output + f(), axis=axis) # ## return beta * numpy.exp(output) - numpy.dot(target, output) ## #+ logfactorial(target) # #import numpy #target = numpy.array([0, 0, 1, 1, 2, 2, 100, 100]) ##output = numpy.array([0., 0.5, 1., 0.5, 2., 0.5, 100., 0.5]) #output = numpy.array([0., 1, 1., 0, 1, 0, 5, 1]) #nlpoisson_nontheano(target, output)