# HG changeset patch # User James Bergstra # Date 1209766755 14400 # Node ID 76e5c0f371651b9053c7389b532402d1d8c8dcd0 # Parent 8c2607f387e61ec543df1bbc758ef73cb99a17d5 better docs & precondition testing for cross_entropy_softmax_1hot & friends diff -r 8c2607f387e6 -r 76e5c0f37165 _nnet_ops.py --- a/_nnet_ops.py Mon Apr 21 15:23:49 2008 -0400 +++ b/_nnet_ops.py Fri May 02 18:19:15 2008 -0400 @@ -23,10 +23,16 @@ def test0(self): y_idx = [0,1,3] def output1(a,b): - return crossentropy_softmax_1hot(a, b, y_idx)[0:1] + return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0:1] TT.verify_grad(self, output1, [numpy.random.rand(3,4), numpy.random.rand(4)]) + def test1(self): + y_idx = [0,1,3] + def output1(a): + return crossentropy_softmax_1hot(a, y_idx)[0:1] + TT.verify_grad(self, output1, [numpy.random.rand(3,4)]) + if __name__ == '__main__': diff -r 8c2607f387e6 -r 76e5c0f37165 nnet_ops.py --- a/nnet_ops.py Mon Apr 21 15:23:49 2008 -0400 +++ b/nnet_ops.py Fri May 02 18:19:15 2008 -0400 @@ -65,19 +65,30 @@ # -class CrossentropySoftmax1Hot(gof.op.Op): - """A special compound Op for the output of neural-net classifiers. +class CrossentropySoftmax1HotWithBias(gof.op.Op): + """A special compound L{Op} for the output of neural-net classifiers. + + @type x: is a matrix of floats (32 or 64) + @type b: is a [row] vector of floats (32 or 64), length is number of cols in x + @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x + + @precondition: every entry in y_idx is a valid (non-negative) column index into x - This Op has two outputs: - - KL(softmax(x), y) - - softmax(x) + This L{Op} has two outputs: + - KL(softmax(x+b), y) + - softmax(x+b) - x[i] is assumed to be a dense vector + softmax(x[i]) is the i'th distribution over len(x[i]) options - y[i] is an integer index, encoding a 1-hot distribution + + y_idx[i] is an integer index, encoding a 1-hot distribution. + + In practice, when we're trying to do classification, we have one row in x + and y_idx per example, and y[i] is the index of the (correct) class of the + i'th example. """ - nin=2 + nin=3 nout=2 def __init__(self, x, b, y_idx, **kwargs): x = tensor._as_tensor(x) @@ -102,7 +113,9 @@ def perform(self): x, b, y_idx = [i.data for i in self.inputs] if b.shape[0] != x.shape[1]: - raise ValueError('b must have same shape as x[0]') + raise ValueError('b must have same number of columns as x') + if y_idx.shape[0] != x.shape[0]: + raise ValueError('y_idx must have same number of rows as x') sm = numpy.zeros_like(x) # softmax nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) @@ -116,8 +129,8 @@ def grad(self, (x, b, y_idx), (g_nll, g_sm)): if g_sm is not None: raise NotImplementedError() - nll, sm = crossentropy_softmax_1hot(x, b, y_idx) - dx = CrossentropySoftmax1HotDx(g_nll, sm, y_idx).outputs[0] + nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) + dx = CrossentropySoftmax1HotWithBiasDx(g_nll, sm, y_idx).outputs[0] db = tensor.Sum(dx, axis = [0]).outputs[0] return dx, db, None @@ -268,9 +281,10 @@ } """ % dict(locals(), **sub) -crossentropy_softmax_1hot = gof.op.constructor(CrossentropySoftmax1Hot) +crossentropy_softmax_1hot_with_bias = \ + gof.op.constructor(CrossentropySoftmax1HotWithBias) -class CrossentropySoftmax1HotDx (gof.op.Op): +class CrossentropySoftmax1HotWithBiasDx (gof.op.Op): nin=3 nout=1 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" @@ -348,3 +362,7 @@ } """ % dict(locals(), **sub) +def crossentropy_softmax_1hot(x, y_idx, **kwargs): + b = tensor.zeros_like(x[0,:]) + return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs) +