Mercurial > pylearn
diff nnet_ops.py @ 70:76e5c0f37165
better docs & precondition testing for cross_entropy_softmax_1hot & friends
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 02 May 2008 18:19:15 -0400 |
parents | 8c2607f387e6 |
children | 3ef569b92fba |
line wrap: on
line diff
--- a/nnet_ops.py Mon Apr 21 15:23:49 2008 -0400 +++ b/nnet_ops.py Fri May 02 18:19:15 2008 -0400 @@ -65,19 +65,30 @@ # -class CrossentropySoftmax1Hot(gof.op.Op): - """A special compound Op for the output of neural-net classifiers. +class CrossentropySoftmax1HotWithBias(gof.op.Op): + """A special compound L{Op} for the output of neural-net classifiers. + + @type x: is a matrix of floats (32 or 64) + @type b: is a [row] vector of floats (32 or 64), length is number of cols in x + @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x + + @precondition: every entry in y_idx is a valid (non-negative) column index into x - This Op has two outputs: - - KL(softmax(x), y) - - softmax(x) + This L{Op} has two outputs: + - KL(softmax(x+b), y) + - softmax(x+b) - x[i] is assumed to be a dense vector + softmax(x[i]) is the i'th distribution over len(x[i]) options - y[i] is an integer index, encoding a 1-hot distribution + + y_idx[i] is an integer index, encoding a 1-hot distribution. + + In practice, when we're trying to do classification, we have one row in x + and y_idx per example, and y[i] is the index of the (correct) class of the + i'th example. """ - nin=2 + nin=3 nout=2 def __init__(self, x, b, y_idx, **kwargs): x = tensor._as_tensor(x) @@ -102,7 +113,9 @@ def perform(self): x, b, y_idx = [i.data for i in self.inputs] if b.shape[0] != x.shape[1]: - raise ValueError('b must have same shape as x[0]') + raise ValueError('b must have same number of columns as x') + if y_idx.shape[0] != x.shape[0]: + raise ValueError('y_idx must have same number of rows as x') sm = numpy.zeros_like(x) # softmax nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) @@ -116,8 +129,8 @@ def grad(self, (x, b, y_idx), (g_nll, g_sm)): if g_sm is not None: raise NotImplementedError() - nll, sm = crossentropy_softmax_1hot(x, b, y_idx) - dx = CrossentropySoftmax1HotDx(g_nll, sm, y_idx).outputs[0] + nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) + dx = CrossentropySoftmax1HotWithBiasDx(g_nll, sm, y_idx).outputs[0] db = tensor.Sum(dx, axis = [0]).outputs[0] return dx, db, None @@ -268,9 +281,10 @@ } """ % dict(locals(), **sub) -crossentropy_softmax_1hot = gof.op.constructor(CrossentropySoftmax1Hot) +crossentropy_softmax_1hot_with_bias = \ + gof.op.constructor(CrossentropySoftmax1HotWithBias) -class CrossentropySoftmax1HotDx (gof.op.Op): +class CrossentropySoftmax1HotWithBiasDx (gof.op.Op): nin=3 nout=1 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" @@ -348,3 +362,7 @@ } """ % dict(locals(), **sub) +def crossentropy_softmax_1hot(x, y_idx, **kwargs): + b = tensor.zeros_like(x[0,:]) + return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs) +