comparison nnet_ops.py @ 70:76e5c0f37165

better docs & precondition testing for cross_entropy_softmax_1hot & friends
author James Bergstra <bergstrj@iro.umontreal.ca>
date Fri, 02 May 2008 18:19:15 -0400
parents 8c2607f387e6
children 3ef569b92fba
comparison
equal deleted inserted replaced
69:8c2607f387e6 70:76e5c0f37165
63 # 63 #
64 # TENSOR OPS 64 # TENSOR OPS
65 # 65 #
66 66
67 67
68 class CrossentropySoftmax1Hot(gof.op.Op): 68 class CrossentropySoftmax1HotWithBias(gof.op.Op):
69 """A special compound Op for the output of neural-net classifiers. 69 """A special compound L{Op} for the output of neural-net classifiers.
70 70
71 This Op has two outputs: 71 @type x: is a matrix of floats (32 or 64)
72 - KL(softmax(x), y) 72 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
73 - softmax(x) 73 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x
74 74
75 x[i] is assumed to be a dense vector 75 @precondition: every entry in y_idx is a valid (non-negative) column index into x
76
77 This L{Op} has two outputs:
78 - KL(softmax(x+b), y)
79 - softmax(x+b)
80
81
76 softmax(x[i]) is the i'th distribution over len(x[i]) options 82 softmax(x[i]) is the i'th distribution over len(x[i]) options
77 y[i] is an integer index, encoding a 1-hot distribution 83
84 y_idx[i] is an integer index, encoding a 1-hot distribution.
85
86 In practice, when we're trying to do classification, we have one row in x
87 and y_idx per example, and y[i] is the index of the (correct) class of the
88 i'th example.
78 89
79 """ 90 """
80 nin=2 91 nin=3
81 nout=2 92 nout=2
82 def __init__(self, x, b, y_idx, **kwargs): 93 def __init__(self, x, b, y_idx, **kwargs):
83 x = tensor._as_tensor(x) 94 x = tensor._as_tensor(x)
84 b = tensor._as_tensor(b) 95 b = tensor._as_tensor(b)
85 y_idx = tensor._as_tensor(y_idx) 96 y_idx = tensor._as_tensor(y_idx)
100 self.inputs = [x, b, y_idx] 111 self.inputs = [x, b, y_idx]
101 self.outputs = [nll, sm] 112 self.outputs = [nll, sm]
102 def perform(self): 113 def perform(self):
103 x, b, y_idx = [i.data for i in self.inputs] 114 x, b, y_idx = [i.data for i in self.inputs]
104 if b.shape[0] != x.shape[1]: 115 if b.shape[0] != x.shape[1]:
105 raise ValueError('b must have same shape as x[0]') 116 raise ValueError('b must have same number of columns as x')
117 if y_idx.shape[0] != x.shape[0]:
118 raise ValueError('y_idx must have same number of rows as x')
106 119
107 sm = numpy.zeros_like(x) # softmax 120 sm = numpy.zeros_like(x) # softmax
108 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) 121 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x))
109 for i in xrange(sm.shape[0]): 122 for i in xrange(sm.shape[0]):
110 row = x[i] + b 123 row = x[i] + b
114 self.outputs[0].data = nll 127 self.outputs[0].data = nll
115 self.outputs[1].data = sm 128 self.outputs[1].data = sm
116 def grad(self, (x, b, y_idx), (g_nll, g_sm)): 129 def grad(self, (x, b, y_idx), (g_nll, g_sm)):
117 if g_sm is not None: 130 if g_sm is not None:
118 raise NotImplementedError() 131 raise NotImplementedError()
119 nll, sm = crossentropy_softmax_1hot(x, b, y_idx) 132 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
120 dx = CrossentropySoftmax1HotDx(g_nll, sm, y_idx).outputs[0] 133 dx = CrossentropySoftmax1HotWithBiasDx(g_nll, sm, y_idx).outputs[0]
121 db = tensor.Sum(dx, axis = [0]).outputs[0] 134 db = tensor.Sum(dx, axis = [0]).outputs[0]
122 return dx, db, None 135 return dx, db, None
123 136
124 def c_headers(self): return ['<iostream>'] 137 def c_headers(self): return ['<iostream>']
125 def c_code(self, (x, b, y_idx), (nll, sm), sub): 138 def c_code(self, (x, b, y_idx), (nll, sm), sub):
266 //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate? 279 //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate?
267 //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); 280 //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum);
268 } 281 }
269 """ % dict(locals(), **sub) 282 """ % dict(locals(), **sub)
270 283
271 crossentropy_softmax_1hot = gof.op.constructor(CrossentropySoftmax1Hot) 284 crossentropy_softmax_1hot_with_bias = \
272 285 gof.op.constructor(CrossentropySoftmax1HotWithBias)
273 class CrossentropySoftmax1HotDx (gof.op.Op): 286
287 class CrossentropySoftmax1HotWithBiasDx (gof.op.Op):
274 nin=3 288 nin=3
275 nout=1 289 nout=1
276 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" 290 """Gradient wrt x of the CrossentropySoftmax1Hot Op"""
277 def __init__(self, dy, sm, y_idx,**kwargs): 291 def __init__(self, dy, sm, y_idx,**kwargs):
278 dy = tensor._as_tensor(dy) 292 dy = tensor._as_tensor(dy)
346 } 360 }
347 dx_i[y_i * Sdx] -= dnll_i; 361 dx_i[y_i * Sdx] -= dnll_i;
348 } 362 }
349 """ % dict(locals(), **sub) 363 """ % dict(locals(), **sub)
350 364
365 def crossentropy_softmax_1hot(x, y_idx, **kwargs):
366 b = tensor.zeros_like(x[0,:])
367 return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs)
368