Mercurial > pylearn
comparison nnet_ops.py @ 70:76e5c0f37165
better docs & precondition testing for cross_entropy_softmax_1hot & friends
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 02 May 2008 18:19:15 -0400 |
parents | 8c2607f387e6 |
children | 3ef569b92fba |
comparison
equal
deleted
inserted
replaced
69:8c2607f387e6 | 70:76e5c0f37165 |
---|---|
63 # | 63 # |
64 # TENSOR OPS | 64 # TENSOR OPS |
65 # | 65 # |
66 | 66 |
67 | 67 |
68 class CrossentropySoftmax1Hot(gof.op.Op): | 68 class CrossentropySoftmax1HotWithBias(gof.op.Op): |
69 """A special compound Op for the output of neural-net classifiers. | 69 """A special compound L{Op} for the output of neural-net classifiers. |
70 | 70 |
71 This Op has two outputs: | 71 @type x: is a matrix of floats (32 or 64) |
72 - KL(softmax(x), y) | 72 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x |
73 - softmax(x) | 73 @type y_idx: a [column] vector of int (32 or 64), length is number of rows in x |
74 | 74 |
75 x[i] is assumed to be a dense vector | 75 @precondition: every entry in y_idx is a valid (non-negative) column index into x |
76 | |
77 This L{Op} has two outputs: | |
78 - KL(softmax(x+b), y) | |
79 - softmax(x+b) | |
80 | |
81 | |
76 softmax(x[i]) is the i'th distribution over len(x[i]) options | 82 softmax(x[i]) is the i'th distribution over len(x[i]) options |
77 y[i] is an integer index, encoding a 1-hot distribution | 83 |
84 y_idx[i] is an integer index, encoding a 1-hot distribution. | |
85 | |
86 In practice, when we're trying to do classification, we have one row in x | |
87 and y_idx per example, and y[i] is the index of the (correct) class of the | |
88 i'th example. | |
78 | 89 |
79 """ | 90 """ |
80 nin=2 | 91 nin=3 |
81 nout=2 | 92 nout=2 |
82 def __init__(self, x, b, y_idx, **kwargs): | 93 def __init__(self, x, b, y_idx, **kwargs): |
83 x = tensor._as_tensor(x) | 94 x = tensor._as_tensor(x) |
84 b = tensor._as_tensor(b) | 95 b = tensor._as_tensor(b) |
85 y_idx = tensor._as_tensor(y_idx) | 96 y_idx = tensor._as_tensor(y_idx) |
100 self.inputs = [x, b, y_idx] | 111 self.inputs = [x, b, y_idx] |
101 self.outputs = [nll, sm] | 112 self.outputs = [nll, sm] |
102 def perform(self): | 113 def perform(self): |
103 x, b, y_idx = [i.data for i in self.inputs] | 114 x, b, y_idx = [i.data for i in self.inputs] |
104 if b.shape[0] != x.shape[1]: | 115 if b.shape[0] != x.shape[1]: |
105 raise ValueError('b must have same shape as x[0]') | 116 raise ValueError('b must have same number of columns as x') |
117 if y_idx.shape[0] != x.shape[0]: | |
118 raise ValueError('y_idx must have same number of rows as x') | |
106 | 119 |
107 sm = numpy.zeros_like(x) # softmax | 120 sm = numpy.zeros_like(x) # softmax |
108 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) | 121 nll = numpy.zeros(x.shape[0]) #nll(y | softmax(x)) |
109 for i in xrange(sm.shape[0]): | 122 for i in xrange(sm.shape[0]): |
110 row = x[i] + b | 123 row = x[i] + b |
114 self.outputs[0].data = nll | 127 self.outputs[0].data = nll |
115 self.outputs[1].data = sm | 128 self.outputs[1].data = sm |
116 def grad(self, (x, b, y_idx), (g_nll, g_sm)): | 129 def grad(self, (x, b, y_idx), (g_nll, g_sm)): |
117 if g_sm is not None: | 130 if g_sm is not None: |
118 raise NotImplementedError() | 131 raise NotImplementedError() |
119 nll, sm = crossentropy_softmax_1hot(x, b, y_idx) | 132 nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) |
120 dx = CrossentropySoftmax1HotDx(g_nll, sm, y_idx).outputs[0] | 133 dx = CrossentropySoftmax1HotWithBiasDx(g_nll, sm, y_idx).outputs[0] |
121 db = tensor.Sum(dx, axis = [0]).outputs[0] | 134 db = tensor.Sum(dx, axis = [0]).outputs[0] |
122 return dx, db, None | 135 return dx, db, None |
123 | 136 |
124 def c_headers(self): return ['<iostream>'] | 137 def c_headers(self): return ['<iostream>'] |
125 def c_code(self, (x, b, y_idx), (nll, sm), sub): | 138 def c_code(self, (x, b, y_idx), (nll, sm), sub): |
266 //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate? | 279 //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate? |
267 //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); | 280 //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); |
268 } | 281 } |
269 """ % dict(locals(), **sub) | 282 """ % dict(locals(), **sub) |
270 | 283 |
271 crossentropy_softmax_1hot = gof.op.constructor(CrossentropySoftmax1Hot) | 284 crossentropy_softmax_1hot_with_bias = \ |
272 | 285 gof.op.constructor(CrossentropySoftmax1HotWithBias) |
273 class CrossentropySoftmax1HotDx (gof.op.Op): | 286 |
287 class CrossentropySoftmax1HotWithBiasDx (gof.op.Op): | |
274 nin=3 | 288 nin=3 |
275 nout=1 | 289 nout=1 |
276 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" | 290 """Gradient wrt x of the CrossentropySoftmax1Hot Op""" |
277 def __init__(self, dy, sm, y_idx,**kwargs): | 291 def __init__(self, dy, sm, y_idx,**kwargs): |
278 dy = tensor._as_tensor(dy) | 292 dy = tensor._as_tensor(dy) |
346 } | 360 } |
347 dx_i[y_i * Sdx] -= dnll_i; | 361 dx_i[y_i * Sdx] -= dnll_i; |
348 } | 362 } |
349 """ % dict(locals(), **sub) | 363 """ % dict(locals(), **sub) |
350 | 364 |
365 def crossentropy_softmax_1hot(x, y_idx, **kwargs): | |
366 b = tensor.zeros_like(x[0,:]) | |
367 return crossentropy_softmax_1hot_with_bias(x, b, y_idx, **kwargs) | |
368 |