Mercurial > pylearn
comparison nnet_ops.py @ 440:18dbc1c11647
Work on softmax operators
author | Pascal Lamblin <lamblinp@iro.umontreal.ca> |
---|---|
date | Thu, 21 Aug 2008 13:55:16 -0400 |
parents | 43d9aa93934e |
children | b3315b252824 |
comparison
equal
deleted
inserted
replaced
433:200a5b0e24ea | 440:18dbc1c11647 |
---|---|
63 ############ | 63 ############ |
64 # | 64 # |
65 # TENSOR OPS | 65 # TENSOR OPS |
66 # | 66 # |
67 | 67 |
68 | |
69 class SoftmaxWithBias(theano.Op): | |
70 """ | |
71 An L{Op} for the output of neural-net multiclass classifiers. | |
72 | |
73 @type x: is a matrix of floats (32 or 64) | |
74 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x | |
75 | |
76 This L{Op}'s output is softmax(x+b). | |
77 softmax(x[i]) is the i'th distribution over len(x[i]) options. | |
78 """ | |
79 | |
80 nin = 2 | |
81 nout = 1 | |
82 def __init__(self, **kwargs): | |
83 theano.Op.__init__(self, **kwargs) | |
84 | |
85 def make_node(self, x, b): | |
86 x = tensor.as_tensor(x) | |
87 b = tensor.as_tensor(b) | |
88 if x.type.ndim != 2 \ | |
89 or x.type.dtype not in ['float32', 'float64']: | |
90 raise ValueError('x must be 2-d tensor of floats') | |
91 if b.type.ndim != 1 \ | |
92 or x.type.dtype not in ['float32', 'float64']: | |
93 raise ValueError('b must be 1-d tensor of floats') | |
94 | |
95 sm = x.type.make_result() | |
96 return theano.Apply(self, [x, b], [sm]) | |
97 | |
98 def perform(self, node, input_storage, output_storage): | |
99 x, b = input_storage | |
100 if b.shape[0] != x.shape[1]: | |
101 raise ValueError('b must have same number of columns as x') | |
102 | |
103 sm = nympy.zeros_like(x) | |
104 for i in xrange(sm.shape[0]): | |
105 row = x[i] + b | |
106 sm[i] = numpy.exp(row - numpy.max(row)) | |
107 sm[i] *= 1.0 / numpy.sum(sm[i]) | |
108 output_storage[0][0] = nll | |
109 | |
110 def grad(self, (x, b), (g_sm,)): | |
111 sm = softmax_with_bias(x, b) | |
112 dx = SoftmaxWithBiasDx()(g_sm, sm) | |
113 db = tensor.sum(dx, axis = 0) | |
114 return dx, db | |
115 | |
116 def c_headers(self): | |
117 return ['<iostream>'] | |
118 | |
119 @staticmethod | |
120 def c_code_template(): | |
121 # this implementation was lifted from | |
122 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx | |
123 | |
124 #TODO: put this into a templated function, in the support code | |
125 #TODO: declare the max of each row as an Op output | |
126 | |
127 #TODO: set error messages for failures in this code | |
128 | |
129 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] | |
130 init_decl = """ | |
131 npy_intp* Nx = %(x)s->dimensions; | |
132 | |
133 if (%(x)s->nd != 2) | |
134 { | |
135 PyErr_SetString(PyExc_ValueError, "a not 2d tensor"); | |
136 %(fail)s; | |
137 } | |
138 if (%(b)s->nd != 1) | |
139 { | |
140 PyErr_SetString(PyExc_ValueError, "b not 1d tensor"); | |
141 %(fail)s; | |
142 } | |
143 if (%(x)s->descr->type_num != PyArray_DOUBLE) | |
144 { | |
145 PyErr_SetString(PyExc_TypeError, "a not float64"); | |
146 %(fail)s; | |
147 } | |
148 if (%(b)s->descr->type_num != PyArray_DOUBLE) | |
149 { | |
150 PyErr_SetString(PyExc_TypeError, "b not float64"); | |
151 %(fail)s; | |
152 } | |
153 if ((%(x)s->dimensions[1] != %(b)s->dimensions[0])) | |
154 { | |
155 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments"); | |
156 %(fail)s; | |
157 } | |
158 | |
159 if ((NULL == %(sm)s) | |
160 || (%(sm)s->dimensions[0] != %(x)s->dimensions[0]) | |
161 || (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) | |
162 { | |
163 if (NULL != %(sm)s) Py_XDECREF(%(sm)s); | |
164 %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s); | |
165 if(!%(sm)s) { | |
166 // The normal cleanup code will take care of %(nll)s | |
167 // Py_XDECREF(%(nll)s); %(nll)s=NULL; | |
168 PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output"); | |
169 %(fail)s | |
170 } | |
171 } | |
172 """ | |
173 | |
174 begin_row_loop = """ | |
175 for (size_t i = 0; i < Nx[0]; ++i) | |
176 { | |
177 size_t j; | |
178 double sum = 0.0; | |
179 bool discount_max = false; | |
180 | |
181 const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i); | |
182 const double* __restrict__ b_i = (double*)(%(b)s->data); | |
183 double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i); | |
184 """ | |
185 | |
186 inside_row_loop = """ | |
187 npy_intp Sx = %(x)s->strides[1]/sizeof(double); | |
188 npy_intp Sb = %(b)s->strides[0]/sizeof(double); | |
189 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); | |
190 | |
191 size_t row_max_j=0; | |
192 double row_max = x_i[0] + b_i[0]; | |
193 // Get the maximum value of the row | |
194 for (j = 0; j < Nx[1]; ++j) | |
195 { | |
196 double row_ij = x_i[j * Sx] + b_i[j * Sb]; | |
197 row_max_j = (row_ij > row_max) ? j : row_max_j; | |
198 row_max = (row_ij > row_max) ? row_ij : row_max; | |
199 } | |
200 | |
201 for (j = 0; j < Nx[1]; ++j) | |
202 { | |
203 double row_ij = x_i[j * Sx] + b_i[j * Sb]; | |
204 double sm_ij = exp(row_ij - row_max); | |
205 sum += sm_ij; | |
206 sm_i[j * Ssm] = sm_ij; | |
207 } | |
208 if ( (0.0 == sum) || (isinf(sum))) | |
209 { | |
210 //that was our best... | |
211 %(fail)s; | |
212 } | |
213 | |
214 //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n); | |
215 double sum_inv = 1.0 / sum; | |
216 for (j = 0; j < Nx[1]; ++j) | |
217 { | |
218 sm_i[j * Ssm] *= sum_inv; | |
219 } | |
220 | |
221 if (y_i >= Nx[1]) | |
222 { | |
223 %(fail)s; | |
224 } | |
225 """ | |
226 | |
227 end_row_loop = """ | |
228 } | |
229 """ | |
230 | |
231 return (init_decl, begin_row_loop, inside_row_loop, end_row_loop) | |
232 | |
233 | |
234 def c_code(self, node, name, (x, b), (sm,), sub): | |
235 code_template = ''.join(self.c_code_template()) | |
236 return code_template % dict(locals(), **sub) | |
237 | |
238 softmax_with_bias = SoftmaxWithBias() | |
239 | |
240 | |
241 | |
68 class CrossentropySoftmax1HotWithBias(theano.Op): | 242 class CrossentropySoftmax1HotWithBias(theano.Op): |
69 """A special compound L{Op} for the output of neural-net classifiers. | 243 """A special compound L{Op} for the output of neural-net classifiers. |
70 | 244 |
71 @type x: is a matrix of floats (32 or 64) | 245 @type x: is a matrix of floats (32 or 64) |
72 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x | 246 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x |
76 | 250 |
77 This L{Op} has two outputs: | 251 This L{Op} has two outputs: |
78 - KL(softmax(x+b), y) | 252 - KL(softmax(x+b), y) |
79 - softmax(x+b) | 253 - softmax(x+b) |
80 | 254 |
81 | 255 |
82 softmax(x[i]) is the i'th distribution over len(x[i]) options | 256 softmax(x[i]) is the i'th distribution over len(x[i]) options |
83 | 257 |
84 y_idx[i] is an integer index, encoding a 1-hot distribution. | 258 y_idx[i] is an integer index, encoding a 1-hot distribution. |
85 | 259 |
86 In practice, when we're trying to do classification, we have one row in x | 260 In practice, when we're trying to do classification, we have one row in x |
87 and y_idx per example, and y[i] is the index of the (correct) class of the | 261 and y_idx per example, and y[i] is the index of the (correct) class of the |
88 i'th example. | 262 i'th example. |
89 | 263 |
90 """ | 264 """ |
136 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) | 310 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx) |
137 db = tensor.sum(dx, axis = [0]) | 311 db = tensor.sum(dx, axis = [0]) |
138 return dx, db, None | 312 return dx, db, None |
139 | 313 |
140 def c_headers(self): return ['<iostream>'] | 314 def c_headers(self): return ['<iostream>'] |
141 def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub): | 315 |
316 @staticmethod | |
317 def c_code_template(): | |
142 # this implementation was lifted from | 318 # this implementation was lifted from |
143 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx | 319 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx |
144 | 320 |
145 #TODO: put this into a templated function, in the support code | 321 #TODO: put this into a templated function, in the support code |
146 #TODO: declare the max of each row as an Op output | 322 #TODO: declare the max of each row as an Op output |
147 | 323 |
148 #TODO: set error messages for failures in this code | 324 #TODO: set error messages for failures in this code |
149 | 325 |
150 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] | 326 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] |
151 y_idx_type = node.inputs[2].type.dtype_specs()[1] | 327 (init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \ |
152 | 328 SoftmaxWithBias.c_code_template() |
153 return """ | 329 return (init_decl, |
154 npy_intp* Nx = %(x)s->dimensions; | 330 """ |
155 | |
156 if (%(x)s->nd != 2) | |
157 { | |
158 PyErr_SetString(PyExc_ValueError, "a not 2d tensor"); | |
159 %(fail)s; | |
160 } | |
161 if (%(b)s->nd != 1) | |
162 { | |
163 PyErr_SetString(PyExc_ValueError, "b not 1d tensor"); | |
164 %(fail)s; | |
165 } | |
166 if (%(y_idx)s->nd != 1) | 331 if (%(y_idx)s->nd != 1) |
167 { | 332 { |
168 PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor"); | 333 PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor"); |
169 %(fail)s; | |
170 } | |
171 if (%(x)s->descr->type_num != PyArray_DOUBLE) | |
172 { | |
173 PyErr_SetString(PyExc_TypeError, "a not float64"); | |
174 %(fail)s; | |
175 } | |
176 if (%(b)s->descr->type_num != PyArray_DOUBLE) | |
177 { | |
178 PyErr_SetString(PyExc_TypeError, "b not float64"); | |
179 %(fail)s; | 334 %(fail)s; |
180 } | 335 } |
181 if ((%(y_idx)s->descr->type_num != PyArray_INT64) | 336 if ((%(y_idx)s->descr->type_num != PyArray_INT64) |
182 && (%(y_idx)s->descr->type_num != PyArray_INT32) | 337 && (%(y_idx)s->descr->type_num != PyArray_INT32) |
183 && (%(y_idx)s->descr->type_num != PyArray_INT16) | 338 && (%(y_idx)s->descr->type_num != PyArray_INT16) |
184 && (%(y_idx)s->descr->type_num != PyArray_INT8)) | 339 && (%(y_idx)s->descr->type_num != PyArray_INT8)) |
185 { | 340 { |
186 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64"); | 341 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64"); |
187 %(fail)s; | 342 %(fail)s; |
188 } | 343 } |
189 if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]) | 344 if (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0]) |
190 || (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0])) | |
191 { | 345 { |
192 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments"); | 346 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments"); |
193 %(fail)s; | 347 %(fail)s; |
194 } | 348 } |
195 | 349 |
202 { | 356 { |
203 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output"); | 357 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output"); |
204 %(fail)s; | 358 %(fail)s; |
205 } | 359 } |
206 } | 360 } |
207 if ((NULL == %(sm)s) | 361 """, |
208 || (%(sm)s->dimensions[0] != %(x)s->dimensions[0]) | 362 begin_row_loop, |
209 || (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) | 363 """ |
210 { | |
211 if (NULL != %(sm)s) Py_XDECREF(%(sm)s); | |
212 %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s); | |
213 if(!%(sm)s) { | |
214 // The normal cleanup code will take care of %(nll)s | |
215 // Py_XDECREF(%(nll)s); %(nll)s=NULL; | |
216 PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output"); | |
217 %(fail)s | |
218 } | |
219 } | |
220 | |
221 for (size_t i = 0; i < Nx[0]; ++i) | |
222 { | |
223 size_t j; | |
224 double sum = 0.0; | |
225 bool discount_max = false; | |
226 | |
227 const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i); | |
228 const double* __restrict__ b_i = (double*)(%(b)s->data); | |
229 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; | 364 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; |
230 double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i); | |
231 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); | 365 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); |
232 | 366 """, |
233 npy_intp Sx = %(x)s->strides[1]/sizeof(double); | 367 inside_row_loop, |
234 npy_intp Sb = %(b)s->strides[0]/sizeof(double); | 368 """ |
235 npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); | 369 nll_i[0] = - x_i[y_i*Sx] |
236 | |
237 size_t row_max_j=0; | |
238 double row_max = x_i[0] + b_i[0]; | |
239 //try to compute sum and sm the easy way | |
240 for (j = 0; j < Nx[1]; ++j) | |
241 { | |
242 double row_ij = x_i[j * Sx] + b_i[j * Sb]; | |
243 row_max_j = (row_ij > row_max) ? j : row_max_j; | |
244 row_max = (row_ij > row_max) ? row_ij : row_max; | |
245 | |
246 double sm_ij = exp(row_ij); | |
247 sum += sm_ij; | |
248 sm_i[j * Ssm] = sm_ij; | |
249 } | |
250 if ((0.0 == sum) || (isinf(sum))) | |
251 { | |
252 //our cheap trick didn't work... try again and do it better. | |
253 discount_max = true; | |
254 sum = 0.0; //reset sum and recompute.... | |
255 for (j = 0; j < Nx[1]; ++j) | |
256 { | |
257 double row_ij = x_i[j * Sx] + b_i[j * Sb]; | |
258 | |
259 double sm_ij = exp(row_ij - row_max); | |
260 sum += sm_ij; | |
261 sm_i[j * Ssm] = sm_ij; | |
262 } | |
263 if ( (0.0 == sum) || (isinf(sum))) | |
264 { | |
265 //that was our best... | |
266 %(fail)s; | |
267 } | |
268 //if we still can't sum it up, we're screwed. | |
269 //So far, this assertion has never failed... | |
270 } | |
271 | |
272 //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n); | |
273 double sum_inv = 1.0 / sum; | |
274 for (j = 0; j < Nx[1]; ++j) | |
275 { | |
276 sm_i[j * Ssm] *= sum_inv; | |
277 } | |
278 | |
279 if (y_i >= Nx[1]) | |
280 { | |
281 %(fail)s; | |
282 } | |
283 | |
284 nll_i[0] = - x_i[y_i*Sx] | |
285 - b_i[y_i*Sb] | 370 - b_i[y_i*Sb] |
286 + (discount_max ? row_max : 0.0) | |
287 + log(sum); | 371 + log(sum); |
288 //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate? | 372 """, |
289 //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); | 373 end_row_loop) |
290 } | 374 |
291 """ % dict(locals(), **sub) | 375 |
376 def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub): | |
377 y_idx_type = node.inputs[2].type.dtype_specs()[1] | |
378 code_template = ''.join(self.c_code_template()) | |
379 return code_template % dict(locals(), **sub) | |
380 | |
292 crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias() | 381 crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias() |
293 | 382 |
294 class CrossentropySoftmax1HotWithBiasDx (theano.Op): | 383 class CrossentropySoftmax1HotWithBiasDx (theano.Op): |
295 nin=3 | 384 nin=3 |
296 nout=1 | 385 nout=1 |