Mercurial > pylearn
comparison pylearn/formulas/costs.py @ 1399:d4a35c1c0a23
adding MultiHingeLoss cost function
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Thu, 13 Jan 2011 17:52:49 -0500 |
parents | 63fe96ede21d |
children | f9066ff6e198 |
comparison
equal
deleted
inserted
replaced
1398:1934ba31b7d9 | 1399:d4a35c1c0a23 |
---|---|
165 | 165 |
166 | 166 |
167 # This file seems like it has some overlap with theano.tensor.nnet. Which functions should go | 167 # This file seems like it has some overlap with theano.tensor.nnet. Which functions should go |
168 # in which file? | 168 # in which file? |
169 | 169 |
170 from theano import gof | |
171 from theano.tensor.tsor_apply import Apply | |
172 from theano import tensor | |
173 import numpy as np | |
174 | |
175 class MultiHingeMargin(gof.Op): | |
176 """ | |
177 This is a hinge loss function for multiclass predictions. | |
178 | |
179 For each vector X[i] and label index yidx[i], | |
180 output z[i] = 1 - margin | |
181 | |
182 where margin is the difference between X[i, yidx[i]] and the maximum other element of X[i]. | |
183 """ | |
184 default_output = 0 | |
185 def __eq__(self, other): | |
186 return type(self) == type(other) | |
187 def __hash__(self): | |
188 return tensor.hashtype(self) | |
189 def __str__(self): | |
190 return self.__class__.__name__ | |
191 def make_node(self, X, yidx): | |
192 X_ = tensor.as_tensor_variable(X) | |
193 yidx_ = tensor.as_tensor_variable(yidx) | |
194 if X_.type.ndim != 2: | |
195 raise TypeError('X must be matrix') | |
196 if yidx.type.ndim != 1: | |
197 raise TypeError('yidx must be vector') | |
198 if 'int' not in str(yidx.type.dtype): | |
199 raise TypeError("yidx must be integers, it's a vector of class labels") | |
200 hinge_loss = tensor.vector(dtype=X.dtype) | |
201 winners = X.type() | |
202 return Apply(self, [X_, yidx_], [hinge_loss, winners]) | |
203 def perform(self, node, input_storage, out): | |
204 X, yidx = input_storage | |
205 toplabel = X.shape[1]-1 | |
206 out[0][0] = z = np.zeros_like(X[:,0]) | |
207 out[1][0] = w = np.zeros_like(X) | |
208 for i,Xi in enumerate(X): | |
209 yi = yidx[i] | |
210 if yi == 0: | |
211 next_best = Xi[1:].argmax()+1 | |
212 elif yi==toplabel: | |
213 next_best = Xi[:toplabel].argmax() | |
214 else: | |
215 next_best0 = Xi[:yi].argmax() | |
216 next_best1 = Xi[yi+1:].argmax()+yi+1 | |
217 next_best = next_best0 if Xi[next_best0]>Xi[next_best1] else next_best1 | |
218 margin = Xi[yi] - Xi[next_best] | |
219 if margin < 1: | |
220 z[i] = 1 - margin | |
221 w[i,yi] = -1 | |
222 w[i,next_best] = 1 | |
223 def grad(self, inputs, g_outs): | |
224 z = self(*inputs) | |
225 w = z.owner.outputs[1] | |
226 gz, gw = g_outs | |
227 if gw is not None: | |
228 raise NotImplementedError() | |
229 gX = gz.dimshuffle(0,'x') * w | |
230 return [gX, None] | |
231 def c_code_cache_version(self): | |
232 return (1,) | |
233 def c_code(self, node, name, (X, y_idx), (z,w), sub): | |
234 return ''' | |
235 if ((%(X)s->descr->type_num != PyArray_DOUBLE) && (%(X)s->descr->type_num != PyArray_FLOAT)) | |
236 { | |
237 PyErr_SetString(PyExc_TypeError, "types should be float or float64"); | |
238 %(fail)s; | |
239 } | |
240 if ((%(y_idx)s->descr->type_num != PyArray_INT64) | |
241 && (%(y_idx)s->descr->type_num != PyArray_INT32) | |
242 && (%(y_idx)s->descr->type_num != PyArray_INT16) | |
243 && (%(y_idx)s->descr->type_num != PyArray_INT8)) | |
244 { | |
245 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64"); | |
246 %(fail)s; | |
247 } | |
248 if ((%(X)s->nd != 2) | |
249 || (%(y_idx)s->nd != 1)) | |
250 { | |
251 PyErr_SetString(PyExc_ValueError, "rank error"); | |
252 %(fail)s; | |
253 } | |
254 if (%(X)s->dimensions[0] != %(y_idx)s->dimensions[0]) | |
255 { | |
256 PyErr_SetString(PyExc_ValueError, "dy.shape[0] != sm.shape[0]"); | |
257 %(fail)s; | |
258 } | |
259 if ((NULL == %(z)s) | |
260 || (%(z)s->dimensions[0] != %(X)s->dimensions[0])) | |
261 { | |
262 Py_XDECREF(%(z)s); | |
263 %(z)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(X)s), | |
264 type_num_%(X)s); | |
265 if (!%(z)s) | |
266 { | |
267 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output"); | |
268 %(fail)s; | |
269 } | |
270 } | |
271 if ((NULL == %(w)s) | |
272 || (%(w)s->dimensions[0] != %(X)s->dimensions[0]) | |
273 || (%(w)s->dimensions[1] != %(X)s->dimensions[1])) | |
274 { | |
275 Py_XDECREF(%(w)s); | |
276 %(w)s = (PyArrayObject*) PyArray_SimpleNew(2, PyArray_DIMS(%(X)s), | |
277 type_num_%(X)s); | |
278 if (!%(w)s) | |
279 { | |
280 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output"); | |
281 %(fail)s; | |
282 } | |
283 } | |
284 | |
285 for (size_t i = 0; i < %(X)s->dimensions[0]; ++i) | |
286 { | |
287 const dtype_%(X)s* __restrict__ X_i = (dtype_%(X)s*) (%(X)s->data + %(X)s->strides[0] * i); | |
288 npy_intp SX = %(X)s->strides[1]/sizeof(dtype_%(X)s); | |
289 | |
290 dtype_%(w)s* __restrict__ w_i = (dtype_%(w)s*) (%(w)s->data + %(w)s->strides[0] * i); | |
291 npy_intp Sw = %(w)s->strides[1]/sizeof(dtype_%(w)s); | |
292 | |
293 const dtype_%(y_idx)s y_i = ((dtype_%(y_idx)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; | |
294 | |
295 dtype_%(X)s X_i_max = X_i[0]; | |
296 dtype_%(X)s X_at_y_i = X_i[0]; | |
297 size_t X_i_argmax = 0; | |
298 size_t j = 1; | |
299 w_i[0] = 0; | |
300 | |
301 if (y_i == 0) | |
302 { | |
303 X_i_max = X_i[SX]; | |
304 X_i_argmax = 1; | |
305 w_i[Sw] = 0; | |
306 } | |
307 for (; j < %(X)s->dimensions[1]; ++j) | |
308 { | |
309 dtype_%(X)s X_ij = X_i[j*SX]; | |
310 if (j == y_i) | |
311 { | |
312 X_at_y_i = X_ij; | |
313 } | |
314 else if (X_ij > X_i_max) | |
315 { | |
316 X_i_max = X_ij; | |
317 X_i_argmax = j; | |
318 } | |
319 w_i[j*Sw] = 0; | |
320 } | |
321 if (0 < 1 - X_at_y_i + X_i_max) | |
322 { | |
323 ((dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * i))[0] | |
324 = 1 - X_at_y_i + X_i_max; | |
325 w_i[y_i*Sw] = -1; | |
326 w_i[X_i_argmax*Sw] = 1; | |
327 } | |
328 } | |
329 ''' % dict(locals(), **sub) | |
330 multi_hinge_margin = MultiHingeMargin() |