comparison pylearn/formulas/costs.py @ 1399:d4a35c1c0a23

adding MultiHingeLoss cost function
author James Bergstra <bergstrj@iro.umontreal.ca>
date Thu, 13 Jan 2011 17:52:49 -0500
parents 63fe96ede21d
children f9066ff6e198
comparison
equal deleted inserted replaced
1398:1934ba31b7d9 1399:d4a35c1c0a23
165 165
166 166
167 # This file seems like it has some overlap with theano.tensor.nnet. Which functions should go 167 # This file seems like it has some overlap with theano.tensor.nnet. Which functions should go
168 # in which file? 168 # in which file?
169 169
170 from theano import gof
171 from theano.tensor.tsor_apply import Apply
172 from theano import tensor
173 import numpy as np
174
175 class MultiHingeMargin(gof.Op):
176 """
177 This is a hinge loss function for multiclass predictions.
178
179 For each vector X[i] and label index yidx[i],
180 output z[i] = 1 - margin
181
182 where margin is the difference between X[i, yidx[i]] and the maximum other element of X[i].
183 """
184 default_output = 0
185 def __eq__(self, other):
186 return type(self) == type(other)
187 def __hash__(self):
188 return tensor.hashtype(self)
189 def __str__(self):
190 return self.__class__.__name__
191 def make_node(self, X, yidx):
192 X_ = tensor.as_tensor_variable(X)
193 yidx_ = tensor.as_tensor_variable(yidx)
194 if X_.type.ndim != 2:
195 raise TypeError('X must be matrix')
196 if yidx.type.ndim != 1:
197 raise TypeError('yidx must be vector')
198 if 'int' not in str(yidx.type.dtype):
199 raise TypeError("yidx must be integers, it's a vector of class labels")
200 hinge_loss = tensor.vector(dtype=X.dtype)
201 winners = X.type()
202 return Apply(self, [X_, yidx_], [hinge_loss, winners])
203 def perform(self, node, input_storage, out):
204 X, yidx = input_storage
205 toplabel = X.shape[1]-1
206 out[0][0] = z = np.zeros_like(X[:,0])
207 out[1][0] = w = np.zeros_like(X)
208 for i,Xi in enumerate(X):
209 yi = yidx[i]
210 if yi == 0:
211 next_best = Xi[1:].argmax()+1
212 elif yi==toplabel:
213 next_best = Xi[:toplabel].argmax()
214 else:
215 next_best0 = Xi[:yi].argmax()
216 next_best1 = Xi[yi+1:].argmax()+yi+1
217 next_best = next_best0 if Xi[next_best0]>Xi[next_best1] else next_best1
218 margin = Xi[yi] - Xi[next_best]
219 if margin < 1:
220 z[i] = 1 - margin
221 w[i,yi] = -1
222 w[i,next_best] = 1
223 def grad(self, inputs, g_outs):
224 z = self(*inputs)
225 w = z.owner.outputs[1]
226 gz, gw = g_outs
227 if gw is not None:
228 raise NotImplementedError()
229 gX = gz.dimshuffle(0,'x') * w
230 return [gX, None]
231 def c_code_cache_version(self):
232 return (1,)
233 def c_code(self, node, name, (X, y_idx), (z,w), sub):
234 return '''
235 if ((%(X)s->descr->type_num != PyArray_DOUBLE) && (%(X)s->descr->type_num != PyArray_FLOAT))
236 {
237 PyErr_SetString(PyExc_TypeError, "types should be float or float64");
238 %(fail)s;
239 }
240 if ((%(y_idx)s->descr->type_num != PyArray_INT64)
241 && (%(y_idx)s->descr->type_num != PyArray_INT32)
242 && (%(y_idx)s->descr->type_num != PyArray_INT16)
243 && (%(y_idx)s->descr->type_num != PyArray_INT8))
244 {
245 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
246 %(fail)s;
247 }
248 if ((%(X)s->nd != 2)
249 || (%(y_idx)s->nd != 1))
250 {
251 PyErr_SetString(PyExc_ValueError, "rank error");
252 %(fail)s;
253 }
254 if (%(X)s->dimensions[0] != %(y_idx)s->dimensions[0])
255 {
256 PyErr_SetString(PyExc_ValueError, "dy.shape[0] != sm.shape[0]");
257 %(fail)s;
258 }
259 if ((NULL == %(z)s)
260 || (%(z)s->dimensions[0] != %(X)s->dimensions[0]))
261 {
262 Py_XDECREF(%(z)s);
263 %(z)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(X)s),
264 type_num_%(X)s);
265 if (!%(z)s)
266 {
267 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output");
268 %(fail)s;
269 }
270 }
271 if ((NULL == %(w)s)
272 || (%(w)s->dimensions[0] != %(X)s->dimensions[0])
273 || (%(w)s->dimensions[1] != %(X)s->dimensions[1]))
274 {
275 Py_XDECREF(%(w)s);
276 %(w)s = (PyArrayObject*) PyArray_SimpleNew(2, PyArray_DIMS(%(X)s),
277 type_num_%(X)s);
278 if (!%(w)s)
279 {
280 PyErr_SetString(PyExc_MemoryError, "failed to alloc dx output");
281 %(fail)s;
282 }
283 }
284
285 for (size_t i = 0; i < %(X)s->dimensions[0]; ++i)
286 {
287 const dtype_%(X)s* __restrict__ X_i = (dtype_%(X)s*) (%(X)s->data + %(X)s->strides[0] * i);
288 npy_intp SX = %(X)s->strides[1]/sizeof(dtype_%(X)s);
289
290 dtype_%(w)s* __restrict__ w_i = (dtype_%(w)s*) (%(w)s->data + %(w)s->strides[0] * i);
291 npy_intp Sw = %(w)s->strides[1]/sizeof(dtype_%(w)s);
292
293 const dtype_%(y_idx)s y_i = ((dtype_%(y_idx)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
294
295 dtype_%(X)s X_i_max = X_i[0];
296 dtype_%(X)s X_at_y_i = X_i[0];
297 size_t X_i_argmax = 0;
298 size_t j = 1;
299 w_i[0] = 0;
300
301 if (y_i == 0)
302 {
303 X_i_max = X_i[SX];
304 X_i_argmax = 1;
305 w_i[Sw] = 0;
306 }
307 for (; j < %(X)s->dimensions[1]; ++j)
308 {
309 dtype_%(X)s X_ij = X_i[j*SX];
310 if (j == y_i)
311 {
312 X_at_y_i = X_ij;
313 }
314 else if (X_ij > X_i_max)
315 {
316 X_i_max = X_ij;
317 X_i_argmax = j;
318 }
319 w_i[j*Sw] = 0;
320 }
321 if (0 < 1 - X_at_y_i + X_i_max)
322 {
323 ((dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * i))[0]
324 = 1 - X_at_y_i + X_i_max;
325 w_i[y_i*Sw] = -1;
326 w_i[X_i_argmax*Sw] = 1;
327 }
328 }
329 ''' % dict(locals(), **sub)
330 multi_hinge_margin = MultiHingeMargin()