pylearn: nnet_ops.py comparison

comparison nnet_ops.py @ 440:18dbc1c11647

Work on softmax operators

author	Pascal Lamblin <lamblinp@iro.umontreal.ca>
date	Thu, 21 Aug 2008 13:55:16 -0400
parents	43d9aa93934e
children	b3315b252824

comparison

equal deleted inserted replaced

-:200a5b0e24ea
+:18dbc1c11647
 ############
 #
 # TENSOR OPS
 #
+class SoftmaxWithBias(theano.Op):
+"""
+An L{Op} for the output of neural-net multiclass classifiers.
+@type x: is a matrix of floats (32 or 64)
+@type b: is a [row] vector of floats (32 or 64), length is number of cols in x
+This L{Op}'s output is softmax(x+b).
+softmax(x[i]) is the i'th distribution over len(x[i]) options.
+"""
+nin = 2
+nout = 1
+def __init__(self, **kwargs):
+theano.Op.__init__(self, **kwargs)
+def make_node(self, x, b):
+x = tensor.as_tensor(x)
+b = tensor.as_tensor(b)
+if x.type.ndim != 2 \
+or x.type.dtype not in ['float32', 'float64']:
+raise ValueError('x must be 2-d tensor of floats')
+if b.type.ndim != 1 \
+or x.type.dtype not in ['float32', 'float64']:
+raise ValueError('b must be 1-d tensor of floats')
+sm = x.type.make_result()
+return theano.Apply(self, [x, b], [sm])
+def perform(self, node, input_storage, output_storage):
+x, b = input_storage
+if b.shape[0] != x.shape[1]:
+raise ValueError('b must have same number of columns as x')
+sm = nympy.zeros_like(x)
+for i in xrange(sm.shape[0]):
+row = x[i] + b
+sm[i] = numpy.exp(row - numpy.max(row))
+sm[i] *= 1.0 / numpy.sum(sm[i])
+output_storage[0][0] = nll
+def grad(self, (x, b), (g_sm,)):
+sm = softmax_with_bias(x, b)
+dx = SoftmaxWithBiasDx()(g_sm, sm)
+db = tensor.sum(dx, axis = 0)
+return dx, db
+def c_headers(self):
+return ['<iostream>']
+@staticmethod
+def c_code_template():
+# this implementation was lifted from
+# /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
+#TODO: put this into a templated function, in the support code
+#TODO: declare the max of each row as an Op output
+#TODO: set error messages for failures in this code
+#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
+init_decl = """
+npy_intp* Nx = %(x)s->dimensions;
+if (%(x)s->nd != 2)
+{
+PyErr_SetString(PyExc_ValueError, "a not 2d tensor");
+%(fail)s;
+}
+if (%(b)s->nd != 1)
+{
+PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
+%(fail)s;
+}
+if (%(x)s->descr->type_num != PyArray_DOUBLE)
+{
+PyErr_SetString(PyExc_TypeError, "a not float64");
+%(fail)s;
+}
+if (%(b)s->descr->type_num != PyArray_DOUBLE)
+{
+PyErr_SetString(PyExc_TypeError, "b not float64");
+%(fail)s;
+}
+if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]))
+{
+PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments");
+%(fail)s;
+}
+if ((NULL == %(sm)s)
+|| (%(sm)s->dimensions[0] != %(x)s->dimensions[0])
+|| (%(sm)s->dimensions[1] != %(x)s->dimensions[1]))
+{
+if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
+%(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s);
+if(!%(sm)s) {
+// The normal cleanup code will take care of %(nll)s
+// Py_XDECREF(%(nll)s); %(nll)s=NULL;
+PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output");
+%(fail)s
+}
+}
+"""
+begin_row_loop = """
+for (size_t i = 0; i < Nx[0]; ++i)
+{
+size_t j;
+double sum = 0.0;
+bool  discount_max = false;
+const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
+const double* __restrict__ b_i = (double*)(%(b)s->data);
+double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
+"""
+inside_row_loop = """
+npy_intp Sx = %(x)s->strides[1]/sizeof(double);
+npy_intp Sb = %(b)s->strides[0]/sizeof(double);
+npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
+size_t row_max_j=0;
+double row_max = x_i[0] + b_i[0];
+// Get the maximum value of the row
+for (j = 0; j < Nx[1]; ++j)
+{
+double row_ij = x_i[j * Sx] +  b_i[j * Sb];
+row_max_j = (row_ij > row_max) ? j : row_max_j;
+row_max   = (row_ij > row_max) ? row_ij : row_max;
+}
+for (j = 0; j < Nx[1]; ++j)
+{
+double row_ij = x_i[j * Sx] +  b_i[j * Sb];
+double sm_ij = exp(row_ij - row_max);
+sum += sm_ij;
+sm_i[j * Ssm] = sm_ij;
+}
+if ( (0.0 == sum) || (isinf(sum)))
+{
+//that was our best...
+%(fail)s;
+}
+//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
+double sum_inv = 1.0 / sum;
+for (j = 0; j < Nx[1]; ++j)
+{
+sm_i[j * Ssm] *= sum_inv;
+}
+if (y_i >= Nx[1])
+{
+%(fail)s;
+}
+"""
+end_row_loop = """
+}
+"""
+return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
+def c_code(self, node, name, (x, b), (sm,), sub):
+code_template = ''.join(self.c_code_template())
+return code_template % dict(locals(), **sub)
+softmax_with_bias = SoftmaxWithBias()
 class CrossentropySoftmax1HotWithBias(theano.Op):
 """A special compound L{Op} for the output of neural-net classifiers.
 @type x: is a matrix of floats (32 or 64)
 @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
 This L{Op} has two outputs:
 - KL(softmax(x+b), y)
 - softmax(x+b)
 softmax(x[i]) is the i'th distribution over len(x[i]) options
 y_idx[i] is an integer index, encoding a 1-hot distribution.
 In practice, when we're trying to do classification, we have one row in x
 and y_idx per example, and y[i] is the index of the (correct) class of the
 i'th example.
 """
 dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
 db = tensor.sum(dx, axis = [0])
 return dx, db, None
 def c_headers(self): return ['<iostream>']
-def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub):
+@staticmethod
+def c_code_template():
 # this implementation was lifted from
 # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
 #TODO: put this into a templated function, in the support code
 #TODO: declare the max of each row as an Op output
 #TODO: set error messages for failures in this code
 #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
-y_idx_type = node.inputs[2].type.dtype_specs()[1]
+(init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \
+SoftmaxWithBias.c_code_template()
-return """
+return (init_decl,
-npy_intp* Nx = %(x)s->dimensions;
+"""
-if (%(x)s->nd != 2)
-{
-PyErr_SetString(PyExc_ValueError, "a not 2d tensor");
-%(fail)s;
-}
-if (%(b)s->nd != 1)
-{
-PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
-%(fail)s;
-}
 if (%(y_idx)s->nd != 1)
 {
 PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
-%(fail)s;
-}
-if (%(x)s->descr->type_num != PyArray_DOUBLE)
-{
-PyErr_SetString(PyExc_TypeError, "a not float64");
-%(fail)s;
-}
-if (%(b)s->descr->type_num != PyArray_DOUBLE)
-{
-PyErr_SetString(PyExc_TypeError, "b not float64");
 %(fail)s;
 }
 if ((%(y_idx)s->descr->type_num != PyArray_INT64)
 && (%(y_idx)s->descr->type_num != PyArray_INT32)
 && (%(y_idx)s->descr->type_num != PyArray_INT16)
 && (%(y_idx)s->descr->type_num != PyArray_INT8))
 {
 PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
 %(fail)s;
 }
-if ((%(x)s->dimensions[1] != %(b)s->dimensions[0])
+if (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0])
-|| (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0]))
 {
 PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments");
 %(fail)s;
 }
 {
 PyErr_SetString(PyExc_MemoryError, "failed to alloc nll output");
 %(fail)s;
 }
 }
-if ((NULL == %(sm)s)
+""",
-|| (%(sm)s->dimensions[0] != %(x)s->dimensions[0])
+begin_row_loop,
-|| (%(sm)s->dimensions[1] != %(x)s->dimensions[1]))
+"""
-{
-if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
-%(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s);
-if(!%(sm)s) {
-// The normal cleanup code will take care of %(nll)s
-// Py_XDECREF(%(nll)s); %(nll)s=NULL;
-PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output");
-%(fail)s
-}
-}
-for (size_t i = 0; i < Nx[0]; ++i)
-{
-size_t j;
-double sum = 0.0;
-bool  discount_max = false;
-const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
-const double* __restrict__ b_i = (double*)(%(b)s->data);
 const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
-double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
 double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);
+""",
-npy_intp Sx = %(x)s->strides[1]/sizeof(double);
+inside_row_loop,
-npy_intp Sb = %(b)s->strides[0]/sizeof(double);
+"""
-npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
+nll_i[0] = - x_i[y_i*Sx]
-size_t row_max_j=0;
-double row_max = x_i[0] + b_i[0];
-//try to compute sum and sm the easy way
-for (j = 0; j < Nx[1]; ++j)
-{
-double row_ij = x_i[j * Sx] +  b_i[j * Sb];
-row_max_j = (row_ij > row_max) ? j : row_max_j;
-row_max   = (row_ij > row_max) ? row_ij : row_max;
-double sm_ij = exp(row_ij);
-sum += sm_ij;
-sm_i[j * Ssm] = sm_ij;
-}
-if ((0.0 == sum) || (isinf(sum)))
-{
-//our cheap trick didn't work... try again and do it better.
-discount_max = true;
-sum = 0.0; //reset sum and recompute....
-for (j = 0; j < Nx[1]; ++j)
-{
-double row_ij = x_i[j * Sx] +  b_i[j * Sb];
-double sm_ij = exp(row_ij - row_max);
-sum += sm_ij;
-sm_i[j * Ssm] = sm_ij;
-}
-if ( (0.0 == sum) || (isinf(sum)))
-{
-//that was our best...
-%(fail)s;
-}
-//if we still can't sum it up, we're screwed.
-//So far, this assertion has never failed...
-}
-//cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
-double sum_inv = 1.0 / sum;
-for (j = 0; j < Nx[1]; ++j)
-{
-sm_i[j * Ssm] *= sum_inv;
-}
-if (y_i >= Nx[1])
-{
-%(fail)s;
-}
-nll_i[0] = - x_i[y_i*Sx]
 - b_i[y_i*Sb]
-+ (discount_max ? row_max : 0.0)
 + log(sum);
-//mat_at(y,i,0) = -log( mat_at(s,i,t[i]));  //less accurate?
+""",
-//mat_at(y,i,0) =  - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum);
+end_row_loop)
-}
-""" % dict(locals(), **sub)
+def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub):
+y_idx_type = node.inputs[2].type.dtype_specs()[1]
+code_template = ''.join(self.c_code_template())
+return code_template % dict(locals(), **sub)
 crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias()
 class CrossentropySoftmax1HotWithBiasDx (theano.Op):
 nin=3
 nout=1

Mercurial > pylearn

comparison nnet_ops.py @ 440:18dbc1c11647