# HG changeset patch # User Pascal Lamblin # Date 1219341316 14400 # Node ID 18dbc1c11647b5ef65e1c16d0b7566626b92d9d0 # Parent 200a5b0e24ea93527b433e65c6fdf719487c66b9 Work on softmax operators diff -r 200a5b0e24ea -r 18dbc1c11647 __init__.py --- a/__init__.py Thu Jul 31 17:25:35 2008 -0400 +++ b/__init__.py Thu Aug 21 13:55:16 2008 -0400 @@ -1,6 +1,7 @@ import filetensor import nnet_ops import version +import learner from lookup_list import LookupList diff -r 200a5b0e24ea -r 18dbc1c11647 nnet_ops.py --- a/nnet_ops.py Thu Jul 31 17:25:35 2008 -0400 +++ b/nnet_ops.py Thu Aug 21 13:55:16 2008 -0400 @@ -65,6 +65,180 @@ # TENSOR OPS # + +class SoftmaxWithBias(theano.Op): + """ + An L{Op} for the output of neural-net multiclass classifiers. + + @type x: is a matrix of floats (32 or 64) + @type b: is a [row] vector of floats (32 or 64), length is number of cols in x + + This L{Op}'s output is softmax(x+b). + softmax(x[i]) is the i'th distribution over len(x[i]) options. + """ + + nin = 2 + nout = 1 + def __init__(self, **kwargs): + theano.Op.__init__(self, **kwargs) + + def make_node(self, x, b): + x = tensor.as_tensor(x) + b = tensor.as_tensor(b) + if x.type.ndim != 2 \ + or x.type.dtype not in ['float32', 'float64']: + raise ValueError('x must be 2-d tensor of floats') + if b.type.ndim != 1 \ + or x.type.dtype not in ['float32', 'float64']: + raise ValueError('b must be 1-d tensor of floats') + + sm = x.type.make_result() + return theano.Apply(self, [x, b], [sm]) + + def perform(self, node, input_storage, output_storage): + x, b = input_storage + if b.shape[0] != x.shape[1]: + raise ValueError('b must have same number of columns as x') + + sm = nympy.zeros_like(x) + for i in xrange(sm.shape[0]): + row = x[i] + b + sm[i] = numpy.exp(row - numpy.max(row)) + sm[i] *= 1.0 / numpy.sum(sm[i]) + output_storage[0][0] = nll + + def grad(self, (x, b), (g_sm,)): + sm = softmax_with_bias(x, b) + dx = SoftmaxWithBiasDx()(g_sm, sm) + db = tensor.sum(dx, axis = 0) + return dx, db + + def c_headers(self): + return [''] + + @staticmethod + def c_code_template(): + # this implementation was lifted from + # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx + + #TODO: put this into a templated function, in the support code + #TODO: declare the max of each row as an Op output + + #TODO: set error messages for failures in this code + + #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] + init_decl = """ + npy_intp* Nx = %(x)s->dimensions; + + if (%(x)s->nd != 2) + { + PyErr_SetString(PyExc_ValueError, "a not 2d tensor"); + %(fail)s; + } + if (%(b)s->nd != 1) + { + PyErr_SetString(PyExc_ValueError, "b not 1d tensor"); + %(fail)s; + } + if (%(x)s->descr->type_num != PyArray_DOUBLE) + { + PyErr_SetString(PyExc_TypeError, "a not float64"); + %(fail)s; + } + if (%(b)s->descr->type_num != PyArray_DOUBLE) + { + PyErr_SetString(PyExc_TypeError, "b not float64"); + %(fail)s; + } + if ((%(x)s->dimensions[1] != %(b)s->dimensions[0])) + { + PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments"); + %(fail)s; + } + + if ((NULL == %(sm)s) + || (%(sm)s->dimensions[0] != %(x)s->dimensions[0]) + || (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) + { + if (NULL != %(sm)s) Py_XDECREF(%(sm)s); + %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s); + if(!%(sm)s) { + // The normal cleanup code will take care of %(nll)s + // Py_XDECREF(%(nll)s); %(nll)s=NULL; + PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output"); + %(fail)s + } + } + """ + + begin_row_loop = """ + for (size_t i = 0; i < Nx[0]; ++i) + { + size_t j; + double sum = 0.0; + bool discount_max = false; + + const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i); + const double* __restrict__ b_i = (double*)(%(b)s->data); + double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i); + """ + + inside_row_loop = """ + npy_intp Sx = %(x)s->strides[1]/sizeof(double); + npy_intp Sb = %(b)s->strides[0]/sizeof(double); + npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); + + size_t row_max_j=0; + double row_max = x_i[0] + b_i[0]; + // Get the maximum value of the row + for (j = 0; j < Nx[1]; ++j) + { + double row_ij = x_i[j * Sx] + b_i[j * Sb]; + row_max_j = (row_ij > row_max) ? j : row_max_j; + row_max = (row_ij > row_max) ? row_ij : row_max; + } + + for (j = 0; j < Nx[1]; ++j) + { + double row_ij = x_i[j * Sx] + b_i[j * Sb]; + double sm_ij = exp(row_ij - row_max); + sum += sm_ij; + sm_i[j * Ssm] = sm_ij; + } + if ( (0.0 == sum) || (isinf(sum))) + { + //that was our best... + %(fail)s; + } + + //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n); + double sum_inv = 1.0 / sum; + for (j = 0; j < Nx[1]; ++j) + { + sm_i[j * Ssm] *= sum_inv; + } + + if (y_i >= Nx[1]) + { + %(fail)s; + } + """ + + end_row_loop = """ + } + """ + + return (init_decl, begin_row_loop, inside_row_loop, end_row_loop) + + + def c_code(self, node, name, (x, b), (sm,), sub): + code_template = ''.join(self.c_code_template()) + return code_template % dict(locals(), **sub) + +softmax_with_bias = SoftmaxWithBias() + + + class CrossentropySoftmax1HotWithBias(theano.Op): """A special compound L{Op} for the output of neural-net classifiers. @@ -78,11 +252,11 @@ - KL(softmax(x+b), y) - softmax(x+b) - + softmax(x[i]) is the i'th distribution over len(x[i]) options y_idx[i] is an integer index, encoding a 1-hot distribution. - + In practice, when we're trying to do classification, we have one row in x and y_idx per example, and y[i] is the index of the (correct) class of the i'th example. @@ -138,7 +312,9 @@ return dx, db, None def c_headers(self): return [''] - def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub): + + @staticmethod + def c_code_template(): # this implementation was lifted from # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx @@ -148,36 +324,15 @@ #TODO: set error messages for failures in this code #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] - y_idx_type = node.inputs[2].type.dtype_specs()[1] - - return """ - npy_intp* Nx = %(x)s->dimensions; - - if (%(x)s->nd != 2) - { - PyErr_SetString(PyExc_ValueError, "a not 2d tensor"); - %(fail)s; - } - if (%(b)s->nd != 1) - { - PyErr_SetString(PyExc_ValueError, "b not 1d tensor"); - %(fail)s; - } + (init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \ + SoftmaxWithBias.c_code_template() + return (init_decl, + """ if (%(y_idx)s->nd != 1) { PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor"); %(fail)s; } - if (%(x)s->descr->type_num != PyArray_DOUBLE) - { - PyErr_SetString(PyExc_TypeError, "a not float64"); - %(fail)s; - } - if (%(b)s->descr->type_num != PyArray_DOUBLE) - { - PyErr_SetString(PyExc_TypeError, "b not float64"); - %(fail)s; - } if ((%(y_idx)s->descr->type_num != PyArray_INT64) && (%(y_idx)s->descr->type_num != PyArray_INT32) && (%(y_idx)s->descr->type_num != PyArray_INT16) @@ -186,8 +341,7 @@ PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64"); %(fail)s; } - if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]) - || (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0])) + if (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0]) { PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments"); %(fail)s; @@ -204,91 +358,26 @@ %(fail)s; } } - if ((NULL == %(sm)s) - || (%(sm)s->dimensions[0] != %(x)s->dimensions[0]) - || (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) - { - if (NULL != %(sm)s) Py_XDECREF(%(sm)s); - %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s); - if(!%(sm)s) { - // The normal cleanup code will take care of %(nll)s - // Py_XDECREF(%(nll)s); %(nll)s=NULL; - PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output"); - %(fail)s - } - } - - for (size_t i = 0; i < Nx[0]; ++i) - { - size_t j; - double sum = 0.0; - bool discount_max = false; - - const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i); - const double* __restrict__ b_i = (double*)(%(b)s->data); + """, + begin_row_loop, + """ const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; - double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i); double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i); - - npy_intp Sx = %(x)s->strides[1]/sizeof(double); - npy_intp Sb = %(b)s->strides[0]/sizeof(double); - npy_intp Ssm = %(sm)s->strides[1]/sizeof(double); - - size_t row_max_j=0; - double row_max = x_i[0] + b_i[0]; - //try to compute sum and sm the easy way - for (j = 0; j < Nx[1]; ++j) - { - double row_ij = x_i[j * Sx] + b_i[j * Sb]; - row_max_j = (row_ij > row_max) ? j : row_max_j; - row_max = (row_ij > row_max) ? row_ij : row_max; + """, + inside_row_loop, + """ + nll_i[0] = - x_i[y_i*Sx] + - b_i[y_i*Sb] + + log(sum); + """, + end_row_loop) - double sm_ij = exp(row_ij); - sum += sm_ij; - sm_i[j * Ssm] = sm_ij; - } - if ((0.0 == sum) || (isinf(sum))) - { - //our cheap trick didn't work... try again and do it better. - discount_max = true; - sum = 0.0; //reset sum and recompute.... - for (j = 0; j < Nx[1]; ++j) - { - double row_ij = x_i[j * Sx] + b_i[j * Sb]; - double sm_ij = exp(row_ij - row_max); - sum += sm_ij; - sm_i[j * Ssm] = sm_ij; - } - if ( (0.0 == sum) || (isinf(sum))) - { - //that was our best... - %(fail)s; - } - //if we still can't sum it up, we're screwed. - //So far, this assertion has never failed... - } + def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub): + y_idx_type = node.inputs[2].type.dtype_specs()[1] + code_template = ''.join(self.c_code_template()) + return code_template % dict(locals(), **sub) - //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n); - double sum_inv = 1.0 / sum; - for (j = 0; j < Nx[1]; ++j) - { - sm_i[j * Ssm] *= sum_inv; - } - - if (y_i >= Nx[1]) - { - %(fail)s; - } - - nll_i[0] = - x_i[y_i*Sx] - - b_i[y_i*Sb] - + (discount_max ? row_max : 0.0) - + log(sum); - //mat_at(y,i,0) = -log( mat_at(s,i,t[i])); //less accurate? - //mat_at(y,i,0) = - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum); - } - """ % dict(locals(), **sub) crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias() class CrossentropySoftmax1HotWithBiasDx (theano.Op):