changeset 440:18dbc1c11647

Work on softmax operators
author Pascal Lamblin <lamblinp@iro.umontreal.ca>
date Thu, 21 Aug 2008 13:55:16 -0400
parents 200a5b0e24ea
children a2e8de4669cd
files __init__.py nnet_ops.py
diffstat 2 files changed, 200 insertions(+), 110 deletions(-) [+]
line wrap: on
line diff
--- a/__init__.py	Thu Jul 31 17:25:35 2008 -0400
+++ b/__init__.py	Thu Aug 21 13:55:16 2008 -0400
@@ -1,6 +1,7 @@
 import filetensor
 import nnet_ops
 import version
+import learner
 
 from lookup_list import LookupList
 
--- a/nnet_ops.py	Thu Jul 31 17:25:35 2008 -0400
+++ b/nnet_ops.py	Thu Aug 21 13:55:16 2008 -0400
@@ -65,6 +65,180 @@
 # TENSOR OPS
 #
 
+
+class SoftmaxWithBias(theano.Op):
+    """
+    An L{Op} for the output of neural-net multiclass classifiers.
+
+    @type x: is a matrix of floats (32 or 64)
+    @type b: is a [row] vector of floats (32 or 64), length is number of cols in x
+
+    This L{Op}'s output is softmax(x+b).
+    softmax(x[i]) is the i'th distribution over len(x[i]) options.
+    """
+
+    nin = 2
+    nout = 1
+    def __init__(self, **kwargs):
+        theano.Op.__init__(self, **kwargs)
+
+    def make_node(self, x, b):
+        x = tensor.as_tensor(x)
+        b = tensor.as_tensor(b)
+        if x.type.ndim != 2 \
+                or x.type.dtype not in ['float32', 'float64']:
+            raise ValueError('x must be 2-d tensor of floats')
+        if b.type.ndim != 1 \
+                or x.type.dtype not in ['float32', 'float64']:
+            raise ValueError('b must be 1-d tensor of floats')
+
+        sm = x.type.make_result()
+        return theano.Apply(self, [x, b], [sm])
+
+    def perform(self, node, input_storage, output_storage):
+        x, b = input_storage
+        if b.shape[0] != x.shape[1]:
+            raise ValueError('b must have same number of columns as x')
+
+        sm = nympy.zeros_like(x)
+        for i in xrange(sm.shape[0]):
+            row = x[i] + b
+            sm[i] = numpy.exp(row - numpy.max(row))
+            sm[i] *= 1.0 / numpy.sum(sm[i])
+        output_storage[0][0] = nll
+
+    def grad(self, (x, b), (g_sm,)):
+        sm = softmax_with_bias(x, b)
+        dx = SoftmaxWithBiasDx()(g_sm, sm)
+        db = tensor.sum(dx, axis = 0)
+        return dx, db
+
+    def c_headers(self):
+        return ['<iostream>']
+
+    @staticmethod
+    def c_code_template():
+        # this implementation was lifted from
+        # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
+
+        #TODO: put this into a templated function, in the support code
+        #TODO: declare the max of each row as an Op output
+
+        #TODO: set error messages for failures in this code
+
+        #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
+        init_decl = """
+        npy_intp* Nx = %(x)s->dimensions;
+
+        if (%(x)s->nd != 2)
+        {
+            PyErr_SetString(PyExc_ValueError, "a not 2d tensor");
+            %(fail)s;
+        }
+        if (%(b)s->nd != 1)
+        {
+            PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
+            %(fail)s;
+        }
+        if (%(x)s->descr->type_num != PyArray_DOUBLE)
+        {
+            PyErr_SetString(PyExc_TypeError, "a not float64");
+            %(fail)s;
+        }
+        if (%(b)s->descr->type_num != PyArray_DOUBLE)
+        {
+            PyErr_SetString(PyExc_TypeError, "b not float64");
+            %(fail)s;
+        }
+        if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]))
+        {
+            PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments");
+            %(fail)s;
+        }
+
+        if ((NULL == %(sm)s)
+            || (%(sm)s->dimensions[0] != %(x)s->dimensions[0])
+            || (%(sm)s->dimensions[1] != %(x)s->dimensions[1]))
+        {
+            if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
+            %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s);
+            if(!%(sm)s) {
+                // The normal cleanup code will take care of %(nll)s
+                // Py_XDECREF(%(nll)s); %(nll)s=NULL;
+                PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output");
+                %(fail)s
+            }
+        }
+        """
+
+        begin_row_loop = """
+        for (size_t i = 0; i < Nx[0]; ++i)
+        {
+            size_t j;
+            double sum = 0.0;
+            bool  discount_max = false;
+
+            const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
+            const double* __restrict__ b_i = (double*)(%(b)s->data);
+            double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
+        """
+
+        inside_row_loop = """
+            npy_intp Sx = %(x)s->strides[1]/sizeof(double);
+            npy_intp Sb = %(b)s->strides[0]/sizeof(double);
+            npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
+
+            size_t row_max_j=0;
+            double row_max = x_i[0] + b_i[0];
+            // Get the maximum value of the row
+            for (j = 0; j < Nx[1]; ++j)
+            {
+                double row_ij = x_i[j * Sx] +  b_i[j * Sb];
+                row_max_j = (row_ij > row_max) ? j : row_max_j;
+                row_max   = (row_ij > row_max) ? row_ij : row_max;
+            }
+
+            for (j = 0; j < Nx[1]; ++j)
+            {
+                double row_ij = x_i[j * Sx] +  b_i[j * Sb];
+                double sm_ij = exp(row_ij - row_max);
+                sum += sm_ij;
+                sm_i[j * Ssm] = sm_ij;
+            }
+            if ( (0.0 == sum) || (isinf(sum)))
+            {
+                //that was our best...
+                %(fail)s;
+            }
+
+            //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
+            double sum_inv = 1.0 / sum;
+            for (j = 0; j < Nx[1]; ++j)
+            {
+                sm_i[j * Ssm] *= sum_inv;
+            }
+
+            if (y_i >= Nx[1])
+            {
+                %(fail)s;
+            }
+        """
+
+        end_row_loop = """
+        }
+        """
+
+        return (init_decl, begin_row_loop, inside_row_loop, end_row_loop)
+
+
+    def c_code(self, node, name, (x, b), (sm,), sub):
+        code_template = ''.join(self.c_code_template())
+        return code_template % dict(locals(), **sub)
+
+softmax_with_bias = SoftmaxWithBias()
+
+
+
 class CrossentropySoftmax1HotWithBias(theano.Op):
     """A special compound L{Op} for the output of neural-net classifiers.
 
@@ -78,11 +252,11 @@
      - KL(softmax(x+b), y)
      - softmax(x+b)
 
-    
+
     softmax(x[i]) is the i'th distribution over len(x[i]) options
 
     y_idx[i] is an integer index, encoding a 1-hot distribution. 
-    
+
     In practice, when we're trying to do classification, we have one row in x
     and y_idx per example, and y[i] is the index of the (correct) class of the
     i'th example.
@@ -138,7 +312,9 @@
         return dx, db, None
 
     def c_headers(self): return ['<iostream>']
-    def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub):
+
+    @staticmethod
+    def c_code_template():
         # this implementation was lifted from
         # /u/bergstrj/cvs/bergstrj/src/feb07/nn.cxx
 
@@ -148,36 +324,15 @@
         #TODO: set error messages for failures in this code
 
         #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
-        y_idx_type = node.inputs[2].type.dtype_specs()[1]
-
-        return """
-        npy_intp* Nx = %(x)s->dimensions;
-
-        if (%(x)s->nd != 2)
-        {
-            PyErr_SetString(PyExc_ValueError, "a not 2d tensor");
-            %(fail)s;
-        }
-        if (%(b)s->nd != 1)
-        {
-            PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
-            %(fail)s;
-        }
+        (init_decl, begin_row_loop, inside_row_loop, end_row_loop) = \
+                SoftmaxWithBias.c_code_template()
+        return (init_decl,
+                """
         if (%(y_idx)s->nd != 1)
         {
             PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
             %(fail)s;
         }
-        if (%(x)s->descr->type_num != PyArray_DOUBLE)
-        {
-            PyErr_SetString(PyExc_TypeError, "a not float64");
-            %(fail)s;
-        }
-        if (%(b)s->descr->type_num != PyArray_DOUBLE)
-        {
-            PyErr_SetString(PyExc_TypeError, "b not float64");
-            %(fail)s;
-        }
         if ((%(y_idx)s->descr->type_num != PyArray_INT64)
             && (%(y_idx)s->descr->type_num != PyArray_INT32)
             && (%(y_idx)s->descr->type_num != PyArray_INT16)
@@ -186,8 +341,7 @@
             PyErr_SetString(PyExc_TypeError, "y_idx not int8, int16, int32, or int64");
             %(fail)s;
         }
-        if ((%(x)s->dimensions[1] != %(b)s->dimensions[0])
-         || (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0]))
+        if (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0])
         {
             PyErr_SetString(PyExc_ValueError, "dimension mismatch in arguments");
             %(fail)s;
@@ -204,91 +358,26 @@
                 %(fail)s;
             }
         }
-        if ((NULL == %(sm)s)
-            || (%(sm)s->dimensions[0] != %(x)s->dimensions[0])
-            || (%(sm)s->dimensions[1] != %(x)s->dimensions[1]))
-        {
-            if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
-            %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), type_num_%(x)s);
-            if(!%(sm)s) {
-                // The normal cleanup code will take care of %(nll)s
-                // Py_XDECREF(%(nll)s); %(nll)s=NULL;
-                PyErr_SetString(PyExc_MemoryError, "failed to alloc sm output");
-                %(fail)s
-            }
-        }
-
-        for (size_t i = 0; i < Nx[0]; ++i)
-        {
-            size_t j;
-            double sum = 0.0;
-            bool  discount_max = false;
-
-            const double* __restrict__ x_i = (double*)(%(x)s->data + %(x)s->strides[0] * i);
-            const double* __restrict__ b_i = (double*)(%(b)s->data);
+                """,
+                begin_row_loop,
+                """
             const %(y_idx_type)s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
-            double* __restrict__ sm_i = (double*)(%(sm)s->data + %(sm)s->strides[0] * i);
             double* __restrict__ nll_i = (double*)(%(nll)s->data + %(nll)s->strides[0] * i);
-
-            npy_intp Sx = %(x)s->strides[1]/sizeof(double);
-            npy_intp Sb = %(b)s->strides[0]/sizeof(double);
-            npy_intp Ssm = %(sm)s->strides[1]/sizeof(double);
-
-            size_t row_max_j=0;
-            double row_max = x_i[0] + b_i[0];
-            //try to compute sum and sm the easy way
-            for (j = 0; j < Nx[1]; ++j)
-            {
-                double row_ij = x_i[j * Sx] +  b_i[j * Sb];
-                row_max_j = (row_ij > row_max) ? j : row_max_j;
-                row_max   = (row_ij > row_max) ? row_ij : row_max;
+                """,
+                inside_row_loop,
+                """
+            nll_i[0] = - x_i[y_i*Sx]
+                       - b_i[y_i*Sb]
+                       + log(sum);
+                """,
+                end_row_loop)
 
-                double sm_ij = exp(row_ij);
-                sum += sm_ij;
-                sm_i[j * Ssm] = sm_ij;
-            }
-            if ((0.0 == sum) || (isinf(sum))) 
-            {
-                //our cheap trick didn't work... try again and do it better.
-                discount_max = true;
-                sum = 0.0; //reset sum and recompute....
-                for (j = 0; j < Nx[1]; ++j)
-                {
-                    double row_ij = x_i[j * Sx] +  b_i[j * Sb];
 
-                    double sm_ij = exp(row_ij - row_max);
-                    sum += sm_ij;
-                    sm_i[j * Ssm] = sm_ij;
-                }
-                if ( (0.0 == sum) || (isinf(sum)))
-                { 
-                    //that was our best... 
-                    %(fail)s;
-                }
-                //if we still can't sum it up, we're screwed.
-                //So far, this assertion has never failed...
-            }
+    def c_code(self, node, name, (x, b, y_idx), (nll, sm), sub):
+        y_idx_type = node.inputs[2].type.dtype_specs()[1]
+        code_template = ''.join(self.c_code_template())
+        return code_template % dict(locals(), **sub)
 
-            //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
-            double sum_inv = 1.0 / sum;
-            for (j = 0; j < Nx[1]; ++j)
-            {
-                sm_i[j * Ssm] *= sum_inv;
-            }
-
-            if (y_i >= Nx[1])
-            {
-                %(fail)s;
-            }
-
-            nll_i[0] = - x_i[y_i*Sx] 
-                       - b_i[y_i*Sb]
-                       + (discount_max ? row_max : 0.0)
-                       + log(sum);
-              //mat_at(y,i,0) = -log( mat_at(s,i,t[i]));  //less accurate?
-              //mat_at(y,i,0) =  - mat_at(x,i,t[i]) - mat_at(b,0,t[i]) + (discount_max ? maxi : 0.0) + log(sum);
-        }
-        """ % dict(locals(), **sub)
 crossentropy_softmax_1hot_with_bias = CrossentropySoftmax1HotWithBias()
 
 class CrossentropySoftmax1HotWithBiasDx (theano.Op):