changeset 843:c19085585464

many changes to kouh2008
author James Bergstra <bergstrj@iro.umontreal.ca>
date Thu, 22 Oct 2009 18:53:50 -0400
parents 3c1fb6f14a14
children b2948ae5087c
files pylearn/shared/layers/kouh2008.py
diffstat 1 files changed, 136 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/shared/layers/kouh2008.py	Thu Oct 22 18:53:16 2009 -0400
+++ b/pylearn/shared/layers/kouh2008.py	Thu Oct 22 18:53:50 2009 -0400
@@ -16,14 +16,21 @@
 # in the range 1-3, but gradient descent may overstep that range.
 
 # TODO: Use updates() to clamp exponents p and q to sensible range
-
+import logging
+_logger = logging.getLogger('pylearn.shared.layers.kouh2008')
 import numpy
 import theano
 from theano import tensor
 from theano.tensor.nnet import softplus
+from theano.sandbox.softsign import softsign
 from theano.compile.sandbox import shared
 from .util import add_logging, update_locals
 
+try:
+    from PIL import Image
+except:
+    pass
+
 def _shared_uniform(rng, low, high, size, dtype, name=None):
     return shared(
             numpy.asarray(
@@ -37,24 +44,26 @@
     :param w: a list of N output weights of shape (n_out, )
     :param p: a tensor of exponents of shape (n_out,)
     :param q: a tensor of exponents of shape (n_out,)
+    :param r: a tensor of exponents of shape (n_out,)
     :param k: a tensor of biases of shape (n_out,)
 
     output - a tensor of activations of shape (n_examples, n_out)
     """
 
-    def __init__(self, w_list, x_list, p, q, r, k, params, updates):
-        """Transcription of equation 2.1 from paper that appears on page 1434.
+    def __init__(self, w_list, x_list, p, q, r, k, params, updates, eps=1.0e-6):
+        """Transcription of equation 2.1 from paper (page 1434).
         """
         if len(w_list) != len(x_list):
             raise ValueError('w_list must have same len as x_list')
         output = (sum(w * tensor.pow(x, p) for (w,x) in zip(w_list, x_list)))\
-                / (k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r))
+                / (numpy.asarray(eps, dtype=k.type.dtype) + k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r))
 
         assert output.type.ndim == 2
         update_locals(self, locals())
+        _logger.debug('output dtype %s' % output.dtype)
 
     @classmethod
-    def new(cls, rng, x_list, n_out, dtype=None, params=[], updates=[]):
+    def new_expbounds(cls, rng, x_list, n_out, dtype=None, params=[], updates=[], exponent_range=(1.0, 3.0)):
         """
         """
         if dtype is None:
@@ -64,18 +73,61 @@
         def shared_uniform(low, high, size, name): 
             return _shared_uniform(rng, low, high, size, dtype, name)
 
-        w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='w_%i'%i)
-                for i in xrange(n_terms)]
-        p = shared_uniform(low=1.0, high=3.0, size=(n_out,), name='p')
-        q = shared_uniform(low=1.0, high=3.0, size=(n_out,), name='q')
-        r = shared_uniform(low=0.3, high=0.8, size=(n_out,), name='r')
-        k = shared_uniform(low=-0.3, high=0.3, size=(n_out,), name='k')
-        return cls(w_list, x_list, p, q, r, k,
-                params = [p, q, r, k] + w_list + params,
-                updates=updates)
+        use_softmax_w = True
+
+        if use_softmax_w:
+            w = shared_uniform(low=-.1, high=.1, size=(n_out, n_terms), name='Kouh2008::w')
+            w_sm = theano.tensor.nnet.softmax(w)
+            w_list = [w_sm[:,i] for i in xrange(n_terms)]
+            w_l1 = abs(w).sum()
+            w_l2_sqr = (w**2).sum()
+        else:
+            w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='w_%i'%i)
+                    for i in xrange(n_terms)]
+            w_l1 = sum(abs(wi).sum() for wi in w_list)
+            w_l2_sqr = sum((wi**2).sum() for wi in w_list)
+
+        e_range_low, e_range_high = exponent_range
+        e_range_low = numpy.asarray(e_range_low, dtype=dtype)
+        e_range_high = numpy.asarray(e_range_high, dtype=dtype)
+        e_range_mag = e_range_high - e_range_low
+        if e_range_mag < 0:
+            raise ValueError('exponent range must have low <= high')
+
+        p_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='p')
+        q_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='q') 
+        r_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='r')
+        k_unbounded = shared_uniform(low=-0.2, high=0.2, size=(n_out,), name='k') # biases
+
+        p = tensor.nnet.sigmoid(p_unbounded) * e_range_mag + e_range_low
+        q = tensor.nnet.sigmoid(q_unbounded) * e_range_mag + e_range_low
+        r = tensor.nnet.sigmoid(r_unbounded) * \
+                numpy.asarray(1.0/e_range_low - 1.0/e_range_high, dtype=dtype) \
+                + numpy.asarray(1.0/e_range_high, dtype=dtype)
+
+        k = softsign(k_unbounded)
+
+        if use_softmax_w:
+            rval = cls(w_list, x_list, p, q, r, k,
+                    params = [p_unbounded, q_unbounded, r_unbounded, k, w] + params,
+                    updates=updates)
+        else:
+            rval = cls(w_list, x_list, p, q, r, k,
+                    params = [p_unbounded, q_unbounded, r_unbounded, k_unbounded] + w_list + params,
+                    updates=updates)
+        rval.p_unbounded = p_unbounded
+        rval.q_unbounded = q_unbounded
+        rval.r_unbounded = r_unbounded
+        rval.k_unbounded = k_unbounded
+        rval.exp_l1 = abs(p_unbounded).sum() + abs(q_unbounded).sum() + abs(r_unbounded).sum()
+        rval.exp_l2_sqr = (p_unbounded**2).sum() + (q_unbounded**2).sum() + (r_unbounded**2).sum()
+        rval.w_l1 = w_l1
+        rval.w_l2_sqr = w_l2_sqr
+        return rval
 
     @classmethod
-    def new_filters(cls, rng, input, n_in, n_out, n_terms, dtype=None):
+    def new_filters_expbounds(cls, rng, input, n_in, n_out, n_terms, dtype=None, eps=1e-1,
+            exponent_range=(1.0, 3.0), filter_range=1.0):
         """Return a KouhLayer instance with random parameters
 
         The parameters are drawn on a range [typically] suitable for fine-tuning by gradient
@@ -93,6 +145,9 @@
         :param nterms: each (of n_out) complex-cell firing rate will be determined from this
         many 'simple cell' responses.
 
+        :param eps: this amount is added to the softplus of filter responses as a baseline
+        firing rate (that prevents a subsequent error from ``pow(0, p)``) 
+
         :returns: KouhLayer instance with freshly-allocated random weights.
 
         """
@@ -101,19 +156,80 @@
 
         if dtype is None:
             dtype = input.dtype
+        _logger.debug('dtype %s' % dtype)
 
         def shared_uniform(low, high, size, name): 
             return _shared_uniform(rng, low, high, size, dtype, name)
 
-        f_list = [shared_uniform(low=-2.0/n_in, high=2.0/n_in, size=(n_in, n_out), name='f_%i'%i)
+        f_list = [shared_uniform(low=-2.0/numpy.sqrt(n_in), high=2.0/numpy.sqrt(n_in), size=(n_in, n_out), name='f_%i'%i)
                 for i in xrange(n_terms)]
 
-        x_list = [softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)]
+        b_list = [shared_uniform(low=0, high=.01, size=(n_out,), name='b_%i'%i)
+                for i in xrange(n_terms)]
+        #x_list = [numpy.asarray(eps, dtype=dtype)+softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)]
+        filter_range = numpy.asarray(filter_range, dtype=dtype)
+        half_filter_range = numpy.asarray(filter_range/2, dtype=dtype)
+        x_list = [numpy.asarray(filter_range + eps, dtype=dtype)+half_filter_range *softsign(tensor.dot(input, f_list[i]) +
+            b_list[i]) for i in xrange(n_terms)]
 
-        rval = cls.new(rng, x_list, n_out, dtype=dtype, params=f_list)
+        rval = cls.new_expbounds(rng, x_list, n_out, dtype=dtype, params=f_list + b_list,
+                exponent_range=exponent_range)
+        rval.f_list = f_list
         rval.input = input #add the input to the returned object
-        rval.l1 = sum(abs(fi).sum() for fi in f_list)
-        rval.l2_sqr = sum((fi**2).sum() for fi in f_list)
+        rval.filter_l1 = sum(abs(fi).sum() for fi in f_list)
+        rval.filter_l2_sqr = sum((fi**2).sum() for fi in f_list)
         return rval
 
+    def img_from_weights(self, rows=None, cols=None, row_gap=1, col_gap=1, eps=1e-4):
+        """ Return an image that visualizes all the weights in the layer.
+        """
 
+        n_in, n_out = self.f_list[0].value.shape
+
+        if rows is None and cols is None:
+            rows = int(numpy.sqrt(n_out))
+        if cols is None:
+            cols = n_out // rows + (1 if n_out % rows else 0)
+        if rows is None:
+            rows = n_out // cols + (1 if n_out % cols else 0)
+
+        filter_shape = self.filter_shape
+        height = rows * (row_gap + filter_shape[0]) - row_gap
+        width = cols * (col_gap + filter_shape[1]) - col_gap
+
+        out_array = numpy.zeros((height, width, 3), dtype='uint8')
+
+        w = self.w.value
+        w_col = 0
+        def pixel_range(x):
+            return 255 * (x - x.min()) / (x.max() - x.min() + eps)
+
+        for r in xrange(rows):
+            out_r_low = r*(row_gap + filter_shape[0])
+            out_r_high = out_r_low + filter_shape[0]
+            for c in xrange(cols):
+                out_c_low = c*(col_gap + filter_shape[1])
+                out_c_high = out_c_low + filter_shape[1]
+                out_tile = out_array[out_r_low:out_r_high, out_c_low:out_c_high,:]
+
+                if c % 3 == 0: # linear filter
+                    if w_col < w.shape[1]:
+                        out_tile[...] = pixel_range(w[:,w_col]).reshape(filter_shape+(1,))
+                        w_col += 1
+                if c % 3 == 1: # E filters
+                    if w_col < w.shape[1]:
+                        #filters after the 3rd do not get rendered, but are skipped over.
+                        #  there are only 3 colour channels.
+                        for i in xrange(min(self.n_E_quadratic,3)):
+                            out_tile[:,:,i] = pixel_range(w[:,w_col+i]).reshape(filter_shape)
+                        w_col += self.n_E_quadratic
+                if c % 3 == 2: # S filters
+                    if w_col < w.shape[1]:
+                        #filters after the 3rd do not get rendered, but are skipped over.
+                        #  there are only 3 colour channels.
+                        for i in xrange(min(self.n_S_quadratic,3)):
+                            out_tile[:,:,2-i] = pixel_range(w[:,w_col+i]).reshape(filter_shape)
+                        w_col += self.n_S_quadratic
+        return Image.fromarray(out_array, 'RGB')
+
+add_logging(Kouh2008)