changeset 861:07a06c2f9408

reparametrization of kouh2008
author James Bergstra <bergstrj@iro.umontreal.ca>
date Tue, 03 Nov 2009 15:25:08 -0500
parents bf2f71084d59
children 882b4169e2b1
files pylearn/shared/layers/kouh2008.py
diffstat 1 files changed, 99 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/shared/layers/kouh2008.py	Tue Nov 03 15:24:27 2009 -0500
+++ b/pylearn/shared/layers/kouh2008.py	Tue Nov 03 15:25:08 2009 -0500
@@ -12,12 +12,9 @@
 
 """
 
-## optimizing this model may be difficult-- paper talks about using exponents p and q in
-# in the range 1-3, but gradient descent may overstep that range.
-
-# TODO: Use updates() to clamp exponents p and q to sensible range
 import logging
 _logger = logging.getLogger('pylearn.shared.layers.kouh2008')
+
 import numpy
 import theano
 from theano import tensor
@@ -50,26 +47,56 @@
     output - a tensor of activations of shape (n_examples, n_out)
     """
 
+    input = None #optional - symbolic variable of input
+    f_list = None # optional - list of filter shared variables
+    filter_l1 = None # optional - l1 of filters
+    filter_l2_sqr = None # optional - l2**2 of filters
+
+    exp_l1 = None
+    exp_l2_sqr = None
+
+    w_l1 = None
+    w_l2_sqr = None
+
+    p_unbounded = None
+    q_unbounded = None
+    r_unbounded = None
+    k_unbounded = None
+
+    p_range_default=(1.0, 3.0)
+    q_range_default=(1.0, 3.0)
+    r_range_default=(0.333, 1.0)
+    k_range_default=(0.0, 1.0)
+    x_range_default=(0.01, 1.0)
+
     def __init__(self, w_list, x_list, p, q, r, k, params, updates, eps=1.0e-6):
         """Transcription of equation 2.1 from paper (page 1434).
         """
         if len(w_list) != len(x_list):
             raise ValueError('w_list must have same len as x_list')
-        output = (sum(w * tensor.pow(x, p) for (w,x) in zip(w_list, x_list)))\
-                / (numpy.asarray(eps, dtype=k.type.dtype) + k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r))
+        numerator = sum(w_i * tensor.pow(x_i, p) for (w_i,x_i) in zip(w_list, x_list))
+        denominator = k + tensor.pow(sum(tensor.pow(x_i, q) for x_i in x_list), r)
+        output = numerator / (numpy.asarray(eps, dtype=k.type.dtype) + denominator)
 
         assert output.type.ndim == 2
         update_locals(self, locals())
         _logger.debug('output dtype %s' % output.dtype)
 
     @classmethod
-    def new_expbounds(cls, rng, x_list, n_out, dtype=None, params=[], updates=[], exponent_range=(1.0, 3.0)):
+    def new_expbounds(cls, rng, x_list, n_out, dtype=None, params=[], updates=[], 
+            p_range=p_range_default,
+            q_range=q_range_default,
+            r_range=r_range_default,
+            k_range=k_range_default,
+            ):
         """
         """
         if dtype is None:
             dtype = x_list[0].dtype
         n_terms = len(x_list)
 
+        new_params = []
+
         def shared_uniform(low, high, size, name): 
             return _shared_uniform(rng, low, high, size, dtype, name)
 
@@ -81,40 +108,32 @@
             w_list = [w_sm[:,i] for i in xrange(n_terms)]
             w_l1 = abs(w).sum()
             w_l2_sqr = (w**2).sum()
+            new_params.append(w)
         else:
-            w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='w_%i'%i)
+            w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='Kouh2008::w_%i'%i)
                     for i in xrange(n_terms)]
             w_l1 = sum(abs(wi).sum() for wi in w_list)
             w_l2_sqr = sum((wi**2).sum() for wi in w_list)
-
-        e_range_low, e_range_high = exponent_range
-        e_range_low = numpy.asarray(e_range_low, dtype=dtype)
-        e_range_high = numpy.asarray(e_range_high, dtype=dtype)
-        e_range_mag = e_range_high - e_range_low
-        if e_range_mag < 0:
-            raise ValueError('exponent range must have low <= high')
+            new_params.extend(w_list)
 
         p_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='p')
         q_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='q') 
         r_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='r')
-        k_unbounded = shared_uniform(low=-0.2, high=0.2, size=(n_out,), name='k') # biases
+        k_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='k') # biases
+        new_params.extend([p_unbounded, q_unbounded, r_unbounded, k_unbounded])
 
-        p = tensor.nnet.sigmoid(p_unbounded) * e_range_mag + e_range_low
-        q = tensor.nnet.sigmoid(q_unbounded) * e_range_mag + e_range_low
-        r = tensor.nnet.sigmoid(r_unbounded) * \
-                numpy.asarray(1.0/e_range_low - 1.0/e_range_high, dtype=dtype) \
-                + numpy.asarray(1.0/e_range_high, dtype=dtype)
+        def d(a):
+            return numpy.asarray(a, dtype=dtype)
 
-        k = softsign(k_unbounded)
+        p = softsign(p_unbounded) * d(p_range[1] - p_range[0]) + d(p_range[0])
+        q = softsign(q_unbounded) * d(q_range[1] - q_range[0]) + d(q_range[0])
+        r = softsign(r_unbounded) * d(r_range[1] - r_range[0]) + d(r_range[0])
+        k = softsign(k_unbounded) * d(k_range[1] - k_range[0]) + d(k_range[0])
 
-        if use_softmax_w:
-            rval = cls(w_list, x_list, p, q, r, k,
-                    params = [p_unbounded, q_unbounded, r_unbounded, k, w] + params,
-                    updates=updates)
-        else:
-            rval = cls(w_list, x_list, p, q, r, k,
-                    params = [p_unbounded, q_unbounded, r_unbounded, k_unbounded] + w_list + params,
-                    updates=updates)
+        rval = cls(w_list, x_list, p, q, r, k,
+                params = params + new_params,
+                updates=updates)
+
         rval.p_unbounded = p_unbounded
         rval.q_unbounded = q_unbounded
         rval.r_unbounded = r_unbounded
@@ -126,9 +145,14 @@
         return rval
 
     @classmethod
-    def new_filters_expbounds(cls, rng, input, n_in, n_out, n_terms, dtype=None, eps=1e-1,
-            exponent_range=(1.0, 3.0), filter_range=1.0):
-        """Return a KouhLayer instance with random parameters
+    def new_filters_expbounds(cls, rng, input, n_in, n_out, n_terms, dtype=None,
+            p_range=p_range_default,
+            q_range=q_range_default,
+            r_range=r_range_default,
+            k_range=k_range_default,
+            x_range=x_range_default,
+            ):
+        """Return a Kouh2008 instance with random parameters
 
         The parameters are drawn on a range [typically] suitable for fine-tuning by gradient
         descent. 
@@ -145,10 +169,27 @@
         :param nterms: each (of n_out) complex-cell firing rate will be determined from this
         many 'simple cell' responses.
 
-        :param eps: this amount is added to the softplus of filter responses as a baseline
-        firing rate (that prevents a subsequent error from ``pow(0, p)``) 
+        :param eps: this amount is added to the filter responses as a baseline
+        firing rate (that prevents a subsequent error from ``pow(0, p)``)
+        The eps must be large enough so that eps**p_range[1] does not underflow.
+
+        :param p_range: See `new_expbounds`.
+        :type p_range: tuple([low, high])
+
+        :param q_range: See `new_expbounds`.
+        :type q_range: tuple([low, high])
 
-        :returns: KouhLayer instance with freshly-allocated random weights.
+        :param r_range: See `new_expbounds`.
+        :type r_range: tuple([low, high])
+
+        :param k_range: See `new_expbounds`.
+        :type k_range: tuple([low, high])
+
+        :param x_range: Filter responses are affine-transformed softsigns lying between these
+        values.
+        :type x_range: tuple([low, high])
+
+        :returns: Kouh2008 instance with freshly-allocated random weights.
 
         """
         if input.type.ndim != 2:
@@ -161,19 +202,30 @@
         def shared_uniform(low, high, size, name): 
             return _shared_uniform(rng, low, high, size, dtype, name)
 
-        f_list = [shared_uniform(low=-2.0/numpy.sqrt(n_in), high=2.0/numpy.sqrt(n_in), size=(n_in, n_out), name='f_%i'%i)
+        f_list = [shared_uniform(low=-2.0/numpy.sqrt(n_in), high=2.0/numpy.sqrt(n_in), 
+            size=(n_in, n_out), name='Kouh2008::f_%i'%i)
+                for i in xrange(n_terms)]
+
+        b_list = [shared_uniform(low=0, high=.01,
+            size=(n_out,), name='Kouh::2008::b_%i'%i)
                 for i in xrange(n_terms)]
 
-        b_list = [shared_uniform(low=0, high=.01, size=(n_out,), name='b_%i'%i)
-                for i in xrange(n_terms)]
-        #x_list = [numpy.asarray(eps, dtype=dtype)+softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)]
-        filter_range = numpy.asarray(filter_range, dtype=dtype)
-        half_filter_range = numpy.asarray(filter_range/2, dtype=dtype)
-        x_list = [numpy.asarray(filter_range + eps, dtype=dtype)+half_filter_range *softsign(tensor.dot(input, f_list[i]) +
-            b_list[i]) for i in xrange(n_terms)]
+        def d(a):
+            return numpy.asarray(a, dtype=dtype)
+
+        x_low = d(x_range[0])
+        x_high = d(x_range[1])
+
+        #softsign's range is (-1, 1)
+        # we want filter responses to span (x_low, x_high)
+        x_list = [x_low + (x_high-x_low)*(d(0.5) + d(0.5)*softsign(tensor.dot(input, f_list[i])+b_list[i]))
+                    for i in xrange(n_terms)]
 
         rval = cls.new_expbounds(rng, x_list, n_out, dtype=dtype, params=f_list + b_list,
-                exponent_range=exponent_range)
+                p_range=p_range,
+                q_range=q_range,
+                r_range=r_range,
+                k_range=k_range)
         rval.f_list = f_list
         rval.input = input #add the input to the returned object
         rval.filter_l1 = sum(abs(fi).sum() for fi in f_list)
@@ -182,6 +234,8 @@
 
     def img_from_weights(self, rows=None, cols=None, row_gap=1, col_gap=1, eps=1e-4):
         """ Return an image that visualizes all the weights in the layer.
+
+        WRITEME: how does the image relate to the weights
         """
 
         n_in, n_out = self.f_list[0].value.shape