view pylearn/shared/layers/kouh2008.py @ 834:580087712f69

added shared.layers
author James Bergstra <bergstrj@iro.umontreal.ca>
date Fri, 16 Oct 2009 12:14:43 -0400
parents
children c19085585464
line wrap: on
line source

""" 
Paper: 

This layer implements a model of complex cell firing rate responses.

Canonical neural circuit (Kouh and Poggio, 2008)

This layer is in a sense a 2-layer neural network, with a strange activation function
in the middle.   It is introduced in "A Canonical Neural Circuit for Cortical Nonlinear
Operations", NECO 2008.  It includes various complex-cell models and approximates neural
network activation functions as special cases.

"""

## optimizing this model may be difficult-- paper talks about using exponents p and q in
# in the range 1-3, but gradient descent may overstep that range.

# TODO: Use updates() to clamp exponents p and q to sensible range

import numpy
import theano
from theano import tensor
from theano.tensor.nnet import softplus
from theano.compile.sandbox import shared
from .util import add_logging, update_locals

def _shared_uniform(rng, low, high, size, dtype, name=None):
    return shared(
            numpy.asarray(
                rng.uniform(low=low, high=high, size=size),
                dtype=dtype), name)

class Kouh2008(object):
    """WRITEME

    :param x: a list of N non-negative tensors of shape (n_examples, n_out)
    :param w: a list of N output weights of shape (n_out, )
    :param p: a tensor of exponents of shape (n_out,)
    :param q: a tensor of exponents of shape (n_out,)
    :param k: a tensor of biases of shape (n_out,)

    output - a tensor of activations of shape (n_examples, n_out)
    """

    def __init__(self, w_list, x_list, p, q, r, k, params, updates):
        """Transcription of equation 2.1 from paper that appears on page 1434.
        """
        if len(w_list) != len(x_list):
            raise ValueError('w_list must have same len as x_list')
        output = (sum(w * tensor.pow(x, p) for (w,x) in zip(w_list, x_list)))\
                / (k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r))

        assert output.type.ndim == 2
        update_locals(self, locals())

    @classmethod
    def new(cls, rng, x_list, n_out, dtype=None, params=[], updates=[]):
        """
        """
        if dtype is None:
            dtype = x_list[0].dtype
        n_terms = len(x_list)

        def shared_uniform(low, high, size, name): 
            return _shared_uniform(rng, low, high, size, dtype, name)

        w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='w_%i'%i)
                for i in xrange(n_terms)]
        p = shared_uniform(low=1.0, high=3.0, size=(n_out,), name='p')
        q = shared_uniform(low=1.0, high=3.0, size=(n_out,), name='q')
        r = shared_uniform(low=0.3, high=0.8, size=(n_out,), name='r')
        k = shared_uniform(low=-0.3, high=0.3, size=(n_out,), name='k')
        return cls(w_list, x_list, p, q, r, k,
                params = [p, q, r, k] + w_list + params,
                updates=updates)

    @classmethod
    def new_filters(cls, rng, input, n_in, n_out, n_terms, dtype=None):
        """Return a KouhLayer instance with random parameters

        The parameters are drawn on a range [typically] suitable for fine-tuning by gradient
        descent. 


        :param input: a tensor of shape (n_examples, n_in)

        :type n_in: positive int
        :param n_in: number of input dimensions

        :type n_out: positive int
        :param n_out: number of dimensions in rval.output

        :param nterms: each (of n_out) complex-cell firing rate will be determined from this
        many 'simple cell' responses.

        :returns: KouhLayer instance with freshly-allocated random weights.

        """
        if input.type.ndim != 2:
            raise TypeError('matrix expected for input')

        if dtype is None:
            dtype = input.dtype

        def shared_uniform(low, high, size, name): 
            return _shared_uniform(rng, low, high, size, dtype, name)

        f_list = [shared_uniform(low=-2.0/n_in, high=2.0/n_in, size=(n_in, n_out), name='f_%i'%i)
                for i in xrange(n_terms)]

        x_list = [softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)]

        rval = cls.new(rng, x_list, n_out, dtype=dtype, params=f_list)
        rval.input = input #add the input to the returned object
        rval.l1 = sum(abs(fi).sum() for fi in f_list)
        rval.l2_sqr = sum((fi**2).sum() for fi in f_list)
        return rval