Mercurial > pylearn
view pylearn/shared/layers/kouh2008.py @ 834:580087712f69
added shared.layers
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 16 Oct 2009 12:14:43 -0400 |
parents | |
children | c19085585464 |
line wrap: on
line source
""" Paper: This layer implements a model of complex cell firing rate responses. Canonical neural circuit (Kouh and Poggio, 2008) This layer is in a sense a 2-layer neural network, with a strange activation function in the middle. It is introduced in "A Canonical Neural Circuit for Cortical Nonlinear Operations", NECO 2008. It includes various complex-cell models and approximates neural network activation functions as special cases. """ ## optimizing this model may be difficult-- paper talks about using exponents p and q in # in the range 1-3, but gradient descent may overstep that range. # TODO: Use updates() to clamp exponents p and q to sensible range import numpy import theano from theano import tensor from theano.tensor.nnet import softplus from theano.compile.sandbox import shared from .util import add_logging, update_locals def _shared_uniform(rng, low, high, size, dtype, name=None): return shared( numpy.asarray( rng.uniform(low=low, high=high, size=size), dtype=dtype), name) class Kouh2008(object): """WRITEME :param x: a list of N non-negative tensors of shape (n_examples, n_out) :param w: a list of N output weights of shape (n_out, ) :param p: a tensor of exponents of shape (n_out,) :param q: a tensor of exponents of shape (n_out,) :param k: a tensor of biases of shape (n_out,) output - a tensor of activations of shape (n_examples, n_out) """ def __init__(self, w_list, x_list, p, q, r, k, params, updates): """Transcription of equation 2.1 from paper that appears on page 1434. """ if len(w_list) != len(x_list): raise ValueError('w_list must have same len as x_list') output = (sum(w * tensor.pow(x, p) for (w,x) in zip(w_list, x_list)))\ / (k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r)) assert output.type.ndim == 2 update_locals(self, locals()) @classmethod def new(cls, rng, x_list, n_out, dtype=None, params=[], updates=[]): """ """ if dtype is None: dtype = x_list[0].dtype n_terms = len(x_list) def shared_uniform(low, high, size, name): return _shared_uniform(rng, low, high, size, dtype, name) w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='w_%i'%i) for i in xrange(n_terms)] p = shared_uniform(low=1.0, high=3.0, size=(n_out,), name='p') q = shared_uniform(low=1.0, high=3.0, size=(n_out,), name='q') r = shared_uniform(low=0.3, high=0.8, size=(n_out,), name='r') k = shared_uniform(low=-0.3, high=0.3, size=(n_out,), name='k') return cls(w_list, x_list, p, q, r, k, params = [p, q, r, k] + w_list + params, updates=updates) @classmethod def new_filters(cls, rng, input, n_in, n_out, n_terms, dtype=None): """Return a KouhLayer instance with random parameters The parameters are drawn on a range [typically] suitable for fine-tuning by gradient descent. :param input: a tensor of shape (n_examples, n_in) :type n_in: positive int :param n_in: number of input dimensions :type n_out: positive int :param n_out: number of dimensions in rval.output :param nterms: each (of n_out) complex-cell firing rate will be determined from this many 'simple cell' responses. :returns: KouhLayer instance with freshly-allocated random weights. """ if input.type.ndim != 2: raise TypeError('matrix expected for input') if dtype is None: dtype = input.dtype def shared_uniform(low, high, size, name): return _shared_uniform(rng, low, high, size, dtype, name) f_list = [shared_uniform(low=-2.0/n_in, high=2.0/n_in, size=(n_in, n_out), name='f_%i'%i) for i in xrange(n_terms)] x_list = [softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)] rval = cls.new(rng, x_list, n_out, dtype=dtype, params=f_list) rval.input = input #add the input to the returned object rval.l1 = sum(abs(fi).sum() for fi in f_list) rval.l2_sqr = sum((fi**2).sum() for fi in f_list) return rval