Mercurial > pylearn
changeset 843:c19085585464
many changes to kouh2008
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Thu, 22 Oct 2009 18:53:50 -0400 |
parents | 3c1fb6f14a14 |
children | b2948ae5087c |
files | pylearn/shared/layers/kouh2008.py |
diffstat | 1 files changed, 136 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/shared/layers/kouh2008.py Thu Oct 22 18:53:16 2009 -0400 +++ b/pylearn/shared/layers/kouh2008.py Thu Oct 22 18:53:50 2009 -0400 @@ -16,14 +16,21 @@ # in the range 1-3, but gradient descent may overstep that range. # TODO: Use updates() to clamp exponents p and q to sensible range - +import logging +_logger = logging.getLogger('pylearn.shared.layers.kouh2008') import numpy import theano from theano import tensor from theano.tensor.nnet import softplus +from theano.sandbox.softsign import softsign from theano.compile.sandbox import shared from .util import add_logging, update_locals +try: + from PIL import Image +except: + pass + def _shared_uniform(rng, low, high, size, dtype, name=None): return shared( numpy.asarray( @@ -37,24 +44,26 @@ :param w: a list of N output weights of shape (n_out, ) :param p: a tensor of exponents of shape (n_out,) :param q: a tensor of exponents of shape (n_out,) + :param r: a tensor of exponents of shape (n_out,) :param k: a tensor of biases of shape (n_out,) output - a tensor of activations of shape (n_examples, n_out) """ - def __init__(self, w_list, x_list, p, q, r, k, params, updates): - """Transcription of equation 2.1 from paper that appears on page 1434. + def __init__(self, w_list, x_list, p, q, r, k, params, updates, eps=1.0e-6): + """Transcription of equation 2.1 from paper (page 1434). """ if len(w_list) != len(x_list): raise ValueError('w_list must have same len as x_list') output = (sum(w * tensor.pow(x, p) for (w,x) in zip(w_list, x_list)))\ - / (k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r)) + / (numpy.asarray(eps, dtype=k.type.dtype) + k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r)) assert output.type.ndim == 2 update_locals(self, locals()) + _logger.debug('output dtype %s' % output.dtype) @classmethod - def new(cls, rng, x_list, n_out, dtype=None, params=[], updates=[]): + def new_expbounds(cls, rng, x_list, n_out, dtype=None, params=[], updates=[], exponent_range=(1.0, 3.0)): """ """ if dtype is None: @@ -64,18 +73,61 @@ def shared_uniform(low, high, size, name): return _shared_uniform(rng, low, high, size, dtype, name) - w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='w_%i'%i) - for i in xrange(n_terms)] - p = shared_uniform(low=1.0, high=3.0, size=(n_out,), name='p') - q = shared_uniform(low=1.0, high=3.0, size=(n_out,), name='q') - r = shared_uniform(low=0.3, high=0.8, size=(n_out,), name='r') - k = shared_uniform(low=-0.3, high=0.3, size=(n_out,), name='k') - return cls(w_list, x_list, p, q, r, k, - params = [p, q, r, k] + w_list + params, - updates=updates) + use_softmax_w = True + + if use_softmax_w: + w = shared_uniform(low=-.1, high=.1, size=(n_out, n_terms), name='Kouh2008::w') + w_sm = theano.tensor.nnet.softmax(w) + w_list = [w_sm[:,i] for i in xrange(n_terms)] + w_l1 = abs(w).sum() + w_l2_sqr = (w**2).sum() + else: + w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='w_%i'%i) + for i in xrange(n_terms)] + w_l1 = sum(abs(wi).sum() for wi in w_list) + w_l2_sqr = sum((wi**2).sum() for wi in w_list) + + e_range_low, e_range_high = exponent_range + e_range_low = numpy.asarray(e_range_low, dtype=dtype) + e_range_high = numpy.asarray(e_range_high, dtype=dtype) + e_range_mag = e_range_high - e_range_low + if e_range_mag < 0: + raise ValueError('exponent range must have low <= high') + + p_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='p') + q_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='q') + r_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='r') + k_unbounded = shared_uniform(low=-0.2, high=0.2, size=(n_out,), name='k') # biases + + p = tensor.nnet.sigmoid(p_unbounded) * e_range_mag + e_range_low + q = tensor.nnet.sigmoid(q_unbounded) * e_range_mag + e_range_low + r = tensor.nnet.sigmoid(r_unbounded) * \ + numpy.asarray(1.0/e_range_low - 1.0/e_range_high, dtype=dtype) \ + + numpy.asarray(1.0/e_range_high, dtype=dtype) + + k = softsign(k_unbounded) + + if use_softmax_w: + rval = cls(w_list, x_list, p, q, r, k, + params = [p_unbounded, q_unbounded, r_unbounded, k, w] + params, + updates=updates) + else: + rval = cls(w_list, x_list, p, q, r, k, + params = [p_unbounded, q_unbounded, r_unbounded, k_unbounded] + w_list + params, + updates=updates) + rval.p_unbounded = p_unbounded + rval.q_unbounded = q_unbounded + rval.r_unbounded = r_unbounded + rval.k_unbounded = k_unbounded + rval.exp_l1 = abs(p_unbounded).sum() + abs(q_unbounded).sum() + abs(r_unbounded).sum() + rval.exp_l2_sqr = (p_unbounded**2).sum() + (q_unbounded**2).sum() + (r_unbounded**2).sum() + rval.w_l1 = w_l1 + rval.w_l2_sqr = w_l2_sqr + return rval @classmethod - def new_filters(cls, rng, input, n_in, n_out, n_terms, dtype=None): + def new_filters_expbounds(cls, rng, input, n_in, n_out, n_terms, dtype=None, eps=1e-1, + exponent_range=(1.0, 3.0), filter_range=1.0): """Return a KouhLayer instance with random parameters The parameters are drawn on a range [typically] suitable for fine-tuning by gradient @@ -93,6 +145,9 @@ :param nterms: each (of n_out) complex-cell firing rate will be determined from this many 'simple cell' responses. + :param eps: this amount is added to the softplus of filter responses as a baseline + firing rate (that prevents a subsequent error from ``pow(0, p)``) + :returns: KouhLayer instance with freshly-allocated random weights. """ @@ -101,19 +156,80 @@ if dtype is None: dtype = input.dtype + _logger.debug('dtype %s' % dtype) def shared_uniform(low, high, size, name): return _shared_uniform(rng, low, high, size, dtype, name) - f_list = [shared_uniform(low=-2.0/n_in, high=2.0/n_in, size=(n_in, n_out), name='f_%i'%i) + f_list = [shared_uniform(low=-2.0/numpy.sqrt(n_in), high=2.0/numpy.sqrt(n_in), size=(n_in, n_out), name='f_%i'%i) for i in xrange(n_terms)] - x_list = [softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)] + b_list = [shared_uniform(low=0, high=.01, size=(n_out,), name='b_%i'%i) + for i in xrange(n_terms)] + #x_list = [numpy.asarray(eps, dtype=dtype)+softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)] + filter_range = numpy.asarray(filter_range, dtype=dtype) + half_filter_range = numpy.asarray(filter_range/2, dtype=dtype) + x_list = [numpy.asarray(filter_range + eps, dtype=dtype)+half_filter_range *softsign(tensor.dot(input, f_list[i]) + + b_list[i]) for i in xrange(n_terms)] - rval = cls.new(rng, x_list, n_out, dtype=dtype, params=f_list) + rval = cls.new_expbounds(rng, x_list, n_out, dtype=dtype, params=f_list + b_list, + exponent_range=exponent_range) + rval.f_list = f_list rval.input = input #add the input to the returned object - rval.l1 = sum(abs(fi).sum() for fi in f_list) - rval.l2_sqr = sum((fi**2).sum() for fi in f_list) + rval.filter_l1 = sum(abs(fi).sum() for fi in f_list) + rval.filter_l2_sqr = sum((fi**2).sum() for fi in f_list) return rval + def img_from_weights(self, rows=None, cols=None, row_gap=1, col_gap=1, eps=1e-4): + """ Return an image that visualizes all the weights in the layer. + """ + n_in, n_out = self.f_list[0].value.shape + + if rows is None and cols is None: + rows = int(numpy.sqrt(n_out)) + if cols is None: + cols = n_out // rows + (1 if n_out % rows else 0) + if rows is None: + rows = n_out // cols + (1 if n_out % cols else 0) + + filter_shape = self.filter_shape + height = rows * (row_gap + filter_shape[0]) - row_gap + width = cols * (col_gap + filter_shape[1]) - col_gap + + out_array = numpy.zeros((height, width, 3), dtype='uint8') + + w = self.w.value + w_col = 0 + def pixel_range(x): + return 255 * (x - x.min()) / (x.max() - x.min() + eps) + + for r in xrange(rows): + out_r_low = r*(row_gap + filter_shape[0]) + out_r_high = out_r_low + filter_shape[0] + for c in xrange(cols): + out_c_low = c*(col_gap + filter_shape[1]) + out_c_high = out_c_low + filter_shape[1] + out_tile = out_array[out_r_low:out_r_high, out_c_low:out_c_high,:] + + if c % 3 == 0: # linear filter + if w_col < w.shape[1]: + out_tile[...] = pixel_range(w[:,w_col]).reshape(filter_shape+(1,)) + w_col += 1 + if c % 3 == 1: # E filters + if w_col < w.shape[1]: + #filters after the 3rd do not get rendered, but are skipped over. + # there are only 3 colour channels. + for i in xrange(min(self.n_E_quadratic,3)): + out_tile[:,:,i] = pixel_range(w[:,w_col+i]).reshape(filter_shape) + w_col += self.n_E_quadratic + if c % 3 == 2: # S filters + if w_col < w.shape[1]: + #filters after the 3rd do not get rendered, but are skipped over. + # there are only 3 colour channels. + for i in xrange(min(self.n_S_quadratic,3)): + out_tile[:,:,2-i] = pixel_range(w[:,w_col+i]).reshape(filter_shape) + w_col += self.n_S_quadratic + return Image.fromarray(out_array, 'RGB') + +add_logging(Kouh2008)