# HG changeset patch # User James Bergstra # Date 1257279908 18000 # Node ID 07a06c2f9408406e3291aad179216d65c30d5d9d # Parent bf2f71084d59c1e7dec5a6dd0d6813f4f853ca9b reparametrization of kouh2008 diff -r bf2f71084d59 -r 07a06c2f9408 pylearn/shared/layers/kouh2008.py --- a/pylearn/shared/layers/kouh2008.py Tue Nov 03 15:24:27 2009 -0500 +++ b/pylearn/shared/layers/kouh2008.py Tue Nov 03 15:25:08 2009 -0500 @@ -12,12 +12,9 @@ """ -## optimizing this model may be difficult-- paper talks about using exponents p and q in -# in the range 1-3, but gradient descent may overstep that range. - -# TODO: Use updates() to clamp exponents p and q to sensible range import logging _logger = logging.getLogger('pylearn.shared.layers.kouh2008') + import numpy import theano from theano import tensor @@ -50,26 +47,56 @@ output - a tensor of activations of shape (n_examples, n_out) """ + input = None #optional - symbolic variable of input + f_list = None # optional - list of filter shared variables + filter_l1 = None # optional - l1 of filters + filter_l2_sqr = None # optional - l2**2 of filters + + exp_l1 = None + exp_l2_sqr = None + + w_l1 = None + w_l2_sqr = None + + p_unbounded = None + q_unbounded = None + r_unbounded = None + k_unbounded = None + + p_range_default=(1.0, 3.0) + q_range_default=(1.0, 3.0) + r_range_default=(0.333, 1.0) + k_range_default=(0.0, 1.0) + x_range_default=(0.01, 1.0) + def __init__(self, w_list, x_list, p, q, r, k, params, updates, eps=1.0e-6): """Transcription of equation 2.1 from paper (page 1434). """ if len(w_list) != len(x_list): raise ValueError('w_list must have same len as x_list') - output = (sum(w * tensor.pow(x, p) for (w,x) in zip(w_list, x_list)))\ - / (numpy.asarray(eps, dtype=k.type.dtype) + k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r)) + numerator = sum(w_i * tensor.pow(x_i, p) for (w_i,x_i) in zip(w_list, x_list)) + denominator = k + tensor.pow(sum(tensor.pow(x_i, q) for x_i in x_list), r) + output = numerator / (numpy.asarray(eps, dtype=k.type.dtype) + denominator) assert output.type.ndim == 2 update_locals(self, locals()) _logger.debug('output dtype %s' % output.dtype) @classmethod - def new_expbounds(cls, rng, x_list, n_out, dtype=None, params=[], updates=[], exponent_range=(1.0, 3.0)): + def new_expbounds(cls, rng, x_list, n_out, dtype=None, params=[], updates=[], + p_range=p_range_default, + q_range=q_range_default, + r_range=r_range_default, + k_range=k_range_default, + ): """ """ if dtype is None: dtype = x_list[0].dtype n_terms = len(x_list) + new_params = [] + def shared_uniform(low, high, size, name): return _shared_uniform(rng, low, high, size, dtype, name) @@ -81,40 +108,32 @@ w_list = [w_sm[:,i] for i in xrange(n_terms)] w_l1 = abs(w).sum() w_l2_sqr = (w**2).sum() + new_params.append(w) else: - w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='w_%i'%i) + w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='Kouh2008::w_%i'%i) for i in xrange(n_terms)] w_l1 = sum(abs(wi).sum() for wi in w_list) w_l2_sqr = sum((wi**2).sum() for wi in w_list) - - e_range_low, e_range_high = exponent_range - e_range_low = numpy.asarray(e_range_low, dtype=dtype) - e_range_high = numpy.asarray(e_range_high, dtype=dtype) - e_range_mag = e_range_high - e_range_low - if e_range_mag < 0: - raise ValueError('exponent range must have low <= high') + new_params.extend(w_list) p_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='p') q_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='q') r_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='r') - k_unbounded = shared_uniform(low=-0.2, high=0.2, size=(n_out,), name='k') # biases + k_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='k') # biases + new_params.extend([p_unbounded, q_unbounded, r_unbounded, k_unbounded]) - p = tensor.nnet.sigmoid(p_unbounded) * e_range_mag + e_range_low - q = tensor.nnet.sigmoid(q_unbounded) * e_range_mag + e_range_low - r = tensor.nnet.sigmoid(r_unbounded) * \ - numpy.asarray(1.0/e_range_low - 1.0/e_range_high, dtype=dtype) \ - + numpy.asarray(1.0/e_range_high, dtype=dtype) + def d(a): + return numpy.asarray(a, dtype=dtype) - k = softsign(k_unbounded) + p = softsign(p_unbounded) * d(p_range[1] - p_range[0]) + d(p_range[0]) + q = softsign(q_unbounded) * d(q_range[1] - q_range[0]) + d(q_range[0]) + r = softsign(r_unbounded) * d(r_range[1] - r_range[0]) + d(r_range[0]) + k = softsign(k_unbounded) * d(k_range[1] - k_range[0]) + d(k_range[0]) - if use_softmax_w: - rval = cls(w_list, x_list, p, q, r, k, - params = [p_unbounded, q_unbounded, r_unbounded, k, w] + params, - updates=updates) - else: - rval = cls(w_list, x_list, p, q, r, k, - params = [p_unbounded, q_unbounded, r_unbounded, k_unbounded] + w_list + params, - updates=updates) + rval = cls(w_list, x_list, p, q, r, k, + params = params + new_params, + updates=updates) + rval.p_unbounded = p_unbounded rval.q_unbounded = q_unbounded rval.r_unbounded = r_unbounded @@ -126,9 +145,14 @@ return rval @classmethod - def new_filters_expbounds(cls, rng, input, n_in, n_out, n_terms, dtype=None, eps=1e-1, - exponent_range=(1.0, 3.0), filter_range=1.0): - """Return a KouhLayer instance with random parameters + def new_filters_expbounds(cls, rng, input, n_in, n_out, n_terms, dtype=None, + p_range=p_range_default, + q_range=q_range_default, + r_range=r_range_default, + k_range=k_range_default, + x_range=x_range_default, + ): + """Return a Kouh2008 instance with random parameters The parameters are drawn on a range [typically] suitable for fine-tuning by gradient descent. @@ -145,10 +169,27 @@ :param nterms: each (of n_out) complex-cell firing rate will be determined from this many 'simple cell' responses. - :param eps: this amount is added to the softplus of filter responses as a baseline - firing rate (that prevents a subsequent error from ``pow(0, p)``) + :param eps: this amount is added to the filter responses as a baseline + firing rate (that prevents a subsequent error from ``pow(0, p)``) + The eps must be large enough so that eps**p_range[1] does not underflow. + + :param p_range: See `new_expbounds`. + :type p_range: tuple([low, high]) + + :param q_range: See `new_expbounds`. + :type q_range: tuple([low, high]) - :returns: KouhLayer instance with freshly-allocated random weights. + :param r_range: See `new_expbounds`. + :type r_range: tuple([low, high]) + + :param k_range: See `new_expbounds`. + :type k_range: tuple([low, high]) + + :param x_range: Filter responses are affine-transformed softsigns lying between these + values. + :type x_range: tuple([low, high]) + + :returns: Kouh2008 instance with freshly-allocated random weights. """ if input.type.ndim != 2: @@ -161,19 +202,30 @@ def shared_uniform(low, high, size, name): return _shared_uniform(rng, low, high, size, dtype, name) - f_list = [shared_uniform(low=-2.0/numpy.sqrt(n_in), high=2.0/numpy.sqrt(n_in), size=(n_in, n_out), name='f_%i'%i) + f_list = [shared_uniform(low=-2.0/numpy.sqrt(n_in), high=2.0/numpy.sqrt(n_in), + size=(n_in, n_out), name='Kouh2008::f_%i'%i) + for i in xrange(n_terms)] + + b_list = [shared_uniform(low=0, high=.01, + size=(n_out,), name='Kouh::2008::b_%i'%i) for i in xrange(n_terms)] - b_list = [shared_uniform(low=0, high=.01, size=(n_out,), name='b_%i'%i) - for i in xrange(n_terms)] - #x_list = [numpy.asarray(eps, dtype=dtype)+softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)] - filter_range = numpy.asarray(filter_range, dtype=dtype) - half_filter_range = numpy.asarray(filter_range/2, dtype=dtype) - x_list = [numpy.asarray(filter_range + eps, dtype=dtype)+half_filter_range *softsign(tensor.dot(input, f_list[i]) + - b_list[i]) for i in xrange(n_terms)] + def d(a): + return numpy.asarray(a, dtype=dtype) + + x_low = d(x_range[0]) + x_high = d(x_range[1]) + + #softsign's range is (-1, 1) + # we want filter responses to span (x_low, x_high) + x_list = [x_low + (x_high-x_low)*(d(0.5) + d(0.5)*softsign(tensor.dot(input, f_list[i])+b_list[i])) + for i in xrange(n_terms)] rval = cls.new_expbounds(rng, x_list, n_out, dtype=dtype, params=f_list + b_list, - exponent_range=exponent_range) + p_range=p_range, + q_range=q_range, + r_range=r_range, + k_range=k_range) rval.f_list = f_list rval.input = input #add the input to the returned object rval.filter_l1 = sum(abs(fi).sum() for fi in f_list) @@ -182,6 +234,8 @@ def img_from_weights(self, rows=None, cols=None, row_gap=1, col_gap=1, eps=1e-4): """ Return an image that visualizes all the weights in the layer. + + WRITEME: how does the image relate to the weights """ n_in, n_out = self.f_list[0].value.shape