Mercurial > pylearn

""" Provide the convolution and pooling layers described in LeCun 98

"""

import numpy

import theano
from theano import tensor
from theano.compile import shared, pfunc

from theano.tensor.nnet.conv import ConvOp
from theano.tensor.signal.downsample import DownsampleFactorMax

from pylearn.shared.layers.util import update_locals
from pylearn.shared.layers.squash import squash

class LeNetConvPool(object):
    """
    """

    #TODO: implement biases & scales properly. There are supposed to be more parameters.
    #    - one bias & scale per filter
    #    - one bias & scale per downsample feature location (a 2d bias)
    #    - more?

    def __init__(self, input, w, b, conv_op, ds_op, squash_op, params):
        """
        :param input: symbolic images.  Shape: (n_examples, n_images, n_rows, n_cols)
        :param w: symbolic kernels. Shape: (n_kernels, n_images, filter_height, filter_width)
        :param b: symbolic biases Shape (n_kernels)
        :param conv_op: Typically, an instantiation of ConvOp.
        :param ds_op: A downsampling op instance (such as of DownsampleFactorMax)
        :param squash_op: an elemwise squashing function (typically tanh)
        :param params: a list of shared variables that parametrize this layer (typically w and
        b)
        """
        if input.ndim != 4:
            raise TypeError(input)
        if w.ndim != 4:
            raise TypeError(w)
        if b.ndim != 1:
            raise TypeError(b)

        conv_out = conv_op(input, w)
        output = squash_op(ds_op(conv_out) + b.dimshuffle('x', 0, 'x', 'x'))
        update_locals(self, locals())

    @classmethod
    def new(cls, rng, input, n_examples, n_imgs, img_shape, n_filters, filter_shape, poolsize,
            ignore_border=True, conv_subsample=(1,1), dtype=None, conv_mode='valid',
            pool_type='max', squash_fn=tensor.tanh):
        """
        Allocate a LeNetConvPool layer with shared variable internal parameters.

        :param rng: a random number generator used to initialize weights
        :param input: symbolic images.  Shape: (n_examples, n_imgs, img_shape[0], img_shape[1])
        :param n_examples: input's shape[0] at runtime
        :param n_imgs: input's shape[1] at runtime
        :param img_shape: input's shape[2:4] at runtime
        :param n_filters: the number of filters to apply to the image.
        :param filter_shape: the size of the filters to apply
        :type filter_shape: pair (rows, cols)
        :param poolsize: the downsampling factor
        :type poolsize: pair (rows, cols)
        :param ignore_border: True means the downsampling should skip the scrap around the
        edges if there is any.
        :param conv_subsample: by how much should the convolution subsample the image?
        :type  conv_subsample: pair (rows, cols)
        :param dtype: the dtype for the internally allocated parameters.  This defaults to the
            input's dtype.
        :param conv_mode: The convolution mode ('full' or 'valid')
        :param pool_type: Must be 'max' for now (reserved for different kinds of pooling)
        :param squash_fn: The activation function for this layer
        :type  squash_fn: A one-to-one elemwise function such as tanh or logistic sigmoid.
        """
        if pool_type != 'max':
            # LeNet5 actually used averaging filters. Consider implementing 'mean'
            # consider 'min' pooling?
            # consider 'prod' pooling or some kind of geometric mean 'gmean'??
            raise NotImplementedError()

        if conv_subsample != (1,1):
            # we need to adjust our calculation of the bias size
            raise NotImplementedError()

        if dtype is None:
            dtype = input.dtype

        if len(filter_shape) != 2:
            raise TypeError(filter_shape)

        conv_op = ConvOp((n_imgs,)+img_shape, filter_shape, n_filters, n_examples,
                dx=conv_subsample[0], dy=conv_subsample[1], output_mode=conv_mode)
        ds_op = DownsampleFactorMax(poolsize, ignore_border=ignore_border)

        w_shp = (n_filters, n_imgs) + filter_shape
        b_shp = (n_filters,)

        #TODO: make w_range a parameter to new as well?
        w_range = (-1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
                   1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs))

        w = shared(numpy.asarray(rng.uniform(low=w_range[0], high=w_range[1], size=w_shp), dtype=dtype))
        b = shared(numpy.asarray(rng.uniform(low=-.0, high=0., size=b_shp), dtype=dtype))

        if isinstance(squash_fn, str):
            squash_fn = squash(squash_fn)

        return cls(input, w, b, conv_op, ds_op, squash_fn, [w,b])
author	Olivier Delalleau <delallea@iro>
date	Mon, 22 Aug 2011 11:28:48 -0400
parents	6be337c30928
children