changeset 888:5c7374bd127c

mrege
author James Bergstra <bergstrj@iro.umontreal.ca>
date Tue, 17 Nov 2009 13:33:04 -0500
parents d7766e52e773 (diff) 3a68b6936303 (current diff)
children 57ac1af9c17c
files pylearn/dataset_ops/memo.py
diffstat 11 files changed, 374 insertions(+), 67 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/dataset_ops/MNIST.py	Thu Nov 12 12:24:36 2009 -0500
+++ b/pylearn/dataset_ops/MNIST.py	Tue Nov 17 13:33:04 2009 -0500
@@ -52,7 +52,6 @@
 def get_test_img_f64_rasterized():
     return get_test_img_u8_rasterized() / numpy.asarray(255, dtype='float64')
 
-#@constructor
 def mnist(s_idx, split, dtype='float64', rasterized=False):
     """
     :param s_idx:
--- a/pylearn/dataset_ops/cifar10.py	Thu Nov 12 12:24:36 2009 -0500
+++ b/pylearn/dataset_ops/cifar10.py	Tue Nov 17 13:33:04 2009 -0500
@@ -13,7 +13,7 @@
 import theano
 
 from protocol import TensorFnDataset # protocol.py __init__.py
-from .memo import memo
+from .memo import memo # memo.py
 
 def _unpickle(filename, dtype):
     #implements loading as well as dtype-conversion and dtype-scaling
--- a/pylearn/dataset_ops/glviewer.py	Thu Nov 12 12:24:36 2009 -0500
+++ b/pylearn/dataset_ops/glviewer.py	Tue Nov 17 13:33:04 2009 -0500
@@ -41,7 +41,6 @@
 
 import logging
 _logger = logging.getLogger('glviewer')
-_logger.setLevel(logging.INFO)
 def debug(*msg): _logger.debug(' '.join(str(m) for m in msg))
 def info(*msg): _logger.info(' '.join(str(m) for m in msg))
 def warn(*msg): _logger.warn(' '.join(str(m) for m in msg))
@@ -58,7 +57,16 @@
             glPixelStorei(GL_UNPACK_ALIGNMENT,1)
             return glTexImage2D(GL_TEXTURE_2D, 0, 3, cols, rows, 0, GL_RGBA, GL_UNSIGNED_BYTE,
                     buf[::-1].flatten())
+        elif str(x.dtype).startswith('float'):
+            rows, cols = x.shape
+            buf = numpy.zeros((rows, cols, 4), dtype='uint8')
+            buf += x.reshape( (rows, cols, 1)) * 255
+            glPixelStorei(GL_UNPACK_ALIGNMENT,1)
+            debug( 'HHUH? %f'% buf.sum())
+            return glTexImage2D(GL_TEXTURE_2D, 0, 3, cols, rows, 0, GL_RGBA, GL_UNSIGNED_BYTE,
+                    buf[::-1].flatten())
         else:
+            debug('NOOO....')
             raise NotImplementedError()
     elif x.ndim == 3:
         rows, cols, channels = x.shape
--- a/pylearn/dataset_ops/memo.py	Thu Nov 12 12:24:36 2009 -0500
+++ b/pylearn/dataset_ops/memo.py	Tue Nov 17 13:33:04 2009 -0500
@@ -20,6 +20,7 @@
     def forget():
         for k in cache.keys():
             del cache[k]
+    rval.cache = cache
     rval.forget = forget
     rval.__name__ = 'memo@%s'%f.__name__
     rval.cache = cache
--- a/pylearn/dataset_ops/protocol.py	Thu Nov 12 12:24:36 2009 -0500
+++ b/pylearn/dataset_ops/protocol.py	Tue Nov 17 13:33:04 2009 -0500
@@ -59,6 +59,12 @@
         super(TensorDataset, self).__init__(single_type, batch_type)
 
 class TensorFnDataset(TensorDataset):
+    """A good base class for TensorDatasets that can be read from disk and cached in memory
+
+    The dataset is accessed via a function call to make this Op pickle-able.  If the function
+    is a normal module-level function, then this Op will be picklable.  If the dataset were a
+    property of this Op, then pickling the Op would require pickling the entire dataset.
+    """
     def __init__(self, dtype, bcast, fn, single_shape=None, batch_size=None):
         super(TensorFnDataset, self).__init__(dtype, bcast, single_shape, batch_size)
         try:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/dataset_ops/shapeset1.py	Tue Nov 17 13:33:04 2009 -0500
@@ -0,0 +1,97 @@
+"""A Theano Op to load/access Shapeset1
+"""
+import theano, numpy
+
+from .protocol import TensorFnDataset
+
+from ..datasets.shapeset1 import head_train, head_valid, head_test
+
+#make global functions so Op can be pickled.
+_train_cache = {}
+def train_img(dtype):
+    if dtype not in _train_cache:
+        x, y = head_train()
+        if dtype.startswith('uint') or dtype.startswith('int'):
+            x *= 255
+        _train_cache[dtype] = numpy.asarray(x, dtype=dtype)
+        _train_cache['lbl'] = y
+    return _train_cache[dtype]
+def train_lbl():
+    if 'lbl' not in _train_cache:
+        x, y = head_train()
+        # cache x in some format now that it's read (it isn't that big).
+        _train_cache[x.dtype] = x 
+        _train_cache['lbl'] = y
+    return _train_cache['lbl']
+_valid_cache = {}
+def valid_img(dtype):
+    if dtype not in _valid_cache:
+        x, y = head_valid()
+        if dtype.startswith('uint') or dtype.startswith('int'):
+            x *= 255
+        _valid_cache[dtype] = numpy.asarray(x, dtype=dtype)
+        _valid_cache['lbl'] = y
+    return _valid_cache[dtype]
+def valid_lbl():
+    if 'lbl' not in _valid_cache:
+        x, y = head_valid()
+        # cache x in some format now that it's read (it isn't that big).
+        _valid_cache[x.dtype] = x 
+        _valid_cache['lbl'] = y
+    return _valid_cache['lbl']
+_test_cache = {}
+def test_img(dtype):
+    if dtype not in _test_cache:
+        x, y = head_test()
+        if dtype.startswith('uint') or dtype.startswith('int'):
+            x *= 255
+        _test_cache[dtype] = numpy.asarray(x, dtype=dtype)
+        _test_cache['lbl'] = y
+    return _test_cache[dtype]
+def test_lbl():
+    if 'lbl' not in _test_cache:
+        x, y = head_test()
+        # cache x in some format now that it's read (it isn't that big).
+        _test_cache[x.dtype] = x 
+        _test_cache['lbl'] = y
+    return _test_cache['lbl']
+
+_split_fns = dict(
+        train=(train_img, train_lbl),
+        valid=(valid_img, valid_lbl),
+        test=(test_img, test_lbl))
+
+def shapeset1(s_idx, split, dtype='float64', rasterized=False):
+    """
+    :param s_idx:
+
+    :param split:
+
+    :param dtype:
+
+    :param rasterized: return examples as vectors (True) or 28x28 matrices (False)
+
+    """
+
+    x_fn, y_fn = _split_fns[split]
+
+    x = TensorFnDataset(dtype=dtype, bcast=(False,), fn=(x_fn, (dtype,)),
+            single_shape=(1024,))(s_idx)
+    y = TensorFnDataset(dtype='int64', bcast=(), fn=y_fn)(s_idx)
+    if x.ndim == 1:
+        if not rasterized:
+            x = x.reshape((32,32))
+    elif x.ndim == 2:
+        if not rasterized:
+            x = x.reshape((x.shape[0], 32,32))
+    else:
+        assert False, 'what happened?'
+    return x, y
+nclasses = 10
+
+def glviewer(split='train'):
+    from glviewer import GlViewer
+    i = theano.tensor.iscalar()
+    f = theano.function([i], shapeset1(i, split, dtype='uint8', rasterized=False)[0])
+    GlViewer(f).main()
+
--- a/pylearn/shared/layers/__init__.py	Thu Nov 12 12:24:36 2009 -0500
+++ b/pylearn/shared/layers/__init__.py	Tue Nov 17 13:33:04 2009 -0500
@@ -14,7 +14,7 @@
 from kording2004 import Kording2004
 
 # rust2005.py
-from rust2005 import Rust2005
+from rust2005 import Rust2005, Rust2005Conv
 
 # lecun1998.py
 from lecun1998 import LeNetConvPool
--- a/pylearn/shared/layers/kouh2008.py	Thu Nov 12 12:24:36 2009 -0500
+++ b/pylearn/shared/layers/kouh2008.py	Tue Nov 17 13:33:04 2009 -0500
@@ -12,12 +12,9 @@
 
 """
 
-## optimizing this model may be difficult-- paper talks about using exponents p and q in
-# in the range 1-3, but gradient descent may overstep that range.
-
-# TODO: Use updates() to clamp exponents p and q to sensible range
 import logging
 _logger = logging.getLogger('pylearn.shared.layers.kouh2008')
+
 import numpy
 import theano
 from theano import tensor
@@ -50,26 +47,56 @@
     output - a tensor of activations of shape (n_examples, n_out)
     """
 
+    input = None #optional - symbolic variable of input
+    f_list = None # optional - list of filter shared variables
+    filter_l1 = None # optional - l1 of filters
+    filter_l2_sqr = None # optional - l2**2 of filters
+
+    exp_l1 = None
+    exp_l2_sqr = None
+
+    w_l1 = None
+    w_l2_sqr = None
+
+    p_unbounded = None
+    q_unbounded = None
+    r_unbounded = None
+    k_unbounded = None
+
+    p_range_default=(1.0, 3.0)
+    q_range_default=(1.0, 3.0)
+    r_range_default=(0.333, 1.0)
+    k_range_default=(0.0, 1.0)
+    x_range_default=(0.01, 1.0)
+
     def __init__(self, w_list, x_list, p, q, r, k, params, updates, eps=1.0e-6):
         """Transcription of equation 2.1 from paper (page 1434).
         """
         if len(w_list) != len(x_list):
             raise ValueError('w_list must have same len as x_list')
-        output = (sum(w * tensor.pow(x, p) for (w,x) in zip(w_list, x_list)))\
-                / (numpy.asarray(eps, dtype=k.type.dtype) + k + tensor.pow(sum(tensor.pow(x, q) for x in x_list), r))
+        numerator = sum(w_i * tensor.pow(x_i, p) for (w_i,x_i) in zip(w_list, x_list))
+        denominator = k + tensor.pow(sum(tensor.pow(x_i, q) for x_i in x_list), r)
+        output = numerator / (numpy.asarray(eps, dtype=k.type.dtype) + denominator)
 
         assert output.type.ndim == 2
         update_locals(self, locals())
         _logger.debug('output dtype %s' % output.dtype)
 
     @classmethod
-    def new_expbounds(cls, rng, x_list, n_out, dtype=None, params=[], updates=[], exponent_range=(1.0, 3.0)):
+    def new_expbounds(cls, rng, x_list, n_out, dtype=None, params=[], updates=[], 
+            p_range=p_range_default,
+            q_range=q_range_default,
+            r_range=r_range_default,
+            k_range=k_range_default,
+            ):
         """
         """
         if dtype is None:
             dtype = x_list[0].dtype
         n_terms = len(x_list)
 
+        new_params = []
+
         def shared_uniform(low, high, size, name): 
             return _shared_uniform(rng, low, high, size, dtype, name)
 
@@ -81,40 +108,32 @@
             w_list = [w_sm[:,i] for i in xrange(n_terms)]
             w_l1 = abs(w).sum()
             w_l2_sqr = (w**2).sum()
+            new_params.append(w)
         else:
-            w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='w_%i'%i)
+            w_list = [shared_uniform(low=-2.0/n_terms, high=2.0/n_terms, size=(n_out,), name='Kouh2008::w_%i'%i)
                     for i in xrange(n_terms)]
             w_l1 = sum(abs(wi).sum() for wi in w_list)
             w_l2_sqr = sum((wi**2).sum() for wi in w_list)
-
-        e_range_low, e_range_high = exponent_range
-        e_range_low = numpy.asarray(e_range_low, dtype=dtype)
-        e_range_high = numpy.asarray(e_range_high, dtype=dtype)
-        e_range_mag = e_range_high - e_range_low
-        if e_range_mag < 0:
-            raise ValueError('exponent range must have low <= high')
+            new_params.extend(w_list)
 
         p_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='p')
         q_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='q') 
         r_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='r')
-        k_unbounded = shared_uniform(low=-0.2, high=0.2, size=(n_out,), name='k') # biases
+        k_unbounded = shared_uniform(low=-0.1, high=0.1, size=(n_out,), name='k') # biases
+        new_params.extend([p_unbounded, q_unbounded, r_unbounded, k_unbounded])
 
-        p = tensor.nnet.sigmoid(p_unbounded) * e_range_mag + e_range_low
-        q = tensor.nnet.sigmoid(q_unbounded) * e_range_mag + e_range_low
-        r = tensor.nnet.sigmoid(r_unbounded) * \
-                numpy.asarray(1.0/e_range_low - 1.0/e_range_high, dtype=dtype) \
-                + numpy.asarray(1.0/e_range_high, dtype=dtype)
+        def d(a):
+            return numpy.asarray(a, dtype=dtype)
 
-        k = softsign(k_unbounded)
+        p = softsign(p_unbounded) * d(p_range[1] - p_range[0]) + d(p_range[0])
+        q = softsign(q_unbounded) * d(q_range[1] - q_range[0]) + d(q_range[0])
+        r = softsign(r_unbounded) * d(r_range[1] - r_range[0]) + d(r_range[0])
+        k = softsign(k_unbounded) * d(k_range[1] - k_range[0]) + d(k_range[0])
 
-        if use_softmax_w:
-            rval = cls(w_list, x_list, p, q, r, k,
-                    params = [p_unbounded, q_unbounded, r_unbounded, k, w] + params,
-                    updates=updates)
-        else:
-            rval = cls(w_list, x_list, p, q, r, k,
-                    params = [p_unbounded, q_unbounded, r_unbounded, k_unbounded] + w_list + params,
-                    updates=updates)
+        rval = cls(w_list, x_list, p, q, r, k,
+                params = params + new_params,
+                updates=updates)
+
         rval.p_unbounded = p_unbounded
         rval.q_unbounded = q_unbounded
         rval.r_unbounded = r_unbounded
@@ -126,9 +145,14 @@
         return rval
 
     @classmethod
-    def new_filters_expbounds(cls, rng, input, n_in, n_out, n_terms, dtype=None, eps=1e-1,
-            exponent_range=(1.0, 3.0), filter_range=1.0):
-        """Return a KouhLayer instance with random parameters
+    def new_filters_expbounds(cls, rng, input, n_in, n_out, n_terms, dtype=None,
+            p_range=p_range_default,
+            q_range=q_range_default,
+            r_range=r_range_default,
+            k_range=k_range_default,
+            x_range=x_range_default,
+            ):
+        """Return a Kouh2008 instance with random parameters
 
         The parameters are drawn on a range [typically] suitable for fine-tuning by gradient
         descent. 
@@ -145,10 +169,27 @@
         :param nterms: each (of n_out) complex-cell firing rate will be determined from this
         many 'simple cell' responses.
 
-        :param eps: this amount is added to the softplus of filter responses as a baseline
-        firing rate (that prevents a subsequent error from ``pow(0, p)``) 
+        :param eps: this amount is added to the filter responses as a baseline
+        firing rate (that prevents a subsequent error from ``pow(0, p)``)
+        The eps must be large enough so that eps**p_range[1] does not underflow.
+
+        :param p_range: See `new_expbounds`.
+        :type p_range: tuple([low, high])
+
+        :param q_range: See `new_expbounds`.
+        :type q_range: tuple([low, high])
 
-        :returns: KouhLayer instance with freshly-allocated random weights.
+        :param r_range: See `new_expbounds`.
+        :type r_range: tuple([low, high])
+
+        :param k_range: See `new_expbounds`.
+        :type k_range: tuple([low, high])
+
+        :param x_range: Filter responses are affine-transformed softsigns lying between these
+        values.
+        :type x_range: tuple([low, high])
+
+        :returns: Kouh2008 instance with freshly-allocated random weights.
 
         """
         if input.type.ndim != 2:
@@ -161,19 +202,30 @@
         def shared_uniform(low, high, size, name): 
             return _shared_uniform(rng, low, high, size, dtype, name)
 
-        f_list = [shared_uniform(low=-2.0/numpy.sqrt(n_in), high=2.0/numpy.sqrt(n_in), size=(n_in, n_out), name='f_%i'%i)
+        f_list = [shared_uniform(low=-2.0/numpy.sqrt(n_in), high=2.0/numpy.sqrt(n_in), 
+            size=(n_in, n_out), name='Kouh2008::f_%i'%i)
+                for i in xrange(n_terms)]
+
+        b_list = [shared_uniform(low=0, high=.01,
+            size=(n_out,), name='Kouh::2008::b_%i'%i)
                 for i in xrange(n_terms)]
 
-        b_list = [shared_uniform(low=0, high=.01, size=(n_out,), name='b_%i'%i)
-                for i in xrange(n_terms)]
-        #x_list = [numpy.asarray(eps, dtype=dtype)+softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)]
-        filter_range = numpy.asarray(filter_range, dtype=dtype)
-        half_filter_range = numpy.asarray(filter_range/2, dtype=dtype)
-        x_list = [numpy.asarray(filter_range + eps, dtype=dtype)+half_filter_range *softsign(tensor.dot(input, f_list[i]) +
-            b_list[i]) for i in xrange(n_terms)]
+        def d(a):
+            return numpy.asarray(a, dtype=dtype)
+
+        x_low = d(x_range[0])
+        x_high = d(x_range[1])
+
+        #softsign's range is (-1, 1)
+        # we want filter responses to span (x_low, x_high)
+        x_list = [x_low + (x_high-x_low)*(d(0.5) + d(0.5)*softsign(tensor.dot(input, f_list[i])+b_list[i]))
+                    for i in xrange(n_terms)]
 
         rval = cls.new_expbounds(rng, x_list, n_out, dtype=dtype, params=f_list + b_list,
-                exponent_range=exponent_range)
+                p_range=p_range,
+                q_range=q_range,
+                r_range=r_range,
+                k_range=k_range)
         rval.f_list = f_list
         rval.input = input #add the input to the returned object
         rval.filter_l1 = sum(abs(fi).sum() for fi in f_list)
@@ -182,6 +234,8 @@
 
     def img_from_weights(self, rows=None, cols=None, row_gap=1, col_gap=1, eps=1e-4):
         """ Return an image that visualizes all the weights in the layer.
+
+        WRITEME: how does the image relate to the weights
         """
 
         n_in, n_out = self.f_list[0].value.shape
--- a/pylearn/shared/layers/lecun1998.py	Thu Nov 12 12:24:36 2009 -0500
+++ b/pylearn/shared/layers/lecun1998.py	Tue Nov 17 13:33:04 2009 -0500
@@ -96,8 +96,12 @@
         w_shp = (n_filters, n_imgs) + filter_shape
         b_shp = (n_filters,)
 
-        w = shared(numpy.asarray(rng.uniform(low=-.05, high=.05, size=w_shp), dtype=dtype))
-        b = shared(numpy.asarray(rng.uniform(low=-.05, high=.05, size=b_shp), dtype=dtype))
+        #TODO: make w_range a parameter to new as well?
+        w_range = (-1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs),
+                   1.0 / numpy.sqrt(filter_shape[0] * filter_shape[1] * n_imgs))
+
+        w = shared(numpy.asarray(rng.uniform(low=w_range[0], high=w_range[1], size=w_shp), dtype=dtype))
+        b = shared(numpy.asarray(rng.uniform(low=-.0, high=0., size=b_shp), dtype=dtype))
 
         if isinstance(squash_fn, str):
             squash_fn = squash(squash_fn)
--- a/pylearn/shared/layers/rust2005.py	Thu Nov 12 12:24:36 2009 -0500
+++ b/pylearn/shared/layers/rust2005.py	Tue Nov 17 13:33:04 2009 -0500
@@ -4,6 +4,16 @@
 
 This layer implements a model of simple and complex cell firing rate responses.
 
+
+:TODO: implement full model with variable exponents.  The current implementation fixes internal
+exponents to 2 and the external exponent to 1/2.
+
+:TODO: add the weights on the quadratic filter
+responses.  E and S are supposed to be the "square root of a *weighted* sum of squares".
+The simplifications here are probably useful, but make them optional, or a different class or something.
+- The current implementation can be interpreted as including the weights inside the filters.
+  The filters are not constrained to have a unit norm, for example.
+
 """
 
 import numpy
@@ -18,25 +28,31 @@
 from theano.compile.sandbox import shared
 from theano.sandbox.softsign import softsign
 from theano.tensor.nnet import softplus
+from theano.sandbox.conv import ConvOp
 
 from .util import update_locals, add_logging
 
-def rust2005_act_from_filters(linpart, E_quad, S_quad):
+def rust2005_act_from_filters(linpart, E_quad, S_quad, eps):
+    """Return rust2005 activation from linear filter responses, as well as E and S terms
+
+    :param linpart: a single tensor of linear filter responses
+    :param E_quad: a list of tensors of linear filter responses
+    :param S_quad: a list of tensors of linear filter responses
+    :param eps: a scalar to add to the sum of squares before the sqrt
+
+    """
+    if isinstance(E_quad, theano.Variable):
+        raise TypeError('E_quad should be a list of Variables, not a Variable itself')
+    if isinstance(S_quad, theano.Variable):
+        raise TypeError('E_quad should be a list of Variables, not a Variable itself')
     sqrt = theano.tensor.sqrt
     softlin = theano.tensor.nnet.softplus(linpart)
-    E = sqrt(sum([E_quad_i**2 for E_quad_i in E_quad] + [1e-8, softlin**2]))
-    S = sqrt(sum([S_quad_i**2 for S_quad_i in S_quad] + [1e-8]))
-    return (E-S) / (1+E+S)
+    E = sqrt(sum([E_quad_i**2 for E_quad_i in E_quad] + [softlin**2], eps))
+    S = sqrt(sum([S_quad_i**2 for S_quad_i in S_quad], eps))
+    return (E-S) / (1+E+S), E, S
 
 class Rust2005(object):
-    """
-    shared variable version.
-
-    w is 3-tensor n_in x n_out x (1+n_E_quadratic + n_S_quadratic)
-
-    w 
-
-    """
+    """Energy-like complex cell activation function described in Rust et al. 2005 """
     #logging methods come from the add_logging() call below
     # _info, _debug, _warn, _error, _fatal
 
@@ -99,7 +115,7 @@
         b = shared(numpy.zeros((n_out,), dtype=dtype))
         return cls(input, w, b, n_out, n_E, n_S, epsilon, filter_shape, [w,b])
 
-    def img_from_weights(self, rows=12, cols=24, row_gap=1, col_gap=1, eps=1e-4):
+    def img_from_weights(self, rows=12, cols=25, row_gap=1, col_gap=1, eps=1e-4, triplegap=0):
         """ Return an image that visualizes all the weights in the layer.
 
         The current implentation returns a tiling in which every triple of columns is a logical
@@ -110,9 +126,12 @@
         """
         if cols % 3: #because there are three kinds of filters: linear, excitatory, inhibitory
             raise ValueError("cols must be multiple of 3")
+
+        n_triples = cols / 3
+
         filter_shape = self.filter_shape
         height = rows * (row_gap + filter_shape[0]) - row_gap
-        width = cols * (col_gap + filter_shape[1]) - col_gap
+        width = cols * (col_gap + filter_shape[1]) - col_gap + (n_triples-1) * triplegap
 
         out_array = numpy.zeros((height, width, 3), dtype='uint8')
 
@@ -123,10 +142,14 @@
         for r in xrange(rows):
             out_r_low = r*(row_gap + filter_shape[0])
             out_r_high = out_r_low + filter_shape[0]
+            extra_col_gap = 0 # a counter we'll use for the triplegap
             for c in xrange(cols):
-                out_c_low = c*(col_gap + filter_shape[1])
+                if c and (c%3==0):
+                    extra_col_gap += triplegap
+                out_c_low = c*(col_gap + filter_shape[1]) + extra_col_gap
                 out_c_high = out_c_low + filter_shape[1]
                 out_tile = out_array[out_r_low:out_r_high, out_c_low:out_c_high,:]
+                assert out_tile.shape[1] == filter_shape[1]
 
                 if c % 3 == 0: # linear filter
                     if w_col < w.shape[1]:
@@ -148,3 +171,119 @@
                         w_col += self.n_S_quadratic
         return Image.fromarray(out_array, 'RGB')
 add_logging(Rust2005)
+
+class Rust2005Conv(object):
+    """Convolutional version of `Rust2005`
+
+    :note:
+    The layer doesn't contain an option for downsampling. It makes sense to downsample the output
+    using DownsampleMaxPool, but downsampling is orthogonal to the behaviour of this
+    layer so it is not included.
+    
+    """
+
+    l1 = 0.0
+    l2_sqr = 0.0
+
+    eps=1e-6 # default epsilon to prevent sqrt(0) in quadratic filters
+
+    image_shape = None
+    filter_shape = None
+    output_shape = None
+    output_channels = None
+    output_examples = None
+
+    def __init__(self, linpart, Es, Ss, params, eps=eps):
+        """
+        """
+        eps = numpy.asarray(self.eps, linpart.dtype)
+        output, E, S = rust2005_act_from_filters(linpart, Es, Ss, eps)
+
+        update_locals(self, locals())
+
+
+    @classmethod
+    def new(cls, rng, input, image_shape, filter_shape, n_examples, n_filters, n_E, n_S,
+            n_channels=1,
+            eps=1.0e-6, dtype=None, conv_mode='valid',
+            w_range=None,
+            q_range=None
+            ):
+        """ Return Rust2005Conv layer
+
+        layer.output will be 4D tensor with shape (n_examples, n_filters, R, C) where R and C
+        depend on the image_shape, the filter_shape and the convolution mode.
+
+
+        :param rng: generator for randomized initial filters
+        :param input: symbolic input (4D tensor)
+        :type input: 4D tensor with shape (n_examples, n_channels, image_shape[0], image_shape[1])
+
+        :param image_shape:  rows, cols of every channel of every image
+        :param filter_shape: rows, cols of every filter
+        :param bsize: number of images to be treated
+        :param n_filters: number of filters (output will have this many channels)
+        :param n_channels: number of channels in each image and filter
+        :param n_E: number of squared exciting terms
+        :param n_S: number of squared inhibition terms
+        :param eps: epsilon to add to sum-of-squares in sqrt
+        :param dtype: dtype to use for new variables (Default: input.dtype)
+        :param conv_mode: convolution mode
+        :param w_range: linear weights will be drawn uniformly from this range
+        :type w_range: pair (lower_bound, upper_bound
+        :param q_range: quadratic weights will be drawn uniformly from this range
+        :type q_range: pair (lower_bound, upper_bound
+        """
+        if dtype is None:
+            dtype = input.dtype
+
+        irows, icols = image_shape
+        krows, kcols = filter_shape
+
+        conv = ConvOp((n_channels,irows, icols), (krows, kcols), n_filters, n_examples,
+                dx=1, dy=1, output_mode=conv_mode)
+
+        w_shp = (n_filters, n_channels, krows, kcols)
+        b_shp = (n_filters,)
+
+        if w_range is None:
+            w_low = -2.0/numpy.sqrt(image_shape[0] * image_shape[1] * n_channels)
+            w_high = 2.0/numpy.sqrt(image_shape[0] * image_shape[1] * n_channels)
+        else:
+            w_low, w_high = w_range
+
+        if q_range is None:
+            q_low, q_high = w_low, w_high
+        else:
+            q_low, q_high = w_range
+
+        w = shared(numpy.asarray(rng.uniform(low=w_low, high=w_high, size=w_shp), dtype=dtype))
+        b = shared(numpy.asarray(rng.uniform(low=w_low, high=w_low, size=b_shp), dtype=dtype))
+
+        E_w = [
+                shared(numpy.asarray(rng.uniform(low=q_low, high=q_high, size=w_shp), dtype=dtype))
+                for i in xrange(n_E)
+                ]
+        S_w = [
+                shared(numpy.asarray(rng.uniform(low=q_low, high=q_high, size=w_shp), dtype=dtype))
+                for i in xrange(n_S)
+                ]
+
+        rval = cls(
+                linpart=conv(input, w) + b.dimshuffle(0,'x','x'),
+                Es=[conv(input, e) for e in E_w],
+                Ss=[conv(input, s) for s in S_w],
+                params=[w,b]+E_w + S_w,
+                eps=eps)
+
+        # ignore bias in l1 (Yoshua's habit)
+        rval.l1 = sum(abs(p) for p in ([w]+E_w+S_w))
+        rval.l2_sqr = sum(p**2 for p in ([w]+E_w+S_w))
+        rval.image_shape = image_shape
+        rval.filter_shape = filter_shape
+        rval.output_shape = conv.outshp
+        rval.output_channels = n_filters # how many channels of *output*
+        rval.output_examples = n_examples
+
+        return rval
+add_logging(Rust2005Conv) # _debug, _info, _warn, _error, _fatal
--- a/pylearn/shared/layers/util.py	Thu Nov 12 12:24:36 2009 -0500
+++ b/pylearn/shared/layers/util.py	Tue Nov 17 13:33:04 2009 -0500
@@ -36,7 +36,6 @@
             pass
         cls._logger.setLevel(level)
 
-    print 'adding loggers to ', cls
     cls._debug = LogFn(cls._logger.debug)
     cls._info = LogFn(cls._logger.info)
     cls._warn = cls._warning = LogFn(cls._logger.warn)