changeset 681:4497619e2955

better scan
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 08 Apr 2009 19:53:18 -0400
parents ade894b06471
children be6639fccecc
files pylearn/lib/scan.py pylearn/lib/test_scan.py
diffstat 2 files changed, 131 insertions(+), 83 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/lib/scan.py	Tue Apr 07 18:00:24 2009 -0400
+++ b/pylearn/lib/scan.py	Wed Apr 08 19:53:18 2009 -0400
@@ -16,6 +16,26 @@
         raise NotImplementedError()
 
 
+def scan1(*args):
+    """args should have the form
+
+    x, u, <other variables>, (lambda x_i, y_{i-1}, <other variables> : y_i)
+    
+    """
+    assert len(args) >= 3
+    x, u = args[:2]
+    lmb = args[-1]
+    other_inputs = list(args[2:-1])
+
+    x_this = x[0].type()
+    y_this = u.type()
+    y_next = lmb(x_this, y_this, *other_inputs)
+    if y_next.type != u.type:
+        raise TypeError('type of lambda recursion must match type of y_prev')
+    env = theano.Env([x_this, y_this] + other_inputs, [y_next])
+    env_var = theano.Constant(data=env, type=theano.generic)
+    return _scan1(*([env_var] + list(args[:-1])))
+
 class Scan1(theano.Op):
     """A Theano loop over one variable
 
@@ -63,81 +83,123 @@
     
     destroy_map = {}
     view_map = {}
+    mode=None
+    default_output = 0
 
-    def __init__(self, x_i, y_this, y_next):
-        if y_this.type != y_next.type:
-            raise TypeError('y_this and y_next must match', (y_this.type, y_next.type))
-        
-        #the Env is necessary to create the gradient Scan later.
-        self.env = theano.gof.Env(inputs=[x_i, y_this], outputs=[y_next])
+    def make_node(self, env, *inputs):
+        """
+        :note:
+        make_node must take all the same inputs as Apply holds,
+        so we use __call__ as the syntactic device that inserts self.extra_variables.
+        """
+        x, u = inputs[:2]
 
-        #this is the function that we use in recursion in perform
-        self.fn = theano.function(self.env.inputs, self.env.outputs[0])
-
-
-    def make_node(self, x, u):
         out_type = theano.tensor.Tensor(dtype=u.dtype, 
                 broadcastable=[False] + list(u.broadcastable))
-        return theano.Apply(self, [x,u], [out_type()])
+        return theano.Apply(self, [env]+list(inputs), [out_type(), theano.generic()])
+
+    def grad(self, inputs, (g_y, g_fn)):
+        assert g_fn is None
+            
+        y = self(*inputs)
+        grads = scan1_grad(g_y, y, *inputs) 
+        return [None] + grads[:-1]
+
+    def perform(self, node, args, (y_out, fn_out)):
 
-    def __call__(self, *args, **kwargs):
-        node = self.make_node(*args, **kwargs)
-        node.tag.trace = traceback.extract_stack()[:-1]
-        all_y = node.outputs[0]
-        return all_y[1:] #cut out the leading copy of u
+        env, x, u = args[:3]
+        other_args = args[3:]
 
-
+        if fn_out[0] is None:
+            assert len(env.outputs) == 1
+            fn_out[0] = theano.function(
+                    inputs=env.inputs,
+                    outputs=env.outputs[0],
+                    mode=self.mode)
+        fn = fn_out[0]
 
-    def perform(self, node, (x,u), (y_out,)):
         y_shape = (x.shape[0]+1,) + u.shape
         y = numpy.empty(y_shape, dtype=u.dtype)
 
+        #print 'x', x
+        #print 'y', y
+        #print 'u', u
+        #print 'other', other_args
+
         y[0] = u
         for i, x_i in enumerate(x):
-            y[i+1] = self.fn(x_i, y[i])
+            something = fn(x_i, y[i], *other_args)
+            #print 'something', something
+            y[i+1] = something
         y_out[0] = y
-
-    def grad(self, (x,u), (g_y,)):
-        if not hasattr(self, 'grad_op'):
-            self.grad_op = Scan1Grad(self)
-            
-        return self.grad_op(x, u, g_y)
+_scan1 = Scan1()
 
 
 class Scan1Grad(theano.Op):
-    def __init__(self, scan1, inplace=False):
-        self.scan = scan1
+    def __init__(self, inplace=False):
         self.inplace = inplace
         if inplace:
             self.destroy_map = {1: [3]}
 
-        xi, y_this = self.scan.env.inputs
-        y_next = self.scan.env.outputs[0]
-        gy_next = y_next.type()
-        gxi, gy_this = theano.tensor.grad(
-                y_next,
-                [xi, y_this],
-                g_cost=gy_next)
+    def make_node(self, g_y, y, scan_env, x, u, *other_inputs):
+        return theano.Apply(self,
+                [g_y, y, scan_env, x, u] + list(other_inputs), 
+                [x.type(), u.type()] + [oi.type() for oi in other_inputs] + [theano.generic()])
+
+    def get_fn(self, scan_env, grad_storage):
+        fn_storage = grad_storage[-1]
+        assert isinstance(scan_env, theano.gof.Env)
+        if fn_storage[0] is None:
+            y_next = scan_env.outputs[0]
+            gy_next = y_next.type()
+            inputs = scan_env.inputs # x_this, y_this, *rest
+            g_inputs = theano.tensor.grad(y_next, inputs, g_cost=gy_next)
 
-        self.fn = theano.function([xi, y_this, gy_next], [gxi, gy_this])
+            fn_storage[0] = theano.function(
+                    inputs=[gy_next] + inputs,
+                    outputs=g_inputs)
+        return fn_storage[0]
+
+    def perform(self, node, args, grad_storage):
+
+        #retrieve (or compute) the gradient function
+        fn = self.get_fn(args[2], grad_storage)
 
-    def make_node(self, x, u, g_y):
-        y = self.scan(x,u)
-        return theano.Apply(self, [x, y, g_y], [x.type(), u.type()])
+        #unpack the args
+        (g_y, y) = args[0:2]
+        (x, u) = args[3:5]
+        other_args = args[5:]
 
-    def perform(self, node, (x, y, g_y), (gx_out, gu_out)):
+        #unpack grad_storage (outputs)
+        gx_out, gu_out = grad_storage[0:2]
+        g_other_storage = grad_storage[2:-1]
+
+        assert len(other_args) == len(g_other_storage)
+
         if not self.inplace:
             g_y = g_y.copy()
 
         gx = numpy.zeros_like(x)
 
+        g_other = [numpy.zeros_like(other) for other in other_args]
+
         for i in xrange(len(x)-1, -1, -1):
             #print 'x y gy_next', x[i], y[i], g_y[i+1]
-            gx[i], gy_i= self.fn(x[i], y[i], g_y[i+1])
+            grads = fn(g_y[i+1], x[i], y[i], *other_args) 
+            gx[i], gy_i = grads[0:2]
             #print 'gx gy', gx[i], gy_i
             g_y[i] += gy_i
-            
+
+            #now increment the other-input gradient buffers
+            assert len(g_other) == (len(grads)-2)
+            for g_arg_buffer, g_arg in zip(g_other, grads[2:]):
+                g_arg_buffer += g_arg
+
+        #write results into storage locations
         gx_out[0] = gx
         gu_out[0] = g_y[0]
+        assert len(g_other_storage) == len(g_other)
+        for grad_storage, grad in zip(g_other_storage, g_other):
+            grad_storage[0] = grad
 
-
+scan1_grad = Scan1Grad()
--- a/pylearn/lib/test_scan.py	Tue Apr 07 18:00:24 2009 -0400
+++ b/pylearn/lib/test_scan.py	Wed Apr 08 19:53:18 2009 -0400
@@ -1,68 +1,54 @@
 import numpy
 import theano
-from theano.tensor import dscalar, dvector
-from scan import Scan1
-
-def test_0():
-    x_i = dscalar()
-    u = dscalar()
+from theano.tensor import dscalar, dvector, dmatrix
+from scan import scan1
 
 
-    scan_add = Scan1(x_i, u, x_i + u)
-
-    x = dvector()
+def test_extra_inputs():
+    u = dscalar('u')
+    c = dscalar('c')
+    x = dvector('x')
 
-    y = scan_add(x, u)
+    y = scan1(x, u, c, lambda x_i, y_prev, c: (x_i + y_prev) * c)
 
-    f = theano.function([x,u], y)
+    sum_y = theano.tensor.sum(y)
+
+    f = theano.function([x,u, c], y)
 
     xval = numpy.asarray([1., 1, 1. , 1, 1])
     uval = numpy.asarray(2.)
 
-    yval = f(xval, uval)
-    print yval
+    yval = f(xval, uval, 2.0)
+    assert numpy.all(yval == [2.,    6.,   14.,   30.,   62.,  126.])
 
 
-def test_grad():
-    x_i = dscalar()
-    u = dscalar()
-
-    scan_add = Scan1(x_i, u, x_i + u)
-
-    x = dvector()
-
-    y = scan_add(x, u)
-
-    sum_y = theano.tensor.sum(y)
 
     g_x = theano.tensor.grad(sum_y, x)
     g_u = theano.tensor.grad(sum_y, u)
 
-    f = theano.function([x,u], y)
-    gf = theano.function([x, u], [g_x, g_u])
+    gf = theano.function([x, u, c], [g_x, g_u])
 
-    xval = numpy.asarray([1., 1, 1. , 1, 1])
-    uval = numpy.asarray(2.)
+    gxval, guval = gf(xval, uval, 2.0)
 
-    yval = f(xval, uval)
-    print 'yval', yval
-
-    gxval, guval = gf(xval, uval)
-
-    print 'gxval', gxval
-    print 'guval', guval
+    #print gxval
+    #print guval
+    assert numpy.all(gxval == [ 62.,  30.,  14.,   6.,   2.])
+    assert numpy.all(guval == 63)
 
 
 def test_verify_scan_grad():
-    x_i = dvector()
-    y_prev = dvector()
-    scan_add = Scan1(x_i, y_prev, x_i + y_prev)
+    def scanxx(x, u, c):
+        # u = dvector('u')
+        # c = dvector('c')
+        # x = dmatrix('x')
+        y = scan1(x, u, c, lambda x_i, y_prev, c: (x_i + y_prev) * c)
+        return y
 
     rng = numpy.random.RandomState(456)
 
     xval = rng.rand(4, 3)
     uval = rng.rand(3)
+    cval = rng.rand(3)
 
-    print theano.tensor.verify_grad(scan_add, (xval, uval), rng=rng)
+    theano.tensor.verify_grad(scanxx, (xval, uval, cval), rng=rng)
 
-