# HG changeset patch # User James Bergstra # Date 1239234798 14400 # Node ID 4497619e29558c4900ecb90dfa74e248e8a4ed8d # Parent ade894b06471ad399d65c4f984ed02dfd2493f68 better scan diff -r ade894b06471 -r 4497619e2955 pylearn/lib/scan.py --- a/pylearn/lib/scan.py Tue Apr 07 18:00:24 2009 -0400 +++ b/pylearn/lib/scan.py Wed Apr 08 19:53:18 2009 -0400 @@ -16,6 +16,26 @@ raise NotImplementedError() +def scan1(*args): + """args should have the form + + x, u, , (lambda x_i, y_{i-1}, : y_i) + + """ + assert len(args) >= 3 + x, u = args[:2] + lmb = args[-1] + other_inputs = list(args[2:-1]) + + x_this = x[0].type() + y_this = u.type() + y_next = lmb(x_this, y_this, *other_inputs) + if y_next.type != u.type: + raise TypeError('type of lambda recursion must match type of y_prev') + env = theano.Env([x_this, y_this] + other_inputs, [y_next]) + env_var = theano.Constant(data=env, type=theano.generic) + return _scan1(*([env_var] + list(args[:-1]))) + class Scan1(theano.Op): """A Theano loop over one variable @@ -63,81 +83,123 @@ destroy_map = {} view_map = {} + mode=None + default_output = 0 - def __init__(self, x_i, y_this, y_next): - if y_this.type != y_next.type: - raise TypeError('y_this and y_next must match', (y_this.type, y_next.type)) - - #the Env is necessary to create the gradient Scan later. - self.env = theano.gof.Env(inputs=[x_i, y_this], outputs=[y_next]) + def make_node(self, env, *inputs): + """ + :note: + make_node must take all the same inputs as Apply holds, + so we use __call__ as the syntactic device that inserts self.extra_variables. + """ + x, u = inputs[:2] - #this is the function that we use in recursion in perform - self.fn = theano.function(self.env.inputs, self.env.outputs[0]) - - - def make_node(self, x, u): out_type = theano.tensor.Tensor(dtype=u.dtype, broadcastable=[False] + list(u.broadcastable)) - return theano.Apply(self, [x,u], [out_type()]) + return theano.Apply(self, [env]+list(inputs), [out_type(), theano.generic()]) + + def grad(self, inputs, (g_y, g_fn)): + assert g_fn is None + + y = self(*inputs) + grads = scan1_grad(g_y, y, *inputs) + return [None] + grads[:-1] + + def perform(self, node, args, (y_out, fn_out)): - def __call__(self, *args, **kwargs): - node = self.make_node(*args, **kwargs) - node.tag.trace = traceback.extract_stack()[:-1] - all_y = node.outputs[0] - return all_y[1:] #cut out the leading copy of u + env, x, u = args[:3] + other_args = args[3:] - + if fn_out[0] is None: + assert len(env.outputs) == 1 + fn_out[0] = theano.function( + inputs=env.inputs, + outputs=env.outputs[0], + mode=self.mode) + fn = fn_out[0] - def perform(self, node, (x,u), (y_out,)): y_shape = (x.shape[0]+1,) + u.shape y = numpy.empty(y_shape, dtype=u.dtype) + #print 'x', x + #print 'y', y + #print 'u', u + #print 'other', other_args + y[0] = u for i, x_i in enumerate(x): - y[i+1] = self.fn(x_i, y[i]) + something = fn(x_i, y[i], *other_args) + #print 'something', something + y[i+1] = something y_out[0] = y - - def grad(self, (x,u), (g_y,)): - if not hasattr(self, 'grad_op'): - self.grad_op = Scan1Grad(self) - - return self.grad_op(x, u, g_y) +_scan1 = Scan1() class Scan1Grad(theano.Op): - def __init__(self, scan1, inplace=False): - self.scan = scan1 + def __init__(self, inplace=False): self.inplace = inplace if inplace: self.destroy_map = {1: [3]} - xi, y_this = self.scan.env.inputs - y_next = self.scan.env.outputs[0] - gy_next = y_next.type() - gxi, gy_this = theano.tensor.grad( - y_next, - [xi, y_this], - g_cost=gy_next) + def make_node(self, g_y, y, scan_env, x, u, *other_inputs): + return theano.Apply(self, + [g_y, y, scan_env, x, u] + list(other_inputs), + [x.type(), u.type()] + [oi.type() for oi in other_inputs] + [theano.generic()]) + + def get_fn(self, scan_env, grad_storage): + fn_storage = grad_storage[-1] + assert isinstance(scan_env, theano.gof.Env) + if fn_storage[0] is None: + y_next = scan_env.outputs[0] + gy_next = y_next.type() + inputs = scan_env.inputs # x_this, y_this, *rest + g_inputs = theano.tensor.grad(y_next, inputs, g_cost=gy_next) - self.fn = theano.function([xi, y_this, gy_next], [gxi, gy_this]) + fn_storage[0] = theano.function( + inputs=[gy_next] + inputs, + outputs=g_inputs) + return fn_storage[0] + + def perform(self, node, args, grad_storage): + + #retrieve (or compute) the gradient function + fn = self.get_fn(args[2], grad_storage) - def make_node(self, x, u, g_y): - y = self.scan(x,u) - return theano.Apply(self, [x, y, g_y], [x.type(), u.type()]) + #unpack the args + (g_y, y) = args[0:2] + (x, u) = args[3:5] + other_args = args[5:] - def perform(self, node, (x, y, g_y), (gx_out, gu_out)): + #unpack grad_storage (outputs) + gx_out, gu_out = grad_storage[0:2] + g_other_storage = grad_storage[2:-1] + + assert len(other_args) == len(g_other_storage) + if not self.inplace: g_y = g_y.copy() gx = numpy.zeros_like(x) + g_other = [numpy.zeros_like(other) for other in other_args] + for i in xrange(len(x)-1, -1, -1): #print 'x y gy_next', x[i], y[i], g_y[i+1] - gx[i], gy_i= self.fn(x[i], y[i], g_y[i+1]) + grads = fn(g_y[i+1], x[i], y[i], *other_args) + gx[i], gy_i = grads[0:2] #print 'gx gy', gx[i], gy_i g_y[i] += gy_i - + + #now increment the other-input gradient buffers + assert len(g_other) == (len(grads)-2) + for g_arg_buffer, g_arg in zip(g_other, grads[2:]): + g_arg_buffer += g_arg + + #write results into storage locations gx_out[0] = gx gu_out[0] = g_y[0] + assert len(g_other_storage) == len(g_other) + for grad_storage, grad in zip(g_other_storage, g_other): + grad_storage[0] = grad - +scan1_grad = Scan1Grad() diff -r ade894b06471 -r 4497619e2955 pylearn/lib/test_scan.py --- a/pylearn/lib/test_scan.py Tue Apr 07 18:00:24 2009 -0400 +++ b/pylearn/lib/test_scan.py Wed Apr 08 19:53:18 2009 -0400 @@ -1,68 +1,54 @@ import numpy import theano -from theano.tensor import dscalar, dvector -from scan import Scan1 - -def test_0(): - x_i = dscalar() - u = dscalar() +from theano.tensor import dscalar, dvector, dmatrix +from scan import scan1 - scan_add = Scan1(x_i, u, x_i + u) - - x = dvector() +def test_extra_inputs(): + u = dscalar('u') + c = dscalar('c') + x = dvector('x') - y = scan_add(x, u) + y = scan1(x, u, c, lambda x_i, y_prev, c: (x_i + y_prev) * c) - f = theano.function([x,u], y) + sum_y = theano.tensor.sum(y) + + f = theano.function([x,u, c], y) xval = numpy.asarray([1., 1, 1. , 1, 1]) uval = numpy.asarray(2.) - yval = f(xval, uval) - print yval + yval = f(xval, uval, 2.0) + assert numpy.all(yval == [2., 6., 14., 30., 62., 126.]) -def test_grad(): - x_i = dscalar() - u = dscalar() - - scan_add = Scan1(x_i, u, x_i + u) - - x = dvector() - - y = scan_add(x, u) - - sum_y = theano.tensor.sum(y) g_x = theano.tensor.grad(sum_y, x) g_u = theano.tensor.grad(sum_y, u) - f = theano.function([x,u], y) - gf = theano.function([x, u], [g_x, g_u]) + gf = theano.function([x, u, c], [g_x, g_u]) - xval = numpy.asarray([1., 1, 1. , 1, 1]) - uval = numpy.asarray(2.) + gxval, guval = gf(xval, uval, 2.0) - yval = f(xval, uval) - print 'yval', yval - - gxval, guval = gf(xval, uval) - - print 'gxval', gxval - print 'guval', guval + #print gxval + #print guval + assert numpy.all(gxval == [ 62., 30., 14., 6., 2.]) + assert numpy.all(guval == 63) def test_verify_scan_grad(): - x_i = dvector() - y_prev = dvector() - scan_add = Scan1(x_i, y_prev, x_i + y_prev) + def scanxx(x, u, c): + # u = dvector('u') + # c = dvector('c') + # x = dmatrix('x') + y = scan1(x, u, c, lambda x_i, y_prev, c: (x_i + y_prev) * c) + return y rng = numpy.random.RandomState(456) xval = rng.rand(4, 3) uval = rng.rand(3) + cval = rng.rand(3) - print theano.tensor.verify_grad(scan_add, (xval, uval), rng=rng) + theano.tensor.verify_grad(scanxx, (xval, uval, cval), rng=rng) -