Mercurial > pylearn
changeset 836:788c2c8558eb
removed scan (has been moved to theano)
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 16 Oct 2009 12:19:12 -0400 |
parents | 79c482ec4ccf |
children | 28ceb345ab78 |
files | pylearn/lib/scan.py pylearn/lib/test_scan.py |
diffstat | 2 files changed, 0 insertions(+), 259 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/lib/scan.py Fri Oct 16 12:17:23 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,205 +0,0 @@ -"""Provide Scan and related functions""" -__docformat__ = 'restructedtext en' - -import traceback -import numpy -import theano - -class Scan: - """A Theano loop - - :todo: Implement this, and rewrite `Scan1` to use `Scan` - - - """ - def __init__(self): - raise NotImplementedError() - - -def scan1(*args): - """args should have the form - - x, u, <other variables>, (lambda x_i, y_{i-1}, <other variables> : y_i) - - """ - assert len(args) >= 3 - x, u = args[:2] - lmb = args[-1] - other_inputs = list(args[2:-1]) - - x_this = x[0].type() - y_this = u.type() - y_next = lmb(x_this, y_this, *other_inputs) - if y_next.type != u.type: - raise TypeError('type of lambda recursion must match type of y_prev') - env = theano.Env([x_this, y_this] + other_inputs, [y_next]) - env_var = theano.Constant(data=env, type=theano.generic) - return _scan1(*([env_var] + list(args[:-1]))) - -class Scan1(theano.Op): - """A Theano loop over one variable - - Scan1 is less general than `Scan` because it permits looping only over one tensor. - - Scan1 is defined mathematically like this: - - input - iterable x - input - y-element-like u - input - function x[i], y[i-1] -> y[i] - output - iterable y - - .. code-block:: python - - #inputs - x #a tensor with ndim >= 1 - u #a tensor that is like a row of y - f #the function to scan over x - - for i in xrange(len(x)): - if i > 0: - y[i] = f(x[i], y[i-1]) - else: - y[0] = f(x[0], u) - - #outputs - y # a tensor with the same number of elements as x, - # each element of which is like u (in terms of shape and dtype) - - The Scan1 Op works by representing `f` by an `Env`. - - :note: - Internally, the representation is off-by-one wrt the documentation above. This Op creates - a tensor y whose len is greater by one than x, whose first element is a copy of u. - The `Scan1.__call__()` returns a subtensor view of this internal vector `y` that views only - the len-1 last elements, so the copy of `u` is not visible. - - - :todo: - Optimize for the case where y_this is not required to compute y_next. - This makes all the updates possible in parallel, it also makes the `u` argument to - make_node un-necessary. - - """ - - destroy_map = {} - view_map = {} - mode=None - default_output = 0 - - def make_node(self, env, *inputs): - """ - :note: - make_node must take all the same inputs as Apply holds, - so we use __call__ as the syntactic device that inserts self.extra_variables. - """ - x, u = inputs[:2] - - out_type = theano.tensor.Tensor(dtype=u.dtype, - broadcastable=[False] + list(u.broadcastable)) - return theano.Apply(self, [env]+list(inputs), [out_type(), theano.generic()]) - - def grad(self, inputs, (g_y, g_fn)): - assert g_fn is None - - y = self(*inputs) - grads = scan1_grad(g_y, y, *inputs) - return [None] + grads[:-1] - - def perform(self, node, args, (y_out, fn_out)): - - env, x, u = args[:3] - other_args = args[3:] - - if fn_out[0] is None: - assert len(env.outputs) == 1 - fn_out[0] = theano.function( - inputs=env.inputs, - outputs=env.outputs[0], - mode=self.mode) - fn = fn_out[0] - - y_shape = (x.shape[0]+1,) + u.shape - y = numpy.empty(y_shape, dtype=u.dtype) - - #print 'x', x - #print 'y', y - #print 'u', u - #print 'other', other_args - - y[0] = u - for i, x_i in enumerate(x): - something = fn(x_i, y[i], *other_args) - #print 'something', something - y[i+1] = something - y_out[0] = y -_scan1 = Scan1() - - -class Scan1Grad(theano.Op): - def __init__(self, inplace=False): - self.inplace = inplace - if inplace: - self.destroy_map = {1: [3]} - - def make_node(self, g_y, y, scan_env, x, u, *other_inputs): - return theano.Apply(self, - [g_y, y, scan_env, x, u] + list(other_inputs), - [x.type(), u.type()] + [oi.type() for oi in other_inputs] + [theano.generic()]) - - def get_fn(self, scan_env, grad_storage): - fn_storage = grad_storage[-1] - assert isinstance(scan_env, theano.gof.Env) - if fn_storage[0] is None: - y_next = scan_env.outputs[0] - gy_next = y_next.type() - inputs = scan_env.inputs # x_this, y_this, *rest - g_inputs = theano.tensor.grad(y_next, inputs, g_cost=gy_next) - - fn_storage[0] = theano.function( - inputs=[gy_next] + inputs, - outputs=g_inputs) - return fn_storage[0] - - def perform(self, node, args, grad_storage): - - #retrieve (or compute) the gradient function - fn = self.get_fn(args[2], grad_storage) - - #unpack the args - (g_y, y) = args[0:2] - (x, u) = args[3:5] - other_args = args[5:] - - #unpack grad_storage (outputs) - gx_out, gu_out = grad_storage[0:2] - g_other_storage = grad_storage[2:-1] - - assert len(other_args) == len(g_other_storage) - - if not self.inplace: - g_y = g_y.copy() - - gx = numpy.zeros_like(x) - - g_other = [numpy.zeros_like(other) for other in other_args] - - for i in xrange(len(x)-1, -1, -1): - #print 'x y gy_next', x[i], y[i], g_y[i+1] - grads = fn(g_y[i+1], x[i], y[i], *other_args) - gx[i], gy_i = grads[0:2] - #print 'gx gy', gx[i], gy_i - g_y[i] += gy_i - - #now increment the other-input gradient buffers - assert len(g_other) == (len(grads)-2) - for g_arg_buffer, g_arg in zip(g_other, grads[2:]): - g_arg_buffer += g_arg - - #write results into storage locations - gx_out[0] = gx - gu_out[0] = g_y[0] - assert len(g_other_storage) == len(g_other) - for grad_storage, grad in zip(g_other_storage, g_other): - grad_storage[0] = grad - -scan1_grad = Scan1Grad()
--- a/pylearn/lib/test_scan.py Fri Oct 16 12:17:23 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -import numpy -import theano -from theano.tensor import dscalar, dvector, dmatrix -from scan import scan1 - - -def test_extra_inputs(): - u = dscalar('u') - c = dscalar('c') - x = dvector('x') - - y = scan1(x, u, c, lambda x_i, y_prev, c: (x_i + y_prev) * c) - - sum_y = theano.tensor.sum(y) - - f = theano.function([x,u, c], y) - - xval = numpy.asarray([1., 1, 1. , 1, 1]) - uval = numpy.asarray(2.) - - yval = f(xval, uval, 2.0) - assert numpy.all(yval == [2., 6., 14., 30., 62., 126.]) - - - - g_x = theano.tensor.grad(sum_y, x) - g_u = theano.tensor.grad(sum_y, u) - - gf = theano.function([x, u, c], [g_x, g_u]) - - gxval, guval = gf(xval, uval, 2.0) - - #print gxval - #print guval - assert numpy.all(gxval == [ 62., 30., 14., 6., 2.]) - assert numpy.all(guval == 63) - - -def test_verify_scan_grad(): - def scanxx(x, u, c): - # u = dvector('u') - # c = dvector('c') - # x = dmatrix('x') - y = scan1(x, u, c, lambda x_i, y_prev, c: (x_i + y_prev) * c) - return y - - rng = numpy.random.RandomState(456) - - xval = rng.rand(4, 3) - uval = rng.rand(3) - cval = rng.rand(3) - - theano.tensor.verify_grad(scanxx, (xval, uval, cval), rng=rng) -