Mercurial > pylearn
diff algorithms/stacker.py @ 480:1babf35fcef5
merged
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Mon, 27 Oct 2008 17:29:03 -0400 |
parents | 8fcd0f3d9a17 |
children | bb6bdd3b7ff3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/algorithms/stacker.py Mon Oct 27 17:29:03 2008 -0400 @@ -0,0 +1,83 @@ + +import theano +from theano import tensor as T +import sys +import numpy as N + +class Stacker(T.RModule): + + def __init__(self, submodules, input = None, regularize = False): + super(Stacker, self).__init__() + + current = input + layers = [] + for i, (submodule, outname) in enumerate(submodules): + layer = submodule(current, regularize = regularize) + layers.append(layer) + current = layer[outname] + self.layers = layers + + self.input = self.layers[0].input + self.output = current + + local_update = [] + global_update = [] + to_update = [] + all_kits = [] + for layer in layers: + u = layer.update + u.resolve_all() + to_update += u.updates.keys() + all_kits += u.kits + # the input is the whole deep model's input instead of the layer's own + # input (which is previous_layer[outname]) + inputs = [self.input] + u.inputs[1:] + method = theano.Method(inputs, u.outputs, u.updates, u.kits) + local_update.append(method) + global_update.append( + theano.Method(inputs, + u.outputs, + # we update the params of the previous layers too but wrt + # this layer's cost + dict((param, param - layer.lr * T.grad(layer.cost, param)) + for param in to_update), + list(all_kits))) + + self.local_update = local_update + self.global_update = global_update + self.update = self.global_update[-1] + self.compute = theano.Method(self.input, self.output) + ll = self.layers[-1] + for name, method in ll.components_map(): + if isinstance(method, theano.Method) and not hasattr(self, name): + m = method.dup() + m.resolve_all() + m.inputs = [self.input if x is ll.input else x for x in m.inputs] + setattr(self, name, m) + + def _instance_initialize(self, obj, nunits = None, lr = 0.01, seed = None, **kwargs): + super(Stacker, self)._instance_initialize(obj, **kwargs) + if seed is not None: + R = N.random.RandomState(seed) + else: + R = N.random + for layer in obj.layers: + if layer.lr is None: + layer.lr = lr + if nunits: + if len(nunits) != len(obj.layers) + 1: + raise ValueError('You should give exactly one more unit numbers as there are layers.') + for ni, no, layer in zip(nunits[:-1], nunits[1:], obj.layers): + if seed is not None: + layer.initialize(ni, no, seed = R.random_integers(sys.maxint - 1)) + else: + layer.initialize(ni, no) + if seed is not None: + obj.seed(seed) + + def _instance_flops_approx(self, obj): + rval = 0 + for layer in obj.layers: + rval += layer.flops_approx() + return rval +