diff algorithms/stacker.py @ 480:1babf35fcef5

merged
author James Bergstra <bergstrj@iro.umontreal.ca>
date Mon, 27 Oct 2008 17:29:03 -0400
parents 8fcd0f3d9a17
children bb6bdd3b7ff3
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/algorithms/stacker.py	Mon Oct 27 17:29:03 2008 -0400
@@ -0,0 +1,83 @@
+
+import theano
+from theano import tensor as T
+import sys
+import numpy as N
+
+class Stacker(T.RModule):
+
+    def __init__(self, submodules, input = None, regularize = False):
+        super(Stacker, self).__init__()
+
+        current = input
+        layers = []
+        for i, (submodule, outname) in enumerate(submodules):
+            layer = submodule(current, regularize = regularize)
+            layers.append(layer)
+            current = layer[outname]
+        self.layers = layers
+
+        self.input = self.layers[0].input
+        self.output = current
+
+        local_update = []
+        global_update = []
+        to_update = []
+        all_kits = []
+        for layer in layers:
+            u = layer.update
+            u.resolve_all()
+            to_update += u.updates.keys()
+            all_kits += u.kits
+            # the input is the whole deep model's input instead of the layer's own
+            # input (which is previous_layer[outname])
+            inputs = [self.input] + u.inputs[1:]
+            method = theano.Method(inputs, u.outputs, u.updates, u.kits)
+            local_update.append(method)
+            global_update.append(
+                theano.Method(inputs,
+                              u.outputs,
+                              # we update the params of the previous layers too but wrt
+                              # this layer's cost
+                              dict((param, param - layer.lr * T.grad(layer.cost, param))
+                                   for param in to_update),
+                              list(all_kits)))
+
+        self.local_update = local_update
+        self.global_update = global_update
+        self.update = self.global_update[-1]
+        self.compute = theano.Method(self.input, self.output)
+        ll = self.layers[-1]
+        for name, method in ll.components_map():
+            if isinstance(method, theano.Method) and not hasattr(self, name):
+                m = method.dup()
+                m.resolve_all()
+                m.inputs = [self.input if x is ll.input else x for x in m.inputs]
+                setattr(self, name, m)
+
+    def _instance_initialize(self, obj, nunits = None, lr = 0.01, seed = None, **kwargs):
+        super(Stacker, self)._instance_initialize(obj, **kwargs)
+        if seed is not None:
+            R = N.random.RandomState(seed)
+        else:
+            R = N.random
+        for layer in obj.layers:
+            if layer.lr is None:
+                layer.lr = lr
+        if nunits:
+            if len(nunits) != len(obj.layers) + 1:
+                raise ValueError('You should give exactly one more unit numbers as there are layers.')
+            for ni, no, layer in zip(nunits[:-1], nunits[1:], obj.layers):
+                if seed is not None:
+                    layer.initialize(ni, no, seed = R.random_integers(sys.maxint - 1))
+                else:
+                    layer.initialize(ni, no)
+        if seed is not None:
+            obj.seed(seed)
+
+    def _instance_flops_approx(self, obj):
+        rval = 0
+        for layer in obj.layers:
+            rval += layer.flops_approx()
+        return rval
+