view pylearn/algorithms/stacker.py @ 1531:88f361283a19 tip

Fix url/name to pylearn2.
author Frederic Bastien <nouiz@nouiz.org>
date Mon, 09 Sep 2013 10:08:05 -0400
parents f4729745bb58
children
line wrap: on
line source


# for example in examples:
#     repr = example
#     for layer in stacked.layers:
#         layer.update(repr)
#         repr = layer.representation(repr)

import theano
from theano import tensor as T
from theano.tensor.deprecated import rmodule
import sys
import numpy as N

class Stacker(rmodule.RModule):
    """
    @note: Assumes some names in the layers: input, cost, lr, and update
    @todo: Maybe compile functions on demand, rather than immediately.
    """

    def __init__(self, submodules, input = None, regularize = False):
        super(Stacker, self).__init__()

        current = input
        layers = []
        for i, (submodule, outname) in enumerate(submodules):
            layer = submodule(current, regularize = regularize)
            layers.append(layer)
            current = getattr(layer, outname)
        self.layers = layers

        self.input = self.layers[0].input
        self.output = current

        representation = []
        local_update = []
        global_update = []
        to_update = []
        for layer, (submodule, outname) in zip(layers, submodules):
            u = layer.update
            u.resolve_all()
            to_update += u.updates.keys()
            # the input is the whole deep model's input instead of the layer's own
            # input (which is previous_layer[outname])
            inputs = [self.input] + u.inputs[1:]
            method = theano.Method(inputs, u.outputs, u.updates)
            local_update.append(method)
            global_update.append(
                theano.Method(inputs,
                              u.outputs,
                              # we update the params of the previous layers too but wrt
                              # this layer's cost
                              dict((param, param - layer.lr * T.grad(layer.cost, param))
                                   for param in to_update)))
            representation.append(theano.Method(self.input, getattr(layer,outname)))

#           @todo: Add diagnostics
#             self.diagnose_from_input = Method([self.input], self.layers[0].diagnose.outputs + self.layers[1].diagnose.outputs ...

        self.local_update = local_update
        self.global_update = global_update
        self.representation = representation
        self.update = self.global_update[-1]
        self.compute = theano.Method(self.input, self.output)

        # takes method from last layer (usually ll.classify), copies it to self.,
        # while converting its input to deal with the global "model" input 
        ll = self.layers[-1]
        for name, method in ll.__dict__['local_attr'].iteritems():
            if isinstance(method, theano.Method) and not hasattr(self, name):
                if not isinstance(method.inputs, (list,dict)):
                    method.inputs = [method.inputs]
                inputs = []
                for x in method.inputs:
                  if x is ll.input:
                    inputs += [self.input]
                  else:
                    inputs += [x]
                #backport
                #inputs = [self.input if x is ll.input else x for x in method.inputs]
                m = theano.Method(inputs, method.outputs, method.updates)
                setattr(self, name, m)

    def _instance_initialize(self, obj, nunits = None, lr = 0.01, seed = None, **kwargs):
        super(Stacker, self)._instance_initialize(obj, **kwargs)
        if seed is not None:
            R = N.random.RandomState(seed)
        else:
            R = N.random
        for layer in obj.layers:
            if layer.lr is None:
                layer.lr = lr
        if nunits:
            obj.input_dimension = nunits[0]
            obj.output_dimension = nunits[-1]
            if len(nunits) != len(obj.layers) + 1:
                raise ValueError('You should give exactly one more unit numbers as there are layers.')
            for ni, no, layer in zip(nunits[:-1], nunits[1:], obj.layers):
                if seed is not None:
                    layer.initialize(ni, no, seed = R.random_integers(sys.maxint - 1))
                else:
                    layer.initialize(ni, no)
        if seed is not None:
            obj.seed(seed)

    def _instance_flops_approx(self, obj):
        rval = 0
        for layer in obj.layers:
            rval += layer.flops_approx()
        return rval