view algorithms/daa.py @ 517:716c04512dbe

init
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 12 Nov 2008 10:54:38 -0500
parents dc2d93590da0
children
line wrap: on
line source


import theano
from theano import tensor as T
from theano.tensor import nnet as NN
import numpy as N

from pylearn import cost as cost

class DenoisingAA(T.RModule):
    """De-noising Auto-encoder

    WRITEME

    Abstract base class. Requires subclass with functions:
    
    - build_corrupted_input()

    Introductory article about this model WRITEME.


    """

    def __init__(self, input = None, regularize = True, tie_weights = True,
            activation_function=NN.sigmoid, reconstruction_cost_function=cost.cross_entropy):
        """
        :param input: WRITEME

        :param regularize: WRITEME

        :param tie_weights: WRITEME

        :param activation_function: WRITEME

        :param reconstruction_cost: Should return one cost per example (row)

        :todo: Default noise level for all daa levels

        """
        super(DenoisingAA, self).__init__()

        # MODEL CONFIGURATION
        self.regularize = regularize
        self.tie_weights = tie_weights
        self.activation_function = activation_function
        self.reconstruction_cost_function = reconstruction_cost_function

        # ACQUIRE/MAKE INPUT
        if not input:
            input = T.matrix('input')
        self.input = theano.External(input)

        # HYPER-PARAMETERS
        self.lr = theano.Member(T.scalar())

        # PARAMETERS
        self.w1 = theano.Member(T.matrix())
        if not tie_weights:
            self.w2 = theano.Member(T.matrix())
        else:
            self.w2 = self.w1.T
        self.b1 = theano.Member(T.vector())
        self.b2 = theano.Member(T.vector())


        # REGULARIZATION COST
        self.regularization = self.build_regularization()


        ### NOISELESS ###

        # HIDDEN LAYER
        self.hidden_activation = T.dot(self.input, self.w1) + self.b1
        self.hidden = self.hid_activation_function(self.hidden_activation)

        # RECONSTRUCTION LAYER
        self.output_activation = T.dot(self.hidden, self.w2) + self.b2
        self.output = self.out_activation_function(self.output_activation)

        # RECONSTRUCTION COST
        self.reconstruction_costs = self.build_reconstruction_costs(self.output)
        self.reconstruction_cost = T.mean(self.reconstruction_costs)

        # TOTAL COST
        self.cost = self.reconstruction_cost
        if self.regularize:
            self.cost = self.cost + self.regularization


        ### WITH NOISE ###
        self.corrupted_input = self.build_corrupted_input()

        # HIDDEN LAYER
        self.nhidden_activation = T.dot(self.corrupted_input, self.w1) + self.b1
        self.nhidden = self.hid_activation_function(self.nhidden_activation)

        # RECONSTRUCTION LAYER
        self.noutput_activation = T.dot(self.nhidden, self.w2) + self.b2
        self.noutput = self.out_activation_function(self.noutput_activation)

        # RECONSTRUCTION COST
        self.nreconstruction_costs = self.build_reconstruction_costs(self.noutput)
        self.nreconstruction_cost = T.mean(self.nreconstruction_costs)

        # TOTAL COST
        self.ncost = self.nreconstruction_cost
        if self.regularize:
            self.ncost = self.ncost + self.regularization


        # GRADIENTS AND UPDATES
        if self.tie_weights:
            self.params = self.w1, self.b1, self.b2
        else:
            self.params = self.w1, self.w2, self.b1, self.b2
        gradients = T.grad(self.ncost, self.params)
        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))

        # INTERFACE METHODS
        self.update = theano.Method(self.input, self.ncost, updates)
        self.compute_cost = theano.Method(self.input, self.cost)
        self.noisify = theano.Method(self.input, self.corrupted_input)
        self.reconstruction = theano.Method(self.input, self.output)
        self.representation = theano.Method(self.input, self.hidden)
        self.reconstruction_through_noise = theano.Method(self.input, [self.corrupted_input, self.noutput])

        self.validate = theano.Method(self.input, [self.cost, self.output])

    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
        if (input_size is None) ^ (hidden_size is None):
            raise ValueError("Must specify input_size and hidden_size or neither.")
        super(DenoisingAA, self)._instance_initialize(obj, **init)
        if seed is not None:
            R = N.random.RandomState(seed)
        else:
            R = N.random
        if input_size is not None:
            sz = (input_size, hidden_size)
            inf = 1/N.sqrt(input_size)
            hif = 1/N.sqrt(hidden_size)
            obj.w1 = R.uniform(size = sz, low = -inf, high = inf)
            if not self.tie_weights:
                obj.w2 = R.uniform(size = list(reversed(sz)), low = -hif, high = hif)
            obj.b1 = N.zeros(hidden_size)
            obj.b2 = N.zeros(input_size)
        if seed is not None:
            obj.seed(seed)
        obj.__hide__ = ['params']

    def build_regularization(self):
        """
        @todo: Why do we need this function?
        """
        return T.zero() # no regularization!


class SigmoidXEDenoisingAA(DenoisingAA):
    """
    @todo: Merge this into the above.
    @todo: Default noise level for all daa levels
    """

    def build_corrupted_input(self):
        self.noise_level = theano.Member(T.scalar())
        return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input

    def hid_activation_function(self, activation):
        return self.activation_function(activation)

    def out_activation_function(self, activation):
        return self.activation_function(activation)

    def build_reconstruction_costs(self, output):
        return self.reconstruction_cost_function(self.input, output)

    def build_regularization(self):
        self.l2_coef = theano.Member(T.scalar())
        if self.tie_weights:
            return self.l2_coef * T.sum(self.w1 * self.w1)
        else:
            return self.l2_coef * (T.sum(self.w1 * self.w1) + T.sum(self.w2 * self.w2))

    def _instance_initialize(self, obj, input_size = None, hidden_size = None, seed = None, **init):
        init.setdefault('noise_level', 0)
        init.setdefault('l2_coef', 0)
        super(SigmoidXEDenoisingAA, self)._instance_initialize(obj, input_size, hidden_size, seed, **init)