view pylearn/algorithms/sgd.py @ 549:16894d38ce48

moving stuff in algorithms, added rnn
author James Bergstra <bergstrj@iro.umontreal.ca>
date Thu, 27 Nov 2008 21:42:02 -0500
parents 85d3300c9a9c
children 7de7fa19fb9b
line wrap: on
line source

"""A stochastic gradient descent minimizer. (Possibly the simplest minimizer.)
"""

from theano.compile import module
from theano import tensor as T

from .minimizer import minimizer_factory

class StochasticGradientDescent(module.FancyModule):
    """Fixed stepsize gradient descent"""
    def __init__(self, args, cost, params, gradients=None, stepsize=None):
        """
        :param stepsize: the step to take in (negative) gradient direction
        :type stepsize: None, scalar value, or scalar TensorResult
        """
        super(StochasticGradientDescent, self).__init__()

        self.stepsize_init = None

        if stepsize is None:
            self.stepsize = module.Member(T.dscalar())
        elif isinstance(stepsize, T.TensorResult):
            self.stepsize = stepsize
        else:
            if 1: #TODO: why is this necessary? why does the else clause not work?
                self.stepsize = module.Member(T.dscalar())
                self.stepsize_init = stepsize
            else:
                self.stepsize = module.Member(T.value(stepsize))

        if self.stepsize.ndim != 0:
            raise ValueError('stepsize must be a scalar', stepsize)

        self.params = params
        self.gparams = T.grad(cost, self.params) if gradients is None else gradients

        self.updates = dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams))

        self.step = module.Method(
                args, [],
                updates=self.updates)
        self.step_cost = module.Method(
                args, cost,
                updates=self.updates)
    def _instance_initialize(self, obj):
        if 1:
            obj.stepsize = self.stepsize_init
        else:
            pass


@minimizer_factory('sgd')
def sgd_minimizer(stepsize=None):
    def m(i,c,p,g=None):
        return StochasticGradientDescent(i, c, p, stepsize=stepsize)
    return m