view pylearn/algorithms/sgd.py @ 573:5d1228f99caa

fix import.
author Frederic Bastien <bastienf@iro.umontreal.ca>
date Thu, 04 Dec 2008 10:56:13 -0500
parents b58e71878bb5
children 01e04bf878e2
line wrap: on
line source

"""A stochastic gradient descent minimizer. (Possibly the simplest minimizer.)
"""

from theano.compile import module
from theano import tensor as T

from minimizer import minimizer_factory

class StochasticGradientDescent(module.FancyModule):
    """Fixed stepsize gradient descent"""
    def __init__(self, args, cost, params, gradients=None, stepsize=None, WEIRD_STUFF=True):
        """
        :param stepsize: the step to take in (negative) gradient direction
        :type stepsize: None, scalar value, or scalar TensorResult
        """
        super(StochasticGradientDescent, self).__init__()
        self.WEIRD_STUFF = WEIRD_STUFF
        self.stepsize_init = None

        if stepsize is None:
            self.stepsize = module.Member(T.dscalar())
        elif isinstance(stepsize, T.TensorResult):
            self.stepsize = stepsize
        else:
            if self.WEIRD_STUFF:
                #TODO: why is this necessary? why does the else clause not work?
                self.stepsize = module.Member(T.dscalar())
                self.stepsize_init = stepsize
            else:
                self.stepsize = module.Member(T.value(stepsize))

        if self.stepsize.ndim != 0:
            raise ValueError('stepsize must be a scalar', stepsize)

        self.params = params
        self.gparams = T.grad(cost, self.params) if gradients is None else gradients

        self.updates = dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams))

        self.step = module.Method(
                args, [],
                updates=self.updates)
        self.step_cost = module.Method(
                args, cost,
                updates=self.updates)
    def _instance_initialize(self, obj):
        if self.WEIRD_STUFF:
            obj.stepsize = self.stepsize_init
        else:
            pass


@minimizer_factory('sgd')
def sgd_minimizer(stepsize=None, **args):
    def m(i,c,p,g=None):
        return StochasticGradientDescent(i, c, p, stepsize=stepsize, **args)
    return m