view pylearn/algorithms/sgd.py @ 537:b054271b2504

new file structure layout, factories, etc.
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 12 Nov 2008 21:57:54 -0500
parents algorithms/sgd.py@fbfd3932fd00
children 85d3300c9a9c
line wrap: on
line source

"""A stochastic gradient descent minimizer. (Possibly the simplest minimizer.)
"""

from theano.compile import module
from theano import tensor as T

from .minimizer import minimizer_factory

class StochasticGradientDescent(module.FancyModule):
    def __init__(self, args, cost, params, gradients=None, lr=None):
        super(StochasticGradientDescent, self).__init__()

        self.lr = lr if lr is not None else module.Member(T.dscalar())
        self.params = params
        self.gparams = T.grad(cost, self.params) if gradients is None else gradients

        self.updates = dict((p, p - self.lr * g) for p, g in zip(self.params, self.gparams))
        self.args = args

        self.step = module.Method(
                self.args, None,
                updates=self.updates)
        self.step_cost = module.Method(
                self.args, cost,
                updates=self.updates)

    #no initialization is done here.
    # rationale: the only parameter is lr.  
    # If the user wanted lr constant, he would pass the constant to the constructor
    # If the user wanted lr a computed value, he would pass that to the constructor.
    # If the user wanted a dynamic lr, he would pass a tensor.value() for lr.
    # If the default of a T.dscalar() is used, then it must be initialized elsewhere explicitly.

@minimizer_factory('sgd')
def sgd_minimizer(lr):
    def m(i,c,p,g=None):
        return StochasticGradientDescent(i, c, p,lr=lr)
    return m