# HG changeset patch # User James Bergstra # Date 1282310879 14400 # Node ID 8e78afa74313c8a6be452c6db2590f587c26b44d # Parent a0a6cc21dc4fccff00922e9365379599aaf85bf4 Moved sgd to gd/ module diff -r a0a6cc21dc4f -r 8e78afa74313 pylearn/algorithms/sgd.py --- a/pylearn/algorithms/sgd.py Thu Aug 19 14:44:15 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -"""A stochastic gradient descent minimizer. (Possibly the simplest minimizer.) -""" - -import theano - -class StochasticGradientDescent(theano.Module): - """Fixed stepsize gradient descent - - Methods for gradient descent are: - - step(arg_vals) which returns None and updates the params - - step_cost(arg_vals) which returns the cost value, and updates the params - - """ - def __init__(self, args, cost, params, - gradients=None, stepsize=None, - updates=None, auxout=None, methods=True): - """ - :param stepsize: the step to take in (negative) gradient direction - :type stepsize: None, scalar value, or scalar TensorVariable - - :param updates: extra symbolic updates to make when evating either step or step_cost - (these override the gradients if necessary) - :type updates: dict Variable -> Variable - :param auxout: auxiliary outputs, list containing output symbols to - compute at the same time as cost (for efficiency) - :param methods: Should this module define the step and step_cost methods? - """ - super(StochasticGradientDescent, self).__init__() - self.stepsize_init = None - - if stepsize is None: - self.stepsize = theano.tensor.dscalar() - elif isinstance(stepsize, theano.tensor.TensorVariable): - self.stepsize = stepsize - else: - self.stepsize = (theano.tensor.as_tensor_variable(stepsize)) - - if self.stepsize.ndim != 0: - raise TypeError('stepsize must be a scalar', stepsize) - - self.params = params - self.gparams = theano.tensor.grad(cost, self.params) if gradients is None else gradients - - self._updates = (dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams))) - if updates is not None: - self._updates.update(updates) - - if methods: - if auxout is None: - self.step = theano.Method(args, [], updates=self._updates) - self.step_cost = theano.Method(args, cost, updates=self._updates) - else: - # step cost always returns a list if auxout - self.step = theano.Method( - args, [] + auxout, - updates=self._updates) - self.step_cost = theano.Method( - args, [cost]+auxout, - updates=self._updates) - - - updates = property(lambda self: self._updates.copy()) - - def _instance_initialize(self, obj): - pass - -def sgd_minimizer(stepsize=None): - """Curry the stepsize argument to StochasticGradientDescent, providing standard minimizer interface - - :returns: standard minimizer constructor f(args, cost, params, gradient=None) - """ - def f(args, cost, params, gradients=None, updates=None, auxout=None): - return StochasticGradientDescent(args, cost, params, gradients=gradients, stepsize=stepsize, - updates=updates, auxout=auxout) - return f diff -r a0a6cc21dc4f -r 8e78afa74313 pylearn/algorithms/tests/test_sgd.py --- a/pylearn/algorithms/tests/test_sgd.py Thu Aug 19 14:44:15 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -import theano -from theano.compile.debugmode import DebugMode -from pylearn.algorithms import sgd - -mode = theano.compile.mode.get_default_mode() -if isinstance(mode,DebugMode): - mode = 'FAST_RUN' - -def test_sgd0(): - - x = theano.tensor.dscalar('x') - y = theano.tensor.dscalar('y') - - M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=0.01) - M.y = y - m = M.make(mode=mode) - m.y = 5.0 - for i in xrange(100): - c = m.step_cost(3.0) - #print c[0], m.y - - assert c < 1.0e-5 - assert abs(m.y - (1.0 / 3)) < 1.0e-4 - -def test_sgd_stepsize_variable(): - - x = theano.tensor.dscalar('x') - y = theano.tensor.dscalar('y') - lr = theano.tensor.dscalar('lr') - - M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=lr) - M.y = y - M.lr = lr - m = M.make(mode=mode) - m.y = 5.0 - m.lr = 0.01 - for i in xrange(100): - c = m.step_cost(3.0) - # print c, m.y - - assert c < 1.0e-5 - assert abs(m.y - (1.0 / 3)) < 1.0e-4 - - - #test that changing the lr has impact - - m.y = 5.0 - m.lr = 0.0 - for i in xrange(10): - c = m.step_cost(3.0) - # print c, m.y - - assert m.y == 5.0 - -def test_sgd_stepsize_none(): - - x = theano.tensor.dscalar('x') - y = theano.tensor.dscalar('y') - - M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y]) - M.y = y - m = M.make(mode=mode) - m.y = 5.0 - #there should be a learning rate here by default - assert m.stepsize is None - m.stepsize = 0.01 - for i in xrange(100): - c = m.step_cost(3.0) - # print c, m.y - - assert c < 1.0e-5 - assert abs(m.y - (1.0 / 3)) < 1.0e-4 - -if __name__ == '__main__': - test_sgd0()