Mercurial > pylearn
changeset 952:5f80351bc762
Moving sgd to a new 'gd' pylearn module, where it should be joined by TONGA
and Hessian-Free.
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Thu, 19 Aug 2010 11:53:19 -0400 |
parents | 5d70dfc70ec0 |
children | 2eb98a740823 |
files | pylearn/gd/README.txt pylearn/gd/__init__.py pylearn/gd/sgd.py pylearn/gd/stopper.py pylearn/gd/tests/test_sgd.py |
diffstat | 5 files changed, 294 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/gd/README.txt Thu Aug 19 11:53:19 2010 -0400 @@ -0,0 +1,2 @@ + +see __init__.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/gd/__init__.py Thu Aug 19 11:53:19 2010 -0400 @@ -0,0 +1,11 @@ +"""Gradient Descent + +This module should contain tools and algorithms related to [stochastic] gradient descent. For +example: + + - SGD with/without momentum + - Hessian Free GD + - TONGA + - Stopping criteria (incl. for use in theano functions) + +"""
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/gd/sgd.py Thu Aug 19 11:53:19 2010 -0400 @@ -0,0 +1,75 @@ +"""A stochastic gradient descent minimizer. (Possibly the simplest minimizer.) +""" + +import theano + +class StochasticGradientDescent(theano.Module): + """Fixed stepsize gradient descent + + Methods for gradient descent are: + - step(arg_vals) which returns None and updates the params + - step_cost(arg_vals) which returns the cost value, and updates the params + + """ + def __init__(self, args, cost, params, + gradients=None, stepsize=None, + updates=None, auxout=None, methods=True): + """ + :param stepsize: the step to take in (negative) gradient direction + :type stepsize: None, scalar value, or scalar TensorVariable + + :param updates: extra symbolic updates to make when evating either step or step_cost + (these override the gradients if necessary) + :type updates: dict Variable -> Variable + :param auxout: auxiliary outputs, list containing output symbols to + compute at the same time as cost (for efficiency) + :param methods: Should this module define the step and step_cost methods? + """ + super(StochasticGradientDescent, self).__init__() + self.stepsize_init = None + + if stepsize is None: + self.stepsize = theano.tensor.dscalar() + elif isinstance(stepsize, theano.tensor.TensorVariable): + self.stepsize = stepsize + else: + self.stepsize = (theano.tensor.as_tensor_variable(stepsize)) + + if self.stepsize.ndim != 0: + raise TypeError('stepsize must be a scalar', stepsize) + + self.params = params + self.gparams = theano.tensor.grad(cost, self.params) if gradients is None else gradients + + self._updates = (dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams))) + if updates is not None: + self._updates.update(updates) + + if methods: + if auxout is None: + self.step = theano.Method(args, [], updates=self._updates) + self.step_cost = theano.Method(args, cost, updates=self._updates) + else: + # step cost always returns a list if auxout + self.step = theano.Method( + args, [] + auxout, + updates=self._updates) + self.step_cost = theano.Method( + args, [cost]+auxout, + updates=self._updates) + + + updates = property(lambda self: self._updates.copy()) + + def _instance_initialize(self, obj): + pass + +def sgd_minimizer(stepsize=None): + """Curry the stepsize argument to StochasticGradientDescent, providing standard minimizer interface + + :returns: standard minimizer constructor f(args, cost, params, gradient=None) + """ + def f(args, cost, params, gradients=None, updates=None, auxout=None): + return StochasticGradientDescent(args, cost, params, gradients=gradients, stepsize=stepsize, + updates=updates, auxout=auxout) + return f
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/gd/stopper.py Thu Aug 19 11:53:19 2010 -0400 @@ -0,0 +1,131 @@ +import time +"""Early stopping iterators + +The idea here is to supply early-stopping heuristics that can be used in the +form: + + stopper = SomeEarlyStopper() + + for i in stopper(): + # train from data + if i.set_score: + i.score = validation_score + + +So far I only have one heuristic, so maybe this won't scale. +""" + +class Stopper(object): + + def train(self, data, update_rows_fn, update, validate, save=None): + """Return the best model trained on data + + Parameters: + data - a thing that accepts getitem(<list of int64>), or a tuple of such things + update_rows_fn - fn : int --> <list or tensor of int> + update - fn: update an internal model from elements of data + validate - fn: evaluate an internal model based on elements of data + save - fn: return a copy of the internal model + + The body of this function exhausts the <self> iterator, and trains a + model using early stopping in the process. + """ + + best = None + for stp in self: + i = stp.iter + + # call update on some training set rows + t_rows = update_rows_fn(i) + if isinstance(data, (tuple, list)): + update(*[d[t_rows] for d in data]) + else: + update(data[t_rows]) + + if stp.set_score: + stp.score = validate() + if (stp.score < stp.best_score) and save: + best = save() + return best + + def find_min(self, step, check, save): + best = None + for stp in self: + step() + if stp.set_score: + stp.score = check() + if (stp.score < stp.best_score) and save: + best = (save(), stp.iter, stp.score) + return best + +class ICML08Stopper(Stopper): + @staticmethod + def icml08(ntrain, batchsize): + """Some setting similar to what I used for ICML08 submission""" + #TODO: what did I actually use? put that in here. + return ICML08Stopper(30*ntrain/batchsize, + ntrain/batchsize, 0.96, 2.0, 100000000) + + def __init__(self, i_wait, v_int, min_improvement, patience, hard_limit, hard_time_limit=None): + self.initial_wait = i_wait + self.set_score_interval = v_int + self.min_improvement = min_improvement + self.patience = patience + self.hard_limit = hard_limit + self.hard_limit_seconds = hard_time_limit + self.start_time = time.time() + + self.best_score = float('inf') + self.best_iter = -1 + self.iter = -1 + + self.set_score = False + self.score = None + + def __iter__(self): + return self + + E_set_score = 'when iter.set_score is True, caller must assign a score to iter.score' + def next(self): + + #print 'ICML08 stopper, were doing a next' + + if self.set_score: #left over from last time + if self.score is None: + raise Exception(ICML08Stopper.E_set_score) + if self.score < (self.best_score * self.min_improvement): + (self.best_score, self.best_iter) = (self.score, self.iter) + self.score = None #un-set it + + + starting = self.iter < self.initial_wait + waiting = self.iter < (self.patience * self.best_iter) + if self.hard_limit_seconds != None: + times_up = (time.time() - self.start_time) > self.hard_limit_seconds + else: times_up = False + if (starting or waiting) and not times_up: + # continue to iterate + self.iter += 1 + if self.iter == self.hard_limit: + raise StopIteration + self.set_score = (self.iter % self.set_score_interval == 0) + return self + + raise StopIteration + +class NStages(ICML08Stopper): + """Run for a fixed number of steps, checking validation set every so + often.""" + def __init__(self, hard_limit, v_int): + ICML08Stopper.__init__(self, hard_limit, v_int, 1.0, 1.0, hard_limit) + + #TODO: could optimize next() function. Most of what's in ICML08Stopper.next() + #is not necessary + +def geometric_patience(i_wait, v_int, min_improvement, patience, hard_limit): + return ICML08Stopper(i_wait, v_int, min_improvement, patience, hard_limit) + +def nstages(hard_limit, v_int): + return ICML08Stopper(hard_limit, v_int, 1.0, 1.0, hard_limit) + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/gd/tests/test_sgd.py Thu Aug 19 11:53:19 2010 -0400 @@ -0,0 +1,75 @@ +import theano +from theano.compile.debugmode import DebugMode +from pylearn.gd import sgd + +mode = theano.compile.mode.get_default_mode() +if isinstance(mode,DebugMode): + mode = 'FAST_RUN' + +def test_sgd0(): + + x = theano.tensor.dscalar('x') + y = theano.tensor.dscalar('y') + + M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=0.01) + M.y = y + m = M.make(mode=mode) + m.y = 5.0 + for i in xrange(100): + c = m.step_cost(3.0) + #print c[0], m.y + + assert c < 1.0e-5 + assert abs(m.y - (1.0 / 3)) < 1.0e-4 + +def test_sgd_stepsize_variable(): + + x = theano.tensor.dscalar('x') + y = theano.tensor.dscalar('y') + lr = theano.tensor.dscalar('lr') + + M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=lr) + M.y = y + M.lr = lr + m = M.make(mode=mode) + m.y = 5.0 + m.lr = 0.01 + for i in xrange(100): + c = m.step_cost(3.0) + # print c, m.y + + assert c < 1.0e-5 + assert abs(m.y - (1.0 / 3)) < 1.0e-4 + + + #test that changing the lr has impact + + m.y = 5.0 + m.lr = 0.0 + for i in xrange(10): + c = m.step_cost(3.0) + # print c, m.y + + assert m.y == 5.0 + +def test_sgd_stepsize_none(): + + x = theano.tensor.dscalar('x') + y = theano.tensor.dscalar('y') + + M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y]) + M.y = y + m = M.make(mode=mode) + m.y = 5.0 + #there should be a learning rate here by default + assert m.stepsize is None + m.stepsize = 0.01 + for i in xrange(100): + c = m.step_cost(3.0) + # print c, m.y + + assert c < 1.0e-5 + assert abs(m.y - (1.0 / 3)) < 1.0e-4 + +if __name__ == '__main__': + test_sgd0()