changeset 952:5f80351bc762

Moving sgd to a new 'gd' pylearn module, where it should be joined by TONGA and Hessian-Free.
author James Bergstra <bergstrj@iro.umontreal.ca>
date Thu, 19 Aug 2010 11:53:19 -0400
parents 5d70dfc70ec0
children 2eb98a740823
files pylearn/gd/README.txt pylearn/gd/__init__.py pylearn/gd/sgd.py pylearn/gd/stopper.py pylearn/gd/tests/test_sgd.py
diffstat 5 files changed, 294 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/gd/README.txt	Thu Aug 19 11:53:19 2010 -0400
@@ -0,0 +1,2 @@
+
+see __init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/gd/__init__.py	Thu Aug 19 11:53:19 2010 -0400
@@ -0,0 +1,11 @@
+"""Gradient Descent
+
+This module should contain tools and algorithms related to [stochastic] gradient descent.  For
+example:
+
+ - SGD with/without momentum
+ - Hessian Free GD
+ - TONGA
+ - Stopping criteria (incl. for use in theano functions)
+
+"""
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/gd/sgd.py	Thu Aug 19 11:53:19 2010 -0400
@@ -0,0 +1,75 @@
+"""A stochastic gradient descent minimizer. (Possibly the simplest minimizer.)
+"""
+
+import theano
+
+class StochasticGradientDescent(theano.Module):
+    """Fixed stepsize gradient descent
+
+    Methods for gradient descent are:
+    - step(arg_vals) which returns None and updates the params
+    - step_cost(arg_vals) which returns the cost value, and updates the params
+    
+    """
+    def __init__(self, args, cost, params, 
+                 gradients=None, stepsize=None, 
+                 updates=None, auxout=None, methods=True):
+        """
+        :param stepsize: the step to take in (negative) gradient direction
+        :type stepsize: None, scalar value, or scalar TensorVariable
+
+        :param updates: extra symbolic updates to make when evating either step or step_cost
+        (these override the gradients if necessary)
+        :type updates: dict Variable -> Variable
+        :param auxout: auxiliary outputs, list containing output symbols to 
+                      compute at the same time as cost (for efficiency)
+        :param methods: Should this module define the step and step_cost methods?
+        """
+        super(StochasticGradientDescent, self).__init__()
+        self.stepsize_init = None
+
+        if stepsize is None:
+            self.stepsize = theano.tensor.dscalar()
+        elif isinstance(stepsize, theano.tensor.TensorVariable):
+            self.stepsize = stepsize
+        else:
+            self.stepsize = (theano.tensor.as_tensor_variable(stepsize))
+
+        if self.stepsize.ndim != 0:
+            raise TypeError('stepsize must be a scalar', stepsize)
+
+        self.params = params
+        self.gparams = theano.tensor.grad(cost, self.params) if gradients is None else gradients
+
+        self._updates = (dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams)))
+        if updates is not None:
+            self._updates.update(updates)
+
+        if methods:
+            if auxout is None:
+                self.step = theano.Method(args, [], updates=self._updates)
+                self.step_cost = theano.Method(args, cost, updates=self._updates)
+            else:
+                # step cost always returns a list if auxout
+                self.step = theano.Method(
+                        args, [] + auxout,
+                        updates=self._updates)
+                self.step_cost = theano.Method(
+                        args, [cost]+auxout,
+                        updates=self._updates)
+
+
+    updates = property(lambda self: self._updates.copy())
+
+    def _instance_initialize(self, obj):
+        pass
+
+def sgd_minimizer(stepsize=None):
+    """Curry the stepsize argument to StochasticGradientDescent, providing standard minimizer interface
+    
+    :returns: standard minimizer constructor f(args, cost, params, gradient=None)
+    """
+    def f(args, cost, params, gradients=None, updates=None, auxout=None):
+        return StochasticGradientDescent(args, cost, params, gradients=gradients, stepsize=stepsize,
+                updates=updates, auxout=auxout)
+    return f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/gd/stopper.py	Thu Aug 19 11:53:19 2010 -0400
@@ -0,0 +1,131 @@
+import time
+"""Early stopping iterators
+
+The idea here is to supply early-stopping heuristics that can be used in the
+form:
+
+    stopper = SomeEarlyStopper()
+
+    for i in stopper():
+        # train from data
+        if i.set_score:
+            i.score = validation_score
+
+
+So far I only have one heuristic, so maybe this won't scale.
+"""
+
+class Stopper(object):
+
+    def train(self, data, update_rows_fn, update, validate, save=None):
+        """Return the best model trained on data
+
+        Parameters:
+        data - a thing that accepts getitem(<list of int64>), or a tuple of such things
+        update_rows_fn - fn : int --> <list or tensor of int>
+        update - fn: update an internal model from elements of data
+        validate - fn: evaluate an internal model based on elements of data
+        save - fn: return a copy of the internal model
+
+        The body of this function exhausts the <self> iterator, and trains a
+        model using early stopping in the process.
+        """
+
+        best = None
+        for stp in self:
+            i = stp.iter
+
+            # call update on some training set rows
+            t_rows = update_rows_fn(i)
+            if isinstance(data, (tuple, list)):
+                update(*[d[t_rows] for d in data])
+            else:
+                update(data[t_rows])
+
+            if stp.set_score:
+                stp.score = validate()
+                if (stp.score < stp.best_score) and save:
+                    best = save()
+        return best
+
+    def find_min(self, step, check, save):
+        best = None
+        for stp in self:
+            step()
+            if stp.set_score:
+                stp.score = check()
+                if (stp.score < stp.best_score) and save:
+                    best = (save(), stp.iter, stp.score)
+        return best
+
+class ICML08Stopper(Stopper):
+    @staticmethod
+    def icml08(ntrain, batchsize):
+        """Some setting similar to what I used for ICML08 submission"""
+        #TODO: what did I actually use? put that in here.
+        return ICML08Stopper(30*ntrain/batchsize,
+                ntrain/batchsize, 0.96, 2.0, 100000000)
+
+    def __init__(self, i_wait, v_int, min_improvement, patience, hard_limit, hard_time_limit=None):
+        self.initial_wait = i_wait
+        self.set_score_interval = v_int
+        self.min_improvement = min_improvement
+        self.patience = patience
+        self.hard_limit = hard_limit
+        self.hard_limit_seconds = hard_time_limit
+        self.start_time = time.time()
+
+        self.best_score = float('inf')
+        self.best_iter = -1
+        self.iter = -1
+
+        self.set_score = False
+        self.score = None
+
+    def __iter__(self):
+        return self
+
+    E_set_score = 'when iter.set_score is True, caller must assign a score to iter.score'
+    def next(self):
+
+        #print 'ICML08 stopper, were doing a next'
+
+        if self.set_score: #left over from last time
+            if self.score is None:
+                raise Exception(ICML08Stopper.E_set_score)
+            if self.score < (self.best_score * self.min_improvement):
+                (self.best_score, self.best_iter) = (self.score, self.iter)
+            self.score = None #un-set it
+
+
+        starting = self.iter < self.initial_wait
+        waiting = self.iter < (self.patience * self.best_iter)
+        if self.hard_limit_seconds != None:
+            times_up = (time.time() - self.start_time) > self.hard_limit_seconds
+        else: times_up = False
+        if (starting or waiting) and not times_up:
+            # continue to iterate
+            self.iter += 1
+            if self.iter == self.hard_limit:
+                raise StopIteration
+            self.set_score = (self.iter % self.set_score_interval == 0)
+            return self
+
+        raise StopIteration
+
+class NStages(ICML08Stopper):
+    """Run for a fixed number of steps, checking validation set every so
+    often."""
+    def __init__(self, hard_limit, v_int):
+        ICML08Stopper.__init__(self, hard_limit, v_int, 1.0, 1.0, hard_limit)
+
+    #TODO: could optimize next() function. Most of what's in ICML08Stopper.next()
+    #is not necessary
+
+def geometric_patience(i_wait, v_int, min_improvement, patience, hard_limit):
+    return ICML08Stopper(i_wait, v_int, min_improvement, patience, hard_limit)
+
+def nstages(hard_limit, v_int):
+    return ICML08Stopper(hard_limit, v_int, 1.0, 1.0, hard_limit)
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/gd/tests/test_sgd.py	Thu Aug 19 11:53:19 2010 -0400
@@ -0,0 +1,75 @@
+import theano
+from theano.compile.debugmode import DebugMode
+from pylearn.gd import sgd
+
+mode = theano.compile.mode.get_default_mode()
+if isinstance(mode,DebugMode):
+    mode = 'FAST_RUN'
+
+def test_sgd0():
+
+    x = theano.tensor.dscalar('x')
+    y = theano.tensor.dscalar('y')
+
+    M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=0.01)
+    M.y = y
+    m = M.make(mode=mode)
+    m.y = 5.0
+    for i in xrange(100):
+        c = m.step_cost(3.0)
+        #print c[0], m.y
+
+    assert c < 1.0e-5
+    assert abs(m.y - (1.0 / 3)) < 1.0e-4
+
+def test_sgd_stepsize_variable():
+
+    x = theano.tensor.dscalar('x')
+    y = theano.tensor.dscalar('y')
+    lr = theano.tensor.dscalar('lr')
+
+    M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=lr)
+    M.y = y
+    M.lr = lr
+    m = M.make(mode=mode)
+    m.y = 5.0
+    m.lr = 0.01
+    for i in xrange(100):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert c < 1.0e-5
+    assert abs(m.y - (1.0 / 3)) < 1.0e-4
+
+
+    #test that changing the lr has impact
+
+    m.y = 5.0
+    m.lr = 0.0
+    for i in xrange(10):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert m.y == 5.0
+
+def test_sgd_stepsize_none():
+
+    x = theano.tensor.dscalar('x')
+    y = theano.tensor.dscalar('y')
+
+    M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y])
+    M.y = y
+    m = M.make(mode=mode)
+    m.y = 5.0
+    #there should be a learning rate here by default
+    assert m.stepsize is None
+    m.stepsize = 0.01
+    for i in xrange(100):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert c < 1.0e-5
+    assert abs(m.y - (1.0 / 3)) < 1.0e-4
+
+if __name__ == '__main__':
+    test_sgd0()