changeset 666:d69e668ab904

updating minimizer, sgd to new theano. added sgd tests
author James Bergstra <bergstrj@iro.umontreal.ca>
date Mon, 30 Mar 2009 12:25:42 -0400
parents 2704c8688ced
children 719194960d18
files pylearn/algorithms/minimizer.py pylearn/algorithms/sgd.py pylearn/algorithms/tests/test_sgd.py
diffstat 3 files changed, 96 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/minimizer.py	Wed Feb 11 01:43:14 2009 -0500
+++ b/pylearn/algorithms/minimizer.py	Mon Mar 30 12:25:42 2009 -0400
@@ -1,10 +1,8 @@
 """Define the interface and factory for gradient-based minimizers.
 """
-from theano.compile import module
+import theano
 
-_minimizers = {}
-
-class DummyMinimizer(module.FancyModule):
+class DummyMinimizer(theano.Module):
     """ The idea of a minimizer is that it provides an `step` function that will
     eventually converge toward (maybe realize?) the minimum of a cost function.
 
@@ -15,29 +13,24 @@
     """
     def __init__(self, args, cost, parameters, gradients=None):
         super(DummyMinimizer, self).__init__()
-        #gradients = T.grad(cost, parameters) if gradients is None else gradients
-        #self.step = module.Method(args, None)
-        #self.step_cost = module.Method(args, cost)
+
     def _instance_step(self, obj, *args):
-        pass
-    def _instance_step_cost(self, obj, *args):
+        """Move the parameters toward the minimum of a cost
+
+        :param args: The arguments here should be values for the Variables that were in the
+        `args` argument to the constructor.
+
+        :Return:  None
+        """
         pass
 
-def minimizer_factory(algo):
-    def decorator(fn):
-        if algo in _minimizers:
-            raise Exception('algo in use', algo)
-        else:
-            _minimizers[algo] = fn
-        return fn
-    return decorator
+    def _instance_step_cost(self, obj, *args):
+        """Move the parameters toward the minimum of a cost, and compute the cost
 
-@minimizer_factory('dummy')
-def dummy_minimizer():
-    def m(args, cost, parameters, gradients=None):
-        return DummyMinimizer(args, cost, parameters, gradients)
-    return m
+        :param args: The arguments here should be values for the Variables that were in the
+        `args` argument to the constructor.
 
-def make_minimizer(algo, **kwargs):
-    return _minimizers[algo](**kwargs)
+        :Return:  The current cost value.
+        """
+        pass
 
--- a/pylearn/algorithms/sgd.py	Wed Feb 11 01:43:14 2009 -0500
+++ b/pylearn/algorithms/sgd.py	Mon Mar 30 12:25:42 2009 -0400
@@ -1,45 +1,40 @@
 """A stochastic gradient descent minimizer. (Possibly the simplest minimizer.)
 """
 
-from theano.compile import module
-from theano import tensor as T
+import theano
 
-class StochasticGradientDescent(module.FancyModule):
+class StochasticGradientDescent(theano.Module):
     """Fixed stepsize gradient descent"""
     def __init__(self, args, cost, params, gradients=None, stepsize=None):
         """
         :param stepsize: the step to take in (negative) gradient direction
-        :type stepsize: None, scalar value, or scalar TensorResult
+        :type stepsize: None, scalar value, or scalar TensorVariable
         """
         super(StochasticGradientDescent, self).__init__()
         self.stepsize_init = None
 
         if stepsize is None:
-            self.stepsize = module.Member(T.dscalar())
-        elif isinstance(stepsize, T.TensorResult):
+            self.stepsize = theano.tensor.dscalar()
+        elif isinstance(stepsize, theano.tensor.TensorVariable):
             self.stepsize = stepsize
         else:
-            self.stepsize = module.Member(T.value(stepsize))
+            self.stepsize = (theano.tensor.as_tensor_variable(stepsize))
 
         if self.stepsize.ndim != 0:
-            raise ValueError('stepsize must be a scalar', stepsize)
+            raise TypeError('stepsize must be a scalar', stepsize)
 
         self.params = params
-        self.gparams = T.grad(cost, self.params) if gradients is None else gradients
+        self.gparams = theano.tensor.grad(cost, self.params) if gradients is None else gradients
 
         self.updates = dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams))
 
-        self.step = module.Method(
+        self.step = theano.Method(
                 args, [],
                 updates=self.updates)
-        self.step_cost = module.Method(
+        self.step_cost = theano.Method(
                 args, cost,
                 updates=self.updates)
+
     def _instance_initialize(self, obj):
         pass
 
-def sgd_minimizer(stepsize=None, **args):
-    def m(i,c,p,g=None):
-        return StochasticGradientDescent(i, c, p, stepsize=stepsize, **args)
-    return m
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_sgd.py	Mon Mar 30 12:25:42 2009 -0400
@@ -0,0 +1,68 @@
+import theano
+from pylearn.algorithms import sgd
+
+def test_sgd0():
+
+    x = theano.tensor.dscalar('x')
+    y = theano.tensor.dscalar('y')
+
+    M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=0.01)
+    M.y = y
+    m = M.make()
+    m.y = 5.0
+    for i in xrange(100):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert c < 1.0e-5
+    assert abs(m.y - (1.0 / 3)) < 1.0e-4
+
+def test_sgd_stepsize_variable():
+
+    x = theano.tensor.dscalar('x')
+    y = theano.tensor.dscalar('y')
+    lr = theano.tensor.dscalar('lr')
+
+    M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y], stepsize=lr)
+    M.y = y
+    M.lr = lr
+    m = M.make()
+    m.y = 5.0
+    m.lr = 0.01
+    for i in xrange(100):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert c < 1.0e-5
+    assert abs(m.y - (1.0 / 3)) < 1.0e-4
+
+
+    #test that changing the lr has impact
+
+    m.y = 5.0
+    m.lr = 0.0
+    for i in xrange(10):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert m.y == 5.0
+
+def test_sgd_stepsize_none():
+
+    x = theano.tensor.dscalar('x')
+    y = theano.tensor.dscalar('y')
+
+    M = sgd.StochasticGradientDescent([x], (1.0 - x * y)**2, [y])
+    M.y = y
+    m = M.make()
+    m.y = 5.0
+    #there should be a learning rate here by default
+    assert m.stepsize is None
+    m.stepsize = 0.01
+    for i in xrange(100):
+        c = m.step_cost(3.0)
+        # print c, m.y
+
+    assert c < 1.0e-5
+    assert abs(m.y - (1.0 / 3)) < 1.0e-4
+