changeset 549:16894d38ce48

moving stuff in algorithms, added rnn
author James Bergstra <bergstrj@iro.umontreal.ca>
date Thu, 27 Nov 2008 21:42:02 -0500
parents d3791c59f36e
children b52c1a8811a6
files pylearn/algorithms/_test_linear_regression.py pylearn/algorithms/_test_logistic_regression.py pylearn/algorithms/_test_onehotop.py pylearn/algorithms/onehotop.py pylearn/algorithms/sandbox/_test_onehotop.py pylearn/algorithms/sandbox/onehotop.py pylearn/algorithms/sandbox/stat_ops.py pylearn/algorithms/sgd.py pylearn/algorithms/stat_ops.py pylearn/algorithms/tests/test_linear_regression.py pylearn/algorithms/tests/test_logistic_regression.py
diffstat 11 files changed, 286 insertions(+), 267 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/_test_linear_regression.py	Thu Nov 20 12:18:16 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-
-import unittest
-from linear_regression import *
-from make_test_datasets import *
-import numpy
-
-class test_linear_regression(unittest.TestCase):
-
-    def test1(self):
-        trainset,testset,theta=make_artificial_datasets_from_function(n_inputs=3,
-                                                                      n_targets=2,
-                                                                      n_examples=100,
-                                                                      f=linear_predictor)
-        
-        assert trainset.fields()['input'].shape==(50,3)
-        assert testset.fields()['target'].shape==(50,2)
-        regressor = LinearRegression(L2_regularizer=0.1)
-        predictor = regressor(trainset)
-        test_data = testset.fields()
-        mse = predictor.compute_mse(test_data['input'],test_data['target'])
-        print 'mse = ',mse
-        
-if __name__ == '__main__':
-    unittest.main()
-        
--- a/pylearn/algorithms/_test_logistic_regression.py	Thu Nov 20 12:18:16 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-from logistic_regression import *
-import sys, time
-
-if __name__ == '__main__':
-    pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
-    pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
-    if 1:
-        lrc = Module_Nclass()
-
-        print '================'
-        print lrc.update.pretty()
-        print '================'
-        print lrc.update.pretty(mode = theano.Mode('py', 'fast_run'))
-        print '================'
-#         print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace'))
-#         print '================'
-
-#        sys.exit(0)
-
-        lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'))
-        #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'))
-        #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN')
-
-        data_x = N.random.randn(5, 10)
-        data_y = (N.random.randn(5) > 0)
-
-        t = time.time()
-        for i in xrange(10000):
-            lr.lr = 0.02
-            xe = lr.update(data_x, data_y) 
-            #if i % 100 == 0:
-            #    print i, xe
-
-        print 'training time:', time.time() - t
-        print 'final error', xe
-
-        #print
-        #print 'TRAINED MODEL:'
-        #print lr
-
-    if 0:
-        lrc = Module()
-
-        lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN')
-
-        data_x = N.random.randn(5, 10)
-        data_y = (N.random.randn(5, 1) > 0)
-
-        for i in xrange(10000):
-            xe = lr.update(data_x, data_y)
-            if i % 100 == 0:
-                print i, xe
-
-        print
-        print 'TRAINED MODEL:'
-        print lr
-
-
-
-
--- a/pylearn/algorithms/_test_onehotop.py	Thu Nov 20 12:18:16 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-from onehotop import one_hot
-
-import unittest
-from theano import compile
-from theano import gradient
-
-from theano.tensor import as_tensor
-
-import random
-import numpy.random
-
-class T_OneHot(unittest.TestCase):
-    def test0(self):
-        x = as_tensor([3, 2, 1])
-        y = as_tensor(5)
-        o = one_hot(x, y)
-        y = compile.eval_outputs([o])
-        self.failUnless(numpy.all(y == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]])))
-
-if __name__ == '__main__':
-    unittest.main()
--- a/pylearn/algorithms/onehotop.py	Thu Nov 20 12:18:16 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-"""
-One hot Op
-"""
-
-#from theano import tensor
-from theano.tensor import as_tensor, Tensor
-from theano.gof import op
-from theano.gof.graph import Apply
-
-import numpy
-
-class OneHot(op.Op):
-    """
-    Construct a one-hot vector, x out of y.
-
-    @todo: Document inputs and outputs
-    @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64!
-    @todo: Use 'bool' as output dtype, not 'int64' ?
-    @todo: Allow this to operate on column vectors (Tensor)
-    @todo: Describe better.
-    """
-
-    def make_node(self, x, y):
-        """
-        @type x: Vector L{Tensor} of integers
-        @param x: The entries of the one-hot vector to be one.
-        @type y: Integer scalar L{Tensor}
-        @param y: The length (#columns) of the one-hot vectors.
-        @return: A L{Tensor} of one-hot vectors
-
-        @precondition: x < y for all entries of x
-        @todo: Check that x and y are int types
-        """
-        x = as_tensor(x)
-        y = as_tensor(y)
-        #assert x.dtype[0:3] == "int"
-        #assert y.dtype[0:3] == "int"
-        inputs = [x, y]
-        ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])]
-        #outputs = [tensor.Tensor("float64", broadcastable=[False, False])]
-        #outputs = [Tensor("int64", broadcastable=[False, False])]
-        outputs = [Tensor("float64", broadcastable=[False, False]).make_result()]
-        node = Apply(op = self, inputs = inputs, outputs = outputs)
-        return node
-
-    def perform(self, node, (x, y), (out, )):
-        assert x.dtype == "int64" or x.dtype == "int32"
-        assert x.ndim == 1
-        assert y.dtype == "int64" or x.dtype == "int32"
-        assert y.ndim == 0
-        out[0] = numpy.zeros((x.shape[0], y), dtype="float64")
-        for c in range(x.shape[0]):
-            assert x[c] < y
-            out[0][c, x[c]] = 1
-
-    def grad(self, (x, y), (out_gradient, )):
-        return None, None
-one_hot = OneHot()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/_test_onehotop.py	Thu Nov 27 21:42:02 2008 -0500
@@ -0,0 +1,21 @@
+from onehotop import one_hot
+
+import unittest
+from theano import compile
+from theano import gradient
+
+from theano.tensor import as_tensor
+
+import random
+import numpy.random
+
+class T_OneHot(unittest.TestCase):
+    def test0(self):
+        x = as_tensor([3, 2, 1])
+        y = as_tensor(5)
+        o = one_hot(x, y)
+        y = compile.eval_outputs([o])
+        self.failUnless(numpy.all(y == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]])))
+
+if __name__ == '__main__':
+    unittest.main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/onehotop.py	Thu Nov 27 21:42:02 2008 -0500
@@ -0,0 +1,58 @@
+"""
+One hot Op
+"""
+
+#from theano import tensor
+from theano.tensor import as_tensor, Tensor
+from theano.gof import op
+from theano.gof.graph import Apply
+
+import numpy
+
+class OneHot(op.Op):
+    """
+    Construct a one-hot vector, x out of y.
+
+    @todo: Document inputs and outputs
+    @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64!
+    @todo: Use 'bool' as output dtype, not 'int64' ?
+    @todo: Allow this to operate on column vectors (Tensor)
+    @todo: Describe better.
+    """
+
+    def make_node(self, x, y):
+        """
+        @type x: Vector L{Tensor} of integers
+        @param x: The entries of the one-hot vector to be one.
+        @type y: Integer scalar L{Tensor}
+        @param y: The length (#columns) of the one-hot vectors.
+        @return: A L{Tensor} of one-hot vectors
+
+        @precondition: x < y for all entries of x
+        @todo: Check that x and y are int types
+        """
+        x = as_tensor(x)
+        y = as_tensor(y)
+        #assert x.dtype[0:3] == "int"
+        #assert y.dtype[0:3] == "int"
+        inputs = [x, y]
+        ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])]
+        #outputs = [tensor.Tensor("float64", broadcastable=[False, False])]
+        #outputs = [Tensor("int64", broadcastable=[False, False])]
+        outputs = [Tensor("float64", broadcastable=[False, False]).make_result()]
+        node = Apply(op = self, inputs = inputs, outputs = outputs)
+        return node
+
+    def perform(self, node, (x, y), (out, )):
+        assert x.dtype == "int64" or x.dtype == "int32"
+        assert x.ndim == 1
+        assert y.dtype == "int64" or x.dtype == "int32"
+        assert y.ndim == 0
+        out[0] = numpy.zeros((x.shape[0], y), dtype="float64")
+        for c in range(x.shape[0]):
+            assert x[c] < y
+            out[0][c, x[c]] = 1
+
+    def grad(self, (x, y), (out_gradient, )):
+        return None, None
+one_hot = OneHot()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/stat_ops.py	Thu Nov 27 21:42:02 2008 -0500
@@ -0,0 +1,92 @@
+
+import theano
+from theano import gof
+from theano import tensor
+import numpy
+
+
+class ExampleWiseMean(gof.Op):
+    
+    def __init__(self):
+        self.destroy_map = {0: [1, 2]}
+
+    def make_node(self, x):
+        return gof.Apply(self,
+                         [x, tensor.value(float('nan')), tensor.value(0)],
+                         [tensor.Tensor(dtype = 'float64',
+                                        broadcastable = x.type.broadcastable)()])
+
+    def perform(self, node, (x, sum, n), (out,)):
+        if numpy.isnan(sum).any():
+            sum.resize(x.shape, refcheck=0)
+            sum[:] = x
+        else:
+            sum += x
+        n += 1
+        out[0] = sum / n
+
+    def c_code(self, name, node, (x, sum, n), (out, ), sub):
+        return """
+        PyObject* multi;
+        int nelems;
+        if (isnan(((double*)(%(sum)s->data))[0])) {
+            PyArray_Dims dims;
+            dims.len = %(x)s->nd;
+            dims.ptr = %(x)s->dimensions;
+            PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER);
+            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
+            nelems = PyArray_SIZE(%(sum)s);
+            while (nelems--) {
+                // Copy %(x)s in %(sum)s
+                *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1);
+                PyArray_MultiIter_NEXT(multi);
+            }
+        }
+        else {
+            // Add some error checking on the size of x
+            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
+            nelems = PyArray_SIZE(%(sum)s);
+            while (nelems--) {
+                // Add %(x)s to %(sum)s
+                *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1);
+                PyArray_MultiIter_NEXT(multi);
+            }
+        }
+        ((npy_int64*)(%(n)s->data))[0]++;
+        int n = ((npy_int64*)(%(n)s->data))[0];
+        if (%(out)s == NULL) {
+            %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0);
+        }
+        multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s);
+        nelems = PyArray_SIZE(%(sum)s);
+        while (nelems--) {
+            // %(out)s <- %(sum)s / %(n)s
+            *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n;
+            PyArray_MultiIter_NEXT(multi);
+        }        
+        """ % dict(locals(), **sub)
+
+
+
+if __name__ == '__main__':
+    
+    vectors = numpy.random.RandomState(666).rand(10, 2)
+
+    x = tensor.dvector()
+    e = ExampleWiseMean()(x)
+
+    # f = theano.function([x], [e], linker = 'py')
+
+    # for i, v in enumerate(vectors):
+    #     print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
+
+    # print
+
+    f = theano.function([x], [e], linker = 'c|py')
+
+    for i, v in enumerate(vectors):
+        print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
+
+
+
+
--- a/pylearn/algorithms/sgd.py	Thu Nov 20 12:18:16 2008 -0500
+++ b/pylearn/algorithms/sgd.py	Thu Nov 27 21:42:02 2008 -0500
@@ -7,14 +7,34 @@
 from .minimizer import minimizer_factory
 
 class StochasticGradientDescent(module.FancyModule):
-    def __init__(self, args, cost, params, gradients=None, lr=None):
+    """Fixed stepsize gradient descent"""
+    def __init__(self, args, cost, params, gradients=None, stepsize=None):
+        """
+        :param stepsize: the step to take in (negative) gradient direction
+        :type stepsize: None, scalar value, or scalar TensorResult
+        """
         super(StochasticGradientDescent, self).__init__()
 
-        self.lr = lr if lr is not None else module.Member(T.dscalar())
+        self.stepsize_init = None
+
+        if stepsize is None:
+            self.stepsize = module.Member(T.dscalar())
+        elif isinstance(stepsize, T.TensorResult):
+            self.stepsize = stepsize
+        else:
+            if 1: #TODO: why is this necessary? why does the else clause not work?
+                self.stepsize = module.Member(T.dscalar())
+                self.stepsize_init = stepsize
+            else:
+                self.stepsize = module.Member(T.value(stepsize))
+
+        if self.stepsize.ndim != 0:
+            raise ValueError('stepsize must be a scalar', stepsize)
+
         self.params = params
         self.gparams = T.grad(cost, self.params) if gradients is None else gradients
 
-        self.updates = dict((p, p - self.lr * g) for p, g in zip(self.params, self.gparams))
+        self.updates = dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams))
 
         self.step = module.Method(
                 args, [],
@@ -22,17 +42,16 @@
         self.step_cost = module.Method(
                 args, cost,
                 updates=self.updates)
+    def _instance_initialize(self, obj):
+        if 1:
+            obj.stepsize = self.stepsize_init
+        else:
+            pass
 
-    #no initialization is done here.
-    # rationale: the only parameter is lr.  
-    # If the user wanted lr constant, he would pass the constant to the constructor
-    # If the user wanted lr a computed value, he would pass that to the constructor.
-    # If the user wanted a dynamic lr, he would pass a tensor.value() for lr.
-    # If the default of a T.dscalar() is used, then it must be initialized elsewhere explicitly.
 
 @minimizer_factory('sgd')
-def sgd_minimizer(lr):
+def sgd_minimizer(stepsize=None):
     def m(i,c,p,g=None):
-        return StochasticGradientDescent(i, c, p,lr=lr)
+        return StochasticGradientDescent(i, c, p, stepsize=stepsize)
     return m
 
--- a/pylearn/algorithms/stat_ops.py	Thu Nov 20 12:18:16 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-
-import theano
-from theano import gof
-from theano import tensor
-import numpy
-
-
-class ExampleWiseMean(gof.Op):
-    
-    def __init__(self):
-        self.destroy_map = {0: [1, 2]}
-
-    def make_node(self, x):
-        return gof.Apply(self,
-                         [x, tensor.value(float('nan')), tensor.value(0)],
-                         [tensor.Tensor(dtype = 'float64',
-                                        broadcastable = x.type.broadcastable)()])
-
-    def perform(self, node, (x, sum, n), (out,)):
-        if numpy.isnan(sum).any():
-            sum.resize(x.shape, refcheck=0)
-            sum[:] = x
-        else:
-            sum += x
-        n += 1
-        out[0] = sum / n
-
-    def c_code(self, name, node, (x, sum, n), (out, ), sub):
-        return """
-        PyObject* multi;
-        int nelems;
-        if (isnan(((double*)(%(sum)s->data))[0])) {
-            PyArray_Dims dims;
-            dims.len = %(x)s->nd;
-            dims.ptr = %(x)s->dimensions;
-            PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER);
-            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
-            nelems = PyArray_SIZE(%(sum)s);
-            while (nelems--) {
-                // Copy %(x)s in %(sum)s
-                *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1);
-                PyArray_MultiIter_NEXT(multi);
-            }
-        }
-        else {
-            // Add some error checking on the size of x
-            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
-            nelems = PyArray_SIZE(%(sum)s);
-            while (nelems--) {
-                // Add %(x)s to %(sum)s
-                *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1);
-                PyArray_MultiIter_NEXT(multi);
-            }
-        }
-        ((npy_int64*)(%(n)s->data))[0]++;
-        int n = ((npy_int64*)(%(n)s->data))[0];
-        if (%(out)s == NULL) {
-            %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0);
-        }
-        multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s);
-        nelems = PyArray_SIZE(%(sum)s);
-        while (nelems--) {
-            // %(out)s <- %(sum)s / %(n)s
-            *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n;
-            PyArray_MultiIter_NEXT(multi);
-        }        
-        """ % dict(locals(), **sub)
-
-
-
-if __name__ == '__main__':
-    
-    vectors = numpy.random.RandomState(666).rand(10, 2)
-
-    x = tensor.dvector()
-    e = ExampleWiseMean()(x)
-
-    # f = theano.function([x], [e], linker = 'py')
-
-    # for i, v in enumerate(vectors):
-    #     print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
-
-    # print
-
-    f = theano.function([x], [e], linker = 'c|py')
-
-    for i, v in enumerate(vectors):
-        print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
-
-
-
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_linear_regression.py	Thu Nov 27 21:42:02 2008 -0500
@@ -0,0 +1,25 @@
+
+import unittest
+from linear_regression import *
+from make_test_datasets import *
+import numpy
+
+class test_linear_regression(unittest.TestCase):
+
+    def test1(self):
+        trainset,testset,theta=make_artificial_datasets_from_function(n_inputs=3,
+                                                                      n_targets=2,
+                                                                      n_examples=100,
+                                                                      f=linear_predictor)
+        
+        assert trainset.fields()['input'].shape==(50,3)
+        assert testset.fields()['target'].shape==(50,2)
+        regressor = LinearRegression(L2_regularizer=0.1)
+        predictor = regressor(trainset)
+        test_data = testset.fields()
+        mse = predictor.compute_mse(test_data['input'],test_data['target'])
+        print 'mse = ',mse
+        
+if __name__ == '__main__':
+    unittest.main()
+        
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_logistic_regression.py	Thu Nov 27 21:42:02 2008 -0500
@@ -0,0 +1,60 @@
+from logistic_regression import *
+import sys, time
+
+if __name__ == '__main__':
+    pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
+    pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
+    if 1:
+        lrc = Module_Nclass()
+
+        print '================'
+        print lrc.update.pretty()
+        print '================'
+        print lrc.update.pretty(mode = theano.Mode('py', 'fast_run'))
+        print '================'
+#         print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace'))
+#         print '================'
+
+#        sys.exit(0)
+
+        lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'))
+        #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'))
+        #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN')
+
+        data_x = N.random.randn(5, 10)
+        data_y = (N.random.randn(5) > 0)
+
+        t = time.time()
+        for i in xrange(10000):
+            lr.lr = 0.02
+            xe = lr.update(data_x, data_y) 
+            #if i % 100 == 0:
+            #    print i, xe
+
+        print 'training time:', time.time() - t
+        print 'final error', xe
+
+        #print
+        #print 'TRAINED MODEL:'
+        #print lr
+
+    if 0:
+        lrc = Module()
+
+        lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN')
+
+        data_x = N.random.randn(5, 10)
+        data_y = (N.random.randn(5, 1) > 0)
+
+        for i in xrange(10000):
+            xe = lr.update(data_x, data_y)
+            if i % 100 == 0:
+                print i, xe
+
+        print
+        print 'TRAINED MODEL:'
+        print lr
+
+
+
+