# HG changeset patch # User James Bergstra # Date 1227840122 18000 # Node ID 16894d38ce48b8b16f245c04c5f2076cd3121601 # Parent d3791c59f36edcb985e7c46d915a32fac67e3a09 moving stuff in algorithms, added rnn diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/_test_linear_regression.py --- a/pylearn/algorithms/_test_linear_regression.py Thu Nov 20 12:18:16 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ - -import unittest -from linear_regression import * -from make_test_datasets import * -import numpy - -class test_linear_regression(unittest.TestCase): - - def test1(self): - trainset,testset,theta=make_artificial_datasets_from_function(n_inputs=3, - n_targets=2, - n_examples=100, - f=linear_predictor) - - assert trainset.fields()['input'].shape==(50,3) - assert testset.fields()['target'].shape==(50,2) - regressor = LinearRegression(L2_regularizer=0.1) - predictor = regressor(trainset) - test_data = testset.fields() - mse = predictor.compute_mse(test_data['input'],test_data['target']) - print 'mse = ',mse - -if __name__ == '__main__': - unittest.main() - diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/_test_logistic_regression.py --- a/pylearn/algorithms/_test_logistic_regression.py Thu Nov 20 12:18:16 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,60 +0,0 @@ -from logistic_regression import * -import sys, time - -if __name__ == '__main__': - pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx')) - pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax')) - if 1: - lrc = Module_Nclass() - - print '================' - print lrc.update.pretty() - print '================' - print lrc.update.pretty(mode = theano.Mode('py', 'fast_run')) - print '================' -# print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace')) -# print '================' - -# sys.exit(0) - - lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run')) - #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run')) - #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN') - - data_x = N.random.randn(5, 10) - data_y = (N.random.randn(5) > 0) - - t = time.time() - for i in xrange(10000): - lr.lr = 0.02 - xe = lr.update(data_x, data_y) - #if i % 100 == 0: - # print i, xe - - print 'training time:', time.time() - t - print 'final error', xe - - #print - #print 'TRAINED MODEL:' - #print lr - - if 0: - lrc = Module() - - lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN') - - data_x = N.random.randn(5, 10) - data_y = (N.random.randn(5, 1) > 0) - - for i in xrange(10000): - xe = lr.update(data_x, data_y) - if i % 100 == 0: - print i, xe - - print - print 'TRAINED MODEL:' - print lr - - - - diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/_test_onehotop.py --- a/pylearn/algorithms/_test_onehotop.py Thu Nov 20 12:18:16 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -from onehotop import one_hot - -import unittest -from theano import compile -from theano import gradient - -from theano.tensor import as_tensor - -import random -import numpy.random - -class T_OneHot(unittest.TestCase): - def test0(self): - x = as_tensor([3, 2, 1]) - y = as_tensor(5) - o = one_hot(x, y) - y = compile.eval_outputs([o]) - self.failUnless(numpy.all(y == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]]))) - -if __name__ == '__main__': - unittest.main() diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/onehotop.py --- a/pylearn/algorithms/onehotop.py Thu Nov 20 12:18:16 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -""" -One hot Op -""" - -#from theano import tensor -from theano.tensor import as_tensor, Tensor -from theano.gof import op -from theano.gof.graph import Apply - -import numpy - -class OneHot(op.Op): - """ - Construct a one-hot vector, x out of y. - - @todo: Document inputs and outputs - @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64! - @todo: Use 'bool' as output dtype, not 'int64' ? - @todo: Allow this to operate on column vectors (Tensor) - @todo: Describe better. - """ - - def make_node(self, x, y): - """ - @type x: Vector L{Tensor} of integers - @param x: The entries of the one-hot vector to be one. - @type y: Integer scalar L{Tensor} - @param y: The length (#columns) of the one-hot vectors. - @return: A L{Tensor} of one-hot vectors - - @precondition: x < y for all entries of x - @todo: Check that x and y are int types - """ - x = as_tensor(x) - y = as_tensor(y) - #assert x.dtype[0:3] == "int" - #assert y.dtype[0:3] == "int" - inputs = [x, y] - ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])] - #outputs = [tensor.Tensor("float64", broadcastable=[False, False])] - #outputs = [Tensor("int64", broadcastable=[False, False])] - outputs = [Tensor("float64", broadcastable=[False, False]).make_result()] - node = Apply(op = self, inputs = inputs, outputs = outputs) - return node - - def perform(self, node, (x, y), (out, )): - assert x.dtype == "int64" or x.dtype == "int32" - assert x.ndim == 1 - assert y.dtype == "int64" or x.dtype == "int32" - assert y.ndim == 0 - out[0] = numpy.zeros((x.shape[0], y), dtype="float64") - for c in range(x.shape[0]): - assert x[c] < y - out[0][c, x[c]] = 1 - - def grad(self, (x, y), (out_gradient, )): - return None, None -one_hot = OneHot() diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/sandbox/_test_onehotop.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/sandbox/_test_onehotop.py Thu Nov 27 21:42:02 2008 -0500 @@ -0,0 +1,21 @@ +from onehotop import one_hot + +import unittest +from theano import compile +from theano import gradient + +from theano.tensor import as_tensor + +import random +import numpy.random + +class T_OneHot(unittest.TestCase): + def test0(self): + x = as_tensor([3, 2, 1]) + y = as_tensor(5) + o = one_hot(x, y) + y = compile.eval_outputs([o]) + self.failUnless(numpy.all(y == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]]))) + +if __name__ == '__main__': + unittest.main() diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/sandbox/onehotop.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/sandbox/onehotop.py Thu Nov 27 21:42:02 2008 -0500 @@ -0,0 +1,58 @@ +""" +One hot Op +""" + +#from theano import tensor +from theano.tensor import as_tensor, Tensor +from theano.gof import op +from theano.gof.graph import Apply + +import numpy + +class OneHot(op.Op): + """ + Construct a one-hot vector, x out of y. + + @todo: Document inputs and outputs + @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64! + @todo: Use 'bool' as output dtype, not 'int64' ? + @todo: Allow this to operate on column vectors (Tensor) + @todo: Describe better. + """ + + def make_node(self, x, y): + """ + @type x: Vector L{Tensor} of integers + @param x: The entries of the one-hot vector to be one. + @type y: Integer scalar L{Tensor} + @param y: The length (#columns) of the one-hot vectors. + @return: A L{Tensor} of one-hot vectors + + @precondition: x < y for all entries of x + @todo: Check that x and y are int types + """ + x = as_tensor(x) + y = as_tensor(y) + #assert x.dtype[0:3] == "int" + #assert y.dtype[0:3] == "int" + inputs = [x, y] + ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])] + #outputs = [tensor.Tensor("float64", broadcastable=[False, False])] + #outputs = [Tensor("int64", broadcastable=[False, False])] + outputs = [Tensor("float64", broadcastable=[False, False]).make_result()] + node = Apply(op = self, inputs = inputs, outputs = outputs) + return node + + def perform(self, node, (x, y), (out, )): + assert x.dtype == "int64" or x.dtype == "int32" + assert x.ndim == 1 + assert y.dtype == "int64" or x.dtype == "int32" + assert y.ndim == 0 + out[0] = numpy.zeros((x.shape[0], y), dtype="float64") + for c in range(x.shape[0]): + assert x[c] < y + out[0][c, x[c]] = 1 + + def grad(self, (x, y), (out_gradient, )): + return None, None +one_hot = OneHot() diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/sandbox/stat_ops.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/sandbox/stat_ops.py Thu Nov 27 21:42:02 2008 -0500 @@ -0,0 +1,92 @@ + +import theano +from theano import gof +from theano import tensor +import numpy + + +class ExampleWiseMean(gof.Op): + + def __init__(self): + self.destroy_map = {0: [1, 2]} + + def make_node(self, x): + return gof.Apply(self, + [x, tensor.value(float('nan')), tensor.value(0)], + [tensor.Tensor(dtype = 'float64', + broadcastable = x.type.broadcastable)()]) + + def perform(self, node, (x, sum, n), (out,)): + if numpy.isnan(sum).any(): + sum.resize(x.shape, refcheck=0) + sum[:] = x + else: + sum += x + n += 1 + out[0] = sum / n + + def c_code(self, name, node, (x, sum, n), (out, ), sub): + return """ + PyObject* multi; + int nelems; + if (isnan(((double*)(%(sum)s->data))[0])) { + PyArray_Dims dims; + dims.len = %(x)s->nd; + dims.ptr = %(x)s->dimensions; + PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER); + multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s); + nelems = PyArray_SIZE(%(sum)s); + while (nelems--) { + // Copy %(x)s in %(sum)s + *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1); + PyArray_MultiIter_NEXT(multi); + } + } + else { + // Add some error checking on the size of x + multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s); + nelems = PyArray_SIZE(%(sum)s); + while (nelems--) { + // Add %(x)s to %(sum)s + *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1); + PyArray_MultiIter_NEXT(multi); + } + } + ((npy_int64*)(%(n)s->data))[0]++; + int n = ((npy_int64*)(%(n)s->data))[0]; + if (%(out)s == NULL) { + %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0); + } + multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s); + nelems = PyArray_SIZE(%(sum)s); + while (nelems--) { + // %(out)s <- %(sum)s / %(n)s + *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n; + PyArray_MultiIter_NEXT(multi); + } + """ % dict(locals(), **sub) + + + +if __name__ == '__main__': + + vectors = numpy.random.RandomState(666).rand(10, 2) + + x = tensor.dvector() + e = ExampleWiseMean()(x) + + # f = theano.function([x], [e], linker = 'py') + + # for i, v in enumerate(vectors): + # print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0) + + # print + + f = theano.function([x], [e], linker = 'c|py') + + for i, v in enumerate(vectors): + print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0) + + + + diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/sgd.py --- a/pylearn/algorithms/sgd.py Thu Nov 20 12:18:16 2008 -0500 +++ b/pylearn/algorithms/sgd.py Thu Nov 27 21:42:02 2008 -0500 @@ -7,14 +7,34 @@ from .minimizer import minimizer_factory class StochasticGradientDescent(module.FancyModule): - def __init__(self, args, cost, params, gradients=None, lr=None): + """Fixed stepsize gradient descent""" + def __init__(self, args, cost, params, gradients=None, stepsize=None): + """ + :param stepsize: the step to take in (negative) gradient direction + :type stepsize: None, scalar value, or scalar TensorResult + """ super(StochasticGradientDescent, self).__init__() - self.lr = lr if lr is not None else module.Member(T.dscalar()) + self.stepsize_init = None + + if stepsize is None: + self.stepsize = module.Member(T.dscalar()) + elif isinstance(stepsize, T.TensorResult): + self.stepsize = stepsize + else: + if 1: #TODO: why is this necessary? why does the else clause not work? + self.stepsize = module.Member(T.dscalar()) + self.stepsize_init = stepsize + else: + self.stepsize = module.Member(T.value(stepsize)) + + if self.stepsize.ndim != 0: + raise ValueError('stepsize must be a scalar', stepsize) + self.params = params self.gparams = T.grad(cost, self.params) if gradients is None else gradients - self.updates = dict((p, p - self.lr * g) for p, g in zip(self.params, self.gparams)) + self.updates = dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams)) self.step = module.Method( args, [], @@ -22,17 +42,16 @@ self.step_cost = module.Method( args, cost, updates=self.updates) + def _instance_initialize(self, obj): + if 1: + obj.stepsize = self.stepsize_init + else: + pass - #no initialization is done here. - # rationale: the only parameter is lr. - # If the user wanted lr constant, he would pass the constant to the constructor - # If the user wanted lr a computed value, he would pass that to the constructor. - # If the user wanted a dynamic lr, he would pass a tensor.value() for lr. - # If the default of a T.dscalar() is used, then it must be initialized elsewhere explicitly. @minimizer_factory('sgd') -def sgd_minimizer(lr): +def sgd_minimizer(stepsize=None): def m(i,c,p,g=None): - return StochasticGradientDescent(i, c, p,lr=lr) + return StochasticGradientDescent(i, c, p, stepsize=stepsize) return m diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/stat_ops.py --- a/pylearn/algorithms/stat_ops.py Thu Nov 20 12:18:16 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,92 +0,0 @@ - -import theano -from theano import gof -from theano import tensor -import numpy - - -class ExampleWiseMean(gof.Op): - - def __init__(self): - self.destroy_map = {0: [1, 2]} - - def make_node(self, x): - return gof.Apply(self, - [x, tensor.value(float('nan')), tensor.value(0)], - [tensor.Tensor(dtype = 'float64', - broadcastable = x.type.broadcastable)()]) - - def perform(self, node, (x, sum, n), (out,)): - if numpy.isnan(sum).any(): - sum.resize(x.shape, refcheck=0) - sum[:] = x - else: - sum += x - n += 1 - out[0] = sum / n - - def c_code(self, name, node, (x, sum, n), (out, ), sub): - return """ - PyObject* multi; - int nelems; - if (isnan(((double*)(%(sum)s->data))[0])) { - PyArray_Dims dims; - dims.len = %(x)s->nd; - dims.ptr = %(x)s->dimensions; - PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER); - multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s); - nelems = PyArray_SIZE(%(sum)s); - while (nelems--) { - // Copy %(x)s in %(sum)s - *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1); - PyArray_MultiIter_NEXT(multi); - } - } - else { - // Add some error checking on the size of x - multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s); - nelems = PyArray_SIZE(%(sum)s); - while (nelems--) { - // Add %(x)s to %(sum)s - *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1); - PyArray_MultiIter_NEXT(multi); - } - } - ((npy_int64*)(%(n)s->data))[0]++; - int n = ((npy_int64*)(%(n)s->data))[0]; - if (%(out)s == NULL) { - %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0); - } - multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s); - nelems = PyArray_SIZE(%(sum)s); - while (nelems--) { - // %(out)s <- %(sum)s / %(n)s - *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n; - PyArray_MultiIter_NEXT(multi); - } - """ % dict(locals(), **sub) - - - -if __name__ == '__main__': - - vectors = numpy.random.RandomState(666).rand(10, 2) - - x = tensor.dvector() - e = ExampleWiseMean()(x) - - # f = theano.function([x], [e], linker = 'py') - - # for i, v in enumerate(vectors): - # print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0) - - # print - - f = theano.function([x], [e], linker = 'c|py') - - for i, v in enumerate(vectors): - print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0) - - - - diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/tests/test_linear_regression.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/tests/test_linear_regression.py Thu Nov 27 21:42:02 2008 -0500 @@ -0,0 +1,25 @@ + +import unittest +from linear_regression import * +from make_test_datasets import * +import numpy + +class test_linear_regression(unittest.TestCase): + + def test1(self): + trainset,testset,theta=make_artificial_datasets_from_function(n_inputs=3, + n_targets=2, + n_examples=100, + f=linear_predictor) + + assert trainset.fields()['input'].shape==(50,3) + assert testset.fields()['target'].shape==(50,2) + regressor = LinearRegression(L2_regularizer=0.1) + predictor = regressor(trainset) + test_data = testset.fields() + mse = predictor.compute_mse(test_data['input'],test_data['target']) + print 'mse = ',mse + +if __name__ == '__main__': + unittest.main() + diff -r d3791c59f36e -r 16894d38ce48 pylearn/algorithms/tests/test_logistic_regression.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/tests/test_logistic_regression.py Thu Nov 27 21:42:02 2008 -0500 @@ -0,0 +1,60 @@ +from logistic_regression import * +import sys, time + +if __name__ == '__main__': + pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx')) + pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax')) + if 1: + lrc = Module_Nclass() + + print '================' + print lrc.update.pretty() + print '================' + print lrc.update.pretty(mode = theano.Mode('py', 'fast_run')) + print '================' +# print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace')) +# print '================' + +# sys.exit(0) + + lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run')) + #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run')) + #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN') + + data_x = N.random.randn(5, 10) + data_y = (N.random.randn(5) > 0) + + t = time.time() + for i in xrange(10000): + lr.lr = 0.02 + xe = lr.update(data_x, data_y) + #if i % 100 == 0: + # print i, xe + + print 'training time:', time.time() - t + print 'final error', xe + + #print + #print 'TRAINED MODEL:' + #print lr + + if 0: + lrc = Module() + + lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN') + + data_x = N.random.randn(5, 10) + data_y = (N.random.randn(5, 1) > 0) + + for i in xrange(10000): + xe = lr.update(data_x, data_y) + if i % 100 == 0: + print i, xe + + print + print 'TRAINED MODEL:' + print lr + + + +