Mercurial > pylearn
changeset 570:0a27ba2157b6
merge
author | Olivier Breuleux <breuleuo@iro.umontreal.ca> |
---|---|
date | Wed, 03 Dec 2008 22:28:25 -0500 |
parents | eaf4cbd20017 (current diff) e878003c3009 (diff) |
children | 13bc6620ad95 |
files | pylearn/algorithms/_test_linear_regression.py pylearn/algorithms/_test_logistic_regression.py pylearn/algorithms/_test_onehotop.py pylearn/algorithms/onehotop.py pylearn/algorithms/stat_ops.py pylearn/algorithms/tests/test_aa.py |
diffstat | 29 files changed, 710 insertions(+), 317 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/algorithms/__init__.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/algorithms/__init__.py Wed Dec 03 22:28:25 2008 -0500 @@ -1,4 +1,5 @@ from .minimizer import make_minimizer, minimizer_factory from .stopper import make_stopper, stopper_factory - +from .stacker import Stacker +from .regressor import BinRegressor
--- a/pylearn/algorithms/_test_linear_regression.py Wed Dec 03 22:28:17 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ - -import unittest -from linear_regression import * -from make_test_datasets import * -import numpy - -class test_linear_regression(unittest.TestCase): - - def test1(self): - trainset,testset,theta=make_artificial_datasets_from_function(n_inputs=3, - n_targets=2, - n_examples=100, - f=linear_predictor) - - assert trainset.fields()['input'].shape==(50,3) - assert testset.fields()['target'].shape==(50,2) - regressor = LinearRegression(L2_regularizer=0.1) - predictor = regressor(trainset) - test_data = testset.fields() - mse = predictor.compute_mse(test_data['input'],test_data['target']) - print 'mse = ',mse - -if __name__ == '__main__': - unittest.main() -
--- a/pylearn/algorithms/_test_logistic_regression.py Wed Dec 03 22:28:17 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,60 +0,0 @@ -from logistic_regression import * -import sys, time - -if __name__ == '__main__': - pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx')) - pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax')) - if 1: - lrc = Module_Nclass() - - print '================' - print lrc.update.pretty() - print '================' - print lrc.update.pretty(mode = theano.Mode('py', 'fast_run')) - print '================' -# print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace')) -# print '================' - -# sys.exit(0) - - lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run')) - #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run')) - #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN') - - data_x = N.random.randn(5, 10) - data_y = (N.random.randn(5) > 0) - - t = time.time() - for i in xrange(10000): - lr.lr = 0.02 - xe = lr.update(data_x, data_y) - #if i % 100 == 0: - # print i, xe - - print 'training time:', time.time() - t - print 'final error', xe - - #print - #print 'TRAINED MODEL:' - #print lr - - if 0: - lrc = Module() - - lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN') - - data_x = N.random.randn(5, 10) - data_y = (N.random.randn(5, 1) > 0) - - for i in xrange(10000): - xe = lr.update(data_x, data_y) - if i % 100 == 0: - print i, xe - - print - print 'TRAINED MODEL:' - print lr - - - -
--- a/pylearn/algorithms/_test_onehotop.py Wed Dec 03 22:28:17 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -from onehotop import one_hot - -import unittest -from theano import compile -from theano import gradient - -from theano.tensor import as_tensor - -import random -import numpy.random - -class T_OneHot(unittest.TestCase): - def test0(self): - x = as_tensor([3, 2, 1]) - y = as_tensor(5) - o = one_hot(x, y) - y = compile.eval_outputs([o]) - self.failUnless(numpy.all(y == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]]))) - -if __name__ == '__main__': - unittest.main()
--- a/pylearn/algorithms/kernel_regression.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/algorithms/kernel_regression.py Wed Dec 03 22:28:25 2008 -0500 @@ -4,16 +4,16 @@ from pylearn.learner import OfflineLearningAlgorithm from theano import tensor as T -from nnet_ops import prepend_1_to_each_row +from theano.tensor.nnet import prepend_1_to_each_row from theano.scalar import as_scalar from common.autoname import AutoName import theano import numpy # map a N-vector to a 1xN matrix -row_vector = theano.elemwise.DimShuffle((False,),['x',0]) +row_vector = theano.tensor.DimShuffle((False,),['x',0]) # map a N-vector to a Nx1 matrix -col_vector = theano.elemwise.DimShuffle((False,),[0,'x']) +col_vector = theano.tensor.DimShuffle((False,),[0,'x']) class KernelRegression(OfflineLearningAlgorithm): """
--- a/pylearn/algorithms/linear_regression.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/algorithms/linear_regression.py Wed Dec 03 22:28:25 2008 -0500 @@ -6,7 +6,7 @@ from pylearn.learner import OfflineLearningAlgorithm,OnlineLearningAlgorithm from theano import tensor as T -from nnet_ops import prepend_1_to_each_row +from theano.tensor.nnet import prepend_1_to_each_row from theano.scalar import as_scalar from common.autoname import AutoName import theano @@ -96,14 +96,14 @@ __compiled = False @classmethod - def compile(cls,linker='c|py'): + def compile(cls, mode = "FAST_RUN"): if cls.__compiled: return def fn(input_vars,output_vars): - return staticmethod(theano.function(input_vars,output_vars, linker=linker)) + return staticmethod(theano.function(input_vars, output_vars, mode=mode)) - cls.compute_outputs = fn([cls.inputs,cls.theta],[cls.outputs]) - cls.compute_errors = fn([cls.outputs,cls.targets],[cls.squared_errors]) + cls.compute_outputs = fn([cls.inputs,cls.theta],cls.outputs) + cls.compute_errors = fn([cls.outputs,cls.targets],cls.squared_errors) cls.__compiled = True @@ -115,17 +115,17 @@ XtX = T.matrix() # (n_inputs+1) x (n_inputs+1) XtY = T.matrix() # (n_inputs+1) x n_outputs extended_input = prepend_1_to_each_row(P.inputs) - new_XtX = T.add_inplace(XtX,T.dot(extended_input.T,extended_input)) - new_XtY = T.add_inplace(XtY,T.dot(extended_input.T,P.targets)) + new_XtX = T.add(XtX,T.dot(extended_input.T,extended_input)) + new_XtY = T.add(XtY,T.dot(extended_input.T,P.targets)) __compiled = False @classmethod - def compile(cls,linker='c|py'): + def compile(cls, mode="FAST_RUN"): if cls.__compiled: return def fn(input_vars,output_vars): - return staticmethod(theano.function(input_vars,output_vars, linker=linker)) + return staticmethod(theano.function(input_vars, output_vars, mode=mode)) cls.update = fn([cls.XtX,cls.XtY,cls.P.inputs,cls.P.targets],[cls.new_XtX,cls.new_XtY])
--- a/pylearn/algorithms/logistic_regression.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/algorithms/logistic_regression.py Wed Dec 03 22:28:25 2008 -0500 @@ -40,11 +40,15 @@ #here we actually build the model self.linear_output = T.dot(self.input, self.w) + self.b if 0: + # TODO: pending support for target being a sparse matrix self.softmax = nnet.softmax(self.linear_output) self._max_pr, self.argmax = T.max_and_argmax(self.linear_output) self._xent = self.target * T.log(self.softmax) else: + # TODO: when above is fixed, remove this hack (need an argmax + # which is independent of targets) + self.argmax_standalone = T.argmax(self.linear_output); (self._xent, self.softmax, self._max_pr, self.argmax) =\ nnet.crossentropy_softmax_max_and_argmax_1hot( self.linear_output, self.target) @@ -149,12 +153,12 @@ def __init__(self, input=None, targ=None, w=None, b=None, lr=None, regularize=False): super(LogReg2, self).__init__() #boilerplate - self.input = input if input is not None else T.matrix('input') - self.targ = targ if targ is not None else T.lcol() + self.input = module.Member(input) if input is not None else T.matrix('input') + self.targ = module.Member(targ) if targ is not None else T.lcol() - self.w = w if w is not None else module.Member(T.dmatrix()) - self.b = b if b is not None else module.Member(T.dvector()) - self.lr = lr if lr is not None else module.Member(T.dscalar()) + self.w = module.Member(w) if w is not None else module.Member(T.dmatrix()) + self.b = module.Member(b) if b is not None else module.Member(T.dvector()) + self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar()) self.params = [p for p in [self.w, self.b] if p.owner is None]
--- a/pylearn/algorithms/onehotop.py Wed Dec 03 22:28:17 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -""" -One hot Op -""" - -#from theano import tensor -from theano.tensor import as_tensor, Tensor -from theano.gof import op -from theano.gof.graph import Apply - -import numpy - -class OneHot(op.Op): - """ - Construct a one-hot vector, x out of y. - - @todo: Document inputs and outputs - @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64! - @todo: Use 'bool' as output dtype, not 'int64' ? - @todo: Allow this to operate on column vectors (Tensor) - @todo: Describe better. - """ - - def make_node(self, x, y): - """ - @type x: Vector L{Tensor} of integers - @param x: The entries of the one-hot vector to be one. - @type y: Integer scalar L{Tensor} - @param y: The length (#columns) of the one-hot vectors. - @return: A L{Tensor} of one-hot vectors - - @precondition: x < y for all entries of x - @todo: Check that x and y are int types - """ - x = as_tensor(x) - y = as_tensor(y) - #assert x.dtype[0:3] == "int" - #assert y.dtype[0:3] == "int" - inputs = [x, y] - ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])] - #outputs = [tensor.Tensor("float64", broadcastable=[False, False])] - #outputs = [Tensor("int64", broadcastable=[False, False])] - outputs = [Tensor("float64", broadcastable=[False, False]).make_result()] - node = Apply(op = self, inputs = inputs, outputs = outputs) - return node - - def perform(self, node, (x, y), (out, )): - assert x.dtype == "int64" or x.dtype == "int32" - assert x.ndim == 1 - assert y.dtype == "int64" or x.dtype == "int32" - assert y.ndim == 0 - out[0] = numpy.zeros((x.shape[0], y), dtype="float64") - for c in range(x.shape[0]): - assert x[c] < y - out[0][c, x[c]] = 1 - - def grad(self, (x, y), (out_gradient, )): - return None, None -one_hot = OneHot()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/rnn.py Wed Dec 03 22:28:25 2008 -0500 @@ -0,0 +1,236 @@ + +import numpy as N +from theano import Op, Apply, tensor as T, Module, Member, Method, Mode, compile +from theano.gof import OpSub, TopoOptimizer + +from .minimizer import make_minimizer # minimizer +from theano.printing import Print +import sgd #until Olivier's module-import thing works better + +#################### +# Library-type stuff +#################### + +class TanhRnn(Op): + """ + This class implements the recurrent part of a recurrent neural network. + + There is not a neat way to include this in a more fine-grained way in Theano at the moment, + so to get something working, I'm implementing a relatively complicated Op that could be + broken down later into constituents. + + Anyway, this Op implements recursive computation of the form: + + .. latex-eqn: + z_t &= \tanh( z_{t-1} A + x_{t-1}) + + For z0 a vector, and x a TxM matrix, it returns a matrix z of shape (T+1, M), + in which z[0] = z0. + + """ + + def make_node(self, x, z0, A): + """ + :type x: matrix (each row is an x_t) (shape: (T, M)) + :type z0: vector (the first row of output) (shape: M) + :type A: matrix (M by M) + + """ + x = T.as_tensor(x) + z0 = T.as_tensor(z0) + A = T.as_tensor(A) + z = x.type() #make a new symbolic result with the same type as x + return Apply(self, [x, z0, A], [z]) + + def perform(self, node, (x,z0,A), out): + T,M = x.shape + z = N.zeros((T+1, M)) + z[0] = z0 + for i in xrange(T): + z[i+1] = N.tanh(N.dot(z[i], A) + x[i]) + out[0][0] = z + + def grad(self, (x, z0, A), (gz,)): + z = tanh_rnn(x, z0, A) + gz_incl_rnn, gx = tanh_rnn_grad(A, z, gz) + return [gx, gz_incl_rnn[0], (T.dot(z[:-1].T, gx))] +tanh_rnn = TanhRnn() + +class TanhRnnGrad(Op): + """Gradient calculation for TanhRnn""" + + def __init__(self, inplace): + self.inplace = inplace + + if self.inplace: + self.destroy_map = {0: [2]} + + def __eq__(self, other): + return (type(self) == type(other)) and (self.inplace == other.inplace) + + def __hash__(self, other): + return hash(type(self)) ^ hash(self.inplace) + + def make_node(self, A, z, gz): + return Apply(self, [A,z,gz], (z.type(), gz.type())) + + def perform(self, node, (A, z, gz), out): + Tp1,M = z.shape + T = Tp1 - 1 + gx = N.zeros((T, M)) + + if not self.inplace: + gz = gz.copy() + + for i in xrange(T-1, -1, -1): + #back through the tanh + gx[i] = gz[i+1] * (1.0 - z[i+1] * z[i+1]) + gz[i] += N.dot(A, gx[i]) + + out[0][0] = gz + out[1][0] = gx + + def __str__(self): + if self.inplace: + return 'Inplace' + super(TanhRnnGrad, self).__str__() + else: + return super(TanhRnnGrad, self).__str__() + +tanh_rnn_grad = TanhRnnGrad(inplace=False) +tanh_rnn_grad_inplace = TanhRnnGrad(inplace=True) + +compile.optdb.register('inplace_rnngrad', TopoOptimizer(OpSub(tanh_rnn_grad, tanh_rnn_grad_inplace)), 60, 'fast_run', 'inplace') + + +####################### +# Experiment-type stuff +####################### + + + +class ExampleRNN(Module): + + def __init__(self, n_vis, n_hid, n_out, minimizer): + super(ExampleRNN, self).__init__() + + def affine(weight, bias): + return (lambda a : T.dot(a, weight) + bias) + + self.n_vis = n_vis + self.n_hid = n_hid + self.n_out = n_out + + #affine transformatoin x -> latent space + self.v, self.b = Member(T.dmatrix()), Member(T.dvector()) + input_transform = affine(self.v, self.b) + + #recurrent weight matrix in latent space + self.z0 = Member(T.dvector()) + self.w = Member(T.dmatrix()) + + #affine transformation latent -> output space + self.u, self.c = Member(T.dmatrix()), Member(T.dvector()) + output_transform = affine(self.u, self.c) + + self.params = [self.v, self.b, self.w, self.u, self.c] + + #input and target + x, y = T.dmatrix(), T.dmatrix() + + z = tanh_rnn(input_transform(x), self.z0, self.w) + yhat = output_transform(z[1:]) + self.cost = T.sum((y - yhat)**2) + + self.blah = Method([x,y], self.cost) + + # using the make_minimizer protocol + self.minimizer = minimizer([x, y], self.cost, self.params) + + def _instance_initialize(self, obj): + n_vis = self.n_vis + n_hid = self.n_hid + n_out = self.n_out + + rng = N.random.RandomState(2342) + + obj.z0 = N.zeros(n_hid) + obj.v = rng.randn(n_vis, n_hid) * 0.01 + obj.b = N.zeros(n_hid) + obj.w = rng.randn(n_hid, n_hid) * 0.01 + obj.u = rng.randn(n_hid, n_out) * 0.01 + obj.c = N.zeros(n_out) + obj.minimizer.initialize() + def __eq__(self, other): + if not isinstance(other.component, ExampleRNN): + raise NotImplemented + #we compare the member. + if self.n_vis != other.n_vis or slef.n_hid != other.n_hid or self.n_out != other.n_out: + return False + if (N.abs(self.z0-other.z0)<1e-8).all() and (N.abs(self.v-other.v)<1e-8).all() and (N.abs(self.b-other.b)<1e-8).all() and (N.abs(self.w-other.w)<1e-8).all() and (N.abs(self.u-other.u)<1e-8).all() and (N.abs(self.c-other.c)<1e-8).all() and (N.abs(self.z0-other.z0)<1e-8).all(): + return True + return False + + def __hash__(self): + raise NotImplemented + +def test_example_rnn(): + minimizer_fn = make_minimizer('sgd', stepsize = 0.001) + + n_vis = 5 + n_out = 3 + n_hid = 4 + rnn_module = ExampleRNN(n_vis, n_hid, n_out, minimizer_fn) + + rnn = rnn_module.make(mode='FAST_RUN') + + rng = N.random.RandomState(7722342) + x = rng.randn(10,n_vis) + y = rng.randn(10,n_out) + + #set y to be like x with a lag of LAG + LAG = 4 + y[LAG:] = x[:-LAG, 0:n_out] + + if 1: + for i, node in enumerate(rnn.minimizer.step_cost.maker.env.toposort()): + print i, node + + niter=1500 + for i in xrange(niter): + if i % 100 == 0: + print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize + else: + rnn.minimizer.step_cost(x, y) + +def test_WEIRD_STUFF(): + n_vis = 5 + n_out = 3 + n_hid = 4 + rng = N.random.RandomState(7722342) + x = rng.randn(10,n_vis) + y = rng.randn(10,n_out) + + #set y to be like x with a lag of LAG + LAG = 4 + y[LAG:] = x[:-LAG, 0:n_out] + + minimizer_fn = make_minimizer('sgd', stepsize = 0.001, WEIRD_STUFF = False) + rnn_module = ExampleRNN(n_vis, n_hid, n_out, minimizer_fn) + + rnn1 = rnn_module.make(mode='FAST_RUN') + + rng1 = N.random.RandomState(7722342) + + niter=15 + for i in xrange(niter): + rnn1.minimizer.step_cost(x, y) + + minimizer_fn = make_minimizer('sgd', stepsize = 0.001, WEIRD_STUFF = True) + + rnn_module = ExampleRNN(n_vis, n_hid, n_out, minimizer_fn) + rnn2 = rnn_module.make(mode='FAST_RUN') + + for i in xrange(niter): + rnn2.minimizer.step_cost(x, y) + + assert rnn1 == rnn2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/sandbox/_test_onehotop.py Wed Dec 03 22:28:25 2008 -0500 @@ -0,0 +1,21 @@ +from onehotop import one_hot + +import unittest +from theano import compile +from theano import gradient +from theano import function +from theano.tensor import as_tensor + +import random +import numpy.random + +class T_OneHot(unittest.TestCase): + def test0(self): + x = as_tensor([3, 2, 1]) + y = as_tensor(5) + o = one_hot(x, y) + f = function([],o) + self.failUnless(numpy.all(f() == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]]))) + +if __name__ == '__main__': + unittest.main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/sandbox/kalman.py Wed Dec 03 22:28:25 2008 -0500 @@ -0,0 +1,57 @@ + +""" +Modules and misc. code related to the Kalman Filter. + + +Kalman filter algorithm as presented in "Probabilistic Robotics" + +x_t is the state + +u_t is a control vector + +z_t is the observation vector + +\epsilon_t is a random noise term with zero mean and covariance R_t. + +\delta_t is a random noise term with zero mean and covariance Q_t. + +state (x_t) evolves according to + + x_t = A_t x_{t-1} + B_t u_t + \epsilon_t + +Observation z_t is made according to + + z_t = C_t x_t + \delta_t + +Assume that the distribution over initial states is a Gaussian. + +With these linear/Gaussian assumptions, the belief about the state all times t is Gaussian, so +we can represent it compactly by the mean (mu) and the covariance (sigma). + +""" + +class KalmanModule(Module): + """ + """ + def __init__(self): + + self.mu = Member() + self.sigma = Member() + + u, z = vector(), vector() + + # the formulas here work for A, B, R, C matrix or sparse matrix. + # ... anything that supports dot, +, -, dotinv, and transpose. + + A, B, C= matrix(), matrix(), matrix() + R, Q = matrix(), matrix() + + #algo from Probabilistic Robotics pg. 42 + mu_bar = dot(A, self.mu) + dot(B, u) + sigma_bar = dot(A, self.sigma, A.T) + R + K = dot(sigma_bar, C.T, dotinv(dot(C, sigma_bar, C.T) + Q)) + mu_t = mu_bar + dot(K, z - dot(C,mu_bar)) + sigma_t = dot(ident - dot(K,C), sigma_bar) + + self.update = Method([u, z, A, B, C, R, Q], [], updates = {self.mu:mu_t, self.sigma:sigma_t}) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/sandbox/onehotop.py Wed Dec 03 22:28:25 2008 -0500 @@ -0,0 +1,58 @@ +""" +One hot Op +""" + +#from theano import tensor +from theano.tensor import as_tensor, Tensor +from theano.gof import op +from theano.gof.graph import Apply + +import numpy + +class OneHot(op.Op): + """ + Construct a one-hot vector, x out of y. + + @todo: Document inputs and outputs + @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64! + @todo: Use 'bool' as output dtype, not 'int64' ? + @todo: Allow this to operate on column vectors (Tensor) + @todo: Describe better. + """ + + def make_node(self, x, y): + """ + @type x: Vector L{Tensor} of integers + @param x: The entries of the one-hot vector to be one. + @type y: Integer scalar L{Tensor} + @param y: The length (#columns) of the one-hot vectors. + @return: A L{Tensor} of one-hot vectors + + @precondition: x < y for all entries of x + @todo: Check that x and y are int types + """ + x = as_tensor(x) + y = as_tensor(y) + #assert x.dtype[0:3] == "int" + #assert y.dtype[0:3] == "int" + inputs = [x, y] + ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])] + #outputs = [tensor.Tensor("float64", broadcastable=[False, False])] + #outputs = [Tensor("int64", broadcastable=[False, False])] + outputs = [Tensor("float64", broadcastable=[False, False]).make_result()] + node = Apply(op = self, inputs = inputs, outputs = outputs) + return node + + def perform(self, node, (x, y), (out, )): + assert x.dtype == "int64" or x.dtype == "int32" + assert x.ndim == 1 + assert y.dtype == "int64" or x.dtype == "int32" + assert y.ndim == 0 + out[0] = numpy.zeros((x.shape[0], y), dtype="float64") + for c in range(x.shape[0]): + assert x[c] < y + out[0][c, x[c]] = 1 + + def grad(self, (x, y), (out_gradient, )): + return None, None +one_hot = OneHot()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/sandbox/stat_ops.py Wed Dec 03 22:28:25 2008 -0500 @@ -0,0 +1,92 @@ + +import theano +from theano import gof +from theano import tensor +import numpy + + +class ExampleWiseMean(gof.Op): + + def __init__(self): + self.destroy_map = {0: [1, 2]} + + def make_node(self, x): + return gof.Apply(self, + [x, tensor.value(float('nan')), tensor.value(0)], + [tensor.Tensor(dtype = 'float64', + broadcastable = x.type.broadcastable)()]) + + def perform(self, node, (x, sum, n), (out,)): + if numpy.isnan(sum).any(): + sum.resize(x.shape, refcheck=0) + sum[:] = x + else: + sum += x + n += 1 + out[0] = sum / n + + def c_code(self, name, node, (x, sum, n), (out, ), sub): + return """ + PyObject* multi; + int nelems; + if (isnan(((double*)(%(sum)s->data))[0])) { + PyArray_Dims dims; + dims.len = %(x)s->nd; + dims.ptr = %(x)s->dimensions; + PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER); + multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s); + nelems = PyArray_SIZE(%(sum)s); + while (nelems--) { + // Copy %(x)s in %(sum)s + *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1); + PyArray_MultiIter_NEXT(multi); + } + } + else { + // Add some error checking on the size of x + multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s); + nelems = PyArray_SIZE(%(sum)s); + while (nelems--) { + // Add %(x)s to %(sum)s + *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1); + PyArray_MultiIter_NEXT(multi); + } + } + ((npy_int64*)(%(n)s->data))[0]++; + int n = ((npy_int64*)(%(n)s->data))[0]; + if (%(out)s == NULL) { + %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0); + } + multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s); + nelems = PyArray_SIZE(%(sum)s); + while (nelems--) { + // %(out)s <- %(sum)s / %(n)s + *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n; + PyArray_MultiIter_NEXT(multi); + } + """ % dict(locals(), **sub) + + + +if __name__ == '__main__': + + vectors = numpy.random.RandomState(666).rand(10, 2) + + x = tensor.dvector() + e = ExampleWiseMean()(x) + + # f = theano.function([x], [e], linker = 'py') + + # for i, v in enumerate(vectors): + # print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0) + + # print + + f = theano.function([x], [e], linker = 'c|py') + + for i, v in enumerate(vectors): + print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0) + + + +
--- a/pylearn/algorithms/sgd.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/algorithms/sgd.py Wed Dec 03 22:28:25 2008 -0500 @@ -7,14 +7,35 @@ from .minimizer import minimizer_factory class StochasticGradientDescent(module.FancyModule): - def __init__(self, args, cost, params, gradients=None, lr=None): + """Fixed stepsize gradient descent""" + def __init__(self, args, cost, params, gradients=None, stepsize=None, WEIRD_STUFF=True): + """ + :param stepsize: the step to take in (negative) gradient direction + :type stepsize: None, scalar value, or scalar TensorResult + """ super(StochasticGradientDescent, self).__init__() + self.WEIRD_STUFF = WEIRD_STUFF + self.stepsize_init = None - self.lr = lr if lr is not None else module.Member(T.dscalar()) + if stepsize is None: + self.stepsize = module.Member(T.dscalar()) + elif isinstance(stepsize, T.TensorResult): + self.stepsize = stepsize + else: + if self.WEIRD_STUFF: + #TODO: why is this necessary? why does the else clause not work? + self.stepsize = module.Member(T.dscalar()) + self.stepsize_init = stepsize + else: + self.stepsize = module.Member(T.value(stepsize)) + + if self.stepsize.ndim != 0: + raise ValueError('stepsize must be a scalar', stepsize) + self.params = params self.gparams = T.grad(cost, self.params) if gradients is None else gradients - self.updates = dict((p, p - self.lr * g) for p, g in zip(self.params, self.gparams)) + self.updates = dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams)) self.step = module.Method( args, [], @@ -22,17 +43,16 @@ self.step_cost = module.Method( args, cost, updates=self.updates) + def _instance_initialize(self, obj): + if self.WEIRD_STUFF: + obj.stepsize = self.stepsize_init + else: + pass - #no initialization is done here. - # rationale: the only parameter is lr. - # If the user wanted lr constant, he would pass the constant to the constructor - # If the user wanted lr a computed value, he would pass that to the constructor. - # If the user wanted a dynamic lr, he would pass a tensor.value() for lr. - # If the default of a T.dscalar() is used, then it must be initialized elsewhere explicitly. @minimizer_factory('sgd') -def sgd_minimizer(lr): +def sgd_minimizer(stepsize=None, **args): def m(i,c,p,g=None): - return StochasticGradientDescent(i, c, p,lr=lr) + return StochasticGradientDescent(i, c, p, stepsize=stepsize, **args) return m
--- a/pylearn/algorithms/stat_ops.py Wed Dec 03 22:28:17 2008 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,92 +0,0 @@ - -import theano -from theano import gof -from theano import tensor -import numpy - - -class ExampleWiseMean(gof.Op): - - def __init__(self): - self.destroy_map = {0: [1, 2]} - - def make_node(self, x): - return gof.Apply(self, - [x, tensor.value(float('nan')), tensor.value(0)], - [tensor.Tensor(dtype = 'float64', - broadcastable = x.type.broadcastable)()]) - - def perform(self, node, (x, sum, n), (out,)): - if numpy.isnan(sum).any(): - sum.resize(x.shape, refcheck=0) - sum[:] = x - else: - sum += x - n += 1 - out[0] = sum / n - - def c_code(self, name, node, (x, sum, n), (out, ), sub): - return """ - PyObject* multi; - int nelems; - if (isnan(((double*)(%(sum)s->data))[0])) { - PyArray_Dims dims; - dims.len = %(x)s->nd; - dims.ptr = %(x)s->dimensions; - PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER); - multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s); - nelems = PyArray_SIZE(%(sum)s); - while (nelems--) { - // Copy %(x)s in %(sum)s - *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1); - PyArray_MultiIter_NEXT(multi); - } - } - else { - // Add some error checking on the size of x - multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s); - nelems = PyArray_SIZE(%(sum)s); - while (nelems--) { - // Add %(x)s to %(sum)s - *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1); - PyArray_MultiIter_NEXT(multi); - } - } - ((npy_int64*)(%(n)s->data))[0]++; - int n = ((npy_int64*)(%(n)s->data))[0]; - if (%(out)s == NULL) { - %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0); - } - multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s); - nelems = PyArray_SIZE(%(sum)s); - while (nelems--) { - // %(out)s <- %(sum)s / %(n)s - *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n; - PyArray_MultiIter_NEXT(multi); - } - """ % dict(locals(), **sub) - - - -if __name__ == '__main__': - - vectors = numpy.random.RandomState(666).rand(10, 2) - - x = tensor.dvector() - e = ExampleWiseMean()(x) - - # f = theano.function([x], [e], linker = 'py') - - # for i, v in enumerate(vectors): - # print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0) - - # print - - f = theano.function([x], [e], linker = 'c|py') - - for i, v in enumerate(vectors): - print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0) - - - -
--- a/pylearn/algorithms/stopper.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/algorithms/stopper.py Wed Dec 03 22:28:25 2008 -0500 @@ -122,6 +122,16 @@ raise StopIteration +class NStages(ICML08Stopper): + """Run for a fixed number of steps, checking validation set every so + often.""" + def __init__(self, hard_limit, v_int): + ICML08Stopper.__init__(self, hard_limit, v_int, 1.0, 1.0, hard_limit) + + #TODO: could optimize next() function. Most of what's in ICML08Stopper.next() + #is not necessary + + @stopper_factory('icml08') def icml08_stopper(i_wait, v_int, min_improvement, patience, hard_limit): return ICML08Stopper(i_wait, v_int, min_improvement, patience, hard_limit)
--- a/pylearn/algorithms/tests/test_daa.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/algorithms/tests/test_daa.py Wed Dec 03 22:28:25 2008 -0500 @@ -28,7 +28,7 @@ model.local_update[l]([[0, 1, 0, 1]]) model.local_update[l]([[1, 0, 1, 0]]) - for i in range(1): + for i in range(10): model.update([[0, 1, 0, 1]], [[1]]) model.update([[1, 0, 1, 0]], [[0]]) print model.classify([[0, 1, 0, 1]]) @@ -41,23 +41,31 @@ daa = models.Stacker([(models.SigmoidXEDenoisingAA, 'hidden')] * ndaa + [(pylearn.algorithms.logistic_regression.Module_Nclass, 'pred')], regularize = False) - model = daa.make([4, 20, 20, 20, 10], + model = daa.make([4] + [20] * ndaa + [10], lr = 0.01, mode = mode, seed = 10) - model.layers[0].noise_level = 0.3 - model.layers[1].noise_level = 0.3 - model.layers[2].noise_level = 0.3 + for l in range(ndaa): model.layers[l].noise_level = 0.3 - for l in range(3): + instances = [([[0, 1, 0, 1]], [1]), ([[1, 0, 1, 0]], [0])] + + for l in range(ndaa): for i in range(10): - model.local_update[l]([[0, 1, 0, 1]]) - model.local_update[l]([[1, 0, 1, 0]]) + for (input, output) in instances: + model.local_update[l](input) - for i in range(1): - model.update([[0, 1, 0, 1]], [1]) - model.update([[1, 0, 1, 0]], [0]) + for i in range(10): + for (input, output) in instances: +# model.update(input, output) + print "OLD:", + print model.validate(input, output) + oldloss = model.update(input, output) + print oldloss + print "NEW:" + print model.validate(input, output) + print + print model.apply([[0, 1, 0, 1]]) print model.apply([[1, 0, 1, 0]])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/tests/test_linear_regression.py Wed Dec 03 22:28:25 2008 -0500 @@ -0,0 +1,38 @@ + +import unittest +from pylearn.algorithms.linear_regression import * +from make_test_datasets import * +import numpy + +class test_linear_regression(unittest.TestCase): + + def test1(self): + trainset,testset,theta=make_artificial_datasets_from_function(n_inputs=3, + n_targets=2, + n_examples=100, + f=linear_predictor) + + assert trainset.fields()['input'].shape==(50,3) + assert testset.fields()['target'].shape==(50,2) + regressor = LinearRegression(L2_regularizer=0.1) + predictor = regressor(trainset) + test_data = testset.fields() + mse = predictor.compute_mse(test_data['input'],test_data['target']) + print 'mse = ',mse + +if __name__ == '__main__': + import sys + + if len(sys.argv)==1: + unittest.main() + else: + assert sys.argv[1]=="--debug" + tests = [] + for arg in sys.argv[2:]: + tests.append(arg) + if tests: + unittest.TestSuite(map(T_DataSet, tests)).debug() + else: + module = __import__("_test_linear_regression") + tests = unittest.TestLoader().loadTestsFromModule(module) + tests.debug()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/tests/test_logistic_regression.py Wed Dec 03 22:28:25 2008 -0500 @@ -0,0 +1,60 @@ +from logistic_regression import * +import sys, time + +if __name__ == '__main__': + pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx')) + pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax')) + if 1: + lrc = Module_Nclass() + + print '================' + print lrc.update.pretty() + print '================' + print lrc.update.pretty(mode = theano.Mode('py', 'fast_run')) + print '================' +# print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace')) +# print '================' + +# sys.exit(0) + + lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run')) + #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run')) + #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN') + + data_x = N.random.randn(5, 10) + data_y = (N.random.randn(5) > 0) + + t = time.time() + for i in xrange(10000): + lr.lr = 0.02 + xe = lr.update(data_x, data_y) + #if i % 100 == 0: + # print i, xe + + print 'training time:', time.time() - t + print 'final error', xe + + #print + #print 'TRAINED MODEL:' + #print lr + + if 0: + lrc = Module() + + lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN') + + data_x = N.random.randn(5, 10) + data_y = (N.random.randn(5, 1) > 0) + + for i in xrange(10000): + xe = lr.update(data_x, data_y) + if i % 100 == 0: + print i, xe + + print + print 'TRAINED MODEL:' + print lr + + + +
--- a/pylearn/algorithms/tests/test_regressor.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/algorithms/tests/test_regressor.py Wed Dec 03 22:28:25 2008 -0500 @@ -1,6 +1,6 @@ -import models +import pylearn.algorithms as models import theano import numpy import time
--- a/pylearn/algorithms/tests/test_stacker.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/algorithms/tests/test_stacker.py Wed Dec 03 22:28:25 2008 -0500 @@ -1,5 +1,5 @@ -import models +import pylearn.algorithms as models import theano import numpy import time
--- a/pylearn/datasets/MNIST.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/datasets/MNIST.py Wed Dec 03 22:28:25 2008 -0500 @@ -46,6 +46,7 @@ y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest]) rval.n_classes = 10 + rval.img_shape = (28,28) return rval
--- a/pylearn/datasets/embeddings/parameters.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/datasets/embeddings/parameters.py Wed Dec 03 22:28:25 2008 -0500 @@ -1,10 +1,10 @@ """ Locations of the embedding data files. """ -WEIGHTSFILE = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt" -VOCABFILE = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc" -#WEIGHTSFILE = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt" -#VOCABFILE = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc" +#WEIGHTSFILE = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt" +#VOCABFILE = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc" +WEIGHTSFILE = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt" +VOCABFILE = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc" NUMBER_OF_WORDS = 30000 DIMENSIONS = 50 UNKNOWN = "UNKNOWN"
--- a/pylearn/datasets/embeddings/process.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/datasets/embeddings/process.py Wed Dec 03 22:28:25 2008 -0500 @@ -11,6 +11,12 @@ __word_to_embedding = None __read = False +def length(): + """ + @return: The length of embeddings + """ + return len(__word_to_embedding[__words[0]]) + def word_to_embedding(w): read_embeddings() return __word_to_embedding[w] @@ -39,29 +45,21 @@ w = __words[i] __word_to_embedding[w] = l __read = True + for w in __word_to_embedding: assert len(__word_to_embedding[__words[0]]) == len(__word_to_embedding[w]) sys.stderr.write("...done reading %s\n" % WEIGHTSFILE) import re numberre = re.compile("[0-9]") -slashre = re.compile("\\\/") -def preprocess_word(origw): +def preprocess_word(w): """ Convert a word so that it can be embedded directly. Returned the preprocessed sequence. - @note: Preprocessing is appropriate for Penn Treebank style documents. + @note: Perhaps run L{common.penntreebank.preprocess} on the word first. """ read_embeddings() - if origw == "-LRB-": w = "(" - elif origw == "-RRB-": w = ")" - elif origw == "-LCB-": w = "{" - elif origw == "-RCB-": w = "}" - elif origw == "-LSB-": w = "[" - elif origw == "-RSB-": w = "]" - else: - w = origw + if w not in __word_to_embedding: w = string.lower(w) - w = slashre.sub("/", w) w = numberre.sub("NUMBER", w) if w not in __word_to_embedding: # sys.stderr.write("Word not in vocabulary, using %s: %s (original %s)\n" % (UNKNOWN, w, origw))
--- a/pylearn/datasets/make_test_datasets.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/datasets/make_test_datasets.py Wed Dec 03 22:28:25 2008 -0500 @@ -1,4 +1,4 @@ -import dataset +from dataset import ArrayDataSet from shapeset.dset import Polygons from linear_regression import linear_predictor from kernel_regression import kernel_predictor @@ -110,6 +110,7 @@ # testset = ArrayDataSet(inputs[n_examples/2:],{'input':slice(0,n_inputs)}) | \ # ArrayDataSet(targets[n_examples/2:],{'target':slice(0,n_targets)}) data = hstack((inputs,targets)) + trainset = ArrayDataSet(data[0:n_train], {'input':slice(0,n_inputs),'target':slice(n_inputs,n_inputs+n_targets)}) testset = ArrayDataSet(data[n_train:],
--- a/pylearn/datasets/shapeset1.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/datasets/shapeset1.py Wed Dec 03 22:28:25 2008 -0500 @@ -7,7 +7,7 @@ import os import numpy -from ..amat import AMat +from ..io.amat import AMat from .config import data_root def _head(path, n):
--- a/pylearn/datasets/smallNorb.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/datasets/smallNorb.py Wed Dec 03 22:28:25 2008 -0500 @@ -1,6 +1,6 @@ import os import numpy -from ..filetensor import read +from ..io.filetensor import read from .config import data_root #Path = '/u/bergstrj/pub/data/smallnorb'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/datasets/testDataset.py Wed Dec 03 22:28:25 2008 -0500 @@ -0,0 +1,43 @@ +""" +Various routines to load/access MNIST data. +""" +from __future__ import absolute_import + +import os +import numpy + +from ..io.amat import AMat +from .config import data_root +from .dataset import dataset_factory, Dataset + +VALSEQ, VALRAND = range(2) + +@dataset_factory('DEBUG') +def mnist_factory(variant='', ntrain=10, nvalid=10, ntest=10, \ + nclass=2, ndim=1, dshape=None, valtype=VALSEQ): + + temp = [] + [temp.append(5) for i in range(ndim)] + dshape = temp if dshape is None else dshape + + rval = Dataset() + rval.n_classes = nclass + rval.img_shape = dshape + + dsize = numpy.prod(dshape); + + print ntrain, nvalid, ntest, nclass, dshape, valtype + + ntot = ntrain + nvalid + ntest + xdata = numpy.arange(ntot*numpy.prod(dshape)).reshape((ntot,dsize)) \ + if valtype is VALSEQ else \ + numpy.random.random((ntot,dsize)); + ydata = numpy.round(numpy.random.random(ntot)); + + rval.train = Dataset.Obj(x=xdata[0:ntrain],y=ydata[0:ntrain]) + rval.valid = Dataset.Obj(x=xdata[ntrain:ntrain+nvalid],\ + y=ydata[ntrain:ntrain+nvalid]) + rval.test = Dataset.Obj(x=xdata[ntrain+nvalid:ntrain+nvalid+ntest], + y=ydata[ntrain+nvalid:ntrain+nvalid+ntest]) + + return rval
--- a/pylearn/old_dataset/_test_dataset.py Wed Dec 03 22:28:17 2008 -0500 +++ b/pylearn/old_dataset/_test_dataset.py Wed Dec 03 22:28:25 2008 -0500 @@ -315,6 +315,7 @@ #ds[i] returns the (i+1)-th example of the dataset. ds2=ds[5] assert isinstance(ds2,Example) + test_ds(ds,ds2,[5]) assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds) # index not defined assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds) del ds2