# HG changeset patch
# User Olivier Breuleux <breuleuo@iro.umontreal.ca>
# Date 1228361305 18000
# Node ID 0a27ba2157b60255527dd04751a0e992a6fa64ff
# Parent  eaf4cbd20017ae396126cd5282544169b5a87e8a# Parent  e878003c300929ef85f5d1620d1c291c0f3224b7
merge

diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/__init__.py
--- a/pylearn/algorithms/__init__.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/algorithms/__init__.py	Wed Dec 03 22:28:25 2008 -0500
@@ -1,4 +1,5 @@
 
 from .minimizer import make_minimizer, minimizer_factory
 from .stopper import make_stopper, stopper_factory
-
+from .stacker import Stacker
+from .regressor import BinRegressor
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/_test_linear_regression.py
--- a/pylearn/algorithms/_test_linear_regression.py	Wed Dec 03 22:28:17 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-
-import unittest
-from linear_regression import *
-from make_test_datasets import *
-import numpy
-
-class test_linear_regression(unittest.TestCase):
-
-    def test1(self):
-        trainset,testset,theta=make_artificial_datasets_from_function(n_inputs=3,
-                                                                      n_targets=2,
-                                                                      n_examples=100,
-                                                                      f=linear_predictor)
-        
-        assert trainset.fields()['input'].shape==(50,3)
-        assert testset.fields()['target'].shape==(50,2)
-        regressor = LinearRegression(L2_regularizer=0.1)
-        predictor = regressor(trainset)
-        test_data = testset.fields()
-        mse = predictor.compute_mse(test_data['input'],test_data['target'])
-        print 'mse = ',mse
-        
-if __name__ == '__main__':
-    unittest.main()
-        
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/_test_logistic_regression.py
--- a/pylearn/algorithms/_test_logistic_regression.py	Wed Dec 03 22:28:17 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-from logistic_regression import *
-import sys, time
-
-if __name__ == '__main__':
-    pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
-    pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
-    if 1:
-        lrc = Module_Nclass()
-
-        print '================'
-        print lrc.update.pretty()
-        print '================'
-        print lrc.update.pretty(mode = theano.Mode('py', 'fast_run'))
-        print '================'
-#         print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace'))
-#         print '================'
-
-#        sys.exit(0)
-
-        lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'))
-        #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'))
-        #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN')
-
-        data_x = N.random.randn(5, 10)
-        data_y = (N.random.randn(5) > 0)
-
-        t = time.time()
-        for i in xrange(10000):
-            lr.lr = 0.02
-            xe = lr.update(data_x, data_y) 
-            #if i % 100 == 0:
-            #    print i, xe
-
-        print 'training time:', time.time() - t
-        print 'final error', xe
-
-        #print
-        #print 'TRAINED MODEL:'
-        #print lr
-
-    if 0:
-        lrc = Module()
-
-        lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN')
-
-        data_x = N.random.randn(5, 10)
-        data_y = (N.random.randn(5, 1) > 0)
-
-        for i in xrange(10000):
-            xe = lr.update(data_x, data_y)
-            if i % 100 == 0:
-                print i, xe
-
-        print
-        print 'TRAINED MODEL:'
-        print lr
-
-
-
-
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/_test_onehotop.py
--- a/pylearn/algorithms/_test_onehotop.py	Wed Dec 03 22:28:17 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-from onehotop import one_hot
-
-import unittest
-from theano import compile
-from theano import gradient
-
-from theano.tensor import as_tensor
-
-import random
-import numpy.random
-
-class T_OneHot(unittest.TestCase):
-    def test0(self):
-        x = as_tensor([3, 2, 1])
-        y = as_tensor(5)
-        o = one_hot(x, y)
-        y = compile.eval_outputs([o])
-        self.failUnless(numpy.all(y == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]])))
-
-if __name__ == '__main__':
-    unittest.main()
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/kernel_regression.py
--- a/pylearn/algorithms/kernel_regression.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/algorithms/kernel_regression.py	Wed Dec 03 22:28:25 2008 -0500
@@ -4,16 +4,16 @@
 
 from pylearn.learner import OfflineLearningAlgorithm
 from theano import tensor as T
-from nnet_ops import prepend_1_to_each_row
+from theano.tensor.nnet import prepend_1_to_each_row
 from theano.scalar import as_scalar
 from common.autoname import AutoName
 import theano
 import numpy
 
 # map a N-vector to a 1xN matrix
-row_vector = theano.elemwise.DimShuffle((False,),['x',0])
+row_vector = theano.tensor.DimShuffle((False,),['x',0])
 # map a N-vector to a Nx1 matrix
-col_vector = theano.elemwise.DimShuffle((False,),[0,'x'])
+col_vector = theano.tensor.DimShuffle((False,),[0,'x'])
 
 class KernelRegression(OfflineLearningAlgorithm):
     """
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/linear_regression.py
--- a/pylearn/algorithms/linear_regression.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/algorithms/linear_regression.py	Wed Dec 03 22:28:25 2008 -0500
@@ -6,7 +6,7 @@
 
 from pylearn.learner import OfflineLearningAlgorithm,OnlineLearningAlgorithm
 from theano import tensor as T
-from nnet_ops import prepend_1_to_each_row
+from theano.tensor.nnet import prepend_1_to_each_row
 from theano.scalar import as_scalar
 from common.autoname import AutoName
 import theano
@@ -96,14 +96,14 @@
 
     __compiled = False
     @classmethod
-    def compile(cls,linker='c|py'):
+    def compile(cls, mode = "FAST_RUN"):
         if cls.__compiled:
             return
         def fn(input_vars,output_vars):
-            return staticmethod(theano.function(input_vars,output_vars, linker=linker))
+            return staticmethod(theano.function(input_vars, output_vars, mode=mode))
 
-        cls.compute_outputs = fn([cls.inputs,cls.theta],[cls.outputs])
-        cls.compute_errors = fn([cls.outputs,cls.targets],[cls.squared_errors])
+        cls.compute_outputs = fn([cls.inputs,cls.theta],cls.outputs)
+        cls.compute_errors = fn([cls.outputs,cls.targets],cls.squared_errors)
 
         cls.__compiled = True
 
@@ -115,17 +115,17 @@
     XtX = T.matrix() # (n_inputs+1) x (n_inputs+1)
     XtY = T.matrix() # (n_inputs+1) x n_outputs
     extended_input = prepend_1_to_each_row(P.inputs)
-    new_XtX = T.add_inplace(XtX,T.dot(extended_input.T,extended_input))
-    new_XtY = T.add_inplace(XtY,T.dot(extended_input.T,P.targets))
+    new_XtX = T.add(XtX,T.dot(extended_input.T,extended_input))
+    new_XtY = T.add(XtY,T.dot(extended_input.T,P.targets))
 
     __compiled = False
     
     @classmethod
-    def compile(cls,linker='c|py'):
+    def compile(cls, mode="FAST_RUN"):
         if cls.__compiled:
             return
         def fn(input_vars,output_vars):
-            return staticmethod(theano.function(input_vars,output_vars, linker=linker))
+            return staticmethod(theano.function(input_vars, output_vars, mode=mode))
 
         cls.update = fn([cls.XtX,cls.XtY,cls.P.inputs,cls.P.targets],[cls.new_XtX,cls.new_XtY])
 
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/logistic_regression.py
--- a/pylearn/algorithms/logistic_regression.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/algorithms/logistic_regression.py	Wed Dec 03 22:28:25 2008 -0500
@@ -40,11 +40,15 @@
         #here we actually build the model
         self.linear_output = T.dot(self.input, self.w) + self.b
         if 0:
+            # TODO: pending support for target being a sparse matrix
             self.softmax = nnet.softmax(self.linear_output)
 
             self._max_pr, self.argmax = T.max_and_argmax(self.linear_output)
             self._xent = self.target * T.log(self.softmax)
         else:
+            # TODO: when above is fixed, remove this hack (need an argmax
+            # which is independent of targets)
+            self.argmax_standalone = T.argmax(self.linear_output);
             (self._xent, self.softmax, self._max_pr, self.argmax) =\
                     nnet.crossentropy_softmax_max_and_argmax_1hot(
                     self.linear_output, self.target)
@@ -149,12 +153,12 @@
     def __init__(self, input=None, targ=None, w=None, b=None, lr=None, regularize=False):
         super(LogReg2, self).__init__() #boilerplate
 
-        self.input = input if input is not None else T.matrix('input')
-        self.targ = targ if targ is not None else T.lcol()
+        self.input = module.Member(input) if input is not None else T.matrix('input')
+        self.targ = module.Member(targ) if targ is not None else T.lcol()
 
-        self.w = w if w is not None else module.Member(T.dmatrix())
-        self.b = b if b is not None else module.Member(T.dvector())
-        self.lr = lr if lr is not None else module.Member(T.dscalar())
+        self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
+        self.b = module.Member(b) if b is not None else module.Member(T.dvector())
+        self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
 
         self.params = [p for p in [self.w, self.b] if p.owner is None]
 
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/onehotop.py
--- a/pylearn/algorithms/onehotop.py	Wed Dec 03 22:28:17 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-"""
-One hot Op
-"""
-
-#from theano import tensor
-from theano.tensor import as_tensor, Tensor
-from theano.gof import op
-from theano.gof.graph import Apply
-
-import numpy
-
-class OneHot(op.Op):
-    """
-    Construct a one-hot vector, x out of y.
-
-    @todo: Document inputs and outputs
-    @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64!
-    @todo: Use 'bool' as output dtype, not 'int64' ?
-    @todo: Allow this to operate on column vectors (Tensor)
-    @todo: Describe better.
-    """
-
-    def make_node(self, x, y):
-        """
-        @type x: Vector L{Tensor} of integers
-        @param x: The entries of the one-hot vector to be one.
-        @type y: Integer scalar L{Tensor}
-        @param y: The length (#columns) of the one-hot vectors.
-        @return: A L{Tensor} of one-hot vectors
-
-        @precondition: x < y for all entries of x
-        @todo: Check that x and y are int types
-        """
-        x = as_tensor(x)
-        y = as_tensor(y)
-        #assert x.dtype[0:3] == "int"
-        #assert y.dtype[0:3] == "int"
-        inputs = [x, y]
-        ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])]
-        #outputs = [tensor.Tensor("float64", broadcastable=[False, False])]
-        #outputs = [Tensor("int64", broadcastable=[False, False])]
-        outputs = [Tensor("float64", broadcastable=[False, False]).make_result()]
-        node = Apply(op = self, inputs = inputs, outputs = outputs)
-        return node
-
-    def perform(self, node, (x, y), (out, )):
-        assert x.dtype == "int64" or x.dtype == "int32"
-        assert x.ndim == 1
-        assert y.dtype == "int64" or x.dtype == "int32"
-        assert y.ndim == 0
-        out[0] = numpy.zeros((x.shape[0], y), dtype="float64")
-        for c in range(x.shape[0]):
-            assert x[c] < y
-            out[0][c, x[c]] = 1
-
-    def grad(self, (x, y), (out_gradient, )):
-        return None, None
-one_hot = OneHot()
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/rnn.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/rnn.py	Wed Dec 03 22:28:25 2008 -0500
@@ -0,0 +1,236 @@
+
+import numpy as N
+from theano import Op, Apply, tensor as T, Module, Member, Method, Mode, compile
+from theano.gof import OpSub, TopoOptimizer
+
+from .minimizer import make_minimizer # minimizer
+from theano.printing import Print
+import sgd #until Olivier's module-import thing works better
+
+####################
+# Library-type stuff
+####################
+
+class TanhRnn(Op):
+    """
+    This class implements the recurrent part of a recurrent neural network.
+
+    There is not a neat way to include this in a more fine-grained way in Theano at the moment,
+    so to get something working, I'm implementing a relatively complicated Op that could be 
+    broken down later into constituents.
+
+    Anyway, this Op implements recursive computation of the form:
+
+    .. latex-eqn:
+        z_t &= \tanh( z_{t-1} A + x_{t-1})
+
+    For z0 a vector, and x a TxM matrix, it returns a matrix z of shape (T+1, M), 
+    in which z[0] = z0.
+
+    """
+
+    def make_node(self, x, z0, A):
+        """
+        :type x:  matrix (each row is an x_t) (shape: (T, M))
+        :type z0:  vector (the first row of output) (shape: M)
+        :type A: matrix (M by M)
+
+        """
+        x = T.as_tensor(x)
+        z0 = T.as_tensor(z0)
+        A = T.as_tensor(A)
+        z = x.type() #make a new symbolic result with the same type as x
+        return Apply(self, [x, z0, A], [z])
+
+    def perform(self, node, (x,z0,A), out):
+        T,M = x.shape
+        z = N.zeros((T+1, M))
+        z[0] = z0
+        for i in xrange(T):
+            z[i+1] = N.tanh(N.dot(z[i], A) + x[i])
+        out[0][0] = z
+
+    def grad(self, (x, z0, A), (gz,)):
+        z = tanh_rnn(x, z0, A)
+        gz_incl_rnn, gx = tanh_rnn_grad(A, z, gz)
+        return [gx, gz_incl_rnn[0], (T.dot(z[:-1].T, gx))]
+tanh_rnn = TanhRnn()
+
+class TanhRnnGrad(Op):
+    """Gradient calculation for TanhRnn"""
+
+    def __init__(self, inplace):
+        self.inplace = inplace
+
+        if self.inplace:
+            self.destroy_map = {0: [2]}
+
+    def __eq__(self, other):
+        return (type(self) == type(other)) and (self.inplace == other.inplace)
+
+    def __hash__(self, other):
+        return hash(type(self)) ^ hash(self.inplace)
+
+    def make_node(self, A, z, gz):
+        return Apply(self, [A,z,gz], (z.type(), gz.type()))
+
+    def perform(self, node, (A, z, gz), out):
+        Tp1,M = z.shape
+        T = Tp1 - 1
+        gx = N.zeros((T, M))
+
+        if not self.inplace:
+            gz = gz.copy()
+
+        for i in xrange(T-1, -1, -1):
+            #back through the tanh
+            gx[i] = gz[i+1] * (1.0 - z[i+1] * z[i+1])
+            gz[i] += N.dot(A, gx[i])
+
+        out[0][0] = gz
+        out[1][0] = gx
+
+    def __str__(self):
+        if self.inplace:
+            return 'Inplace' + super(TanhRnnGrad, self).__str__()
+        else:
+            return super(TanhRnnGrad, self).__str__()
+
+tanh_rnn_grad = TanhRnnGrad(inplace=False)
+tanh_rnn_grad_inplace = TanhRnnGrad(inplace=True)
+
+compile.optdb.register('inplace_rnngrad', TopoOptimizer(OpSub(tanh_rnn_grad, tanh_rnn_grad_inplace)), 60, 'fast_run', 'inplace')
+
+
+#######################
+# Experiment-type stuff
+#######################
+
+
+
+class ExampleRNN(Module):
+
+    def __init__(self, n_vis, n_hid, n_out, minimizer):
+        super(ExampleRNN, self).__init__()
+
+        def affine(weight, bias):
+            return (lambda a : T.dot(a, weight) + bias)
+
+        self.n_vis = n_vis
+        self.n_hid = n_hid
+        self.n_out = n_out
+
+        #affine transformatoin x -> latent space
+        self.v, self.b = Member(T.dmatrix()), Member(T.dvector())
+        input_transform = affine(self.v, self.b)
+
+        #recurrent weight matrix in latent space
+        self.z0 = Member(T.dvector())
+        self.w = Member(T.dmatrix())
+
+        #affine transformation latent -> output space
+        self.u, self.c = Member(T.dmatrix()), Member(T.dvector())
+        output_transform = affine(self.u, self.c)
+
+        self.params = [self.v, self.b, self.w, self.u, self.c]
+
+        #input and target
+        x, y = T.dmatrix(), T.dmatrix()
+
+        z = tanh_rnn(input_transform(x), self.z0, self.w)
+        yhat = output_transform(z[1:])
+        self.cost = T.sum((y - yhat)**2)
+
+        self.blah = Method([x,y], self.cost)
+
+        # using the make_minimizer protocol
+        self.minimizer = minimizer([x, y], self.cost, self.params)
+
+    def _instance_initialize(self, obj):
+        n_vis = self.n_vis
+        n_hid = self.n_hid
+        n_out = self.n_out
+
+        rng = N.random.RandomState(2342)
+
+        obj.z0 = N.zeros(n_hid)
+        obj.v = rng.randn(n_vis, n_hid) * 0.01
+        obj.b = N.zeros(n_hid)
+        obj.w = rng.randn(n_hid, n_hid) * 0.01
+        obj.u = rng.randn(n_hid, n_out) * 0.01
+        obj.c = N.zeros(n_out)
+        obj.minimizer.initialize()
+    def __eq__(self, other):
+        if not isinstance(other.component, ExampleRNN):
+            raise NotImplemented
+         #we compare the member.
+        if self.n_vis != other.n_vis or slef.n_hid != other.n_hid or self.n_out != other.n_out:
+            return False
+        if (N.abs(self.z0-other.z0)<1e-8).all() and (N.abs(self.v-other.v)<1e-8).all() and (N.abs(self.b-other.b)<1e-8).all() and (N.abs(self.w-other.w)<1e-8).all() and (N.abs(self.u-other.u)<1e-8).all() and (N.abs(self.c-other.c)<1e-8).all() and (N.abs(self.z0-other.z0)<1e-8).all():
+            return True
+        return False
+
+    def __hash__(self):
+        raise NotImplemented
+
+def test_example_rnn():
+    minimizer_fn = make_minimizer('sgd', stepsize = 0.001)
+
+    n_vis = 5
+    n_out = 3
+    n_hid = 4
+    rnn_module = ExampleRNN(n_vis, n_hid, n_out, minimizer_fn)
+
+    rnn = rnn_module.make(mode='FAST_RUN')
+
+    rng = N.random.RandomState(7722342)
+    x = rng.randn(10,n_vis)
+    y = rng.randn(10,n_out)
+
+    #set y to be like x with a lag of LAG
+    LAG = 4
+    y[LAG:] = x[:-LAG, 0:n_out]
+
+    if 1:
+        for i, node in enumerate(rnn.minimizer.step_cost.maker.env.toposort()):
+            print i, node
+
+    niter=1500
+    for i in xrange(niter):
+        if i % 100 == 0:
+            print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize
+        else:
+            rnn.minimizer.step_cost(x, y)
+
+def test_WEIRD_STUFF():
+    n_vis = 5
+    n_out = 3
+    n_hid = 4
+    rng = N.random.RandomState(7722342)
+    x = rng.randn(10,n_vis)
+    y = rng.randn(10,n_out)
+
+    #set y to be like x with a lag of LAG
+    LAG = 4
+    y[LAG:] = x[:-LAG, 0:n_out]
+
+    minimizer_fn = make_minimizer('sgd', stepsize = 0.001, WEIRD_STUFF = False)
+    rnn_module = ExampleRNN(n_vis, n_hid, n_out, minimizer_fn)
+
+    rnn1 = rnn_module.make(mode='FAST_RUN')
+
+    rng1 = N.random.RandomState(7722342)
+
+    niter=15
+    for i in xrange(niter):
+        rnn1.minimizer.step_cost(x, y)
+
+    minimizer_fn = make_minimizer('sgd', stepsize = 0.001, WEIRD_STUFF = True)
+
+    rnn_module = ExampleRNN(n_vis, n_hid, n_out, minimizer_fn)
+    rnn2 = rnn_module.make(mode='FAST_RUN')
+
+    for i in xrange(niter):
+        rnn2.minimizer.step_cost(x, y)
+
+    assert rnn1 == rnn2
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/sandbox/_test_onehotop.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/_test_onehotop.py	Wed Dec 03 22:28:25 2008 -0500
@@ -0,0 +1,21 @@
+from onehotop import one_hot
+
+import unittest
+from theano import compile
+from theano import gradient
+from theano import function
+from theano.tensor import as_tensor
+
+import random
+import numpy.random
+
+class T_OneHot(unittest.TestCase):
+    def test0(self):
+        x = as_tensor([3, 2, 1])
+        y = as_tensor(5)
+        o = one_hot(x, y)
+        f = function([],o)
+        self.failUnless(numpy.all(f() == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]])))
+
+if __name__ == '__main__':
+    unittest.main()
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/sandbox/kalman.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/kalman.py	Wed Dec 03 22:28:25 2008 -0500
@@ -0,0 +1,57 @@
+
+"""
+Modules and misc. code related to the Kalman Filter.
+
+
+Kalman filter algorithm as presented in "Probabilistic Robotics"
+
+x_t is the state
+
+u_t is a control vector
+
+z_t is the observation vector
+
+\epsilon_t is a random noise term with zero mean and covariance R_t.
+
+\delta_t is a random noise term with zero mean and covariance Q_t.
+
+state (x_t) evolves according to 
+
+    x_t = A_t x_{t-1} + B_t u_t + \epsilon_t
+
+Observation z_t is made according to
+    
+    z_t = C_t x_t + \delta_t
+
+Assume that the distribution over initial states is a Gaussian.
+
+With these linear/Gaussian assumptions, the belief about the state all times t is Gaussian, so
+we can represent it compactly by the mean (mu) and the covariance (sigma).
+
+"""
+
+class KalmanModule(Module):
+    """
+    """
+    def __init__(self):
+
+        self.mu = Member()
+        self.sigma = Member()
+
+        u, z = vector(), vector()
+
+        # the formulas here work for A, B, R, C matrix or sparse matrix.
+        # ... anything that supports dot, +, -, dotinv, and transpose.
+
+        A, B, C= matrix(), matrix(), matrix()
+        R, Q = matrix(), matrix()
+
+        #algo from Probabilistic Robotics pg. 42
+        mu_bar = dot(A, self.mu) + dot(B, u)
+        sigma_bar = dot(A, self.sigma, A.T) + R
+        K = dot(sigma_bar, C.T, dotinv(dot(C, sigma_bar, C.T) + Q))
+        mu_t = mu_bar + dot(K, z - dot(C,mu_bar))
+        sigma_t = dot(ident - dot(K,C), sigma_bar)
+
+        self.update = Method([u, z, A, B, C, R, Q], [], updates = {self.mu:mu_t, self.sigma:sigma_t})
+
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/sandbox/onehotop.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/onehotop.py	Wed Dec 03 22:28:25 2008 -0500
@@ -0,0 +1,58 @@
+"""
+One hot Op
+"""
+
+#from theano import tensor
+from theano.tensor import as_tensor, Tensor
+from theano.gof import op
+from theano.gof.graph import Apply
+
+import numpy
+
+class OneHot(op.Op):
+    """
+    Construct a one-hot vector, x out of y.
+
+    @todo: Document inputs and outputs
+    @todo: Use 'bool' as output dtype? Or, at least 'int64' ? Not float64!
+    @todo: Use 'bool' as output dtype, not 'int64' ?
+    @todo: Allow this to operate on column vectors (Tensor)
+    @todo: Describe better.
+    """
+
+    def make_node(self, x, y):
+        """
+        @type x: Vector L{Tensor} of integers
+        @param x: The entries of the one-hot vector to be one.
+        @type y: Integer scalar L{Tensor}
+        @param y: The length (#columns) of the one-hot vectors.
+        @return: A L{Tensor} of one-hot vectors
+
+        @precondition: x < y for all entries of x
+        @todo: Check that x and y are int types
+        """
+        x = as_tensor(x)
+        y = as_tensor(y)
+        #assert x.dtype[0:3] == "int"
+        #assert y.dtype[0:3] == "int"
+        inputs = [x, y]
+        ##outputs = [tensor.Tensor("int64", broadcastable=[False, False])]
+        #outputs = [tensor.Tensor("float64", broadcastable=[False, False])]
+        #outputs = [Tensor("int64", broadcastable=[False, False])]
+        outputs = [Tensor("float64", broadcastable=[False, False]).make_result()]
+        node = Apply(op = self, inputs = inputs, outputs = outputs)
+        return node
+
+    def perform(self, node, (x, y), (out, )):
+        assert x.dtype == "int64" or x.dtype == "int32"
+        assert x.ndim == 1
+        assert y.dtype == "int64" or x.dtype == "int32"
+        assert y.ndim == 0
+        out[0] = numpy.zeros((x.shape[0], y), dtype="float64")
+        for c in range(x.shape[0]):
+            assert x[c] < y
+            out[0][c, x[c]] = 1
+
+    def grad(self, (x, y), (out_gradient, )):
+        return None, None
+one_hot = OneHot()
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/sandbox/stat_ops.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/stat_ops.py	Wed Dec 03 22:28:25 2008 -0500
@@ -0,0 +1,92 @@
+
+import theano
+from theano import gof
+from theano import tensor
+import numpy
+
+
+class ExampleWiseMean(gof.Op):
+    
+    def __init__(self):
+        self.destroy_map = {0: [1, 2]}
+
+    def make_node(self, x):
+        return gof.Apply(self,
+                         [x, tensor.value(float('nan')), tensor.value(0)],
+                         [tensor.Tensor(dtype = 'float64',
+                                        broadcastable = x.type.broadcastable)()])
+
+    def perform(self, node, (x, sum, n), (out,)):
+        if numpy.isnan(sum).any():
+            sum.resize(x.shape, refcheck=0)
+            sum[:] = x
+        else:
+            sum += x
+        n += 1
+        out[0] = sum / n
+
+    def c_code(self, name, node, (x, sum, n), (out, ), sub):
+        return """
+        PyObject* multi;
+        int nelems;
+        if (isnan(((double*)(%(sum)s->data))[0])) {
+            PyArray_Dims dims;
+            dims.len = %(x)s->nd;
+            dims.ptr = %(x)s->dimensions;
+            PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER);
+            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
+            nelems = PyArray_SIZE(%(sum)s);
+            while (nelems--) {
+                // Copy %(x)s in %(sum)s
+                *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1);
+                PyArray_MultiIter_NEXT(multi);
+            }
+        }
+        else {
+            // Add some error checking on the size of x
+            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
+            nelems = PyArray_SIZE(%(sum)s);
+            while (nelems--) {
+                // Add %(x)s to %(sum)s
+                *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1);
+                PyArray_MultiIter_NEXT(multi);
+            }
+        }
+        ((npy_int64*)(%(n)s->data))[0]++;
+        int n = ((npy_int64*)(%(n)s->data))[0];
+        if (%(out)s == NULL) {
+            %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0);
+        }
+        multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s);
+        nelems = PyArray_SIZE(%(sum)s);
+        while (nelems--) {
+            // %(out)s <- %(sum)s / %(n)s
+            *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n;
+            PyArray_MultiIter_NEXT(multi);
+        }        
+        """ % dict(locals(), **sub)
+
+
+
+if __name__ == '__main__':
+    
+    vectors = numpy.random.RandomState(666).rand(10, 2)
+
+    x = tensor.dvector()
+    e = ExampleWiseMean()(x)
+
+    # f = theano.function([x], [e], linker = 'py')
+
+    # for i, v in enumerate(vectors):
+    #     print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
+
+    # print
+
+    f = theano.function([x], [e], linker = 'c|py')
+
+    for i, v in enumerate(vectors):
+        print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
+
+
+
+
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/sgd.py
--- a/pylearn/algorithms/sgd.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/algorithms/sgd.py	Wed Dec 03 22:28:25 2008 -0500
@@ -7,14 +7,35 @@
 from .minimizer import minimizer_factory
 
 class StochasticGradientDescent(module.FancyModule):
-    def __init__(self, args, cost, params, gradients=None, lr=None):
+    """Fixed stepsize gradient descent"""
+    def __init__(self, args, cost, params, gradients=None, stepsize=None, WEIRD_STUFF=True):
+        """
+        :param stepsize: the step to take in (negative) gradient direction
+        :type stepsize: None, scalar value, or scalar TensorResult
+        """
         super(StochasticGradientDescent, self).__init__()
+        self.WEIRD_STUFF = WEIRD_STUFF
+        self.stepsize_init = None
 
-        self.lr = lr if lr is not None else module.Member(T.dscalar())
+        if stepsize is None:
+            self.stepsize = module.Member(T.dscalar())
+        elif isinstance(stepsize, T.TensorResult):
+            self.stepsize = stepsize
+        else:
+            if self.WEIRD_STUFF:
+                #TODO: why is this necessary? why does the else clause not work?
+                self.stepsize = module.Member(T.dscalar())
+                self.stepsize_init = stepsize
+            else:
+                self.stepsize = module.Member(T.value(stepsize))
+
+        if self.stepsize.ndim != 0:
+            raise ValueError('stepsize must be a scalar', stepsize)
+
         self.params = params
         self.gparams = T.grad(cost, self.params) if gradients is None else gradients
 
-        self.updates = dict((p, p - self.lr * g) for p, g in zip(self.params, self.gparams))
+        self.updates = dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams))
 
         self.step = module.Method(
                 args, [],
@@ -22,17 +43,16 @@
         self.step_cost = module.Method(
                 args, cost,
                 updates=self.updates)
+    def _instance_initialize(self, obj):
+        if self.WEIRD_STUFF:
+            obj.stepsize = self.stepsize_init
+        else:
+            pass
 
-    #no initialization is done here.
-    # rationale: the only parameter is lr.  
-    # If the user wanted lr constant, he would pass the constant to the constructor
-    # If the user wanted lr a computed value, he would pass that to the constructor.
-    # If the user wanted a dynamic lr, he would pass a tensor.value() for lr.
-    # If the default of a T.dscalar() is used, then it must be initialized elsewhere explicitly.
 
 @minimizer_factory('sgd')
-def sgd_minimizer(lr):
+def sgd_minimizer(stepsize=None, **args):
     def m(i,c,p,g=None):
-        return StochasticGradientDescent(i, c, p,lr=lr)
+        return StochasticGradientDescent(i, c, p, stepsize=stepsize, **args)
     return m
 
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/stat_ops.py
--- a/pylearn/algorithms/stat_ops.py	Wed Dec 03 22:28:17 2008 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-
-import theano
-from theano import gof
-from theano import tensor
-import numpy
-
-
-class ExampleWiseMean(gof.Op):
-    
-    def __init__(self):
-        self.destroy_map = {0: [1, 2]}
-
-    def make_node(self, x):
-        return gof.Apply(self,
-                         [x, tensor.value(float('nan')), tensor.value(0)],
-                         [tensor.Tensor(dtype = 'float64',
-                                        broadcastable = x.type.broadcastable)()])
-
-    def perform(self, node, (x, sum, n), (out,)):
-        if numpy.isnan(sum).any():
-            sum.resize(x.shape, refcheck=0)
-            sum[:] = x
-        else:
-            sum += x
-        n += 1
-        out[0] = sum / n
-
-    def c_code(self, name, node, (x, sum, n), (out, ), sub):
-        return """
-        PyObject* multi;
-        int nelems;
-        if (isnan(((double*)(%(sum)s->data))[0])) {
-            PyArray_Dims dims;
-            dims.len = %(x)s->nd;
-            dims.ptr = %(x)s->dimensions;
-            PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER);
-            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
-            nelems = PyArray_SIZE(%(sum)s);
-            while (nelems--) {
-                // Copy %(x)s in %(sum)s
-                *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1);
-                PyArray_MultiIter_NEXT(multi);
-            }
-        }
-        else {
-            // Add some error checking on the size of x
-            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
-            nelems = PyArray_SIZE(%(sum)s);
-            while (nelems--) {
-                // Add %(x)s to %(sum)s
-                *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1);
-                PyArray_MultiIter_NEXT(multi);
-            }
-        }
-        ((npy_int64*)(%(n)s->data))[0]++;
-        int n = ((npy_int64*)(%(n)s->data))[0];
-        if (%(out)s == NULL) {
-            %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0);
-        }
-        multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s);
-        nelems = PyArray_SIZE(%(sum)s);
-        while (nelems--) {
-            // %(out)s <- %(sum)s / %(n)s
-            *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n;
-            PyArray_MultiIter_NEXT(multi);
-        }        
-        """ % dict(locals(), **sub)
-
-
-
-if __name__ == '__main__':
-    
-    vectors = numpy.random.RandomState(666).rand(10, 2)
-
-    x = tensor.dvector()
-    e = ExampleWiseMean()(x)
-
-    # f = theano.function([x], [e], linker = 'py')
-
-    # for i, v in enumerate(vectors):
-    #     print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
-
-    # print
-
-    f = theano.function([x], [e], linker = 'c|py')
-
-    for i, v in enumerate(vectors):
-        print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
-
-
-
-
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/stopper.py
--- a/pylearn/algorithms/stopper.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/algorithms/stopper.py	Wed Dec 03 22:28:25 2008 -0500
@@ -122,6 +122,16 @@
 
         raise StopIteration
 
+class NStages(ICML08Stopper):
+    """Run for a fixed number of steps, checking validation set every so
+    often."""
+    def __init__(self, hard_limit, v_int):
+        ICML08Stopper.__init__(self, hard_limit, v_int, 1.0, 1.0, hard_limit)
+
+    #TODO: could optimize next() function. Most of what's in ICML08Stopper.next()
+    #is not necessary
+
+
 @stopper_factory('icml08')
 def icml08_stopper(i_wait, v_int, min_improvement, patience, hard_limit):
     return ICML08Stopper(i_wait, v_int, min_improvement, patience, hard_limit)
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/tests/test_aa.py
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/tests/test_daa.py
--- a/pylearn/algorithms/tests/test_daa.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/algorithms/tests/test_daa.py	Wed Dec 03 22:28:25 2008 -0500
@@ -28,7 +28,7 @@
             model.local_update[l]([[0, 1, 0, 1]])
             model.local_update[l]([[1, 0, 1, 0]])
 
-    for i in range(1):
+    for i in range(10):
         model.update([[0, 1, 0, 1]], [[1]])
         model.update([[1, 0, 1, 0]], [[0]])
     print model.classify([[0, 1, 0, 1]])
@@ -41,23 +41,31 @@
     daa = models.Stacker([(models.SigmoidXEDenoisingAA, 'hidden')] * ndaa + [(pylearn.algorithms.logistic_regression.Module_Nclass, 'pred')],
                          regularize = False)
 
-    model = daa.make([4, 20, 20, 20, 10],
+    model = daa.make([4] + [20] * ndaa + [10],
                      lr = 0.01,
                      mode = mode,
                      seed = 10)
 
-    model.layers[0].noise_level = 0.3
-    model.layers[1].noise_level = 0.3
-    model.layers[2].noise_level = 0.3
+    for l in range(ndaa): model.layers[l].noise_level = 0.3
 
-    for l in range(3):
+    instances = [([[0, 1, 0, 1]], [1]), ([[1, 0, 1, 0]], [0])]
+
+    for l in range(ndaa):
         for i in range(10):
-            model.local_update[l]([[0, 1, 0, 1]])
-            model.local_update[l]([[1, 0, 1, 0]])
+            for (input, output) in instances:
+                model.local_update[l](input)
 
-    for i in range(1):
-        model.update([[0, 1, 0, 1]], [1])
-        model.update([[1, 0, 1, 0]], [0])
+    for i in range(10):
+        for (input, output) in instances:
+#            model.update(input, output)
+            print "OLD:", 
+            print model.validate(input, output)
+            oldloss = model.update(input, output)
+            print oldloss
+            print "NEW:"
+            print model.validate(input, output)
+            print 
+
     print model.apply([[0, 1, 0, 1]])
     print model.apply([[1, 0, 1, 0]])
 
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/tests/test_linear_regression.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_linear_regression.py	Wed Dec 03 22:28:25 2008 -0500
@@ -0,0 +1,38 @@
+
+import unittest
+from pylearn.algorithms.linear_regression import *
+from make_test_datasets import *
+import numpy
+
+class test_linear_regression(unittest.TestCase):
+
+    def test1(self):
+        trainset,testset,theta=make_artificial_datasets_from_function(n_inputs=3,
+                                                                      n_targets=2,
+                                                                      n_examples=100,
+                                                                      f=linear_predictor)
+        
+        assert trainset.fields()['input'].shape==(50,3)
+        assert testset.fields()['target'].shape==(50,2)
+        regressor = LinearRegression(L2_regularizer=0.1)
+        predictor = regressor(trainset)
+        test_data = testset.fields()
+        mse = predictor.compute_mse(test_data['input'],test_data['target'])
+        print 'mse = ',mse
+        
+if __name__ == '__main__':
+    import sys
+
+    if len(sys.argv)==1:
+        unittest.main()
+    else:
+        assert sys.argv[1]=="--debug"
+        tests = []
+        for arg in sys.argv[2:]:
+            tests.append(arg)
+        if tests:
+            unittest.TestSuite(map(T_DataSet, tests)).debug()
+        else:
+            module = __import__("_test_linear_regression")
+            tests = unittest.TestLoader().loadTestsFromModule(module)
+            tests.debug()
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/tests/test_logistic_regression.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/tests/test_logistic_regression.py	Wed Dec 03 22:28:25 2008 -0500
@@ -0,0 +1,60 @@
+from logistic_regression import *
+import sys, time
+
+if __name__ == '__main__':
+    pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
+    pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
+    if 1:
+        lrc = Module_Nclass()
+
+        print '================'
+        print lrc.update.pretty()
+        print '================'
+        print lrc.update.pretty(mode = theano.Mode('py', 'fast_run'))
+        print '================'
+#         print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace'))
+#         print '================'
+
+#        sys.exit(0)
+
+        lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'))
+        #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'))
+        #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN')
+
+        data_x = N.random.randn(5, 10)
+        data_y = (N.random.randn(5) > 0)
+
+        t = time.time()
+        for i in xrange(10000):
+            lr.lr = 0.02
+            xe = lr.update(data_x, data_y) 
+            #if i % 100 == 0:
+            #    print i, xe
+
+        print 'training time:', time.time() - t
+        print 'final error', xe
+
+        #print
+        #print 'TRAINED MODEL:'
+        #print lr
+
+    if 0:
+        lrc = Module()
+
+        lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN')
+
+        data_x = N.random.randn(5, 10)
+        data_y = (N.random.randn(5, 1) > 0)
+
+        for i in xrange(10000):
+            xe = lr.update(data_x, data_y)
+            if i % 100 == 0:
+                print i, xe
+
+        print
+        print 'TRAINED MODEL:'
+        print lr
+
+
+
+
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/tests/test_regressor.py
--- a/pylearn/algorithms/tests/test_regressor.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/algorithms/tests/test_regressor.py	Wed Dec 03 22:28:25 2008 -0500
@@ -1,6 +1,6 @@
 
 
-import models
+import pylearn.algorithms as models
 import theano
 import numpy
 import time
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/algorithms/tests/test_stacker.py
--- a/pylearn/algorithms/tests/test_stacker.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/algorithms/tests/test_stacker.py	Wed Dec 03 22:28:25 2008 -0500
@@ -1,5 +1,5 @@
 
-import models
+import pylearn.algorithms as models
 import theano
 import numpy
 import time
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/datasets/MNIST.py
--- a/pylearn/datasets/MNIST.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/datasets/MNIST.py	Wed Dec 03 22:28:25 2008 -0500
@@ -46,6 +46,7 @@
             y=all_targ[ntrain+nvalid:ntrain+nvalid+ntest])
 
     rval.n_classes = 10
+    rval.img_shape = (28,28)
     return rval
 
 
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/datasets/embeddings/parameters.py
--- a/pylearn/datasets/embeddings/parameters.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/datasets/embeddings/parameters.py	Wed Dec 03 22:28:25 2008 -0500
@@ -1,10 +1,10 @@
 """
 Locations of the embedding data files.
 """
-WEIGHTSFILE     = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt"
-VOCABFILE       = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc"
-#WEIGHTSFILE     = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt"
-#VOCABFILE       = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc"
+#WEIGHTSFILE     = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt"
+#VOCABFILE       = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc"
+WEIGHTSFILE     = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt"
+VOCABFILE       = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc"
 NUMBER_OF_WORDS = 30000
 DIMENSIONS      = 50
 UNKNOWN         = "UNKNOWN"
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/datasets/embeddings/process.py
--- a/pylearn/datasets/embeddings/process.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/datasets/embeddings/process.py	Wed Dec 03 22:28:25 2008 -0500
@@ -11,6 +11,12 @@
 __word_to_embedding = None
 __read = False
 
+def length():
+    """
+    @return: The length of embeddings
+    """
+    return len(__word_to_embedding[__words[0]])
+
 def word_to_embedding(w):
     read_embeddings()
     return __word_to_embedding[w]
@@ -39,29 +45,21 @@
         w = __words[i]
         __word_to_embedding[w] = l
     __read = True
+    for w in __word_to_embedding: assert len(__word_to_embedding[__words[0]]) == len(__word_to_embedding[w])
     sys.stderr.write("...done reading %s\n" % WEIGHTSFILE)
 
 import re
 numberre = re.compile("[0-9]")
-slashre = re.compile("\\\/")
 
-def preprocess_word(origw):
+def preprocess_word(w):
     """
     Convert a word so that it can be embedded directly.
     Returned the preprocessed sequence.
-    @note: Preprocessing is appropriate for Penn Treebank style documents.
+    @note: Perhaps run L{common.penntreebank.preprocess} on the word first.
     """
     read_embeddings()
-    if origw == "-LRB-": w = "("
-    elif origw == "-RRB-": w = ")"
-    elif origw == "-LCB-": w = "{"
-    elif origw == "-RCB-": w = "}"
-    elif origw == "-LSB-": w = "["
-    elif origw == "-RSB-": w = "]"
-    else:
-        w = origw
+    if w not in __word_to_embedding:
         w = string.lower(w)
-        w = slashre.sub("/", w)
         w = numberre.sub("NUMBER", w)
     if w not in __word_to_embedding:
 #        sys.stderr.write("Word not in vocabulary, using %s: %s (original %s)\n" % (UNKNOWN, w, origw))
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/datasets/make_test_datasets.py
--- a/pylearn/datasets/make_test_datasets.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/datasets/make_test_datasets.py	Wed Dec 03 22:28:25 2008 -0500
@@ -1,4 +1,4 @@
-import dataset
+from dataset import ArrayDataSet
 from shapeset.dset import Polygons
 from linear_regression import linear_predictor
 from kernel_regression import kernel_predictor
@@ -110,6 +110,7 @@
     #  testset = ArrayDataSet(inputs[n_examples/2:],{'input':slice(0,n_inputs)}) | \
     #            ArrayDataSet(targets[n_examples/2:],{'target':slice(0,n_targets)})
     data = hstack((inputs,targets))
+
     trainset = ArrayDataSet(data[0:n_train],
                             {'input':slice(0,n_inputs),'target':slice(n_inputs,n_inputs+n_targets)})
     testset = ArrayDataSet(data[n_train:],
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/datasets/shapeset1.py
--- a/pylearn/datasets/shapeset1.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/datasets/shapeset1.py	Wed Dec 03 22:28:25 2008 -0500
@@ -7,7 +7,7 @@
 import os
 import numpy
 
-from ..amat import AMat
+from ..io.amat import AMat
 from .config import data_root
 
 def _head(path, n):
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/datasets/smallNorb.py
--- a/pylearn/datasets/smallNorb.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/datasets/smallNorb.py	Wed Dec 03 22:28:25 2008 -0500
@@ -1,6 +1,6 @@
 import os
 import numpy
-from ..filetensor import read
+from ..io.filetensor import read
 from .config import data_root
 
 #Path = '/u/bergstrj/pub/data/smallnorb'
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/datasets/testDataset.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/testDataset.py	Wed Dec 03 22:28:25 2008 -0500
@@ -0,0 +1,43 @@
+"""
+Various routines to load/access MNIST data.
+"""
+from __future__ import absolute_import
+
+import os
+import numpy
+
+from ..io.amat import AMat
+from .config import data_root
+from .dataset import dataset_factory, Dataset
+
+VALSEQ, VALRAND = range(2)
+
+@dataset_factory('DEBUG')
+def mnist_factory(variant='', ntrain=10, nvalid=10, ntest=10, \
+        nclass=2, ndim=1, dshape=None, valtype=VALSEQ):
+
+    temp = []
+    [temp.append(5) for i in range(ndim)]
+    dshape = temp if dshape is None else dshape
+
+    rval = Dataset()
+    rval.n_classes = nclass
+    rval.img_shape = dshape
+
+    dsize = numpy.prod(dshape);
+
+    print ntrain, nvalid, ntest, nclass, dshape, valtype
+
+    ntot = ntrain + nvalid + ntest
+    xdata = numpy.arange(ntot*numpy.prod(dshape)).reshape((ntot,dsize)) \
+            if valtype is VALSEQ else \
+            numpy.random.random((ntot,dsize));
+    ydata = numpy.round(numpy.random.random(ntot));
+
+    rval.train = Dataset.Obj(x=xdata[0:ntrain],y=ydata[0:ntrain])
+    rval.valid = Dataset.Obj(x=xdata[ntrain:ntrain+nvalid],\
+                             y=ydata[ntrain:ntrain+nvalid])
+    rval.test =  Dataset.Obj(x=xdata[ntrain+nvalid:ntrain+nvalid+ntest],
+                             y=ydata[ntrain+nvalid:ntrain+nvalid+ntest])
+
+    return rval
diff -r eaf4cbd20017 -r 0a27ba2157b6 pylearn/old_dataset/_test_dataset.py
--- a/pylearn/old_dataset/_test_dataset.py	Wed Dec 03 22:28:17 2008 -0500
+++ b/pylearn/old_dataset/_test_dataset.py	Wed Dec 03 22:28:25 2008 -0500
@@ -315,6 +315,7 @@
 #ds[i] returns the (i+1)-th example of the dataset.
     ds2=ds[5]
     assert isinstance(ds2,Example)
+    test_ds(ds,ds2,[5])
     assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds)  # index not defined
     assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds)
     del ds2