# HG changeset patch
# User James Bergstra <bergstrj@iro.umontreal.ca>
# Date 1252593050 14400
# Node ID bfc5914642ce8742936b9d82a2a384d997430884
# Parent  e53c06901f8f864925ac89f75fa10cb515ed013b# Parent  f1a29c772210ba7c35b96c1efbbe950b28fd77f0
merge

diff -r e53c06901f8f -r bfc5914642ce .hgignore
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/aa.py
--- a/pylearn/algorithms/aa.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/aa.py	Thu Sep 10 10:30:50 2009 -0400
@@ -16,19 +16,19 @@
         # ACQUIRE/MAKE INPUT
         if not input:
             input = T.matrix('input')
-        self.input = theano.External(input)
+        self.input = input
 
         # HYPER-PARAMETERS
-        self.lr = theano.Member(T.scalar())
+        self.lr = T.scalar()
 
         # PARAMETERS
-        self.w1 = theano.Member(T.matrix())
+        self.w1 = T.matrix()
         if not tie_weights:
-            self.w2 = theano.Member(T.matrix())
+            self.w2 = T.matrix()
         else:
             self.w2 = self.w1.T
-        self.b1 = theano.Member(T.vector())
-        self.b2 = theano.Member(T.vector())
+        self.b1 = T.vector()
+        self.b2 = T.vector()
 
         # HIDDEN LAYER
         self.hidden_activation = T.dot(input, self.w1) + self.b1
@@ -97,7 +97,7 @@
         return T.sum(self.reconstruction_costs)
 
     def build_regularization(self):
-        self.l2_coef = theano.Member(T.scalar())
+        self.l2_coef = T.scalar()
         if self.tie_weights:
             return self.l2_coef * T.sum(self.w1 * self.w1)
         else:
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/daa.py
--- a/pylearn/algorithms/daa.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/daa.py	Thu Sep 10 10:30:50 2009 -0400
@@ -49,19 +49,19 @@
         # ACQUIRE/MAKE INPUT
         if not input:
             input = T.matrix('input')
-        self.input = theano.External(input)
+        self.input = input
 
         # HYPER-PARAMETERS
-        self.lr = theano.Member(T.scalar())
+        self.lr = T.scalar()
 
         # PARAMETERS
-        self.w1 = theano.Member(T.matrix())
+        self.w1 = T.matrix()
         if not tie_weights:
-            self.w2 = theano.Member(T.matrix())
+            self.w2 = T.matrix()
         else:
             self.w2 = self.w1.T
-        self.b1 = theano.Member(T.vector())
-        self.b2 = theano.Member(T.vector())
+        self.b1 = T.vector()
+        self.b2 = T.vector()
 
 
         # REGULARIZATION COST
@@ -162,7 +162,7 @@
     """
 
     def build_corrupted_input(self):
-        self.noise_level = theano.Member(T.scalar())
+        self.noise_level = T.scalar()
         return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
 
     def hid_activation_function(self, activation):
@@ -175,7 +175,7 @@
         return self.reconstruction_cost_function(self.input, output)
 
     def build_regularization(self):
-        self.l2_coef = theano.Member(T.scalar())
+        self.l2_coef = T.scalar()
         if self.tie_weights:
             return self.l2_coef * T.sum(self.w1 * self.w1)
         else:
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/logistic_regression.py
--- a/pylearn/algorithms/logistic_regression.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/logistic_regression.py	Thu Sep 10 10:30:50 2009 -0400
@@ -33,18 +33,57 @@
         self.n_in = n_in
         self.n_out = n_out
 
-        self.input = input if input is not None else T.matrix()
-        self.target = target if target is not None else T.lvector()
+        if input is not None:
+          self.input = input
+        else:
+          self.input = T.matrix()
 
-        self.w = w if w is not None else (T.dmatrix())
-        self.b = b if b is not None else (T.dvector())
+        if target is not None:
+          self.target = target
+        else:
+          self.target = T.lvector()
+
+        #backport
+        #self.input = input if input is not None else T.matrix()
+        #self.target = target if target is not None else T.lvector()
+
+        if w is not None:
+          self.w = w
+        else:
+          self.w = (T.dmatrix())
 
+        if b is not None:
+          self.b = b
+        else:
+          self.b = (T.dvector())
+  
+        #backport
+        #self.w = w if w is not None else (T.dmatrix())
+        #self.b = b if b is not None else (T.dvector())
+
+        self.params = []
+        for p in [self.w, self.b]:
+          if p.owner is None:
+            self.params += [p]
+
+        #backport
         #the params of the model are the ones we fit to the data
-        self.params = [p for p in [self.w, self.b] if p.owner is None]
+        #self.params = [p for p in [self.w, self.b] if p.owner is None]
         
+        if l2 is not None:
+          self.l2 = l2
+        else:
+          self.l2 = (T.dscalar())
+
+        if l1 is not None:
+          self.l1 = l1
+        else:
+          self.l1 = (T.dscalar())
+
+        #backport
         #the hyper-parameters of the model are not fit to the data
-        self.l2 = l2 if l2 is not None else (T.dscalar())
-        self.l1 = l1 if l1 is not None else (T.dscalar())
+        #self.l2 = l2 if l2 is not None else (T.dscalar())
+        #self.l1 = l1 if l1 is not None else (T.dscalar())
 
         #here we actually build the model
         self.linear_output = T.dot(self.input, self.w) + self.b
@@ -163,14 +202,46 @@
     def __init__(self, input=None, targ=None, w=None, b=None, lr=None, regularize=False):
         super(LogReg2, self).__init__() #boilerplate
 
-        self.input = (input) if input is not None else T.matrix('input')
-        self.targ = (targ) if targ is not None else T.lcol()
+        if input is not None:
+          self.input = (input)
+        else:
+          self.input = T.matrix('input')
+        
+        if targ is not None:
+          self.targ = (targ)
+        else:
+          self.targ = T.lcol()
+
+        #self.input = (input) if input is not None else T.matrix('input')
+        #self.targ = (targ) if targ is not None else T.lcol()
+
+        if w is not None:
+          self.w = (w)
+        else:
+          self.w = (T.dmatrix())
 
-        self.w = (w) if w is not None else (T.dmatrix())
-        self.b = (b) if b is not None else (T.dvector())
-        self.lr = (lr) if lr is not None else (T.dscalar())
+        if b is not None:
+          self.b = (b)
+        else:
+          self.b = (T.dvector())
+
+        if lr is not None:
+          self.lr = (lr)
+        else:
+          self.lr = (T.scalar())
 
-        self.params = [p for p in [self.w, self.b] if p.owner is None]
+        #backport
+        #self.w = (w) if w is not None else (T.dmatrix())
+        #self.b = (b) if b is not None else (T.dvector())
+        #self.lr = (lr) if lr is not None else (T.dscalar())
+
+        self.params = []
+        for p in [self.w, self.b]:
+          if p.owner is None:
+            self.params += [p]
+
+        #backport
+        #self.params = [p for p in [self.w, self.b] if p.owner is None]
 
         output = nnet.sigmoid(T.dot(self.x, self.w) + self.b)
         xent = -self.targ * T.log(output) - (1.0 - self.targ) * T.log(1.0 - output)
@@ -251,11 +322,23 @@
     def __init__(self, n_in=None, n_out=None, w=None, b=None):
         super(LogRegNew, self).__init__() #boilerplate
 
+        if w is not None:
+          self.w = w
+        else:
+          self.w = (T.dmatrix())
+
+        if b is not None:
+          self.b = b
+        else:
+          self.b = (T.dvector())
+
+
         self.n_in = n_in
         self.n_out = n_out
 
-        self.w = w if w is not None else (T.dmatrix())
-        self.b = b if b is not None else (T.dvector())
+        #backport
+        #self.w = w if w is not None else (T.dmatrix())
+        #self.b = b if b is not None else (T.dvector())
 
     def _instance_initialize(self, obj):
         obj.w = N.zeros((self.n_in, self.n_out))
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/rbm.py
--- a/pylearn/algorithms/rbm.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/rbm.py	Thu Sep 10 10:30:50 2009 -0400
@@ -29,9 +29,9 @@
         # symbolic theano stuff
         # what about multidimensional inputs/outputs ? do they have to be 
         # flattened or should we used tensors instead ?
-        self.w = w if w is not None else module.Member(T.dmatrix())
-        self.visb = visb if visb is not None else module.Member(T.dvector())
-        self.hidb = hidb if hidb is not None else module.Member(T.dvector())
+        self.w = w if w is not None else T.dmatrix()
+        self.visb = visb if visb is not None else T.dvector()
+        self.hidb = hidb if hidb is not None else T.dvector()
         self.seed = seed;
        
         # 1-step Markov chain
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/regressor.py
--- a/pylearn/algorithms/regressor.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/regressor.py	Thu Sep 10 10:30:50 2009 -0400
@@ -13,15 +13,25 @@
         self.regularize = regularize
 
         # ACQUIRE/MAKE INPUT AND TARGET
-        self.input = theano.External(input) if input else T.matrix('input')
-        self.target = theano.External(target) if target else T.matrix('target')
+        if input:
+          self.input = input
+        else:
+          self.target = target
+
+        if target:
+          self.target = target
+        else:
+          self.target = T.dmatrix('target')
+        #backport
+        #self.input = input if input else T.matrix('input')
+        #self.target = target if target else T.matrix('target')
 
         # HYPER-PARAMETERS
-        self.lr = theano.Member(T.scalar())
+        self.lr = T.scalar()
 
         # PARAMETERS
-        self.w = theano.Member(T.matrix())
-        self.b = theano.Member(T.vector())
+        self.w = T.matrix()
+        self.b = T.vector()
 
         # OUTPUT
         self.output_activation = T.dot(self.input, self.w) + self.b
@@ -96,7 +106,7 @@
         return T.mean(self.regression_costs)
 
     def build_regularization(self):
-        self.l2_coef = theano.Member(T.scalar())
+        self.l2_coef = T.scalar()
         return self.l2_coef * T.sum(self.w * self.w)
 
     def _instance_initialize(self, obj, input_size = None, output_size = 1, seed = None, **init):
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/rnn.py
--- a/pylearn/algorithms/rnn.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/rnn.py	Thu Sep 10 10:30:50 2009 -0400
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 import numpy as N
-from theano import Op, Apply, tensor as T, Module, Member, Method, Mode, compile
+from theano import Op, Apply, tensor as T, Module, Method, Mode, compile
 from theano.gof import OpSub, TopoOptimizer
 
 from minimizer import make_minimizer # minimizer
@@ -121,15 +121,15 @@
         self.n_out = n_out
 
         #affine transformatoin x -> latent space
-        self.v, self.b = Member(T.dmatrix()), Member(T.dvector())
+        self.v, self.b = T.dmatrix(), T.dvector()
         input_transform = affine(self.v, self.b)
 
         #recurrent weight matrix in latent space
-        self.z0 = Member(T.dvector())
-        self.w = Member(T.dmatrix())
+        self.z0 = T.dvector()
+        self.w = T.dmatrix()
 
         #affine transformation latent -> output space
-        self.u, self.c = Member(T.dmatrix()), Member(T.dvector())
+        self.u, self.c = T.dmatrix(), T.dvector()
         output_transform = affine(self.u, self.c)
 
         self.params = [self.v, self.b, self.w, self.u, self.c]
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/sandbox/DAA_inputs_groups.py
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Thu Sep 10 10:30:50 2009 -0400
@@ -6,20 +6,77 @@
 from theano.compile import module
 
 from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \
-        scanmaskenc,scanmaskdec, FillMissing, mask_gradient
+        scanmaskenc,scanmaskdec, FillMissing, mask_gradient, blockgrad
 
 from pylearn.algorithms.logistic_regression import LogRegN
+import pylearn.algorithms.cost
 
-# used to initialize containers
-class ScratchPad:
+import time
+
+from pylearn.io import filetensor
+import os
+
+# saving loading utils--------------------------------------------
+def save_mat(fname, mat, save_dir=''):
+    assert isinstance(mat, numpy.ndarray)
+    print 'save ndarray to file: ', save_dir + fname
+    file_handle = open(os.path.join(save_dir, fname), 'w')
+    filetensor.write(file_handle, mat)
+    writebool = False
+    while not writebool:
+        try:
+            file_handle.close()
+            writebool = True
+        except:
+            print 'save model error'
+            time.sleep((numpy.random.randint(10)+2)*10)
+
+def load_mat(fname, save_dir=''):
+    print 'loading ndarray from file: ', save_dir + fname
+    file_handle = open(os.path.join(save_dir,fname), 'r')
+    rval = filetensor.read(file_handle)
+    file_handle.close()
+    return rval
+
+# Weight initialisation utils--------------------------------------
+
+# time consuming but just a test (not conclusive)
+def orthogonalinit(W,axis=1):
+    nb = W.shape[axis]
+    bn = W.shape[0] if axis is 1 else W.shape[1]
+    if axis == 0:
+        W=W.T
+    Worto = copy.copy(W)
+    offset=0
+    tmp=[]
+    for i in range(nb):
+        if i==bn:
+            offset=offset+bn
+        if i-offset != 0:
+            for j in xrange(offset,i):
+                orthoproj = (Worto[:,i]*Worto[:,j]).sum()*Worto[:,j]/(Worto[:,j]*Worto[:,j]).sum()
+                orthoproj.shape=(bn,1)
+                Worto[:,i:i+1] = Worto[:,i:i+1] - orthoproj
+        Worto[:,i:i+1] = Worto[:,i:i+1] / \
+                    numpy.sqrt((Worto[:,i:i+1]*Worto[:,i:i+1]).sum(0)) * numpy.sqrt((W[:,i:i+1]*W[:,i:i+1]).sum(0))
+    return Worto if axis == 1 else Worto.T
+
+# @todo
+def PCAinit(data,nhid):
+    pass
+
+#-----------------------------------------------------------------
+
+# Initialize containers:
+class CreateContainer:
     pass
 
 # regularisation utils:-------------------------------------------
 def lnorm(param, type='l2'):
     if type == 'l1':
-        return T.sum(T.abs(param))
+        return T.sum(T.abs_(param))
     if type == 'l2':
-        return T.sum(T.pow(param,2))
+        return T.sum(param*param)
     raise NotImplementedError('Only l1 and l2 regularization are currently implemented')
 
 def get_reg_cost(params, type):
@@ -32,52 +89,83 @@
 def sigmoid_act(x):
     return theano.tensor.nnet.sigmoid(x)
 
+#tanh is scaled by 2 to have the same gradient than sigmoid [sigmoid(x)=(tanh(x/2.0)+1)/2.0]
 def tanh_act(x):
+    return theano.tensor.tanh(x/2.0)
+
+#divide per 2 is a bad idea with many layers... we lose the std of U*x
+def tanh2_act(x):
     return theano.tensor.tanh(x)
 
+def softsign_act(x):
+    return x/(1.0 + T.abs_(x))
+
 # costs utils:---------------------------------------------------
-
 # in order to fix numerical instability of the cost and gradient calculation for the cross entropy we calculate it
 # with the following functions direclty from the activation:
+# XS is used to get back the KL divergence, important for doing global updates
 
 def sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis):
-    XE =-target * T.log(1 + T.exp(-output_act)) + (1 - target) * (- T.log(1 + T.exp(output_act)))
-    return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis)
+    XE = target * (- T.log(1 + T.exp(-output_act))) + (1 - target) * (- T.log(1 + T.exp(output_act)))
+    XS = T.xlogx.xlogx(target) + T.xlogx.xlogx(1-target)
+    return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis)
 
 def tanh_cross_entropy(target, output_act, mean_axis, sum_axis):
-    XE =-(target+1)/2.0 * T.log(1 + T.exp(-2 * output_act)) + \
-            (1 - (target+1)/2.0) * (- T.log(1 + T.exp(2 * output_act)))
-    return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis)
+    XE = (target+1)/2.0 * (- T.log(1 + T.exp(- output_act))) + \
+            (1 - (target+1)/2.0) * (- T.log(1 + T.exp(output_act)))
+    XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0)
+    return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis)
+
+def tanh2_cross_entropy(target, output_act, mean_axis, sum_axis):
+    XE = (target+1)/2.0 * (- T.log(1 + T.exp(- 2*output_act))) + \
+            (1 - (target+1)/2.0) * (- T.log(1 + T.exp( 2*output_act)))
+    XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0)
+    return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis)
+
+def softsign_cross_entropy(target, output_act, mean_axis, sum_axis):
+    newact = ((output_act/(1.0 + T.abs_(output_act)))+1)/2.0
+    XE = (target+1)/2.0 * T.log(newact) + (1 - (target+1)/2.0) * T.log(1 - newact)
+    XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0)
+    return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis)
 
 def cross_entropy(target, output_act, act, mean_axis=0, sum_axis=1):
     if act == 'sigmoid_act':
         return sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis)
     if act == 'tanh_act':
         return tanh_cross_entropy(target, output_act, mean_axis, sum_axis)
+    if act == 'softsign_act':
+        return softsign_cross_entropy(target, output_act, mean_axis, sum_axis)
+    if act == 'tanh2_act':
+        return tanh2_cross_entropy(target, output_act, mean_axis, sum_axis)
     assert False
 
-def quadratic(target, output, act, axis = 1):
-    return pylearn.algorithms.cost.quadratic(target, output, axis)
-
-
+def quadratic(target, output, act, mean_axis = 0):
+    return T.sum(pylearn.algorithms.cost.quadratic(target, output, mean_axis))
 
 # DAAig module----------------------------------------------------------------
 class DAAig(module.Module):
-    """De-noising Auto-encoder
+    """De-noising Auto-encoder with inputs groups and missing values
     """
     
     def __init__(self, input = None, auxinput = None,
                 in_size=None, auxin_size= None, n_hid=1,
-                regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
-                reconstruction_cost_function='cross_entropy', interface = True,
-                ignore_missing=None, reconstruct_missing=False,
-                corruption_pattern=None,
-                **init):
+                regularize = False, tie_weights = False, tie_weights_aux = None, hid_fn = 'tanh_act',
+                rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy',
+                interface = True, ignore_missing=None, reconstruct_missing=False,
+                corruption_pattern=None, blockgrad = False, **init):
         """
+        :param input: WRITEME
+        :param auxinput: WRITEME
+        :param in_size: WRITEME
+        :param auxin_size: WRITEME
+        :param n_hid: WRITEME
         :param regularize: WRITEME
         :param tie_weights: WRITEME
         :param hid_fn: WRITEME
-        :param reconstruction_cost: Should return one cost per example (row)
+        :param rec_fn: WRITEME
+        :param reconstruction_cost_function: WRITEME
+        :param scale_cost: WRITEME
+        :param interface: WRITEME
         :param ignore_missing: if not None, the input will be scanned in order
             to detect missing values, and these values will be replaced. Also,
             the reconstruction cost's gradient will be computed only on non
@@ -92,7 +180,7 @@
             in the current implementation, auxiliary inputs cannot be used when
             this option is True.
         :param corruption_pattern: if not None, may specify a particular way to
-        corrupt the input with missing values. Valid choices are:
+            corrupt the input with missing values. Valid choices are:
             - 'by_pair': consider that features are given as pairs, and corrupt
             (or not) the whole pair instead of considering them independently.
             Elements in a pair are not consecutive, instead they are assumed to
@@ -101,13 +189,6 @@
         missing inputs will be backpropagated. Otherwise, it will not.
         :todo: Default noise level for all daa levels
         """
-        print '\t\t**** DAAig.__init__ ****'
-        print '\t\tinput = ', input
-        print '\t\tauxinput = ', auxinput
-        print '\t\tin_size = ', in_size
-        print '\t\tauxin_size = ', auxin_size
-        print '\t\tn_hid = ', n_hid
-        
         super(DAAig, self).__init__()
         self.random = T.RandomStreams()
         
@@ -117,20 +198,37 @@
         self.n_hid = n_hid
         self.regularize = regularize
         self.tie_weights = tie_weights
+        self.tie_weights_aux = tie_weights_aux if tie_weights_aux is not None else tie_weights
         self.interface = interface
         self.ignore_missing = ignore_missing
         self.reconstruct_missing = reconstruct_missing
         self.corruption_pattern = corruption_pattern
-        
+        self.blockgrad = blockgrad
         
-        assert hid_fn in ('sigmoid_act','tanh_act')
+        assert hid_fn in ('sigmoid_act','tanh_act','softsign_act','tanh2_act')
         self.hid_fn = eval(hid_fn)
-        self.hid_name = hid_fn
+        
+        assert rec_fn in ('sigmoid_act','tanh_act','softsign_act','tanh2_act')
+        self.rec_fn = eval(rec_fn)
+        self.rec_name = rec_fn
         
         assert reconstruction_cost_function in ('cross_entropy','quadratic')
         self.reconstruction_cost_function = eval(reconstruction_cost_function)
         self.reconstruction_cost_function_name = reconstruction_cost_function
         
+        print '\t\t**** DAAig.__init__ ****'
+        print '\t\tinput = ', input
+        print '\t\tauxinput = ', auxinput
+        print '\t\tin_size = ', self.in_size
+        print '\t\tauxin_size = ', self.auxin_size
+        print '\t\tn_hid = ', self.n_hid
+        print '\t\tregularize = ', self.regularize
+        print '\t\ttie_weights = ', self.tie_weights
+        print '\t\ttie_weights_aux = ', self.tie_weights_aux
+        print '\t\thid_fn = ', hid_fn
+        print '\t\trec_fn = ', rec_fn
+        print '\t\treconstruction_cost_function = ', reconstruction_cost_function
+        
         ### DECLARE MODEL VARIABLES and default
         self.input = input
         if self.ignore_missing is not None and self.input is not None:
@@ -139,10 +237,11 @@
             self.input_missing_mask = no_missing[1] # Missingness pattern.
         else:
             self.input_missing_mask = None
-        self.noisy_input = None
+        
         self.auxinput = auxinput
         self.idx_list = T.ivector('idx_list') if self.auxinput is not None else None
-        self.noisy_idx_list, self.noisy_auxinput = None, None
+        
+        self.noisy_input, self.noisy_idx_list, self.noisy_auxinput = None , None, None 
         
         #parameters
         self.benc = T.dvector('benc')
@@ -153,7 +252,8 @@
         
         if self.auxinput is not None:
             self.wauxenc = [T.dmatrix('wauxenc%s'%i) for i in range(len(auxin_size))]
-            self.wauxdec = [self.wauxenc[i].T if tie_weights else T.dmatrix('wauxdec%s'%i) for i in range(len(auxin_size))]
+            self.wauxdec =[ self.wauxenc[i].T if self.tie_weights_aux else T.dmatrix('wauxdec%s'%i) for i in\
+                    range(len(auxin_size))]
             self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))]
         
         #hyper-parameters
@@ -161,8 +261,10 @@
             self.lr = T.scalar('lr')
         self.noise_level = T.scalar('noise_level')
         self.noise_level_group = T.scalar('noise_level_group')
+        self.scale_cost_in = T.scalar('scale_cost_in')
+        self.scale_cost_aux = T.scalar('scale_cost_aux')
         
-        # leave the chance for subclasses to initialize
+        # leave the chance for subclasses to initialize (example convolutionnal to implement)
         if self.__class__ == DAAig:
             self.init_behavioural()
         print '\t\t**** end DAAig.__init__ ****'
@@ -173,66 +275,76 @@
             self.noisy_input = self.corrupt_input()
         if self.auxinput is not None:
             self.noisy_idx_list , self.noisy_auxinput = \
-                scannoise(self.idx_list, self.auxinput,self.noise_level,
-                        self.noise_level_group)
+                    scannoise(self.idx_list, self.auxinput,self.noise_level, self.noise_level_group)
         
-        self.noise = ScratchPad()
-        self.clean = ScratchPad()
+        self.noise = CreateContainer()
+        self.clean = CreateContainer()
         
         self.define_behavioural(self.clean, self.input, self.idx_list, self.auxinput)
         self.define_behavioural(self.noise, self.noisy_input, self.noisy_idx_list, self.noisy_auxinput)
         
         self.define_regularization()  # call before cost
-        self.define_cost(self.clean)
-        self.define_cost(self.noise)
+        self.define_cost(self.noise)  # the cost is only needed for the noise (not used for the clean part)
         self.define_params()
         if self.interface:
             self.define_gradients()
             self.define_interface()
-        
-    def define_behavioural(self, container, input, idx_list, auxinput):
-        self.define_propup(container, input, idx_list , auxinput)
-        container.hidden = self.hid_fn(container.hidden_activation)
-        self.define_propdown(container, idx_list , auxinput)
-        container.rec = self.hid_fn(container.rec_activation)
-        if (self.ignore_missing is not None and self.input is not None and not
-                self.reconstruct_missing):
-            # Apply mask to gradient to ensure we do not backpropagate on the
-            # cost computed on missing inputs (that were replaced with zeros).
-            container.rec = mask_gradient(container.rec,
-                    self.input_missing_mask)
-        
-    def define_propup(self, container, input, idx_list, auxinput):
-        if self.input is not None:
-            container.hidden_activation = self.filter_up(input, self.wenc, self.benc)
-            if self.auxinput is not None:
-                container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc)
-        else:
-            if self.auxinput is not None:
-                container.hidden_activation = scandotenc(idx_list,auxinput,self.wauxenc) + self.benc
-        
-    # DEPENDENCY: define_propup
-    def define_propdown(self, container, idx_list, auxinput):
-        if self.input is not None:
-            rec_activation1 = self.filter_down(container.hidden,self.wdec,self.bdec)
-        if self.auxinput is not None:
-            rec_activation2 = scandotdec(idx_list,auxinput,container.hidden,self.wauxdec) +\
-                    scanbiasdec(idx_list,auxinput,self.bauxdec)
-        
-        if (self.input is not None) and (self.auxinput is not None):
-            container.rec_activation = T.join(1,rec_activation1,rec_activation2)
-        else:
-            if self.input is not None:
-                container.rec_activation = rec_activation1
-            else:
-                container.rec_activation = rec_activation2
-        
+    
     def filter_up(self, vis, w, b=None):
         out = T.dot(vis, w)
         return out + b if b else out
     filter_down = filter_up
     
-    # TODO: fix regularization type (outside parameter ?)
+    def corrupt_input(self):
+        if self.corruption_pattern is None:
+            mask = self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level)
+        elif self.corruption_pattern == 'by_pair':
+            shape = T.shape(self.input)
+            # Do not ask me why, but just doing "/ 2" does not work (there is
+            # a bug in the optimizer).
+            shape = T.stack(shape[0], (shape[1] * 2) / 4)
+            mask = self.random.binomial(shape, 1, 1 - self.noise_level)
+            mask = T.horizontal_stack(mask, mask)
+        else:
+            raise ValueError('Unknown value for corruption_pattern: %s' % self.corruption_pattern)
+        return mask * self.input
+     
+    def define_behavioural(self, container, input, idx_list, auxinput):
+        self.define_propup(container, input, idx_list , auxinput)
+        container.hidden = self.hid_fn(container.hidden_activation)
+        
+        self.define_propdown(container, idx_list , auxinput)
+        container.rec = self.rec_fn(container.rec_activation)
+        if self.input is not None:
+            container.rec_in = self.rec_fn(container.rec_activation_in)
+        if (self.auxinput is not None):
+            container.rec_aux = self.rec_fn(container.rec_activation_aux)
+    
+    def define_propup(self, container, input, idx_list, auxinput):
+        container.hidden_activation = self.benc
+        if self.input is not None:
+            container.hidden_activation += self.filter_up(input, self.wenc)
+        if self.auxinput is not None:
+            container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc)
+    
+    def define_propdown(self, container, idx_list, auxinput):
+        if self.input is not None:
+            container.rec_activation_in = self.filter_down(container.hidden,self.wdec,self.bdec)
+        if self.auxinput is not None:
+            container.rec_activation_aux = scandotdec(idx_list,auxinput,container.hidden,self.wauxdec) +\
+                    scanbiasdec(idx_list,auxinput,self.bauxdec)
+        
+        if (self.ignore_missing is not None and self.input is not None and not self.reconstruct_missing):
+            # Apply mask to gradient to ensure we do not backpropagate on the
+            # cost computed on missing inputs (that have been imputed).
+            container.rec_activation_in = mask_gradient(container.rec_activation_in, self.input_missing_mask)
+        
+        if (self.input is not None) and (self.auxinput is not None):
+            container.rec_activation = T.join(1,container.rec_activation_in,container.rec_activation_aux)
+        else:
+            container.rec_activation = container.rec_activation_in \
+                    if self.input is not None else container.rec_activation_aux
+    
     def define_regularization(self):
         self.reg_coef = T.scalar('reg_coef')
         if self.auxinput is not None:
@@ -242,6 +354,7 @@
                 self.Maskup = [self.Maskup]
             if type(self.Maskdown) is not list:
                 self.Maskdown = [self.Maskdown]
+        
         listweights = []
         listweightsenc = []
         if self.auxinput is not None:
@@ -250,111 +363,97 @@
         if self.input is not None:
             listweights += [self.wenc,self.wdec]
             listweightsenc += [self.wenc]
-        self.regularization = self.reg_coef * get_reg_cost(listweights,'l2')
-        self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l2')
+        
+        self.regularization = self.reg_coef * get_reg_cost(listweights,'l1')
+        self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l1')
     
-    
-    # DEPENDENCY: define_behavioural, define_regularization
     def define_cost(self, container):
-        if self.reconstruction_cost_function_name == 'cross_entropy':
-            container.reconstruction_cost = self.reconstruction_costs(container.rec_activation)
+        tmpbool = (self.reconstruction_cost_function_name == 'cross_entropy')
+        if (self.input is not None):
+            container.reconstruction_cost_in = \
+                self.reconstruction_cost_function(blockgrad(self.input) if self.blockgrad else self.input,\
+                        container.rec_activation_in if tmpbool else container.rec_in, self.rec_name)
+        if (self.auxinput is not None):
+            container.reconstruction_cost_aux = \
+                self.reconstruction_cost_function(scaninputs(self.idx_list, self.auxinput), container.rec_activation_aux \
+                if tmpbool else container.rec_aux, self.rec_name)
+        
+        # TOTAL COST
+        if (self.input is not None) and (self.auxinput is not None):
+            container.reconstruction_cost = self.scale_cost_in * \
+                container.reconstruction_cost_in +  self.scale_cost_aux*\
+                container.reconstruction_cost_aux
         else:
-            container.reconstruction_cost = self.reconstruction_costs(container.rec)
-        # TOTAL COST
+            if self.input is not None:
+                container.reconstruction_cost = container.reconstruction_cost_in
+            if (self.auxinput is not None):
+                container.reconstruction_cost = container.reconstruction_cost_aux
+        
         if self.regularize: #if stacked don't merge regularization and cost here but in the stackeddaaig module
-            container.cost = container.cost + self.regularization
+            container.cost = container.reconstruction_cost + self.regularization
         else:
             container.cost = container.reconstruction_cost
     
-    # DEPENDENCY: define_cost
     def define_params(self):
         if not hasattr(self,'params'):
             self.params = []
+        
         self.params += [self.benc]
         self.paramsenc = copy.copy(self.params)
+        
         if self.input is not None:
             self.params += [self.wenc] + [self.bdec]
             self.paramsenc += [self.wenc]
         if self.auxinput is not None:
             self.params += self.wauxenc + self.bauxdec
             self.paramsenc += self.wauxenc
+        
         if not(self.tie_weights):
             if self.input is not None:
                 self.params += [self.wdec]
+        if not(self.tie_weights_aux):
             if self.auxinput is not None:
                 self.params += self.wauxdec
     
-    # DEPENDENCY: define_cost, define_gradients
     def define_gradients(self):
         self.gradients = T.grad(self.noise.cost, self.params)
-        self.updates = dict((p, p - self.lr * g) for p, g in \
-                zip(self.params, self.gradients))
+        self.updates = dict((p, p - self.lr * g) for p, g in zip(self.params, self.gradients))
     
-    
-    # DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients
     def define_interface(self):
         # declare function to interface with module (if not stacked)
-        if self.input is None:
-            listin = [self.idx_list, self.auxinput]
+        listin = []
+        listout = []
+        if self.input is not None:
+            listin += [self.input]
+            listout += [self.noisy_input]
         if self.auxinput is None:
-            listin = [self.input]
-        if (self.input is not None) and (self.auxinput is not None):
-            listin =[self.input,self.idx_list, self.auxinput]
+            listin += [self.idx_list, self.auxinput]
+            listout += [self.noisy_auxinput]
+        
         self.update = theano.Method(listin, self.noise.cost, self.updates)
         self.compute_cost = theano.Method(listin, self.noise.cost)
-        if self.input is not None:
-            self.noisify = theano.Method(listin, self.noisy_input)
-        if self.auxinput is not None:
-            self.auxnoisify = theano.Method(listin, self.noisy_auxinput)
-        self.reconstruction = theano.Method(listin, self.clean.rec)
+        self.noisify = theano.Method(listin, listout)
+        self.recactivation = theano.Method(listin, self.noise.rec_activation)
+        self.reconstruction = theano.Method(listin, self.noise.rec)
+        self.activation = theano.Method(listin, self.clean.hidden_activation)
         self.representation = theano.Method(listin, self.clean.hidden)
-        self.validate = theano.Method(listin, [self.clean.cost, self.clean.rec])
     
-    def corrupt_input(self):
-        if self.corruption_pattern is None:
-            mask = self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level)
-        elif self.corruption_pattern == 'by_pair':
-            shape = T.shape(self.input)
-            scale = numpy.ones(2)
-            scale[1] = 2
-            shape = shape / scale
-            mask = self.random.binomial(shape, 1, 1 - self.noise_level)
-            mask = T.hstack((mask, mask))
-        else:
-            raise ValueError('Unknown value for corruption_pattern: %s'
-                    % self.corruption_pattern)
-        return mask * self.input
-    
-    def reconstruction_costs(self, rec):
-        if (self.input is not None) and (self.auxinput is not None):
-            return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)),\
-                    rec, self.hid_name)
-        if self.input is not None:
-            return self.reconstruction_cost_function(self.input, rec, self.hid_name)
-        if self.auxinput is not None:
-            return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec, self.hid_name)
-        # All cases should be covered above. If not, something is wrong!
-        assert False
-    
-    def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0,
-                            seed=1, alloc=True, **init):
+    def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, scale_cost_in = 1,
+                            scale_cost_aux = 1 , seed=1, orthoinit = False, tieinit = False, alloc=True, **init):
         super(DAAig, self)._instance_initialize(obj, **init)
         
         obj.reg_coef = reg_coef
         obj.noise_level = noise_level
         obj.noise_level_group = noise_level_group
-        if self. interface:
-            obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module)
-        else:
-            obj.lr = None
+        obj.scale_cost_in = scale_cost_in
+        obj.scale_cost_aux = scale_cost_aux
+        obj.lr = lr  if self.interface else None
+        # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module)
         
         obj.random.initialize()
-        if seed is not None:
-            obj.random.seed(seed)
+        obj.random.seed(seed)
         self.R = numpy.random.RandomState(seed)
         
-        obj.__hide__ = ['params']
-        
         if self.input is not None:
             self.inf = 1/numpy.sqrt(self.in_size)
         if self.auxinput is not None:
@@ -363,29 +462,36 @@
             self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size)
         self.hif = 1/numpy.sqrt(self.n_hid)
         
-        
         if alloc:
             if self.input is not None:
                 wencshp = (self.in_size, self.n_hid)
                 wdecshp = tuple(reversed(wencshp))
+                obj.bdec = numpy.zeros(self.in_size)
+                obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)
+                if not(self.tie_weights):
+                    obj.wdec = copy.copy(obj.wenc.T) if tieinit else \
+                            self.R.uniform(size=wdecshp,low=-self.hif,high=self.hif)
+                if orthoinit:
+                    obj.wenc = orthogonalinit(obj.wenc)
+                    if not(self.tie_weights):
+                        obj.wdec = orthogonalinit(obj.wdec,0)
                 print 'wencshp = ', wencshp
                 print 'wdecshp = ', wdecshp
-                
-                obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)
-                if not(self.tie_weights):
-                    obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif)
-                obj.bdec = numpy.zeros(self.in_size)
             
             if self.auxinput is not None:
                 wauxencshp = [(i, self.n_hid) for i in self.auxin_size]
                 wauxdecshp = [tuple(reversed(i)) for i in wauxencshp]
+                obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size]
+                obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp]
+                if not(self.tie_weights_aux):
+                    obj.wauxdec = [copy.copy(obj.wauxenc[i].T) for i in range(len(wauxdecshp))] if tieinit else\
+                            [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp]
+                if orthoinit:
+                    obj.wauxenc = [orthogonalinit(w) for w in obj.wauxenc]
+                    if not(self.tie_weights_aux):
+                        obj.wauxdec = [orthogonalinit(w,0) for w in obj.wauxdec]
                 print 'wauxencshp = ', wauxencshp
                 print 'wauxdecshp = ', wauxdecshp
-                
-                obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp]
-                if not(self.tie_weights):
-                    obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp]
-                obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size]
             
             print 'self.inf = ', self.inf
             print 'self.hif = ', self.hif
@@ -398,20 +504,21 @@
 class StackedDAAig(module.Module):
     def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None],
                 in_size = None, auxin_size = [None], n_hid = [1],
-                regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
-                reconstruction_cost_function='cross_entropy',
+                regularize = False, tie_weights = False, tie_weights_aux = None, hid_fn = 'tanh_act',
+                rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy',
                 n_out = 2, target = None, debugmethod = False, totalupdatebool=False,
                 ignore_missing=None, reconstruct_missing=False,
-                corruption_pattern=None,
+                corruption_pattern=None, blockgrad = False, act_reg = 'sigmoid_act',
                 **init):
         
         super(StackedDAAig, self).__init__()
-        print '\t**** StackedDAAig.__init__ ****'
-        print '\tinput = ', input
-        print '\tauxinput = ', auxinput
-        print '\tin_size = ', in_size
-        print '\tauxin_size = ', auxin_size
-        print '\tn_hid = ', n_hid
+        
+        # utils
+        def listify(param,depth):
+            if type(param) is list:
+                return param if len(param)==depth else [param[0]]*depth
+            else:
+                return [param]*depth
         
         # save parameters
         self.depth = depth
@@ -419,11 +526,13 @@
         self.auxinput = auxinput
         self.in_size = in_size
         auxin_size = auxin_size
-        self.n_hid = n_hid
+        self.n_hid = listify(n_hid,depth)
         self.regularize = regularize
-        self.tie_weights = tie_weights
-        self.hid_fn = hid_fn
-        self.reconstruction_cost_function = reconstruction_cost_function
+        tie_weights = listify(tie_weights,depth)
+        tie_weights_aux = listify(tie_weights_aux,depth)
+        hid_fn = listify(hid_fn,depth)
+        rec_fn = listify(rec_fn,depth)
+        reconstruction_cost_function = listify(reconstruction_cost_function,depth)
         self.n_out = n_out
         self.target = target if target is not None else T.lvector('target')
         self.debugmethod = debugmethod
@@ -431,6 +540,27 @@
         self.ignore_missing = ignore_missing
         self.reconstruct_missing = reconstruct_missing
         self.corruption_pattern = corruption_pattern
+        self.blockgrad = blockgrad
+        
+        assert act_reg in ('sigmoid_act','tanh_act','softsign_act','tanh2_act')
+        self.act_reg = eval(act_reg)
+        
+        print '\t**** StackedDAAig.__init__ ****'
+        print '\tdepth = ', self.depth
+        print '\tinput = ', self.input
+        print '\tauxinput = ', self.auxinput
+        print '\tin_size = ', self.in_size
+        print '\tauxin_size = ', auxin_size
+        print '\tn_hid = ', self.n_hid
+        print '\tregularize = ', self.regularize
+        print '\ttie_weights = ', tie_weights
+        print '\ttie_weights_aux = ', tie_weights_aux
+        print '\thid_fn = ', hid_fn
+        print '\trec_fn = ', rec_fn
+        print '\tact_reg = ', act_reg
+        print '\treconstruction_cost_function = ', reconstruction_cost_function
+        print '\tblockgrad = ', self.blockgrad
+        print '\tn_out = ', self.n_out
         
         # init for model construction
         inputprec = input
@@ -446,15 +576,16 @@
         self.globalupdate = [None] * (self.depth+1)#update wrt the layer cost backproped untill the input layer
         if self.totalupdatebool:
             self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer
-        #
-        self.classify = None
         
-        #others methods
+        # facultative methods
         if self.debugmethod:
+            self.activation = [None] * (self.depth+1)
             self.representation = [None] * (self.depth)
+            self.recactivation = [None] * (self.depth)
             self.reconstruction = [None] * (self.depth)
-            self.validate = [None] * (self.depth)
             self.noisyinputs = [None] * (self.depth)
+            self.compute_localgradients_in = [None] * (self.depth)
+            self.compute_localgradients_aux = [None] * (self.depth)
             self.compute_localcost = [None] * (self.depth+1)
             self.compute_localgradients = [None] * (self.depth+1)
             self.compute_globalcost = [None] * (self.depth+1)
@@ -462,15 +593,16 @@
             if self.totalupdatebool:
                 self.compute_totalcost = [None] * (self.depth+1)
                 self.compute_totalgradients = [None] * (self.depth+1)
-        #
         
         # some theano Variables we want to keep track on
-        if self.regularize:
-            self.regularizationenccost = [None] * (self.depth)
+        self.localgradients_in = [None] * (self.depth)
+        self.localgradients_aux = [None] * (self.depth)
         self.localcost = [None] * (self.depth+1)
         self.localgradients = [None] * (self.depth+1)
         self.globalcost = [None] * (self.depth+1)
         self.globalgradients = [None] * (self.depth+1)
+        if self.regularize:
+            self.regularizationenccost = [None] * (self.depth)
         if self.totalupdatebool:
             self.totalcost = [None] * (self.depth+1)
             self.totalgradients = [None] * (self.depth+1)
@@ -479,7 +611,6 @@
         paramstot = []
         paramsenc = []
         self.inputs = [None] * (self.depth+1)
-        
         if self.input is not None:
             self.inputs[0] = [self.input]
         else:
@@ -488,20 +619,21 @@
         offset = 0
         for i in range(self.depth):
             
+            dict_params = dict(input = inputprec, in_size = in_sizeprec, auxin_size = auxin_size[i],
+                    n_hid = self.n_hid[i], regularize = False, tie_weights = tie_weights[i],
+                    tie_weights_aux = tie_weights_aux[i], hid_fn = hid_fn[i],
+                    rec_fn = rec_fn[i], reconstruction_cost_function = reconstruction_cost_function[i],
+                    interface = False, ignore_missing = self.ignore_missing,
+                    reconstruct_missing = self.reconstruct_missing,corruption_pattern = self.corruption_pattern,
+                    blockgrad=self.blockgrad)
             if auxin_size[i] is None:
                 offset +=1
-                param = [inputprec, None, in_sizeprec, auxin_size[i], self.n_hid[i],\
-                    False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False]
+                dict_params.update({'auxinput' : None})
             else:
-                param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\
-                    False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False]
-
-            dict_params = dict(ignore_missing = self.ignore_missing,
-                    reconstruct_missing = self.reconstruct_missing,
-                    corruption_pattern = self.corruption_pattern)
+                dict_params.update({'auxinput' : self.auxinput[i-offset]})
             
             print '\tLayer init= ', i+1
-            self.daaig[i] = DAAig(*param, **dict_params)
+            self.daaig[i] = DAAig(**dict_params)
             
             # method input, outputs and parameters update
             if i:
@@ -521,31 +653,27 @@
             self.localcost[i] = self.daaig[i].noise.cost
             self.globalcost[i] = self.daaig[i].noise.cost
             if self.totalupdatebool:
-                if i:
-                    self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost
-                else:
-                    self.totalcost[i] = self.daaig[i].noise.cost
+                self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost if i else self.daaig[i].noise.cost
             
             if self.regularize:
-                if i:
-                    self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc
-                else:
-                    self.regularizationenccost[i] = 0
-                
+                self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc if i else 0
                 self.localcost[i] += self.daaig[i].regularization
-                self.globalcost[i] += self.regularizationenccost[i]
+                self.globalcost[i] += self.regularizationenccost[i] + self.daaig[i].regularization
                 if self.totalupdatebool:
                     self.totalcost[i] += self.daaig[i].regularization
             
+            self.localgradients_in[i] = T.grad(self.daaig[i].noise.reconstruction_cost_in, self.daaig[i].params) \
+                if inputprec is not None else T.constant(0)
+            self.localgradients_aux[i] = T.grad(self.daaig[i].noise.reconstruction_cost_aux,self.daaig[i].params) \
+                if auxin_size[i] is not None else T.constant(0)
             self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params)
-            self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc)
+            self.globalgradients[i] = T.grad(self.globalcost[i], paramsenc + self.daaig[i].params)
             if self.totalupdatebool:
                 self.totalgradients[i] = T.grad(self.totalcost[i], paramstot)
             
             #create the updates dictionnaries
-            local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i]))
-            global_grads = dict((j, j - self.unsup_lr * g)\
-                    for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i]))
+            local_grads = dict((j,j-self.unsup_lr*g) for j,g in zip(self.daaig[i].params,self.localgradients[i]))
+            global_grads = dict((j,j-self.unsup_lr*g) for j,g in zip(paramsenc+self.daaig[i].params,self.globalgradients[i]))
             if self.totalupdatebool:
                 total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i]))
             
@@ -554,50 +682,48 @@
             self.globalupdate[i] = theano.Method(self.inputs[i],self.globalcost[i],global_grads)
             if self.totalupdatebool:
                 self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads)
-            #
+            
             if self.debugmethod:
+                self.activation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden_activation)
                 self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden)
-                self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec)
-                self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec])
+                self.recactivation[i] = theano.Method(self.inputs[i],self.daaig[i].noise.rec_activation)
+                self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].noise.rec)
                 self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout)
                 self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i])
                 self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i])
+                self.compute_localgradients_in[i] = theano.Method(self.inputs[i],self.localgradients_in[i])
+                self.compute_localgradients_aux[i] = theano.Method(self.inputs[i],self.localgradients_aux[i])
                 self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i])
                 self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i])
                 if self.totalupdatebool:
                     self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i])
                     self.compute_totalgradients[i] = theano.Method(self.inputs[i],self.totalgradients[i])
-            #
             
             paramsenc += self.daaig[i].paramsenc
             inputprec = self.daaig[i].clean.hidden
             in_sizeprec = self.n_hid[i]
         
-        # supervised layer
+        # supervised layer------------------------------------------------------------------------
         print '\tLayer supervised init'
         self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target]
-        self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,inputprec,self.target)
+        self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,self.act_reg(self.daaig[-2].clean.hidden_activation),self.target)
         paramstot += self.daaig[-1].params
         
-        if self.regularize:
-            self.localcost[-1] = self.daaig[-1].regularized_cost
-            self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1]
-        else:
-            self.localcost[-1] = self.daaig[-1].unregularized_cost
-            self.globalcost[-1] = self.daaig[-1].unregularized_cost
+        self.localcost[-1] = self.daaig[-1].regularized_cost \
+                if self.regularize else self.daaig[-1].unregularized_cost
+        self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] \
+                if self.regularize else self.daaig[-1].unregularized_cost
         
         if self.totalupdatebool:
             self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]]
         
         self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params)
-        self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc)
+        self.globalgradients[-1] = T.grad(self.globalcost[-1], paramsenc + self.daaig[-1].params)
         if self.totalupdatebool:
-            self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\
-                    T.grad(self.globalcost[-1], paramstot) ]
+            self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) , T.grad(self.globalcost[-1],paramstot) ]
         
-        local_grads = dict((j, j - self.sup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1]))
-        global_grads = dict((j, j - self.sup_lr * g)\
-                for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1]))
+        local_grads = dict((j,j-self.sup_lr*g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1]))
+        global_grads = dict((j,j-self.sup_lr*g) for j,g in zip(paramsenc + self.daaig[-1].params,self.globalgradients[-1]))
         if self.totalupdatebool:
             total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\
                     for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1]))
@@ -606,10 +732,21 @@
         self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads)
         if self.totalupdatebool:
             self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads)
+            # total update of each local cost [no global cost backpropagated]
+            totallocal_grads={}
+            for k in range(self.depth):
+                totallocal_grads.update(dict((j, j - self.unsup_lr * g) for j,g in \
+                        zip(self.daaig[k].params,self.localgradients[k])))
+            totallocal_grads.update(dict((j, j - self.sup_lr * g) for j,g in
+                    zip(self.daaig[-1].params,self.localgradients[-1])))
+            self.totallocalupdate = theano.Method(self.inputs[-1],self.localcost,totallocal_grads)
+        
+        # interface for the user
         self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone)
         self.NLL = theano.Method(self.inputs[-1],self.daaig[-1]._xent)
         
         if self.debugmethod:
+            self.activation[-1] = theano.Method(self.inputs[-2],self.daaig[-1].linear_output)
             self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1])
             self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1])
             self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1])
@@ -619,8 +756,9 @@
                 self.compute_totalgradients[-1] =\
                         theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1])
     
-    def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0,
-                                noise_level = 0 , noise_level_group = 0, seed = 1, alloc = True,**init):
+    def _instance_initialize(self,inst,unsup_lr = 0.01, sup_lr = 0.01, reg_coef = 0, scale_cost_in = 1, scale_cost_aux = 1,
+                                noise_level = 0 , noise_level_group = 0, seed = 1, orthoinit = False, tieinit=False,
+                                alloc = True,**init):
         super(StackedDAAig, self)._instance_initialize(inst, **init)
         
         inst.unsup_lr = unsup_lr
@@ -628,9 +766,189 @@
         
         for i in range(self.depth):
             print '\tLayer = ', i+1
-            inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\
-                    noise_level_group = noise_level_group, seed = seed, alloc = alloc)
+            inst.daaig[i].initialize(reg_coef = reg_coef[i] if type(reg_coef) is list else reg_coef, \
+                    noise_level = noise_level[i] if type(noise_level) is list else noise_level, \
+                    scale_cost_in = scale_cost_in[i] if type(scale_cost_in) is list else scale_cost_in, \
+                    scale_cost_aux = scale_cost_aux[i] if type(scale_cost_aux) is list else scale_cost_aux, \
+                    noise_level_group = noise_level_group[i] if type(noise_level_group) is list else noise_level_group, \
+                    seed = seed + i, orthoinit = orthoinit, tieinit = tieinit, alloc = alloc)
+        
         print '\tLayer supervised'
         inst.daaig[-1].initialize()
-        inst.daaig[-1].l1 = 0
-        inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation
+        
+        if alloc:
+            inst.daaig[-1].R = numpy.random.RandomState(seed+self.depth)
+            # init the logreg weights
+            inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\
+                    low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid))
+            if orthoinit:
+                inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].w)
+        inst.daaig[-1].l1 = reg_coef[-1] if type(reg_coef) is list else reg_coef
+        inst.daaig[-1].l2 = 0
+        #only l1 norm for regularisation to be consitent with the unsup regularisation
+    
+    def _instance_save(self,inst,save_dir=''):
+        
+        for i in range(self.depth):
+            save_mat('benc%s.ft'%(i) ,inst.daaig[i].benc, save_dir)
+            
+            if self.daaig[i].auxinput is not None:
+                for j in range(len(inst.daaig[i].wauxenc)):
+                    save_mat('wauxenc%s_%s.ft'%(i,j) ,inst.daaig[i].wauxenc[j], save_dir)
+                    save_mat('bauxdec%s_%s.ft'%(i,j) ,inst.daaig[i].bauxdec[j], save_dir)
+            
+            if self.daaig[i].input is not None:
+                save_mat('wenc%s.ft'%(i) ,inst.daaig[i].wenc, save_dir)
+                save_mat('bdec%s.ft'%(i) ,inst.daaig[i].bdec, save_dir)
+            
+            if not self.daaig[i].tie_weights_aux:
+                if self.daaig[i].auxinput is not None:
+                    for j in range(len(inst.daaig[i].wauxdec)):
+                        save_mat('wauxdec%s_%s.ft'%(i,j) ,inst.daaig[i].wauxdec[j], save_dir)
+            
+            if not self.daaig[i].tie_weights:
+                if self.daaig[i].input is not None:
+                    save_mat('wdec%s.ft'%(i) ,inst.daaig[i].wdec, save_dir)
+        i=i+1
+        save_mat('wenc%s.ft'%(i) ,inst.daaig[i].w, save_dir)
+        save_mat('benc%s.ft'%(i) ,inst.daaig[i].b, save_dir)
+    
+    def _instance_load(self,inst,save_dir='',coefenc = None, coefdec = None, Sup_layer = None):
+        
+        if coefenc is None:
+            coefenc = [1.]*self.depth
+        if coefdec is None:
+            coefdec = [1.]*self.depth
+            
+        for i in range(self.depth):
+            inst.daaig[i].benc = load_mat('benc%s.ft'%(i), save_dir)/coefenc[i]
+            
+            if self.daaig[i].auxinput is not None:
+                for j in range(len(inst.daaig[i].wauxenc)):
+                    inst.daaig[i].wauxenc[j] = load_mat('wauxenc%s_%s.ft'%(i,j),save_dir)/coefenc[i]
+                    inst.daaig[i].bauxdec[j] = load_mat('bauxdec%s_%s.ft'%(i,j),save_dir)/coefdec[i]
+            
+            if self.daaig[i].input is not None:
+                inst.daaig[i].wenc = load_mat('wenc%s.ft'%(i),save_dir)/coefenc[i]
+                inst.daaig[i].bdec = load_mat('bdec%s.ft'%(i),save_dir)/coefdec[i]
+            
+            if not self.daaig[i].tie_weights_aux:
+                if self.daaig[i].auxinput is not None:
+                    for j in range(len(inst.daaig[i].wauxdec)):
+                        if 'wauxdec%s_%s.ft'%(i,j) in os.listdir(save_dir):
+                            inst.daaig[i].wauxdec[j] = load_mat('wauxdec%s_%s.ft'%(i,j),save_dir)/coefdec[i]
+                        else:
+                            print "WARNING: no decoding 'wauxdec%s_%s.ft' file use 'wauxenc%s_%s.ft' instead"%(i,j,i,j)
+                            inst.daaig[i].wauxdec[j] = numpy.transpose(load_mat('wauxenc%s_%s.ft'%(i,j),save_dir)/coefdec[i])
+            
+            if not self.daaig[i].tie_weights:
+                if self.daaig[i].input is not None:
+                    if 'wdec%s.ft'%(i) in os.listdir(save_dir):
+                        inst.daaig[i].wdec = load_mat('wdec%s.ft'%(i),save_dir)/coefdec[i]
+                    else:
+                        print "WARNING: no decoding 'wdec%s.ft' file use 'wenc%s.ft' instead"%(i,i)
+                        inst.daaig[i].wdec = numpy.transpose(load_mat('wenc%s.ft'%(i),save_dir)/coefdec[i])
+        i=i+1
+        if Sup_layer is None:
+            inst.daaig[i].w = load_mat('wenc%s.ft'%(i),save_dir)
+            inst.daaig[i].b = load_mat('benc%s.ft'%(i),save_dir)
+        else:
+            inst.daaig[i].w = load_mat('wenc%s.ft'%(Sup_layer),save_dir)
+            inst.daaig[i].b = load_mat('benc%s.ft'%(Sup_layer),save_dir)
+    
+    def _instance_hidsaturation(self,inst,layer,inputs):
+        return numpy.mean(numpy.median(abs(inst.activation[layer](*inputs)),1))
+    
+    def _instance_recsaturation(self,inst,layer,inputs):
+        return numpy.mean(numpy.median(abs(inst.recactivation[layer](*inputs)),1))
+    
+    def _instance_error(self,inst,inputs,target):
+        return numpy.sum(inst.classify(*inputs) != target) / float(len(target))*100.0
+    
+    def _instance_nll(self,inst,inputs,target):
+        return numpy.sum(inst.NLL(*(inputs+[target]))) / float(len(target))
+    
+    #try--------------------------------------------------------------------
+    def _instance_rescalwsaturation(self,inst,inputs):
+        sat = [None]*(self.depth+1)
+        for i in range(self.depth+1):
+            sat[i] = inst.hidsaturation(i,inputs[min(i,self.depth-1)])
+        
+        for i in range(self.depth-1):
+            if sat[i+1] > max(sat[:i+1]):
+                inst.daaig[i+1].wenc = inst.daaig[i+1].wenc/sat[i+1]*max(sat[:i+1])
+                inst.daaig[i+1].benc = inst.daaig[i+1].benc/sat[i+1]*max(sat[:i+1])
+                sat[i+1] = max(sat[:i+1])
+        if sat[-1]>max(sat[:-1]):
+            inst.daaig[-1].w = inst.daaig[-1].w/sat[-1]*max(sat[:-1])
+            inst.daaig[-1].b = inst.daaig[-1].b/sat[-1]*max(sat[:-1])
+    
+    #-----------------------------------------------------------------------
+    
+    def _instance_unsupgrad(self,inst,inputs,layer,param_name):
+        inst.noiseseed(0)
+        gradin = inst.compute_localgradients_in[layer](*inputs)
+        inst.noiseseed(0)
+        gradaux = inst.compute_localgradients_aux[layer](*inputs)
+        inst.noiseseed(0)
+        gradtot = inst.compute_localgradients[layer](*inputs)
+        
+        for j in range(len(gradtot)):
+            if str(self.daaig[layer].params[j]) is param_name:
+                tmpin = numpy.sqrt((pow(inst.daaig[layer].scale_cost_in,2)*gradin[j]*gradin[j]).sum()) \
+                                if type(gradin) is list else 0
+                tmpaux= numpy.sqrt((pow(inst.daaig[layer].scale_cost_aux,2)*gradaux[j]*gradaux[j]).sum())\
+                                 if type(gradaux) is list else 0
+                tmptot = numpy.sqrt((gradtot[j]*gradtot[j]).sum()) if type(gradtot) is list else 0
+                
+                if type(gradin) is list and type(gradaux) is list and (gradin[j]*gradin[j]).sum() != 0:
+                    projauxin =(inst.daaig[layer].scale_cost_aux*gradaux[j] * \
+                                inst.daaig[layer].scale_cost_in*gradin[j]).sum()/ \
+                                (numpy.sqrt((pow(inst.daaig[layer].scale_cost_in,2)*gradin[j]*gradin[j]).sum()))
+                else:
+                    projauxin = 0
+                return tmpin, tmpaux, tmptot, tmpin/(tmpaux+tmpin)*100, projauxin/tmpaux*100 if tmpaux != 0 else 0
+    
+    def _instance_noiseseed(self,inst,seed):
+        scannoise.R.rand.seed(seed)
+        for i in range(self.depth):
+            inst.daaig[i].random.seed(seed+i+1)
+    
+    def _instance_unsupupdate(self,inst,data,layer='all',typeup = 'local',printcost = False):
+        cost = [None]*self.depth
+        if typeup == 'totallocal':
+            cost[-1] = inst.totallocalupdate(*data)
+        else: 
+            if typeup == 'total':
+                if layer == 'all':
+                    cost[-1] = inst.totalupdate[-2](*data[-1])
+                else:
+                    cost[layer] = inst.totalupdate[layer](*data[layer])
+            else:
+                if layer is 'all':
+                    for i in range(self.depth):
+                        if typeup == 'local':
+                            cost[i] = inst.localupdate[i](*data[i])
+                        if typeup == 'global':
+                            cost[i] = inst.globalupdate[i](*data[i])
+                            for j in range(i):
+                                dummy = inst.localupdate[j](*data[j])
+                else:
+                    if typeup == 'local':
+                        cost[layer] = inst.localupdate[layer](*data[layer])
+                    if typeup == 'global':
+                        cost[layer] = inst.globalupdate[layer](*data[layer])
+                        for j in range(layer):
+                            dummy = inst.localupdate[j](*data[j])
+        if printcost:
+            print cost
+        return cost
+    
+    def _instance_supupdate(self,inst,data,typeup = 'global',printcost = False):
+        if typeup == 'local':
+            cost = inst.localupdate[-1](*data)
+        if typeup == 'global':
+            cost = inst.globalupdate[-1](*data)
+        if printcost:
+            print cost
+        return cost
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/sandbox/test_cost.py
--- a/pylearn/algorithms/sandbox/test_cost.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/sandbox/test_cost.py	Thu Sep 10 10:30:50 2009 -0400
@@ -14,13 +14,19 @@
 
     def test_float(self):
         """
-        This should fail because we can't use floats in logfactorial
+        Ensure we cannot use floats in logfactorial.
         """
         x = TT.as_tensor([0.5, 2.7])
         o = cost.logfactorial(x)
-        f = T.function([],o)
-#        print repr(f())
-        self.failUnless(numpy.all(f() == numpy.asarray([0., 0., 1.38629436, 3.29583687, 5.54517744, 8.04718956, 10.75055682, 13.62137104, 16.63553233, 19.7750212])))
+        f = T.function([], o)
+        try:
+            f()
+            assert False
+        except TypeError, e:
+            if str(e).find("<type 'float'>, must be int or long") >= 0:
+                pass
+            else:
+                raise
 
 class T_nlpoisson(unittest.TestCase):
     def test(self):
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/sgd.py
--- a/pylearn/algorithms/sgd.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/sgd.py	Thu Sep 10 10:30:50 2009 -0400
@@ -20,7 +20,7 @@
 
         :param updates: extra symbolic updates to make when evating either step or step_cost
         (these override the gradients if necessary)
-        :type updatess: dict Variable -> Variable
+        :type updates: dict Variable -> Variable
         :param auxout: auxiliary outputs, list containing output symbols to 
                       compute at the same time as cost (for efficiency)
         :param methods: Should this module define the step and step_cost methods?
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/stacker.py
--- a/pylearn/algorithms/stacker.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/stacker.py	Thu Sep 10 10:30:50 2009 -0400
@@ -25,7 +25,7 @@
         for i, (submodule, outname) in enumerate(submodules):
             layer = submodule(current, regularize = regularize)
             layers.append(layer)
-            current = layer[outname]
+            current = getattr(layer, outname)
         self.layers = layers
 
         self.input = self.layers[0].input
@@ -35,16 +35,14 @@
         local_update = []
         global_update = []
         to_update = []
-        all_kits = []
         for layer, (submodule, outname) in zip(layers, submodules):
             u = layer.update
             u.resolve_all()
             to_update += u.updates.keys()
-            all_kits += u.kits
             # the input is the whole deep model's input instead of the layer's own
             # input (which is previous_layer[outname])
             inputs = [self.input] + u.inputs[1:]
-            method = theano.Method(inputs, u.outputs, u.updates, u.kits)
+            method = theano.Method(inputs, u.outputs, u.updates)
             local_update.append(method)
             global_update.append(
                 theano.Method(inputs,
@@ -52,9 +50,8 @@
                               # we update the params of the previous layers too but wrt
                               # this layer's cost
                               dict((param, param - layer.lr * T.grad(layer.cost, param))
-                                   for param in to_update),
-                              list(all_kits)))
-            representation.append(theano.Method(self.input, layer[outname]))
+                                   for param in to_update)))
+            representation.append(theano.Method(self.input, getattr(layer,outname)))
 
 #           @todo: Add diagnostics
 #             self.diagnose_from_input = Method([self.input], self.layers[0].diagnose.outputs + self.layers[1].diagnose.outputs ...
@@ -64,12 +61,23 @@
         self.representation = representation
         self.update = self.global_update[-1]
         self.compute = theano.Method(self.input, self.output)
+
+        # takes method from last layer (usually ll.classify), copies it to self.,
+        # while converting its input to deal with the global "model" input 
         ll = self.layers[-1]
-        for name, method in ll.components_map():
+        for name, method in ll.__dict__['local_attr'].iteritems():
             if isinstance(method, theano.Method) and not hasattr(self, name):
-                m = method.dup()
-                m.resolve_all()
-                m.inputs = [self.input if x is ll.input else x for x in m.inputs]
+                if not isinstance(method.inputs, (list,dict)):
+                    method.inputs = [method.inputs]
+                inputs = []
+                for x in method.inputs:
+                  if x is ll.input:
+                    inputs += [self.input]
+                  else:
+                    inputs += [x]
+                #backport
+                #inputs = [self.input if x is ll.input else x for x in method.inputs]
+                m = theano.Method(inputs, method.outputs, method.updates)
                 setattr(self, name, m)
 
     def _instance_initialize(self, obj, nunits = None, lr = 0.01, seed = None, **kwargs):
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/tests/test_daa.py
--- a/pylearn/algorithms/tests/test_daa.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/tests/test_daa.py	Thu Sep 10 10:30:50 2009 -0400
@@ -6,8 +6,9 @@
 import time
 
 import pylearn.algorithms.logistic_regression
+from theano.compile.mode import default_mode
 
-def test_train_daa(mode = theano.Mode('c|py', 'fast_run')):
+def test_train_daa(mode = default_mode):
 
     ndaa = 3
     daa = models.Stacker([(models.SigmoidXEDenoisingAA, 'hidden')] * ndaa + [(models.BinRegressor, 'output')],
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/tests/test_sgd.py
--- a/pylearn/algorithms/tests/test_sgd.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/tests/test_sgd.py	Thu Sep 10 10:30:50 2009 -0400
@@ -14,7 +14,7 @@
         c = m.step_cost(3.0)
         #print c[0], m.y
 
-    assert c[0] < 1.0e-5
+    assert c < 1.0e-5
     assert abs(m.y - (1.0 / 3)) < 1.0e-4
 
 def test_sgd_stepsize_variable():
@@ -33,7 +33,7 @@
         c = m.step_cost(3.0)
         # print c, m.y
 
-    assert c[0] < 1.0e-5
+    assert c < 1.0e-5
     assert abs(m.y - (1.0 / 3)) < 1.0e-4
 
 
@@ -63,7 +63,7 @@
         c = m.step_cost(3.0)
         # print c, m.y
 
-    assert c[0] < 1.0e-5
+    assert c < 1.0e-5
     assert abs(m.y - (1.0 / 3)) < 1.0e-4
 
 if __name__ == '__main__':
diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/tests/test_stacker.py
--- a/pylearn/algorithms/tests/test_stacker.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/algorithms/tests/test_stacker.py	Thu Sep 10 10:30:50 2009 -0400
@@ -5,12 +5,15 @@
 import numpy
 import time
 
+class StackBinRegressor(models_reg.BinRegressor):
+    def __init__(self, input = None, target = None, regularize = True):
+        super(StackBinRegressor, self).__init__(input, target, regularize)
+        self.build_extensions()
 
 def test_train(mode = theano.Mode('c|py', 'fast_run')):
-
-    reg = models_stacker.Stacker([(models_reg.BinRegressor, 'output'),
-        (models_reg.BinRegressor, 'output')],
-        regularize = False)
+    reg = models_stacker.Stacker([(StackBinRegressor, 'output'),
+                                  (StackBinRegressor, 'output')],
+                                  regularize = False)
     #print reg.global_update[1].pretty(mode = mode.excluding('inplace'))
 
     model = reg.make([100, 200, 1],
diff -r e53c06901f8f -r bfc5914642ce pylearn/datasets/MNIST.py
--- a/pylearn/datasets/MNIST.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/datasets/MNIST.py	Thu Sep 10 10:30:50 2009 -0400
@@ -6,9 +6,9 @@
 import os
 import numpy
 
-from ..io.amat import AMat
-from .config import data_root # config
-from .dataset import Dataset
+from pylearn.io.pmat import PMat
+from pylearn.datasets.config import data_root # config
+from pylearn.datasets.dataset import Dataset
 
 def head(n=10, path=None):
     """Load the first MNIST examples.
@@ -18,20 +18,20 @@
     is the label of the i'th row of x.
     
     """
-    path = os.path.join(data_root(), 'mnist','mnist_with_header.amat') if path is None else path
+    if path is None:
+      path = os.path.join(data_root(), 'mnist','mnist_all.pmat')
 
-    dat = AMat(path=path, head=n)
+    dat = PMat(fname=path)
+
+    rows=dat.getRows(0,n)
 
-    try:
-        assert dat.input.shape[0] == n
-        assert dat.target.shape[0] == n
-    except Exception , e:
-        raise Exception("failed to read MNIST data", (dat, e))
+    return rows[:,0:-1], numpy.asarray(rows[:,-1], dtype='int64')
+
 
-    return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0])
-
-def all(path=None):
-    return head(n=None, path=path)
+#What is the purpose of this fct?
+#If still usefull, rename it as it conflict with the python an numpy nake all.
+#def all(path=None):
+#    return head(n=None, path=path)
 
 def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None):
     all_x, all_targ = head(ntrain+nvalid+ntest, path=path)
diff -r e53c06901f8f -r bfc5914642ce pylearn/datasets/config.py
--- a/pylearn/datasets/config.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/datasets/config.py	Thu Sep 10 10:30:50 2009 -0400
@@ -11,7 +11,11 @@
     if os.getenv(key) is None:
         print >> sys.stderr, "WARNING: Environment variable", key,
         print >> sys.stderr, "is not set. Using default of", default
-    return default if os.getenv(key) is None else os.getenv(key)
+    if os.getenv(key) is None:
+      return default
+    else:
+      return os.getenv(key)
+    #return default if os.getenv(key) is None else os.getenv(key)
 
 def data_root():
     return env_get('PYLEARN_DATA_ROOT', os.getenv('HOME')+'/data', 'DBPATH')
diff -r e53c06901f8f -r bfc5914642ce pylearn/datasets/norb_small.py
--- a/pylearn/datasets/norb_small.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/datasets/norb_small.py	Thu Sep 10 10:30:50 2009 -0400
@@ -63,11 +63,14 @@
         test = {}
         train['dat'] = os.path.join(dirpath, 'smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat')
         train['cat'] = os.path.join(dirpath, 'smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat')
+        train['info'] = os.path.join(dirpath, 'smallnorb-5x46789x9x18x6x2x96x96-training-info.mat')
         test['dat']  = os.path.join(dirpath, 'smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat')
         test['cat']  = os.path.join(dirpath, 'smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat')
+        test['info']  = os.path.join(dirpath, 'smallnorb-5x01235x9x18x6x2x96x96-testing-info.mat')
     path = Paths()
 
-    def __init__(self, ntrain=19440, nvalid=4860, ntest=24300, 
+    def __init__(self, ntrain=19440, nvalid=4860, ntest=24300,
+               valid_variant=None,
                downsample_amt=1, seed=1, normalize=False,
                mode='stereo', dtype='int8'):
 
@@ -78,16 +81,47 @@
         self.ntrain = ntrain
         self.nvalid = nvalid
         self.ntest = ntest
-        self.downsample_amt = 1
+        self.downsample_amt = downsample_amt
         self.normalize = normalize
         self.dtype = dtype
 
         rng = numpy.random.RandomState(seed)
-        self.indices = rng.permutation(self.nsamples)
-        self.itr  = self.indices[0:ntrain]
-        self.ival = self.indices[ntrain:ntrain+nvalid]
+        if valid_variant is None:
+            # The validation set is just a random subset of training
+            self.indices = rng.permutation(self.nsamples)
+            self.itr  = self.indices[0:ntrain]
+            self.ival = self.indices[ntrain:ntrain+nvalid]
+        elif valid_variant in (4,6,7,8,9):
+            # The validation set consists in an instance of each category
+            # In order to know which indices correspond to which instance,
+            # we need to load the 'info' files.
+            train_info = read(open(self.path.train['info']))
+
+            ordered_itrain = numpy.nonzero(train_info[:,0] != valid_variant)[0]
+            max_ntrain = ordered_itrain.shape[0]
+            ordered_ivalid = numpy.nonzero(train_info[:,0] == valid_variant)[0]
+            max_nvalid = ordered_ivalid.shape[0]
+
+            if self.ntrain > max_ntrain:
+                print 'WARNING: ntrain is %i, but there are only %i training samples available' % (self.ntrain, max_ntrain)
+                self.ntrain = max_ntrain
+
+            if self.nvalid > max_nvalid:
+                print 'WARNING: nvalid is %i, but there are only %i validation samples available' % (self.nvalid, max_nvalid)
+                self.nvalid = max_nvalid
+
+            # Randomize
+            print 
+            self.itr = ordered_itrain[rng.permutation(max_ntrain)][0:self.ntrain]
+            self.ival = ordered_ivalid[rng.permutation(max_nvalid)][0:self.nvalid]
+
         self.current = None
- 
+
+    def preprocess(self, x):
+        if not self.normalize:
+            return numpy.float64(x *1.0 / 255.0)
+        return x
+
     def load(self, dataset='train'):
 
         if dataset == 'train' or dataset=='valid':
@@ -99,7 +133,7 @@
                 print 'need to reload from train file'
                 dat, cat  = load_file(self.path.train, self.normalize,
                                       self.downsample_amt, self.dtype)
-                
+
                 x = dat[self.itr,...].reshape(self.ntrain,-1)
                 y = cat[self.itr]
                 self.dat1 = Dataset.Obj(x=x, y=y) # training
@@ -126,7 +160,7 @@
                 x = dat.reshape(self.nsamples,-1)
                 y = cat
                 self.dat1 = Dataset.Obj(x=x, y=y)
-                
+
                 del dat, cat, x, y
 
             rval = self.dat1
diff -r e53c06901f8f -r bfc5914642ce pylearn/datasets/smallNorb.py
--- a/pylearn/datasets/smallNorb.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/datasets/smallNorb.py	Thu Sep 10 10:30:50 2009 -0400
@@ -1,7 +1,7 @@
 import os
 import numpy
-from ..io.filetensor import read
-from .config import data_root
+from pylearn.io.filetensor import read
+from pylearn.datasets.config import data_root
 
 #Path = '/u/bergstrj/pub/data/smallnorb'
 #Path = '/home/fringant2/lisa/louradoj/data/smallnorb'
diff -r e53c06901f8f -r bfc5914642ce pylearn/io/filetensor.py
--- a/pylearn/io/filetensor.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/io/filetensor.py	Thu Sep 10 10:30:50 2009 -0400
@@ -129,8 +129,19 @@
         self.magic_t, self.elsize, self.ndim, self.dim, self.dim_size = _read_header(f,debug)
         self.f_start = f.tell()
 
-        self.readshape = tuple(self.dim[self.ndim-rank:]) if rank <= self.ndim else tuple(self.dim)
-        padding = tuple() if rank <= self.ndim else (1,) * (rank - self.ndim)
+        if rank <= self.ndim:
+          self.readshape = tuple(self.dim[self.ndim-rank:])
+        else:
+          self.readshape = tuple(self.dim)
+
+        #self.readshape = tuple(self.dim[self.ndim-rank:]) if rank <= self.ndim else tuple(self.dim)
+
+        if rank <= self.ndim:
+          padding = tuple()
+        else:
+          padding = (1,) * (rank - self.ndim)
+
+        #padding = tuple() if rank <= self.ndim else (1,) * (rank - self.ndim)
         self.returnshape = padding + self.readshape
         self.readsize = _prod(self.readshape)
         if debug: print 'READ PARAM', self.readshape, self.returnshape, self.readsize
diff -r e53c06901f8f -r bfc5914642ce pylearn/sandbox/scan_inputs_groups.py
--- a/pylearn/sandbox/scan_inputs_groups.py	Thu Sep 10 10:30:35 2009 -0400
+++ b/pylearn/sandbox/scan_inputs_groups.py	Thu Sep 10 10:30:50 2009 -0400
@@ -71,6 +71,31 @@
         if nbias != 1: raise TypeError('not vector', bias_list[i])
     return bias_list
 
+
+# block grad Op------------------------------------
+class BlockGrad(Op):
+    """This Op block the gradient of a variable"""
+    def make_node(self, x):
+        x = T.as_tensor_variable(x)
+        if x.ndim == 1:
+            return Apply(self, [x], [T.dvector()])
+        else:
+            return Apply(self, [x], [T.dmatrix()])
+    
+    def perform(self, node , x ,(out,)):
+        out[0] = x[0].copy()
+    
+    def grad(self, x, (gx,)):
+        return [gx*0]
+    
+    def __hash__(self):
+        return hash(BlockGrad)^77612
+    
+    def __str__(self):
+        return "BlockGrad"
+
+blockgrad=BlockGrad()
+
 # Encoding scan dot product------------------------------------
 class ScanDotEnc(Op):
     """This Op takes an index list (as tensor.ivector), a list of matrices representing
@@ -104,12 +129,12 @@
             raise NotImplementedError('size of index different of inputs list size',idx_list)
         if max(idx_list) >= (len(args)-2)+1 :
             raise NotImplementedError('index superior to weight list length',idx_list)
-        for i in range(len(args[1])):
-            if (args[1][i].shape)[0] != batchsize:
-                raise NotImplementedError('different batchsize in the inputs list',args[1][i].shape)
-        for i in range(len(args)-2):
-            if (args[2+i].shape)[1] != n_hid:
-                raise NotImplementedError('different length of hidden in the weights list',args[2+i].shape)
+        for a in args[1]:
+            if (a.shape)[0] != batchsize:
+                raise NotImplementedError('different batchsize in the inputs list',a.shape)
+        for a in args[2:]:
+            if (a.shape)[1] != n_hid:
+                raise NotImplementedError('different length of hidden in the weights list',a.shape)
     
         for i in range(len(idx_list)):
             if idx_list[i]>0:
@@ -171,12 +196,12 @@
             raise NotImplementedError('size of index different of inputs list size',idx_list)
         if max(idx_list) >= (len(args)-3)+1 :
             raise NotImplementedError('index superior to weight list length',idx_list)
-        for i in range(len(args[1])):
-            if (args[1][i].shape)[0] != batchsize:
-                raise NotImplementedError('different batchsize in the inputs list',args[1][i].shape)
-        for i in range(len(args)-3):
-            if (args[2+i].shape)[1] != n_hid:
-                raise NotImplementedError('different length of hidden in the weights list',args[2+i].shape)
+        for a in args[1]:
+            if (a.shape)[0] != batchsize:
+                raise NotImplementedError('different batchsize in the inputs list',a.shape)
+        for a in args[2:-1]:
+            if (a.shape)[1] != n_hid:
+                raise NotImplementedError('different length of hidden in the weights list',a.shape)
     
         zcalc = [False for i in range(len(args)-3)]
     
@@ -237,9 +262,9 @@
             raise NotImplementedError('index superior to weight list length',idx_list)
         if len(idx_list) != len(args[1]) :
             raise NotImplementedError('size of index different of inputs list size',idx_list)
-        for i in range(len(args)-3):
-            if (args[3+i].shape)[0] != n_hid:
-                raise NotImplementedError('different length of hidden in the weights list',args[3+i].shape)
+        for a in args[3:]:
+            if (a.shape)[0] != n_hid:
+                raise NotImplementedError('different length of hidden in the weights list',a.shape)
     
         zcalc = [False for i in idx_list]
         z[0] = [None for i in idx_list]
@@ -311,9 +336,9 @@
             raise NotImplementedError('index superior to weight list length',idx_list)
         if len(idx_list) != len(args[1]) :
             raise NotImplementedError('size of index different of inputs list size',idx_list)
-        for i in range(len(args)-4):
-            if (args[3+i].shape)[0] != n_hid:
-                raise NotImplementedError('different length of hidden in the weights list',args[3+i].shape)
+        for a in args[3:-1]:
+            if a.shape[0] != n_hid:
+                raise NotImplementedError('different length of hidden in the weights list',a.shape)
     
         zidx=numpy.zeros((len(idx_list)+1))
     
@@ -538,9 +563,9 @@
 
         if max(idx_list) >= (len(args)-1)+1 :
             raise NotImplementedError('index superior to weights list length',idx_listdec)
-        for i in range(len(args)-1):
-            if args[1+i].shape[dim] != n_hid:
-                raise NotImplementedError('different length of hidden in the encoding weights list',args[1+i].shape)
+        for a in args[1:]:
+            if a.shape[dim] != n_hid:
+                raise NotImplementedError('different length of hidden in the encoding weights list',a.shape)
     
         for i in range(len(args[1:])):
             z[i][0] = numpy.asarray((idx_list == i+1).sum(),dtype='int32')
@@ -607,22 +632,250 @@
         out[0] = input.copy()
         out = out[0]
         mask = output_storage[1]
-        mask[0] = numpy.ones(input.shape)
+        
+        if mask[0] is None or mask[0].shape!=input.shape:
+            mask[0] = numpy.ones(input.shape)
+
         mask = mask[0]
         if self.fill_with_is_array:
-            ignore_k = len(out.shape) - len(self.fill_with.shape)
-            assert ignore_k >= 0
-        for (idx, v) in numpy.ndenumerate(out):
-            if numpy.isnan(v):
-                if self.fill_with_is_array:
-                    out[idx] = self.fill_with[idx[ignore_k:]]
-                else:
-                    out[idx] = self.fill_with
-                mask[idx] = 0
+            #numpy.ndenumerate is slower then a loop
+            #so we optimise for some number of dimension frequently used
+            if out.ndim==1:
+                assert self.fill_with.ndim==1
+                for i in range(out.shape[0]):
+                    if numpy.isnan(out[i]):
+                        out[i] = self.fill_with[i]
+                        mask[i] = 0
+            elif out.ndim==2 and self.fill_with.ndim==1:
+                for i in range(out.shape[0]):
+                    for j in range(out.shape[1]):
+                        if numpy.isnan(out[i,j]):
+                            out[i,j] = self.fill_with[j]
+                            mask[i,j] = 0
+            else:
+                ignore_k = out.ndim - self.fill_with.ndim
+                assert ignore_k >= 0
+                for (idx, v) in numpy.ndenumerate(out):
+                    if numpy.isnan(v):
+                        out[idx] = self.fill_with[idx[ignore_k:]]
+                        mask[idx] = 0
+        else:
+            #numpy.ndenumerate is slower then a loop
+            #so we optimise for some number of dimension frequently used
+            if out.ndim==1:
+                for i in range(out.shape[0]):
+                    if numpy.isnan(out[i]):
+                        out[i] = self.fill_with
+                        mask[i] = 0
+            elif out.ndim==2:
+                for i in range(out.shape[0]):
+                    for j in range(out.shape[1]):
+                        if numpy.isnan(out[i,j]):
+                            out[i,j] = self.fill_with
+                            mask[i,j] = 0
+            else:
+                for (idx, v) in numpy.ndenumerate(out):
+                    if numpy.isnan(out[idx]):
+                        out[idx] = self.fill_with
+                        mask[idx] = 0
 
     def grad(self, inputs, (out_grad, mask_grad, )):
         return [out_grad]
 
+#def c():
+    def c_no_compile_args(self):
+#-ffast-math and "-ffinite-math-only" SHOULD NOT BE ACTIVATED as they make isnan don't work! Idem for -funsafe-math-optimizations on gcc 4.1(on gcc 4.3 it don't break isnan)
+        return ["-ffast-math", "-ffinite-math-only",
+#for gcc 4.1 we also need '-funsafe-math-optimizations', not need for gcc 4.3. TODO find a way to return the value depending of the compiler used?
+                "-funsafe-math-optimizations"
+                ]
+
+    def c_headers(self):
+        return ['"Python.h"', '"numpy/noprefix.h"', '<math.h>']
+
+    def c_support_code(self):
+        return """                      
+using namespace std;
+"""
+
+    def c_code(self, node, name, (input,), (value, mask), sub):
+        if self.fill_with==None:
+            print "OPTIMISATION WARNING: FillMissing don't implement this case in c. We don't support fill_with=None in c. We revert to python version", self.fill_with_is_array, node.inputs[0].ndim
+            return super(FillMissing,self).c_code(node, name, (input,),(value,mask), sub)
+        if (self.fill_with_is_array and not node.inputs[0].ndim in [1,2]) or (not node.inputs[0].ndim in [1,2,3]):
+            print "OPTIMISATION WARNING: FillMissing don't implement this case in c. We revert to python version", self.fill_with_is_array, node.inputs[0].ndim
+            return super(FillMissing,self).c_code(node, name, (input,),(value,mask), sub)
+            
+
+        d=locals()
+        d.update(sub)
+        d["self.fill_with_is_array"] = 1 if self.fill_with_is_array else 0
+        d["self.fill_with"] = self.fill_with
+        if self.fill_with_is_array:
+            d["self.fill_with_length"]=str(self.fill_with.size)
+            s=""
+            for i in self.fill_with.flatten():
+                s+=","+str(i)
+            d["self.fill_with_data"]=s[1:]
+            d["self.fill_with.ndim"]=str(self.fill_with.ndim)
+        else:
+            d["self.fill_with_length"]=str(1)
+            d["self.fill_with_data"]=str(self.fill_with)
+            d["self.fill_with.ndim"]=0
+        if node.inputs[0].type.dtype=="float32": d["type"]="float"
+        elif node.inputs[0].type.dtype=="float64": d["type"]="double"
+        else: raise Exception("Type %s not implemented "%node.inputs[0].type.dtype)
+                              
+        return """
+//This space was added to for the recompilation as we changed the compiler option.
+int typenum;
+PyArrayObject* input = %(input)s, *value = %(value)s, *mask = %(mask)s;
+%(type)s fill_with[%(self.fill_with_length)s] = {%(self.fill_with_data)s};
+
+if(!PyArray_Check(input)){
+  PyErr_SetString(PyExc_ValueError, "input must be an ndarray");
+  %(fail)s;
+
+}
+
+typenum = PyArray_ObjectType((PyObject*)input, 0);
+if(!value || !PyArray_SAMESHAPE(value,input)){
+  Py_XDECREF(value);
+  value = (PyArrayObject*) PyArray_ZEROS(input->nd, input->dimensions, typenum,0);
+  %(value)s = value;
+}
+
+if (!mask || !PyArray_SAMESHAPE(mask,input)){
+  Py_XDECREF(mask); 
+  mask = (PyArrayObject*) PyArray_ZEROS(input->nd, input->dimensions, typenum,0);
+  %(mask)s = mask;
+}
+
+if(!PyArray_ISCONTIGUOUS(input)){
+  cout<<"OPTIMISATION WARNING: in FillMissing, the input is not contiguous in memory, so we create a new version that is contiguous. This can be optimized by using directly the data."<<endl;
+  input = PyArray_GETCONTIGUOUS((PyArrayObject*)input);
+  if(!PyArray_ISCONTIGUOUS(input)){
+    PyErr_SetString(PyExc_ValueError, "input is not continuous in memory");
+    %(fail)s;
+  }
+}
+if(!PyArray_ISCONTIGUOUS(value)){
+  cout<<"OPTIMISATION WARNING: in FillMissing, the value is not contiguous in memory, so we create a new version that is contiguous. This can be optimized by using directly the data."<<endl;
+  value = PyArray_GETCONTIGUOUS((PyArrayObject*)value);
+  if(!PyArray_ISCONTIGUOUS(value)){
+    PyErr_SetString(PyExc_ValueError, "value is not continuous in memory");
+    %(fail)s;
+  }
+}
+if(!PyArray_ISCONTIGUOUS(mask)){
+  cout<<"OPTIMISATION WARNING: in FillMissing, the mask is not contiguous in memory, so we create a new version that is contiguous. This can be optimized by using directly the data."<<endl;
+  mask = PyArray_GETCONTIGUOUS((PyArrayObject*)mask);
+  if(!PyArray_ISCONTIGUOUS(mask)){
+    PyErr_SetString(PyExc_ValueError, "mask is not continuous in memory");
+    %(fail)s;
+  }
+}
+
+assert(input->nd==value->nd==mask->nd);
+#if %(self.fill_with_is_array)s
+  if(input->nd==1){
+    %(type)s* value_  = (%(type)s*)(value->data);
+    %(type)s* mask_   = (%(type)s*)(mask->data);
+    %(type)s* input_ = (%(type)s*)(input->data);
+    for(int i=0;i<input->dimensions[0];i++){
+      if(isnan(input_[i])){
+        value_[i]=fill_with[i];
+        mask_[i]=0;
+      }else{
+        value_[i]=input_[i];
+        mask_[i]=1;
+
+      }
+    }
+  }else if(input->nd==2 && %(self.fill_with.ndim)s==1){
+    for(int i=0; i<input->dimensions[0];i++){
+      %(type)s* value_  = (%(type)s*) PyArray_GETPTR2(value,i,0);
+      %(type)s* mask_   = (%(type)s*) PyArray_GETPTR2(mask,i,0);
+      %(type)s* input_ = (%(type)s*) PyArray_GETPTR2(input,i,0);
+      for(int j=0; j<input->dimensions[1];j++){
+        if(isnan(input_[j])){
+          value_[j]=fill_with[j];
+          mask_[j]=0;
+        }else{
+          value_[j]=input_[j];
+          mask_[j]=1;
+        }
+      }
+    }
+  }else{//not implemented!
+//SHOULD not happen as c_code should revert to the python  version in that case
+    std:stringstream temp;
+    temp << "In FillMissing, we try to fill with an array and the input ndim is implemented only for 1 and 2. This case is not implemented."<<endl;
+    temp << " ndim="<<input->nd<<endl;;
+    std::string param = temp.str();
+    PyErr_SetString(PyExc_ValueError, param.c_str());
+    %(fail)s
+  }
+#else
+//we fill with a scalar
+  if(input->nd==1){
+    %(type)s* value_  = (%(type)s*)(value->data);
+    %(type)s* mask_   = (%(type)s*)(mask->data);
+    %(type)s* input_ = (%(type)s*)(input->data);
+    for(int i=0;i<input->dimensions[0];i++){
+      if(isnan(input_[i])){
+        value_[i]=%(self.fill_with)s;
+        mask_[i]=0;
+      }else{
+        value_[i]=input_[i];
+        mask_[i]=1;
+
+      }
+    }
+  }else if(input->nd==2){
+    for(int i=0;i<input->dimensions[0];i++){
+      %(type)s* value_  = (%(type)s*) PyArray_GETPTR2(value,i,0);
+      %(type)s* mask_   = (%(type)s*) PyArray_GETPTR2(mask,i,0);
+      %(type)s* input_ = (%(type)s*) PyArray_GETPTR2(input,i,0);
+      for(int j=0;j<input->dimensions[1];j++){
+        if(isnan(input_[j])){
+          value_[j]=%(self.fill_with)s;
+          mask_[j]=0;
+        }else{
+          value_[j]=input_[j];
+          mask_[j]=1;
+        }
+      }
+    }
+  }else if(input->nd==3){
+    for(int i=0;i<input->dimensions[0];i++){
+      for(int j=0;j<input->dimensions[1];j++){
+        %(type)s* value_  = (%(type)s*) PyArray_GETPTR3(value,i,j,0);
+        %(type)s* mask_   = (%(type)s*) PyArray_GETPTR3(mask,i,j,0);
+        %(type)s* input_ = (%(type)s*) PyArray_GETPTR3(input,i,j,0);
+        for(int k=0;k<input->dimensions[2];k++){
+          if(isnan(input_[k])){
+            value_[k]=%(self.fill_with)s;
+            mask_[k]=0;
+          }else{
+            value_[k]=input_[k];
+            mask_[k]=1;
+          }
+        }
+      }
+    }
+  }else{//not implemented!
+//SHOULD not happen as c_code should revert to the python  version in that case
+    std:stringstream temp;
+    temp << "In FillMissing, we try to fill with a constant and the input ndim is implemented only for 1, 2 and 3.";
+    temp << " ndim="<<input->nd<<endl;;
+    std::string param = temp.str();
+    PyErr_SetString(PyExc_ValueError, param.c_str());
+    %(fail)s
+  }
+#endif
+
+"""%d
 fill_missing_with_zeros = FillMissing(0)
 
 class MaskGradient(Op):
diff -r e53c06901f8f -r bfc5914642ce pylearn/sandbox/test_scan_inputs_groups.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/test_scan_inputs_groups.py	Thu Sep 10 10:30:50 2009 -0400
@@ -0,0 +1,133 @@
+import sys, time, unittest, copy
+
+import numpy,time
+import numpy as N
+
+from theano.tests import unittest_tools as utt
+
+from theano import function, Mode
+import theano.tensor as T
+from pylearn.sandbox.scan_inputs_groups import FillMissing
+import theano.compile.mode as mode_module
+
+class TestFillMissing(unittest.TestCase):
+    def setUp(self):
+        utt.seed_rng()
+
+        #we need to desactivate the check for NaN value as we have them in input
+        #TODO: Make an option to don't check NaN value in input only, bug check in output.
+        m=mode_module.default_mode
+        if m=="DEBUG_MODE":
+            m=copy.copy(mode_module.predefined_modes[m])
+            m.check_isfinite=False
+        self.mode = m
+
+    def test_vector(self):
+        print "test_vector"
+        n=100000
+        v=T.dvector()
+        def t(prob,val,fill):
+            op=FillMissing(fill)(v)
+            f=function([v],op,mode=self.mode)
+            nb_missing=0
+            for i in range(n):
+                if prob[i]<0.1:
+                    nb_missing+=1
+                    val[i]=N.nan
+            t=time.time()
+            out=f(val)
+            print "time %.3fs"%(time.time()-t)
+            for i in range(n):
+                if N.isnan(val[i]):
+                    if isinstance(fill,N.ndarray):
+                        assert abs(out[0][i]-fill[i])<1e-6
+                    else:
+                        assert out[0][i]==fill
+                else:
+                    assert out[0][i]==val[i]
+                    assert out[1][i]==1                
+
+        prob=N.random.random(n)
+        val=N.random.random(n)
+
+        fill=0
+        t(prob,val,fill)#test with fill a constant
+
+        fill=N.random.random(n)
+        t(prob,val,fill)#test with fill a vector
+
+#TODO: test fill_with_array!
+    def test_matrix(self):
+        print "test_matrix"
+        shp=(100,10)
+        v=T.dmatrix()
+        def t(prob,val,fill):
+            op=FillMissing(fill)(v)
+            f=function([v],op,mode=self.mode)
+        
+            nb_missing=0
+            for i in range(shp[0]):
+                for j in range(shp[1]):
+                    if prob[i,j]<0.1:
+                        nb_missing+=1
+                        val[i,j]=N.nan
+            t=time.time()
+            out=f(val)
+            print "time %.3fs"%(time.time()-t)
+            for i in range(shp[0]):
+                for j in range(shp[1]):
+                    if N.isnan(val[i,j]):
+                        if isinstance(fill,N.ndarray):
+                            assert abs(out[0][i,j]-fill[j])<1e-6
+                        else:
+                            assert out[0][i,j]==fill
+                    else:
+                        assert out[0][i,j]==val[i,j]
+                        assert out[1][i,j]==1
+
+
+        prob=N.random.random(N.prod(shp)).reshape(shp)
+        val=N.random.random(shp)
+
+        fill=0
+        t(prob,val,fill)#test with fill a constant
+        
+        fill=N.random.random(shp[1])
+        t(prob,val,fill)#test with fill a vector
+
+#TODO: test fill_with_array!
+    def test_matrix3d(self):
+        print "test_matrix3d"
+        shp=(10,100,100)
+        v= T.TensorType('float64', (False, False, False))()
+        op=FillMissing()(v)
+        fct=function([v],op,mode=self.mode)
+        
+        prob=N.random.random(N.prod(shp)).reshape(shp)
+        val=N.random.random(prob.shape)
+        nb_missing=0
+        for i in range(shp[0]):
+            for j in range(shp[1]):
+                for k in range(shp[2]):
+                    if prob[i,j,k]<0.1:
+                        nb_missing+=1
+                        val[i,j,k]=N.nan
+        t=time.time()
+        out=fct(val)
+        print "time %.3fs"%(time.time()-t)
+        for i in range(shp[0]):
+            for j in range(shp[1]):
+                for k in range(shp[2]):
+                    if N.isnan(val[i,j,k]):
+                        assert out[0][i,j,k]==0
+                        assert out[1][i,j,k]==0
+                    else:
+                        assert out[0][i,j,k]==val[i,j,k]
+                        assert out[1][i,j,k]==1
+
+if __name__ == '__main__':
+    t = TestFillMissing("test_vector")
+    t.test_vector()
+#    from theano.tests import main
+#    main("test_sp")
+