# HG changeset patch
# User Olivier Delalleau <delallea@iro>
# Date 1243307636 14400
# Node ID df3aef87d8d2adbbe1dc28e66c7ba92f0d8e9012
# Parent  0594cba02fa8a9d57e591856edcb958beb3c5c9c
Fixes to the handling of missing values in input

diff -r 0594cba02fa8 -r df3aef87d8d2 pylearn/algorithms/sandbox/DAA_inputs_groups.py
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Mon May 25 23:11:38 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Mon May 25 23:13:56 2009 -0400
@@ -7,7 +7,7 @@
 from theano.tensor.nnet import sigmoid
 
 from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \
-        scanmaskenc,scanmaskdec
+        scanmaskenc,scanmaskdec, fill_missing_with_zeros, mask_gradient
 
 from pylearn.algorithms import cost
 from pylearn.algorithms.logistic_regression import LogRegN
@@ -56,16 +56,14 @@
         :param reconstruction_cost: Should return one cost per example (row)
         :param ignore_missing: if True, the input will be scanned in order to
             detect missing values, and these values will be replaced by zeros.
-            Also, the reconstruction cost will be computed only on non missing
-            components.
+            Also, the reconstruction cost's gradient will be computed only on
+            non missing components.
             If False, the presence of missing values may cause crashes or other
             weird and unexpected behavior.
             Please note that this option only affects the permanent input, not
             auxilary ones (that should never contain missing values). In fact,
             in the current implementation, auxiliary inputs cannot be used when
             this option is True.
-            Another side effect of the current crappy way it is implemented is
-            that the reconstruction cost is not properly computed.
         :todo: Default noise level for all daa levels
         """
         print '\t\t**** DAAig.__init__ ****'
@@ -154,6 +152,11 @@
         container.hidden = self.hid_fn(container.hidden_activation)
         self.define_propdown(container, idx_list , auxinput)
         container.rec = self.hid_fn(container.rec_activation)
+        if self.ignore_missing and self.input is not None:
+            # Apply mask to gradient to ensure we do not backpropagate on the
+            # cost computed on missing inputs (that were replaced with zeros).
+            container.rec = mask_gradient(container.rec,
+                    self.input_missing_mask)
         
     def define_propup(self, container, input, idx_list, auxinput):
         if self.input is not None:
@@ -263,18 +266,6 @@
         return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
     
     def reconstruction_costs(self, rec):
-        if self.ignore_missing and self.input is not None:
-            # Note: the following code is very ugly. It is just a hack to
-            # ensure that the gradient w.r.t. missing coordinates is (close to)
-            # zero. It is neither efficient nor elegant.
-            # The idea is to put a very big negative value in the
-            # reconstruction for these missing inputs (whose target is 0), so
-            # that the gradient is 1/(1 - rec) ~= 0.
-            # This will in particular screw up the cost computations.
-            zero = rec * 0
-            rec = (rec * T.neq(self.input_missing_mask, zero) +
-                    (zero - 1e100) * T.eq(self.input_missing_mask, zero))
-            
         if (self.input is not None) and (self.auxinput is not None):
             return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec)
         if self.input is not None:
@@ -348,7 +339,9 @@
                 in_size = None, auxin_size = [None], n_hid = [1],
                 regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
                 reconstruction_cost_function=cost.cross_entropy,
-                n_out = 2, target = None, debugmethod = False, totalupdatebool=False, **init):
+                n_out = 2, target = None, debugmethod = False, totalupdatebool=False,
+                ignore_missing=False,
+                **init):
         
         super(StackedDAAig, self).__init__()
         print '\t**** StackedDAAig.__init__ ****'
@@ -373,6 +366,7 @@
         self.target = target if target is not None else T.lvector('target')
         self.debugmethod = debugmethod
         self.totalupdatebool = totalupdatebool
+        self.ignore_missing = ignore_missing
         
         # init for model construction
         inputprec = input
@@ -437,9 +431,11 @@
             else:
                 param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\
                     False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False]
+
+            dict_params = dict(ignore_missing = self.ignore_missing)
             
             print '\tLayer init= ', i+1
-            self.daaig[i] = DAAig(*param)
+            self.daaig[i] = DAAig(*param, **dict_params)
             
             # method input, outputs and parameters update
             if i: