changeset 748:863e34a3d01e

New option 'corruption_pattern' to alter the way inputs are corrupted
author Olivier Delalleau <delallea@iro>
date Tue, 02 Jun 2009 13:50:43 -0400
parents 6117969dd37f
children 84d22b7d835a
files pylearn/algorithms/sandbox/DAA_inputs_groups.py
diffstat 1 files changed, 27 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Tue Jun 02 11:31:50 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Tue Jun 02 13:50:43 2009 -0400
@@ -48,6 +48,7 @@
                 regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
                 reconstruction_cost_function=cost.cross_entropy, interface = True,
                 ignore_missing=None, reconstruct_missing=False,
+                corruption_pattern=None,
                 **init):
         """
         :param regularize: WRITEME
@@ -67,6 +68,12 @@
             auxilary ones (that should never contain missing values). In fact,
             in the current implementation, auxiliary inputs cannot be used when
             this option is True.
+        :param corruption_pattern: if not None, may specify a particular way to
+        corrupt the input with missing values. Valid choices are:
+            - 'by_pair': consider that features are given as pairs, and corrupt
+            (or not) the whole pair instead of considering them independently.
+            Elements in a pair are not consecutive, instead they are assumed to
+            be at distance (total number of features / 2) of each other.
         :param reconstruct_missing: if True, then the reconstruction cost on
         missing inputs will be backpropagated. Otherwise, it will not.
         :todo: Default noise level for all daa levels
@@ -91,6 +98,7 @@
         self.interface = interface
         self.ignore_missing = ignore_missing
         self.reconstruct_missing = reconstruct_missing
+        self.corruption_pattern = corruption_pattern
         
         assert hid_fn in ('sigmoid_act','tanh_act','softsign_act')
         self.hid_fn = eval(hid_fn)
@@ -137,7 +145,8 @@
             self.noisy_input = self.corrupt_input()
         if self.auxinput is not None:
             self.noisy_idx_list , self.noisy_auxinput = \
-                scannoise(self.idx_list,self.auxinput,self.noise_level,self.noise_level_group)
+                scannoise(self.idx_list, self.auxinput,self.noise_level,
+                        self.noise_level_group)
         
         self.noise = ScratchPad()
         self.clean = ScratchPad()
@@ -270,7 +279,19 @@
         self.validate = theano.Method(listin, [self.clean.cost, self.clean.rec])
     
     def corrupt_input(self):
-        return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
+        if self.corruption_pattern is None:
+            mask = self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level)
+        elif self.corruption_pattern == 'by_pair':
+            shape = T.shape(self.input)
+            scale = numpy.ones(2)
+            scale[1] = 2
+            shape = shape / scale
+            mask = self.random.binomial(shape, 1, 1 - self.noise_level)
+            mask = T.hstack((mask, mask))
+        else:
+            raise ValueError('Unknown value for corruption_pattern: %s'
+                    % self.corruption_pattern)
+        return mask * self.input
     
     def reconstruction_costs(self, rec):
         if (self.input is not None) and (self.auxinput is not None):
@@ -348,6 +369,7 @@
                 reconstruction_cost_function=cost.cross_entropy,
                 n_out = 2, target = None, debugmethod = False, totalupdatebool=False,
                 ignore_missing=None, reconstruct_missing=False,
+                corruption_pattern=None,
                 **init):
         
         super(StackedDAAig, self).__init__()
@@ -375,6 +397,7 @@
         self.totalupdatebool = totalupdatebool
         self.ignore_missing = ignore_missing
         self.reconstruct_missing = reconstruct_missing
+        self.corruption_pattern = corruption_pattern
         
         # init for model construction
         inputprec = input
@@ -441,7 +464,8 @@
                     False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False]
 
             dict_params = dict(ignore_missing = self.ignore_missing,
-                    reconstruct_missing = self.reconstruct_missing)
+                    reconstruct_missing = self.reconstruct_missing,
+                    corruption_pattern = self.corruption_pattern)
             
             print '\tLayer init= ', i+1
             self.daaig[i] = DAAig(*param, **dict_params)