# HG changeset patch # User Olivier Delalleau # Date 1243965082 14400 # Node ID 84d22b7d835aa2626b9a10f2f780fe62cc41454b # Parent 863e34a3d01e1ded5a17c53d8efec5cc5d94009c# Parent 5818682b5489839c1cddbc5fbeb11a10f7b0320f Merged diff -r 5818682b5489 -r 84d22b7d835a pylearn/algorithms/sandbox/DAA_inputs_groups.py --- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Tue Jun 02 11:38:23 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Tue Jun 02 13:51:22 2009 -0400 @@ -71,6 +71,7 @@ regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', reconstruction_cost_function='cross_entropy', interface = True, ignore_missing=None, reconstruct_missing=False, + corruption_pattern=None, **init): """ :param regularize: WRITEME @@ -90,6 +91,12 @@ auxilary ones (that should never contain missing values). In fact, in the current implementation, auxiliary inputs cannot be used when this option is True. + :param corruption_pattern: if not None, may specify a particular way to + corrupt the input with missing values. Valid choices are: + - 'by_pair': consider that features are given as pairs, and corrupt + (or not) the whole pair instead of considering them independently. + Elements in a pair are not consecutive, instead they are assumed to + be at distance (total number of features / 2) of each other. :param reconstruct_missing: if True, then the reconstruction cost on missing inputs will be backpropagated. Otherwise, it will not. :todo: Default noise level for all daa levels @@ -113,6 +120,7 @@ self.interface = interface self.ignore_missing = ignore_missing self.reconstruct_missing = reconstruct_missing + self.corruption_pattern = corruption_pattern assert hid_fn in ('sigmoid_act','tanh_act') @@ -165,7 +173,8 @@ self.noisy_input = self.corrupt_input() if self.auxinput is not None: self.noisy_idx_list , self.noisy_auxinput = \ - scannoise(self.idx_list,self.auxinput,self.noise_level,self.noise_level_group) + scannoise(self.idx_list, self.auxinput,self.noise_level, + self.noise_level_group) self.noise = ScratchPad() self.clean = ScratchPad() @@ -302,7 +311,19 @@ self.validate = theano.Method(listin, [self.clean.cost, self.clean.rec]) def corrupt_input(self): - return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input + if self.corruption_pattern is None: + mask = self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) + elif self.corruption_pattern == 'by_pair': + shape = T.shape(self.input) + scale = numpy.ones(2) + scale[1] = 2 + shape = shape / scale + mask = self.random.binomial(shape, 1, 1 - self.noise_level) + mask = T.hstack((mask, mask)) + else: + raise ValueError('Unknown value for corruption_pattern: %s' + % self.corruption_pattern) + return mask * self.input def reconstruction_costs(self, rec): if (self.input is not None) and (self.auxinput is not None): @@ -381,6 +402,7 @@ reconstruction_cost_function='cross_entropy', n_out = 2, target = None, debugmethod = False, totalupdatebool=False, ignore_missing=None, reconstruct_missing=False, + corruption_pattern=None, **init): super(StackedDAAig, self).__init__() @@ -408,6 +430,7 @@ self.totalupdatebool = totalupdatebool self.ignore_missing = ignore_missing self.reconstruct_missing = reconstruct_missing + self.corruption_pattern = corruption_pattern # init for model construction inputprec = input @@ -474,7 +497,8 @@ False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] dict_params = dict(ignore_missing = self.ignore_missing, - reconstruct_missing = self.reconstruct_missing) + reconstruct_missing = self.reconstruct_missing, + corruption_pattern = self.corruption_pattern) print '\tLayer init= ', i+1 self.daaig[i] = DAAig(*param, **dict_params)