Mercurial > pylearn
changeset 721:df3aef87d8d2
Fixes to the handling of missing values in input
author | Olivier Delalleau <delallea@iro> |
---|---|
date | Mon, 25 May 2009 23:13:56 -0400 |
parents | 0594cba02fa8 |
children | e915f5c9bb21 d42b4bcbb582 |
files | pylearn/algorithms/sandbox/DAA_inputs_groups.py |
diffstat | 1 files changed, 15 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Mon May 25 23:11:38 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Mon May 25 23:13:56 2009 -0400 @@ -7,7 +7,7 @@ from theano.tensor.nnet import sigmoid from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \ - scanmaskenc,scanmaskdec + scanmaskenc,scanmaskdec, fill_missing_with_zeros, mask_gradient from pylearn.algorithms import cost from pylearn.algorithms.logistic_regression import LogRegN @@ -56,16 +56,14 @@ :param reconstruction_cost: Should return one cost per example (row) :param ignore_missing: if True, the input will be scanned in order to detect missing values, and these values will be replaced by zeros. - Also, the reconstruction cost will be computed only on non missing - components. + Also, the reconstruction cost's gradient will be computed only on + non missing components. If False, the presence of missing values may cause crashes or other weird and unexpected behavior. Please note that this option only affects the permanent input, not auxilary ones (that should never contain missing values). In fact, in the current implementation, auxiliary inputs cannot be used when this option is True. - Another side effect of the current crappy way it is implemented is - that the reconstruction cost is not properly computed. :todo: Default noise level for all daa levels """ print '\t\t**** DAAig.__init__ ****' @@ -154,6 +152,11 @@ container.hidden = self.hid_fn(container.hidden_activation) self.define_propdown(container, idx_list , auxinput) container.rec = self.hid_fn(container.rec_activation) + if self.ignore_missing and self.input is not None: + # Apply mask to gradient to ensure we do not backpropagate on the + # cost computed on missing inputs (that were replaced with zeros). + container.rec = mask_gradient(container.rec, + self.input_missing_mask) def define_propup(self, container, input, idx_list, auxinput): if self.input is not None: @@ -263,18 +266,6 @@ return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input def reconstruction_costs(self, rec): - if self.ignore_missing and self.input is not None: - # Note: the following code is very ugly. It is just a hack to - # ensure that the gradient w.r.t. missing coordinates is (close to) - # zero. It is neither efficient nor elegant. - # The idea is to put a very big negative value in the - # reconstruction for these missing inputs (whose target is 0), so - # that the gradient is 1/(1 - rec) ~= 0. - # This will in particular screw up the cost computations. - zero = rec * 0 - rec = (rec * T.neq(self.input_missing_mask, zero) + - (zero - 1e100) * T.eq(self.input_missing_mask, zero)) - if (self.input is not None) and (self.auxinput is not None): return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec) if self.input is not None: @@ -348,7 +339,9 @@ in_size = None, auxin_size = [None], n_hid = [1], regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', reconstruction_cost_function=cost.cross_entropy, - n_out = 2, target = None, debugmethod = False, totalupdatebool=False, **init): + n_out = 2, target = None, debugmethod = False, totalupdatebool=False, + ignore_missing=False, + **init): super(StackedDAAig, self).__init__() print '\t**** StackedDAAig.__init__ ****' @@ -373,6 +366,7 @@ self.target = target if target is not None else T.lvector('target') self.debugmethod = debugmethod self.totalupdatebool = totalupdatebool + self.ignore_missing = ignore_missing # init for model construction inputprec = input @@ -437,9 +431,11 @@ else: param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\ False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] + + dict_params = dict(ignore_missing = self.ignore_missing) print '\tLayer init= ', i+1 - self.daaig[i] = DAAig(*param) + self.daaig[i] = DAAig(*param, **dict_params) # method input, outputs and parameters update if i: