# HG changeset patch # User Xavier Glorot # Date 1243957103 14400 # Node ID 5818682b5489839c1cddbc5fbeb11a10f7b0320f # Parent 6117969dd37fd63ae42f9aa81a2a0c6bec8a919f fixed numerical instability for cross entropy cost calculation of DAAig diff -r 6117969dd37f -r 5818682b5489 pylearn/algorithms/sandbox/DAA_inputs_groups.py --- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Tue Jun 02 11:31:50 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Tue Jun 02 11:38:23 2009 -0400 @@ -1,18 +1,20 @@ import numpy import theano -import os, copy +import copy from theano import tensor as T from theano.compile import module -from theano.tensor.nnet import sigmoid from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \ scanmaskenc,scanmaskdec, FillMissing, mask_gradient -from pylearn.algorithms import cost from pylearn.algorithms.logistic_regression import LogRegN +# used to initialize containers +class ScratchPad: + pass +# regularisation utils:------------------------------------------- def lnorm(param, type='l2'): if type == 'l1': return T.sum(T.abs(param)) @@ -26,19 +28,40 @@ rcost += lnorm(param, type) return rcost - +# activations utils:---------------------------------------------- def sigmoid_act(x): return theano.tensor.nnet.sigmoid(x) def tanh_act(x): - return (theano.tensor.tanh((x-0.5)*2))/2.0+0.5 + return theano.tensor.tanh(x) + +# costs utils:--------------------------------------------------- + +# in order to fix numerical instability of the cost and gradient calculation for the cross entropy we calculate it +# with the following functions direclty from the activation: + +def sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis): + XE =-target * T.log(1 + T.exp(-output_act)) + (1 - target) * (- T.log(1 + T.exp(output_act))) + return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis) -def softsign_act(x): - return theano.sandbox.softsign.softsign(x) +def tanh_cross_entropy(target, output_act, mean_axis, sum_axis): + XE =-(target+1)/2.0 * T.log(1 + T.exp(-2 * output_act)) + \ + (1 - (target+1)/2.0) * (- T.log(1 + T.exp(2 * output_act))) + return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis) -class ScratchPad: - pass +def cross_entropy(target, output_act, act, mean_axis=0, sum_axis=1): + if act == 'sigmoid_act': + return sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis) + if act == 'tanh_act': + return tanh_cross_entropy(target, output_act, mean_axis, sum_axis) + assert False +def quadratic(target, output, act, axis = 1): + return pylearn.algorithms.cost.quadratic(target, output, axis) + + + +# DAAig module---------------------------------------------------------------- class DAAig(module.Module): """De-noising Auto-encoder """ @@ -46,7 +69,7 @@ def __init__(self, input = None, auxinput = None, in_size=None, auxin_size= None, n_hid=1, regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', - reconstruction_cost_function=cost.cross_entropy, interface = True, + reconstruction_cost_function='cross_entropy', interface = True, ignore_missing=None, reconstruct_missing=False, **init): """ @@ -87,13 +110,18 @@ self.n_hid = n_hid self.regularize = regularize self.tie_weights = tie_weights - self.reconstruction_cost_function = reconstruction_cost_function self.interface = interface self.ignore_missing = ignore_missing self.reconstruct_missing = reconstruct_missing - assert hid_fn in ('sigmoid_act','tanh_act','softsign_act') + + assert hid_fn in ('sigmoid_act','tanh_act') self.hid_fn = eval(hid_fn) + self.hid_name = hid_fn + + assert reconstruction_cost_function in ('cross_entropy','quadratic') + self.reconstruction_cost_function = eval(reconstruction_cost_function) + self.reconstruction_cost_function_name = reconstruction_cost_function ### DECLARE MODEL VARIABLES and default self.input = input @@ -219,11 +247,15 @@ # DEPENDENCY: define_behavioural, define_regularization def define_cost(self, container): - container.reconstruction_cost = self.reconstruction_costs(container.rec) + if self.reconstruction_cost_function_name == 'cross_entropy': + container.reconstruction_cost = self.reconstruction_costs(container.rec_activation) + else: + container.reconstruction_cost = self.reconstruction_costs(container.rec) # TOTAL COST - container.cost = container.reconstruction_cost if self.regularize: #if stacked don't merge regularization and cost here but in the stackeddaaig module container.cost = container.cost + self.regularization + else: + container.cost = container.reconstruction_cost # DEPENDENCY: define_cost def define_params(self): @@ -274,11 +306,12 @@ def reconstruction_costs(self, rec): if (self.input is not None) and (self.auxinput is not None): - return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec) + return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)),\ + rec, self.hid_name) if self.input is not None: - return self.reconstruction_cost_function(self.input, rec) + return self.reconstruction_cost_function(self.input, rec, self.hid_name) if self.auxinput is not None: - return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec) + return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec, self.hid_name) # All cases should be covered above. If not, something is wrong! assert False @@ -345,7 +378,7 @@ def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None], in_size = None, auxin_size = [None], n_hid = [1], regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', - reconstruction_cost_function=cost.cross_entropy, + reconstruction_cost_function='cross_entropy', n_out = 2, target = None, debugmethod = False, totalupdatebool=False, ignore_missing=None, reconstruct_missing=False, **init):