changeset 747:5818682b5489

fixed numerical instability for cross entropy cost calculation of DAAig
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Tue, 02 Jun 2009 11:38:23 -0400
parents 6117969dd37f
children 84d22b7d835a
files pylearn/algorithms/sandbox/DAA_inputs_groups.py
diffstat 1 files changed, 51 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Tue Jun 02 11:31:50 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Tue Jun 02 11:38:23 2009 -0400
@@ -1,18 +1,20 @@
 import numpy
 import theano
-import os, copy
+import copy
 
 from theano import tensor as T
 from theano.compile import module
-from theano.tensor.nnet import sigmoid
 
 from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \
         scanmaskenc,scanmaskdec, FillMissing, mask_gradient
 
-from pylearn.algorithms import cost
 from pylearn.algorithms.logistic_regression import LogRegN
 
+# used to initialize containers
+class ScratchPad:
+    pass
 
+# regularisation utils:-------------------------------------------
 def lnorm(param, type='l2'):
     if type == 'l1':
         return T.sum(T.abs(param))
@@ -26,19 +28,40 @@
         rcost += lnorm(param, type)
     return rcost
 
-
+# activations utils:----------------------------------------------
 def sigmoid_act(x):
     return theano.tensor.nnet.sigmoid(x)
 
 def tanh_act(x):
-    return (theano.tensor.tanh((x-0.5)*2))/2.0+0.5
+    return theano.tensor.tanh(x)
+
+# costs utils:---------------------------------------------------
+
+# in order to fix numerical instability of the cost and gradient calculation for the cross entropy we calculate it
+# with the following functions direclty from the activation:
+
+def sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis):
+    XE =-target * T.log(1 + T.exp(-output_act)) + (1 - target) * (- T.log(1 + T.exp(output_act)))
+    return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis)
 
-def softsign_act(x):
-    return theano.sandbox.softsign.softsign(x)
+def tanh_cross_entropy(target, output_act, mean_axis, sum_axis):
+    XE =-(target+1)/2.0 * T.log(1 + T.exp(-2 * output_act)) + \
+            (1 - (target+1)/2.0) * (- T.log(1 + T.exp(2 * output_act)))
+    return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis)
 
-class ScratchPad:
-    pass
+def cross_entropy(target, output_act, act, mean_axis=0, sum_axis=1):
+    if act == 'sigmoid_act':
+        return sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis)
+    if act == 'tanh_act':
+        return tanh_cross_entropy(target, output_act, mean_axis, sum_axis)
+    assert False
 
+def quadratic(target, output, act, axis = 1):
+    return pylearn.algorithms.cost.quadratic(target, output, axis)
+
+
+
+# DAAig module----------------------------------------------------------------
 class DAAig(module.Module):
     """De-noising Auto-encoder
     """
@@ -46,7 +69,7 @@
     def __init__(self, input = None, auxinput = None,
                 in_size=None, auxin_size= None, n_hid=1,
                 regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
-                reconstruction_cost_function=cost.cross_entropy, interface = True,
+                reconstruction_cost_function='cross_entropy', interface = True,
                 ignore_missing=None, reconstruct_missing=False,
                 **init):
         """
@@ -87,13 +110,18 @@
         self.n_hid = n_hid
         self.regularize = regularize
         self.tie_weights = tie_weights
-        self.reconstruction_cost_function = reconstruction_cost_function
         self.interface = interface
         self.ignore_missing = ignore_missing
         self.reconstruct_missing = reconstruct_missing
         
-        assert hid_fn in ('sigmoid_act','tanh_act','softsign_act')
+        
+        assert hid_fn in ('sigmoid_act','tanh_act')
         self.hid_fn = eval(hid_fn)
+        self.hid_name = hid_fn
+        
+        assert reconstruction_cost_function in ('cross_entropy','quadratic')
+        self.reconstruction_cost_function = eval(reconstruction_cost_function)
+        self.reconstruction_cost_function_name = reconstruction_cost_function
         
         ### DECLARE MODEL VARIABLES and default
         self.input = input
@@ -219,11 +247,15 @@
     
     # DEPENDENCY: define_behavioural, define_regularization
     def define_cost(self, container):
-        container.reconstruction_cost = self.reconstruction_costs(container.rec)
+        if self.reconstruction_cost_function_name == 'cross_entropy':
+            container.reconstruction_cost = self.reconstruction_costs(container.rec_activation)
+        else:
+            container.reconstruction_cost = self.reconstruction_costs(container.rec)
         # TOTAL COST
-        container.cost = container.reconstruction_cost
         if self.regularize: #if stacked don't merge regularization and cost here but in the stackeddaaig module
             container.cost = container.cost + self.regularization
+        else:
+            container.cost = container.reconstruction_cost
     
     # DEPENDENCY: define_cost
     def define_params(self):
@@ -274,11 +306,12 @@
     
     def reconstruction_costs(self, rec):
         if (self.input is not None) and (self.auxinput is not None):
-            return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec)
+            return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)),\
+                    rec, self.hid_name)
         if self.input is not None:
-            return self.reconstruction_cost_function(self.input, rec)
+            return self.reconstruction_cost_function(self.input, rec, self.hid_name)
         if self.auxinput is not None:
-            return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec)
+            return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec, self.hid_name)
         # All cases should be covered above. If not, something is wrong!
         assert False
     
@@ -345,7 +378,7 @@
     def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None],
                 in_size = None, auxin_size = [None], n_hid = [1],
                 regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
-                reconstruction_cost_function=cost.cross_entropy,
+                reconstruction_cost_function='cross_entropy',
                 n_out = 2, target = None, debugmethod = False, totalupdatebool=False,
                 ignore_missing=None, reconstruct_missing=False,
                 **init):