changeset 809:a66bef83e1fd

Changes in cost function, sum over quadratic and KL instead of cross entropy for global update for DAA inputs groups
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Wed, 12 Aug 2009 18:29:18 -0400
parents 316817114b15
children 3a4bc4a0dbf4
files pylearn/algorithms/sandbox/DAA_inputs_groups.py
diffstat 1 files changed, 17 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Tue Aug 11 18:37:22 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Wed Aug 12 18:29:18 2009 -0400
@@ -87,14 +87,18 @@
 # costs utils:---------------------------------------------------
 # in order to fix numerical instability of the cost and gradient calculation for the cross entropy we calculate it
 # with the following functions direclty from the activation:
+# XS is used to get back the KL divergence, important for doing global updates
+
 def sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis):
-    XE =-target * T.log(1 + T.exp(-output_act)) + (1 - target) * (- T.log(1 + T.exp(output_act)))
-    return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis)
+    XE = target * (- T.log(1 + T.exp(-output_act))) + (1 - target) * (- T.log(1 + T.exp(output_act)))
+    XS = T.xlogx.xlogx(target) + T.xlogx.xlogx(1-target)
+    return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis)
 
 def tanh_cross_entropy(target, output_act, mean_axis, sum_axis):
-    XE =-(target+1)/2.0 * T.log(1 + T.exp(- output_act)) + \
+    XE = (target+1)/2.0 * (- T.log(1 + T.exp(- output_act))) + \
             (1 - (target+1)/2.0) * (- T.log(1 + T.exp(output_act)))
-    return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis)
+    XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0)
+    return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis)
 
 def cross_entropy(target, output_act, act, mean_axis=0, sum_axis=1):
     if act == 'sigmoid_act':
@@ -103,8 +107,8 @@
         return tanh_cross_entropy(target, output_act, mean_axis, sum_axis)
     assert False
 
-def quadratic(target, output, act, axis = 1):
-    return pylearn.algorithms.cost.quadratic(target, output, axis)
+def quadratic(target, output, act, mean_axis = 0):
+    return T.sum(pylearn.algorithms.cost.quadratic(target, output, mean_axis))
 
 # DAAig module----------------------------------------------------------------
 class DAAig(module.Module):
@@ -854,11 +858,11 @@
     
     def _instance_unsupupdate(self,inst,data,layer='all',typeup = 'local',printcost = False):
         cost = [None]*self.depth
-        if typeup is 'totallocal':
+        if typeup == 'totallocal':
             cost[-1] = inst.totallocalupdate(*data)
         else: 
-            if typeup is 'total':
-                if layer is 'all':
+            if typeup == 'total':
+                if layer == 'all':
                     cost[-1] = inst.totalupdate[-1](*data)
                 else:
                     cost[layer] = inst.totalupdate[layer](*data)
@@ -867,21 +871,21 @@
                     for i in range(self.depth):
                         if typeup == 'local':
                             cost[i] = inst.localupdate[i](*data[i])
-                        else:
+                        if typeup == 'global':
                             cost[i] = inst.globalupdate[i](*data[i])
                 else:
                     if typeup == 'local':
                         cost[layer] = inst.localupdate[layer](*data)
-                    else:
+                    if typeup == 'global':
                         cost[layer] = inst.globalupdate[layer](*data)
         if printcost:
             print cost
         return cost
     
     def _instance_supupdate(self,inst,data,typeup = 'global',printcost = False):
-        if typeup is 'local':
+        if typeup == 'local':
             cost = inst.localupdate[-1](*data)
-        if typeup is 'global':
+        if typeup == 'global':
             cost = inst.globalupdate[-1](*data)
         if printcost:
             print cost