changeset 817:db2c26a2c97c

new parameters and Op for DAA inputs groups
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Wed, 02 Sep 2009 13:36:41 -0400
parents d1e6c36df023
children 7dfecf11cbf4
files pylearn/algorithms/sandbox/DAA_inputs_groups.py pylearn/sandbox/scan_inputs_groups.py
diffstat 2 files changed, 54 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Mon Aug 17 19:54:35 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Wed Sep 02 13:36:41 2009 -0400
@@ -6,7 +6,7 @@
 from theano.compile import module
 
 from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \
-        scanmaskenc,scanmaskdec, FillMissing, mask_gradient
+        scanmaskenc,scanmaskdec, FillMissing, mask_gradient, blockgrad
 
 from pylearn.algorithms.logistic_regression import LogRegN
 import pylearn.algorithms.cost
@@ -93,6 +93,9 @@
 def tanh_act(x):
     return theano.tensor.tanh(x/2.0)
 
+def softsign_act(x):
+    return x/(1.0 + theano.tensor.abs(x))
+
 # costs utils:---------------------------------------------------
 # in order to fix numerical instability of the cost and gradient calculation for the cross entropy we calculate it
 # with the following functions direclty from the activation:
@@ -109,11 +112,19 @@
     XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0)
     return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis)
 
+def softsign_cross_entropy(target, output_act, mean_axis, sum_axis):
+    newact = ((output_act/(1.0 + theano.tensor.abs(output_act)))+1)/2.0
+    XE = (target+1)/2.0 * T.log(newact) + (1 - (target+1)/2.0) * T.log(1 - newact)
+    XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0)
+    return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis)
+
 def cross_entropy(target, output_act, act, mean_axis=0, sum_axis=1):
     if act == 'sigmoid_act':
         return sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis)
     if act == 'tanh_act':
         return tanh_cross_entropy(target, output_act, mean_axis, sum_axis)
+    if act == 'softsign_act':
+        return softsign_cross_entropy(target, output_act, mean_axis, sum_axis)
     assert False
 
 def quadratic(target, output, act, mean_axis = 0):
@@ -129,7 +140,7 @@
                 regularize = False, tie_weights = False, tie_weights_aux = None, hid_fn = 'tanh_act',
                 rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy',
                 interface = True, ignore_missing=None, reconstruct_missing=False,
-                corruption_pattern=None, **init):
+                corruption_pattern=None, blockgrad = False, **init):
         """
         :param input: WRITEME
         :param auxinput: WRITEME
@@ -180,11 +191,12 @@
         self.ignore_missing = ignore_missing
         self.reconstruct_missing = reconstruct_missing
         self.corruption_pattern = corruption_pattern
+        self.blockgrad = blockgrad
         
-        assert hid_fn in ('sigmoid_act','tanh_act')
+        assert hid_fn in ('sigmoid_act','tanh_act','softsign_act')
         self.hid_fn = eval(hid_fn)
         
-        assert rec_fn in ('sigmoid_act','tanh_act')
+        assert rec_fn in ('sigmoid_act','tanh_act','softsign_act')
         self.rec_fn = eval(rec_fn)
         self.rec_name = rec_fn
         
@@ -347,8 +359,8 @@
         tmpbool = (self.reconstruction_cost_function_name == 'cross_entropy')
         if (self.input is not None):
             container.reconstruction_cost_in = \
-                self.reconstruction_cost_function(self.input, container.rec_activation_in \
-                if tmpbool else container.rec_in, self.rec_name)
+                self.reconstruction_cost_function(blockgrad(self.input) if self.blockgrad else self.input,\
+                        container.rec_activation_in if tmpbool else container.rec_in, self.rec_name)
         if (self.auxinput is not None):
             container.reconstruction_cost_aux = \
                 self.reconstruction_cost_function(scaninputs(self.idx_list, self.auxinput), container.rec_activation_aux \
@@ -484,7 +496,7 @@
                 rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy',
                 n_out = 2, target = None, debugmethod = False, totalupdatebool=False,
                 ignore_missing=None, reconstruct_missing=False,
-                corruption_pattern=None,
+                corruption_pattern=None, blockgrad = False, act_reg = 'sigmoid_act',
                 **init):
         
         super(StackedDAAig, self).__init__()
@@ -516,6 +528,10 @@
         self.ignore_missing = ignore_missing
         self.reconstruct_missing = reconstruct_missing
         self.corruption_pattern = corruption_pattern
+        self.blockgrad = blockgrad
+        
+        assert act_reg in ('sigmoid_act','tanh_act','softsign_act')
+        self.act_reg = eval(act_reg)
         
         print '\t**** StackedDAAig.__init__ ****'
         print '\tdepth = ', self.depth
@@ -529,7 +545,10 @@
         print '\ttie_weights_aux = ', tie_weights_aux
         print '\thid_fn = ', hid_fn
         print '\trec_fn = ', rec_fn
+        print '\tact_reg = ', act_reg
         print '\treconstruction_cost_function = ', reconstruction_cost_function
+        print '\tblockgrad = ', blockgrad
+        print '\tact_reg = ', act_reg
         print '\tn_out = ', self.n_out
         
         # init for model construction
@@ -594,7 +613,8 @@
                     tie_weights_aux = tie_weights_aux[i], hid_fn = hid_fn[i],
                     rec_fn = rec_fn[i], reconstruction_cost_function = reconstruction_cost_function[i],
                     interface = False, ignore_missing = self.ignore_missing,
-                    reconstruct_missing = self.reconstruct_missing,corruption_pattern = self.corruption_pattern)
+                    reconstruct_missing = self.reconstruct_missing,corruption_pattern = self.corruption_pattern,
+                    blockgrad=self.blockgrad)
             if auxin_size[i] is None:
                 offset +=1
                 dict_params.update({'auxinput' : None})
@@ -675,7 +695,7 @@
         # supervised layer------------------------------------------------------------------------
         print '\tLayer supervised init'
         self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target]
-        self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,sigmoid_act(self.daaig[-2].clean.hidden_activation),self.target)
+        self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,self.act_reg(self.daaig[-2].clean.hidden_activation),self.target)
         paramstot += self.daaig[-1].params
         
         self.localcost[-1] = self.daaig[-1].regularized_cost \
--- a/pylearn/sandbox/scan_inputs_groups.py	Mon Aug 17 19:54:35 2009 -0400
+++ b/pylearn/sandbox/scan_inputs_groups.py	Wed Sep 02 13:36:41 2009 -0400
@@ -71,6 +71,31 @@
         if nbias != 1: raise TypeError('not vector', bias_list[i])
     return bias_list
 
+
+# block grad Op------------------------------------
+class BlockGrad(Op):
+    """This Op block the gradient of a variable"""
+    def make_node(self, x):
+        x = T.as_tensor_variable(x)
+        if x.ndim == 1:
+            return Apply(self, [x], [T.dvector()])
+        else:
+            return Apply(self, [x], [T.dmatrix()])
+    
+    def perform(self, node , x ,(out,)):
+        out[0] = x[0].copy()
+    
+    def grad(self, x, (gx,)):
+        return [gx*0]
+    
+    def __hash__(self):
+        return hash(BlockGrad)^77612
+    
+    def __str__(self):
+        return "BlockGrad"
+
+blockgrad=BlockGrad()
+
 # Encoding scan dot product------------------------------------
 class ScanDotEnc(Op):
     """This Op takes an index list (as tensor.ivector), a list of matrices representing