changeset 831:0ba62c55d59f

merge
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Wed, 14 Oct 2009 10:19:37 -0400
parents 0f66973e4f95 (diff) 3f44379177b2 (current diff)
children 28ceb345ab78
files
diffstat 1 files changed, 7 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Wed Oct 07 11:17:49 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Wed Oct 14 10:19:37 2009 -0400
@@ -91,7 +91,7 @@
 
 #tanh is scaled by 2 to have the same gradient than sigmoid [sigmoid(x)=(tanh(x/2.0)+1)/2.0]
 def tanh_act(x):
-    return theano.tensor.tanh(x/2.0)
+    return 1.7159*theano.tensor.tanh(2.0/3.0*x)
 
 #divide per 2 is a bad idea with many layers... we lose the std of U*x
 def tanh2_act(x):
@@ -459,12 +459,12 @@
         self.R = numpy.random.RandomState(seed)
         
         if self.input is not None:
-            self.inf = 1/numpy.sqrt(self.in_size)
+            self.inf = numpy.sqrt(3)/numpy.sqrt(self.in_size)
         if self.auxinput is not None:
-            self.inf = 1/numpy.sqrt(sum(self.auxin_size))
+            self.inf = numpy.sqrt(3)/numpy.sqrt(sum(self.auxin_size))
         if (self.auxinput is not None) and (self.input is not None):
-            self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size)
-        self.hif = 1/numpy.sqrt(self.n_hid)
+            self.inf = numpy.sqrt(3)/numpy.sqrt(sum(self.auxin_size)+self.in_size)
+        self.hif = numpy.sqrt(3)/numpy.sqrt(self.n_hid)
         
         if alloc:
             if self.input is not None:
@@ -784,7 +784,8 @@
             inst.daaig[-1].R = numpy.random.RandomState(seed+self.depth)
             # init the logreg weights
             inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\
-                    low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid))
+                    low = -numpy.sqrt(3)/numpy.sqrt(inst.daaig[-2].n_hid),\
+                    high = numpy.sqrt(3)/numpy.sqrt(inst.daaig[-2].n_hid))
             if orthoinit:
                 inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].w)
         inst.daaig[-1].l1 = reg_coef[-1] if type(reg_coef) is list else reg_coef