changeset 837:28ceb345ab78

merge
author James Bergstra <bergstrj@iro.umontreal.ca>
date Fri, 16 Oct 2009 12:20:57 -0400
parents 788c2c8558eb (current diff) 0ba62c55d59f (diff)
children 4f7e0edee7d0
files
diffstat 3 files changed, 21 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Fri Oct 16 12:19:12 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Fri Oct 16 12:20:57 2009 -0400
@@ -91,7 +91,7 @@
 
 #tanh is scaled by 2 to have the same gradient than sigmoid [sigmoid(x)=(tanh(x/2.0)+1)/2.0]
 def tanh_act(x):
-    return theano.tensor.tanh(x/2.0)
+    return 1.7159*theano.tensor.tanh(2.0/3.0*x)
 
 #divide per 2 is a bad idea with many layers... we lose the std of U*x
 def tanh2_act(x):
@@ -100,6 +100,9 @@
 def softsign_act(x):
     return x/(1.0 + T.abs_(x))
 
+def arsinh_act(x):
+    return T.log(x+T.sqrt(1+x*x))
+
 # costs utils:---------------------------------------------------
 # in order to fix numerical instability of the cost and gradient calculation for the cross entropy we calculate it
 # with the following functions direclty from the activation:
@@ -128,6 +131,7 @@
     XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0)
     return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis)
 
+
 def cross_entropy(target, output_act, act, mean_axis=0, sum_axis=1):
     if act == 'sigmoid_act':
         return sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis)
@@ -205,7 +209,7 @@
         self.corruption_pattern = corruption_pattern
         self.blockgrad = blockgrad
         
-        assert hid_fn in ('sigmoid_act','tanh_act','softsign_act','tanh2_act')
+        assert hid_fn in ('sigmoid_act','tanh_act','softsign_act','tanh2_act','arsinh_act')
         self.hid_fn = eval(hid_fn)
         
         assert rec_fn in ('sigmoid_act','tanh_act','softsign_act','tanh2_act')
@@ -455,12 +459,12 @@
         self.R = numpy.random.RandomState(seed)
         
         if self.input is not None:
-            self.inf = 1/numpy.sqrt(self.in_size)
+            self.inf = numpy.sqrt(3)/numpy.sqrt(self.in_size)
         if self.auxinput is not None:
-            self.inf = 1/numpy.sqrt(sum(self.auxin_size))
+            self.inf = numpy.sqrt(3)/numpy.sqrt(sum(self.auxin_size))
         if (self.auxinput is not None) and (self.input is not None):
-            self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size)
-        self.hif = 1/numpy.sqrt(self.n_hid)
+            self.inf = numpy.sqrt(3)/numpy.sqrt(sum(self.auxin_size)+self.in_size)
+        self.hif = numpy.sqrt(3)/numpy.sqrt(self.n_hid)
         
         if alloc:
             if self.input is not None:
@@ -542,7 +546,7 @@
         self.corruption_pattern = corruption_pattern
         self.blockgrad = blockgrad
         
-        assert act_reg in ('sigmoid_act','tanh_act','softsign_act','tanh2_act')
+        assert act_reg in ('sigmoid_act','tanh_act','softsign_act','tanh2_act','arsinh_act')
         self.act_reg = eval(act_reg)
         
         print '\t**** StackedDAAig.__init__ ****'
@@ -780,7 +784,8 @@
             inst.daaig[-1].R = numpy.random.RandomState(seed+self.depth)
             # init the logreg weights
             inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\
-                    low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid))
+                    low = -numpy.sqrt(3)/numpy.sqrt(inst.daaig[-2].n_hid),\
+                    high = numpy.sqrt(3)/numpy.sqrt(inst.daaig[-2].n_hid))
             if orthoinit:
                 inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].w)
         inst.daaig[-1].l1 = reg_coef[-1] if type(reg_coef) is list else reg_coef
--- a/pylearn/datasets/MNIST.py	Fri Oct 16 12:19:12 2009 -0400
+++ b/pylearn/datasets/MNIST.py	Fri Oct 16 12:20:57 2009 -0400
@@ -53,11 +53,11 @@
 def full():
     return train_valid_test()
 
-#usefull for test, keep it
+#useful for test, keep it
 def first_10():
     return train_valid_test(ntrain=10, nvalid=10, ntest=10)
 
-#usefull for test, keep it
+#useful for test, keep it
 def first_100():
     return train_valid_test(ntrain=100, nvalid=100, ntest=100)
 
--- a/pylearn/io/pmat.py	Fri Oct 16 12:19:12 2009 -0400
+++ b/pylearn/io/pmat.py	Fri Oct 16 12:20:57 2009 -0400
@@ -36,7 +36,12 @@
 
 #import numarray, sys, os, os.path
 import numpy.numarray, sys, os, os.path
-import fpconst
+try:
+    import fpconst
+except ImportError:
+    print 'Python library fpconst is needed but not installed,'
+    print 'please install it using your favorite package manager.'
+    raise
 
 def array_columns( a, cols ):
     indices = None