Mercurial > pylearn
changeset 830:0f66973e4f95
better initialisation and tanh DAA input groups
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Wed, 14 Oct 2009 10:19:14 -0400 |
parents | 9945cd79fe79 |
children | 0ba62c55d59f |
files | pylearn/algorithms/sandbox/DAA_inputs_groups.py |
diffstat | 1 files changed, 7 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Wed Sep 30 17:15:01 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Wed Oct 14 10:19:14 2009 -0400 @@ -91,7 +91,7 @@ #tanh is scaled by 2 to have the same gradient than sigmoid [sigmoid(x)=(tanh(x/2.0)+1)/2.0] def tanh_act(x): - return theano.tensor.tanh(x/2.0) + return 1.7159*theano.tensor.tanh(2.0/3.0*x) #divide per 2 is a bad idea with many layers... we lose the std of U*x def tanh2_act(x): @@ -459,12 +459,12 @@ self.R = numpy.random.RandomState(seed) if self.input is not None: - self.inf = 1/numpy.sqrt(self.in_size) + self.inf = numpy.sqrt(3)/numpy.sqrt(self.in_size) if self.auxinput is not None: - self.inf = 1/numpy.sqrt(sum(self.auxin_size)) + self.inf = numpy.sqrt(3)/numpy.sqrt(sum(self.auxin_size)) if (self.auxinput is not None) and (self.input is not None): - self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size) - self.hif = 1/numpy.sqrt(self.n_hid) + self.inf = numpy.sqrt(3)/numpy.sqrt(sum(self.auxin_size)+self.in_size) + self.hif = numpy.sqrt(3)/numpy.sqrt(self.n_hid) if alloc: if self.input is not None: @@ -784,7 +784,8 @@ inst.daaig[-1].R = numpy.random.RandomState(seed+self.depth) # init the logreg weights inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\ - low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid)) + low = -numpy.sqrt(3)/numpy.sqrt(inst.daaig[-2].n_hid),\ + high = numpy.sqrt(3)/numpy.sqrt(inst.daaig[-2].n_hid)) if orthoinit: inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].w) inst.daaig[-1].l1 = reg_coef[-1] if type(reg_coef) is list else reg_coef