Mercurial > pylearn
changeset 817:db2c26a2c97c
new parameters and Op for DAA inputs groups
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Wed, 02 Sep 2009 13:36:41 -0400 |
parents | d1e6c36df023 |
children | 7dfecf11cbf4 |
files | pylearn/algorithms/sandbox/DAA_inputs_groups.py pylearn/sandbox/scan_inputs_groups.py |
diffstat | 2 files changed, 54 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Mon Aug 17 19:54:35 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Wed Sep 02 13:36:41 2009 -0400 @@ -6,7 +6,7 @@ from theano.compile import module from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \ - scanmaskenc,scanmaskdec, FillMissing, mask_gradient + scanmaskenc,scanmaskdec, FillMissing, mask_gradient, blockgrad from pylearn.algorithms.logistic_regression import LogRegN import pylearn.algorithms.cost @@ -93,6 +93,9 @@ def tanh_act(x): return theano.tensor.tanh(x/2.0) +def softsign_act(x): + return x/(1.0 + theano.tensor.abs(x)) + # costs utils:--------------------------------------------------- # in order to fix numerical instability of the cost and gradient calculation for the cross entropy we calculate it # with the following functions direclty from the activation: @@ -109,11 +112,19 @@ XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0) return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis) +def softsign_cross_entropy(target, output_act, mean_axis, sum_axis): + newact = ((output_act/(1.0 + theano.tensor.abs(output_act)))+1)/2.0 + XE = (target+1)/2.0 * T.log(newact) + (1 - (target+1)/2.0) * T.log(1 - newact) + XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0) + return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis) + def cross_entropy(target, output_act, act, mean_axis=0, sum_axis=1): if act == 'sigmoid_act': return sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis) if act == 'tanh_act': return tanh_cross_entropy(target, output_act, mean_axis, sum_axis) + if act == 'softsign_act': + return softsign_cross_entropy(target, output_act, mean_axis, sum_axis) assert False def quadratic(target, output, act, mean_axis = 0): @@ -129,7 +140,7 @@ regularize = False, tie_weights = False, tie_weights_aux = None, hid_fn = 'tanh_act', rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy', interface = True, ignore_missing=None, reconstruct_missing=False, - corruption_pattern=None, **init): + corruption_pattern=None, blockgrad = False, **init): """ :param input: WRITEME :param auxinput: WRITEME @@ -180,11 +191,12 @@ self.ignore_missing = ignore_missing self.reconstruct_missing = reconstruct_missing self.corruption_pattern = corruption_pattern + self.blockgrad = blockgrad - assert hid_fn in ('sigmoid_act','tanh_act') + assert hid_fn in ('sigmoid_act','tanh_act','softsign_act') self.hid_fn = eval(hid_fn) - assert rec_fn in ('sigmoid_act','tanh_act') + assert rec_fn in ('sigmoid_act','tanh_act','softsign_act') self.rec_fn = eval(rec_fn) self.rec_name = rec_fn @@ -347,8 +359,8 @@ tmpbool = (self.reconstruction_cost_function_name == 'cross_entropy') if (self.input is not None): container.reconstruction_cost_in = \ - self.reconstruction_cost_function(self.input, container.rec_activation_in \ - if tmpbool else container.rec_in, self.rec_name) + self.reconstruction_cost_function(blockgrad(self.input) if self.blockgrad else self.input,\ + container.rec_activation_in if tmpbool else container.rec_in, self.rec_name) if (self.auxinput is not None): container.reconstruction_cost_aux = \ self.reconstruction_cost_function(scaninputs(self.idx_list, self.auxinput), container.rec_activation_aux \ @@ -484,7 +496,7 @@ rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy', n_out = 2, target = None, debugmethod = False, totalupdatebool=False, ignore_missing=None, reconstruct_missing=False, - corruption_pattern=None, + corruption_pattern=None, blockgrad = False, act_reg = 'sigmoid_act', **init): super(StackedDAAig, self).__init__() @@ -516,6 +528,10 @@ self.ignore_missing = ignore_missing self.reconstruct_missing = reconstruct_missing self.corruption_pattern = corruption_pattern + self.blockgrad = blockgrad + + assert act_reg in ('sigmoid_act','tanh_act','softsign_act') + self.act_reg = eval(act_reg) print '\t**** StackedDAAig.__init__ ****' print '\tdepth = ', self.depth @@ -529,7 +545,10 @@ print '\ttie_weights_aux = ', tie_weights_aux print '\thid_fn = ', hid_fn print '\trec_fn = ', rec_fn + print '\tact_reg = ', act_reg print '\treconstruction_cost_function = ', reconstruction_cost_function + print '\tblockgrad = ', blockgrad + print '\tact_reg = ', act_reg print '\tn_out = ', self.n_out # init for model construction @@ -594,7 +613,8 @@ tie_weights_aux = tie_weights_aux[i], hid_fn = hid_fn[i], rec_fn = rec_fn[i], reconstruction_cost_function = reconstruction_cost_function[i], interface = False, ignore_missing = self.ignore_missing, - reconstruct_missing = self.reconstruct_missing,corruption_pattern = self.corruption_pattern) + reconstruct_missing = self.reconstruct_missing,corruption_pattern = self.corruption_pattern, + blockgrad=self.blockgrad) if auxin_size[i] is None: offset +=1 dict_params.update({'auxinput' : None}) @@ -675,7 +695,7 @@ # supervised layer------------------------------------------------------------------------ print '\tLayer supervised init' self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target] - self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,sigmoid_act(self.daaig[-2].clean.hidden_activation),self.target) + self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,self.act_reg(self.daaig[-2].clean.hidden_activation),self.target) paramstot += self.daaig[-1].params self.localcost[-1] = self.daaig[-1].regularized_cost \
--- a/pylearn/sandbox/scan_inputs_groups.py Mon Aug 17 19:54:35 2009 -0400 +++ b/pylearn/sandbox/scan_inputs_groups.py Wed Sep 02 13:36:41 2009 -0400 @@ -71,6 +71,31 @@ if nbias != 1: raise TypeError('not vector', bias_list[i]) return bias_list + +# block grad Op------------------------------------ +class BlockGrad(Op): + """This Op block the gradient of a variable""" + def make_node(self, x): + x = T.as_tensor_variable(x) + if x.ndim == 1: + return Apply(self, [x], [T.dvector()]) + else: + return Apply(self, [x], [T.dmatrix()]) + + def perform(self, node , x ,(out,)): + out[0] = x[0].copy() + + def grad(self, x, (gx,)): + return [gx*0] + + def __hash__(self): + return hash(BlockGrad)^77612 + + def __str__(self): + return "BlockGrad" + +blockgrad=BlockGrad() + # Encoding scan dot product------------------------------------ class ScanDotEnc(Op): """This Op takes an index list (as tensor.ivector), a list of matrices representing