Mercurial > pylearn
changeset 798:0b20301ded89
variable scale_cost for DAA_inputs_groups
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Tue, 21 Jul 2009 12:19:30 -0400 |
parents | cc94cdd48d85 |
children | 9dd5af3b26fe |
files | pylearn/algorithms/sandbox/DAA_inputs_groups.py |
diffstat | 1 files changed, 21 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Mon Jul 20 13:07:51 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Tue Jul 21 12:19:30 2009 -0400 @@ -107,7 +107,7 @@ in_size=None, auxin_size= None, n_hid=1, regularize = False, tie_weights = False, hid_fn = 'tanh_act', rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy', - scale_cost = False, interface = True, ignore_missing=None, reconstruct_missing=False, + interface = True, ignore_missing=None, reconstruct_missing=False, corruption_pattern=None, **init): """ :param input: WRITEME @@ -156,7 +156,6 @@ print '\t\thid_fn = ', hid_fn print '\t\trec_fn = ', rec_fn print '\t\treconstruction_cost_function = ', reconstruction_cost_function - print '\t\tscale_cost = ', scale_cost super(DAAig, self).__init__() self.random = T.RandomStreams() @@ -171,7 +170,6 @@ self.ignore_missing = ignore_missing self.reconstruct_missing = reconstruct_missing self.corruption_pattern = corruption_pattern - self.scale_cost = scale_cost assert hid_fn in ('sigmoid_act','tanh_act') self.hid_fn = eval(hid_fn) @@ -215,6 +213,7 @@ self.noise_level = T.scalar('noise_level') self.noise_level_group = T.scalar('noise_level_group') + self.scale_cost = T.scalar('scale_cost') # leave the chance for subclasses to initialize (example convolutionnal to implement) if self.__class__ == DAAig: @@ -304,8 +303,8 @@ if self.input is not None: listweights += [self.wenc,self.wdec] listweightsenc += [self.wenc] - self.regularization = self.reg_coef * get_reg_cost(listweights,'l2') - self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l2') + self.regularization = self.reg_coef * get_reg_cost(listweights,'l1') + self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l1') def define_cost(self, container): if self.reconstruction_cost_function_name == 'cross_entropy': @@ -326,8 +325,9 @@ self.rec_name) # TOTAL COST if (self.input is not None) and (self.auxinput is not None): - container.reconstruction_cost = (T.constant(min(1,1+self.scale_cost)) *container.reconstruction_cost_in +\ - T.constant(min(1,1-self.scale_cost)) * container.reconstruction_cost_aux ) + container.reconstruction_cost = (T.min(T.constant(1),T.constant(1)+self.scale_cost)) * \ + container.reconstruction_cost_in + (T.min(T.constant(1),T.constant(1)-self.scale_cost)) *\ + container.reconstruction_cost_aux else: if self.input is not None: container.reconstruction_cost = container.reconstruction_cost_in @@ -395,13 +395,14 @@ % self.corruption_pattern) return mask * self.input - def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, + def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, scale_cost = 0, seed=1, orthoinit = False, alloc=True, **init): super(DAAig, self)._instance_initialize(obj, **init) obj.reg_coef = reg_coef obj.noise_level = noise_level obj.noise_level_group = noise_level_group + obj.scale_cost = scale_cost if self. interface: obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module) else: @@ -467,7 +468,7 @@ def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None], in_size = None, auxin_size = [None], n_hid = [1], regularize = False, tie_weights = False, hid_fn = 'tanh_act', - rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy', scale_cost=False, + rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy', n_out = 2, target = None, debugmethod = False, totalupdatebool=False, ignore_missing=None, reconstruct_missing=False, corruption_pattern=None, @@ -494,7 +495,6 @@ hid_fn = listify(hid_fn,depth) rec_fn = listify(rec_fn,depth) reconstruction_cost_function = listify(reconstruction_cost_function,depth) - scale_cost = listify(scale_cost,depth) self.n_out = n_out self.target = target if target is not None else T.lvector('target') self.debugmethod = debugmethod @@ -515,7 +515,6 @@ print '\thid_fn = ', hid_fn print '\trec_fn = ', rec_fn print '\treconstruction_cost_function = ', reconstruction_cost_function - print '\tscale_cost = ', scale_cost print '\tn_out = ', self.n_out # init for model construction @@ -578,7 +577,7 @@ dict_params = dict(input = inputprec, in_size = in_sizeprec, auxin_size = auxin_size[i], n_hid = self.n_hid[i], regularize = False, tie_weights = tie_weights[i], hid_fn = hid_fn[i], rec_fn = rec_fn[i], reconstruction_cost_function = reconstruction_cost_function[i], - scale_cost = scale_cost[i], interface = False, ignore_missing = self.ignore_missing, + interface = False, ignore_missing = self.ignore_missing, reconstruct_missing = self.reconstruct_missing,corruption_pattern = self.corruption_pattern) if auxin_size[i] is None: offset +=1 @@ -720,17 +719,21 @@ self.compute_totalgradients[-1] =\ theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1]) - def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0, + def _instance_initialize(self,inst,unsup_lr = 0.01, sup_lr = 0.01, reg_coef = 0, scale_cost = 0, noise_level = 0 , noise_level_group = 0, seed = 1, orthoinit = False, alloc = True,**init): super(StackedDAAig, self)._instance_initialize(inst, **init) inst.unsup_lr = unsup_lr inst.sup_lr = sup_lr + for i in range(self.depth): print '\tLayer = ', i+1 - inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\ - noise_level_group = noise_level_group, seed = seed + i, orthoinit = orthoinit, alloc = alloc) + inst.daaig[i].initialize(reg_coef = reg_coef[i] if type(reg_coef) is list else reg_coef, \ + noise_level = noise_level[i] if type(noise_level) is list else noise_level, \ + scale_cost = scale_cost[i] if type(scale_cost) is list else scale_cost, \ + noise_level_group = noise_level_group[i] if type(noise_level_group) is list else noise_level_group, \ + seed = seed + i, orthoinit = orthoinit, alloc = alloc) print '\tLayer supervised' inst.daaig[-1].initialize() if alloc: @@ -742,8 +745,9 @@ else: inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\ low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid))) - inst.daaig[-1].l1 = 0 - inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation + inst.daaig[-1].l1 = reg_coef[-1] if type(reg_coef) is list else reg_coef + inst.daaig[-1].l2 = 0 + #only l1 norm for regularisation to be consitent with the unsup regularisation def _instance_save(self,inst,save_dir=''):