changeset 798:0b20301ded89

variable scale_cost for DAA_inputs_groups
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Tue, 21 Jul 2009 12:19:30 -0400
parents cc94cdd48d85
children 9dd5af3b26fe
files pylearn/algorithms/sandbox/DAA_inputs_groups.py
diffstat 1 files changed, 21 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Mon Jul 20 13:07:51 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Tue Jul 21 12:19:30 2009 -0400
@@ -107,7 +107,7 @@
                 in_size=None, auxin_size= None, n_hid=1,
                 regularize = False, tie_weights = False, hid_fn = 'tanh_act',
                 rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy',
-                scale_cost = False, interface = True, ignore_missing=None, reconstruct_missing=False,
+                interface = True, ignore_missing=None, reconstruct_missing=False,
                 corruption_pattern=None, **init):
         """
         :param input: WRITEME
@@ -156,7 +156,6 @@
         print '\t\thid_fn = ', hid_fn
         print '\t\trec_fn = ', rec_fn
         print '\t\treconstruction_cost_function = ', reconstruction_cost_function
-        print '\t\tscale_cost = ', scale_cost
         
         super(DAAig, self).__init__()
         self.random = T.RandomStreams()
@@ -171,7 +170,6 @@
         self.ignore_missing = ignore_missing
         self.reconstruct_missing = reconstruct_missing
         self.corruption_pattern = corruption_pattern
-        self.scale_cost = scale_cost
         
         assert hid_fn in ('sigmoid_act','tanh_act')
         self.hid_fn = eval(hid_fn)
@@ -215,6 +213,7 @@
         
         self.noise_level = T.scalar('noise_level')
         self.noise_level_group = T.scalar('noise_level_group')
+        self.scale_cost = T.scalar('scale_cost')
         
         # leave the chance for subclasses to initialize (example convolutionnal to implement)
         if self.__class__ == DAAig:
@@ -304,8 +303,8 @@
         if self.input is not None:
             listweights += [self.wenc,self.wdec]
             listweightsenc += [self.wenc]
-        self.regularization = self.reg_coef * get_reg_cost(listweights,'l2')
-        self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l2')
+        self.regularization = self.reg_coef * get_reg_cost(listweights,'l1')
+        self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l1')
     
     def define_cost(self, container):
         if self.reconstruction_cost_function_name == 'cross_entropy':
@@ -326,8 +325,9 @@
                     self.rec_name)
         # TOTAL COST
         if (self.input is not None) and (self.auxinput is not None):
-            container.reconstruction_cost = (T.constant(min(1,1+self.scale_cost)) *container.reconstruction_cost_in +\
-                T.constant(min(1,1-self.scale_cost)) * container.reconstruction_cost_aux )
+            container.reconstruction_cost = (T.min(T.constant(1),T.constant(1)+self.scale_cost)) * \
+                container.reconstruction_cost_in + (T.min(T.constant(1),T.constant(1)-self.scale_cost)) *\
+                container.reconstruction_cost_aux
         else:
             if self.input is not None:
                 container.reconstruction_cost = container.reconstruction_cost_in
@@ -395,13 +395,14 @@
                     % self.corruption_pattern)
         return mask * self.input
     
-    def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0,
+    def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, scale_cost = 0,
                             seed=1, orthoinit = False, alloc=True, **init):
         super(DAAig, self)._instance_initialize(obj, **init)
         
         obj.reg_coef = reg_coef
         obj.noise_level = noise_level
         obj.noise_level_group = noise_level_group
+        obj.scale_cost = scale_cost
         if self. interface:
             obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module)
         else:
@@ -467,7 +468,7 @@
     def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None],
                 in_size = None, auxin_size = [None], n_hid = [1],
                 regularize = False, tie_weights = False, hid_fn = 'tanh_act',
-                rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy', scale_cost=False,
+                rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy',
                 n_out = 2, target = None, debugmethod = False, totalupdatebool=False,
                 ignore_missing=None, reconstruct_missing=False,
                 corruption_pattern=None,
@@ -494,7 +495,6 @@
         hid_fn = listify(hid_fn,depth)
         rec_fn = listify(rec_fn,depth)
         reconstruction_cost_function = listify(reconstruction_cost_function,depth)
-        scale_cost = listify(scale_cost,depth)
         self.n_out = n_out
         self.target = target if target is not None else T.lvector('target')
         self.debugmethod = debugmethod
@@ -515,7 +515,6 @@
         print '\thid_fn = ', hid_fn
         print '\trec_fn = ', rec_fn
         print '\treconstruction_cost_function = ', reconstruction_cost_function
-        print '\tscale_cost = ', scale_cost
         print '\tn_out = ', self.n_out
         
         # init for model construction
@@ -578,7 +577,7 @@
             dict_params = dict(input = inputprec, in_size = in_sizeprec, auxin_size = auxin_size[i],
                     n_hid = self.n_hid[i], regularize = False, tie_weights = tie_weights[i], hid_fn = hid_fn[i],
                     rec_fn = rec_fn[i], reconstruction_cost_function = reconstruction_cost_function[i],
-                    scale_cost = scale_cost[i], interface = False, ignore_missing = self.ignore_missing,
+                    interface = False, ignore_missing = self.ignore_missing,
                     reconstruct_missing = self.reconstruct_missing,corruption_pattern = self.corruption_pattern)
             if auxin_size[i] is None:
                 offset +=1
@@ -720,17 +719,21 @@
                 self.compute_totalgradients[-1] =\
                         theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1])
     
-    def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0,
+    def _instance_initialize(self,inst,unsup_lr = 0.01, sup_lr = 0.01, reg_coef = 0, scale_cost = 0,
                                 noise_level = 0 , noise_level_group = 0, seed = 1, orthoinit = False, alloc = True,**init):
         super(StackedDAAig, self)._instance_initialize(inst, **init)
         
         inst.unsup_lr = unsup_lr
         inst.sup_lr = sup_lr
         
+        
         for i in range(self.depth):
             print '\tLayer = ', i+1
-            inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\
-                    noise_level_group = noise_level_group, seed = seed + i, orthoinit = orthoinit, alloc = alloc)
+            inst.daaig[i].initialize(reg_coef = reg_coef[i] if type(reg_coef) is list else reg_coef, \
+                    noise_level = noise_level[i] if type(noise_level) is list else noise_level, \
+                    scale_cost = scale_cost[i] if type(scale_cost) is list else scale_cost, \
+                    noise_level_group = noise_level_group[i] if type(noise_level_group) is list else noise_level_group, \
+                    seed = seed + i, orthoinit = orthoinit, alloc = alloc)
         print '\tLayer supervised'
         inst.daaig[-1].initialize()
         if alloc:
@@ -742,8 +745,9 @@
             else:
                 inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\
                                     low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid)))
-        inst.daaig[-1].l1 = 0
-        inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation
+        inst.daaig[-1].l1 = reg_coef[-1] if type(reg_coef) is list else reg_coef
+        inst.daaig[-1].l2 = 0
+        #only l1 norm for regularisation to be consitent with the unsup regularisation
     
     def _instance_save(self,inst,save_dir=''):