changeset 800:49ba5d622c3a

fixed cost scaling bug for DAA_inputs_groups
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Tue, 21 Jul 2009 17:22:20 -0400
parents 9dd5af3b26fe
children 0772b76c806d
files pylearn/algorithms/sandbox/DAA_inputs_groups.py
diffstat 1 files changed, 19 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Tue Jul 21 13:31:43 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Tue Jul 21 17:22:20 2009 -0400
@@ -217,7 +217,8 @@
             self.lr = T.scalar('lr')
         self.noise_level = T.scalar('noise_level')
         self.noise_level_group = T.scalar('noise_level_group')
-        self.scale_cost = T.scalar('scale_cost')
+        self.scale_cost_in = T.scalar('scale_cost_in')
+        self.scale_cost_aux = T.scalar('scale_cost_aux')
         
         # leave the chance for subclasses to initialize (example convolutionnal to implement)
         if self.__class__ == DAAig:
@@ -335,8 +336,8 @@
         
         # TOTAL COST
         if (self.input is not None) and (self.auxinput is not None):
-            container.reconstruction_cost = (T.min(T.constant(1),T.constant(1)+self.scale_cost)) * \
-                container.reconstruction_cost_in + (T.min(T.constant(1),T.constant(1)-self.scale_cost)) *\
+            container.reconstruction_cost = self.scale_cost_in * \
+                container.reconstruction_cost_in +  self.scale_cost_aux*\
                 container.reconstruction_cost_aux
         else:
             if self.input is not None:
@@ -392,18 +393,17 @@
         self.activation = theano.Method(listin, self.clean.hidden_activation)
         self.representation = theano.Method(listin, self.clean.hidden)
     
-    def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, scale_cost = 0,
-                            seed=1, orthoinit = False, alloc=True, **init):
+    def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, scale_cost_in = 1,
+                            scale_cost_aux = 1 , seed=1, orthoinit = False, alloc=True, **init):
         super(DAAig, self)._instance_initialize(obj, **init)
         
         obj.reg_coef = reg_coef
         obj.noise_level = noise_level
         obj.noise_level_group = noise_level_group
-        obj.scale_cost = scale_cost
-        if self. interface:
-            obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module)
-        else:
-            obj.lr = None
+        obj.scale_cost_in = scale_cost_in
+        obj.scale_cost_aux = scale_cost_aux
+        obj.lr = lr  if self.interface else None
+        # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module)
         
         obj.random.initialize()
         obj.random.seed(seed)
@@ -421,8 +421,7 @@
             if self.input is not None:
                 wencshp = (self.in_size, self.n_hid)
                 wdecshp = tuple(reversed(wencshp))
-                print 'wencshp = ', wencshp
-                print 'wdecshp = ', wdecshp
+                obj.bdec = numpy.zeros(self.in_size)
                 obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)
                 if not(self.tie_weights):
                     obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif)
@@ -430,13 +429,13 @@
                     obj.wenc = orthogonalinit(obj.wenc)
                     if not(self.tie_weights):
                         obj.wdec = orthogonalinit(obj.wdec,0)
-                obj.bdec = numpy.zeros(self.in_size)
+                print 'wencshp = ', wencshp
+                print 'wdecshp = ', wdecshp
             
             if self.auxinput is not None:
                 wauxencshp = [(i, self.n_hid) for i in self.auxin_size]
                 wauxdecshp = [tuple(reversed(i)) for i in wauxencshp]
-                print 'wauxencshp = ', wauxencshp
-                print 'wauxdecshp = ', wauxdecshp
+                obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size]
                 obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp]
                 if not(self.tie_weights):
                     obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp]
@@ -444,7 +443,8 @@
                     obj.wauxenc = [orthogonalinit(w) for w in obj.wauxenc]
                     if not(self.tie_weights):
                         obj.wauxdec = [orthogonalinit(w,0) for w in obj.wauxdec]
-                obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size]
+                print 'wauxencshp = ', wauxencshp
+                print 'wauxdecshp = ', wauxdecshp
             
             print 'self.inf = ', self.inf
             print 'self.hif = ', self.hif
@@ -698,7 +698,7 @@
                 self.compute_totalgradients[-1] =\
                         theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1])
     
-    def _instance_initialize(self,inst,unsup_lr = 0.01, sup_lr = 0.01, reg_coef = 0, scale_cost = 0,
+    def _instance_initialize(self,inst,unsup_lr = 0.01, sup_lr = 0.01, reg_coef = 0, scale_cost_in = 1, scale_cost_aux = 1,
                                 noise_level = 0 , noise_level_group = 0, seed = 1, orthoinit = False, alloc = True,**init):
         super(StackedDAAig, self)._instance_initialize(inst, **init)
         
@@ -709,7 +709,8 @@
             print '\tLayer = ', i+1
             inst.daaig[i].initialize(reg_coef = reg_coef[i] if type(reg_coef) is list else reg_coef, \
                     noise_level = noise_level[i] if type(noise_level) is list else noise_level, \
-                    scale_cost = scale_cost[i] if type(scale_cost) is list else scale_cost, \
+                    scale_cost_in = scale_cost_in[i] if type(scale_cost_in) is list else scale_cost_in, \
+                    scale_cost_aux = scale_cost_aux[i] if type(scale_cost_aux) is list else scale_cost_aux, \
                     noise_level_group = noise_level_group[i] if type(noise_level_group) is list else noise_level_group, \
                     seed = seed + i, orthoinit = orthoinit, alloc = alloc)