changeset 796:ef749d03d055

added a separated auxiliary and normal inputs gradient computation behavior
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Mon, 20 Jul 2009 13:04:18 -0400
parents f30bb746f279
children cc94cdd48d85
files pylearn/algorithms/sandbox/DAA_inputs_groups.py
diffstat 1 files changed, 100 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Wed Jul 15 13:19:56 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Mon Jul 20 13:04:18 2009 -0400
@@ -28,6 +28,27 @@
     file_handle.close()
     return rval
 
+# Weight initialisation utils--------------------------------------
+# time consuming but just a test
+def orthogonalinit(W,axis=1):
+    nb = W.shape[axis]
+    bn = W.shape[0] if axis is 1 else W.shape[1]
+    if axis == 0:
+        W=W.T
+    Worto = copy.copy(W)
+    offset=0
+    tmp=[]
+    for i in range(nb):
+        if i==bn:
+            offset=offset+bn
+        if i-offset != 0:
+            for j in xrange(offset,i):
+                orthoproj = (Worto[:,i]*Worto[:,j]).sum()*Worto[:,j]/(Worto[:,j]*Worto[:,j]).sum()
+                orthoproj.shape=(bn,1)
+                Worto[:,i:i+1] = Worto[:,i:i+1] - orthoproj
+        Worto[:,i:i+1] = Worto[:,i:i+1] / \
+                    numpy.sqrt((Worto[:,i:i+1]*Worto[:,i:i+1]).sum(0)) * numpy.sqrt((W[:,i:i+1]*W[:,i:i+1]).sum(0))
+    return Worto if axis == 1 else Worto.T
 
 # Initialize containers:
 class CreateContainer:
@@ -195,7 +216,7 @@
         self.noise_level = T.scalar('noise_level')
         self.noise_level_group = T.scalar('noise_level_group')
         
-        # leave the chance for subclasses to initialize
+        # leave the chance for subclasses to initialize (example convolutionnal to implement)
         if self.__class__ == DAAig:
             self.init_behavioural()
         print '\t\t**** end DAAig.__init__ ****'
@@ -354,7 +375,9 @@
             self.noisify = theano.Method(listin, self.noisy_input)
         if self.auxinput is not None:
             self.auxnoisify = theano.Method(listin, self.noisy_auxinput)
+        self.recactivation = theano.Method(listin, self.clean.rec_activation)
         self.reconstruction = theano.Method(listin, self.clean.rec)
+        self.activation = theano.Method(listin, self.clean.hidden_activation)
         self.representation = theano.Method(listin, self.clean.hidden)
     
     def corrupt_input(self):
@@ -373,7 +396,7 @@
         return mask * self.input
     
     def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0,
-                            seed=1, alloc=True, **init):
+                            seed=1, orthoinit = False, alloc=True, **init):
         super(DAAig, self)._instance_initialize(obj, **init)
         
         obj.reg_coef = reg_coef
@@ -405,9 +428,14 @@
                 wdecshp = tuple(reversed(wencshp))
                 print 'wencshp = ', wencshp
                 print 'wdecshp = ', wdecshp
-                obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)
-                if not(self.tie_weights):
-                    obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif)
+                if not orthoinit:
+                    obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)
+                    if not(self.tie_weights):
+                        obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif)
+                else:
+                    obj.wenc = orthogonalinit(self.R.uniform(size=wencshp, low = -self.inf, high = self.inf))
+                    if not(self.tie_weights):
+                        obj.wdec = orthogonalinit(self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif),0)
                 obj.bdec = numpy.zeros(self.in_size)
             
             if self.auxinput is not None:
@@ -415,9 +443,16 @@
                 wauxdecshp = [tuple(reversed(i)) for i in wauxencshp]
                 print 'wauxencshp = ', wauxencshp
                 print 'wauxdecshp = ', wauxdecshp
-                obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp]
-                if not(self.tie_weights):
-                    obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp]
+                if not orthoinit:
+                    obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp]
+                    if not(self.tie_weights):
+                        obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp]
+                else:
+                    obj.wauxenc = [orthogonalinit(self.R.uniform(size=i, low = -self.inf, high = self.inf)) \
+                                for i in wauxencshp]
+                    if not(self.tie_weights):
+                        obj.wauxdec = [orthogonalinit(self.R.uniform(size=i, low=-self.hif, high=self.hif),0) \
+                                for i in wauxdecshp]
                 obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size]
             
             print 'self.inf = ', self.inf
@@ -500,13 +535,17 @@
         
         # facultative methods
         if self.debugmethod:
+            self.activation = [None] * (self.depth)
             self.representation = [None] * (self.depth)
+            self.recactivation = [None] * (self.depth)
             self.reconstruction = [None] * (self.depth)
             self.noisyinputs = [None] * (self.depth)
             self.compute_localcost = [None] * (self.depth+1)
             self.compute_localgradients = [None] * (self.depth+1)
             self.compute_globalcost = [None] * (self.depth+1)
             self.compute_globalgradients = [None] * (self.depth+1)
+            self.compute_localgradients_in = [None] * (self.depth)
+            self.compute_localgradients_aux = [None] * (self.depth)
             if self.totalupdatebool:
                 self.compute_totalcost = [None] * (self.depth+1)
                 self.compute_totalgradients = [None] * (self.depth+1)
@@ -516,6 +555,8 @@
             self.regularizationenccost = [None] * (self.depth)
         self.localcost = [None] * (self.depth+1)
         self.localgradients = [None] * (self.depth+1)
+        self.localgradients_in = [None] * (self.depth)
+        self.localgradients_aux = [None] * (self.depth)
         self.globalcost = [None] * (self.depth+1)
         self.globalgradients = [None] * (self.depth+1)
         if self.totalupdatebool:
@@ -582,6 +623,10 @@
                     self.totalcost[i] += self.daaig[i].regularization
             
             self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params)
+            self.localgradients_in[i] = T.grad(self.daaig[i].noise.reconstruction_cost_in, self.daaig[i].params) \
+                if inputprec is not None else T.constant(0)
+            self.localgradients_aux[i] = T.grad(self.daaig[i].noise.reconstruction_cost_aux,self.daaig[i].params) \
+                if auxin_size[i] is not None else T.constant(0)
             self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc)
             if self.totalupdatebool:
                 self.totalgradients[i] = T.grad(self.totalcost[i], paramstot)
@@ -600,11 +645,15 @@
                 self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads)
             
             if self.debugmethod:
-                self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden_activation)
+                self.activation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden_activation)
+                self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden)
+                self.recactivation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec_activation)
                 self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec)
                 self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout)
                 self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i])
                 self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i])
+                self.compute_localgradients_in[i] = theano.Method(self.inputs[i],self.localgradients_in[i])
+                self.compute_localgradients_aux[i] = theano.Method(self.inputs[i],self.localgradients_aux[i])
                 self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i])
                 self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i])
                 if self.totalupdatebool:
@@ -648,13 +697,14 @@
         self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads)
         if self.totalupdatebool:
             self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads)
-        
-        totallocal_grads={}
-        for k in range(self.depth):
-            totallocal_grads.update(dict((j, j - self.unsup_lr * g) for j,g in
-                    zip(self.daaig[k].params,self.localgradients[k])))
-        totallocal_grads.update(dict((j, j - self.sup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1])))
-        self.totallocalupdate = theano.Method(self.inputs[-1],self.localcost,totallocal_grads)
+            # total update of each local cost [no global cost backpropagated]
+            totallocal_grads={}
+            for k in range(self.depth):
+                totallocal_grads.update(dict((j, j - self.unsup_lr * g) for j,g in \
+                        zip(self.daaig[k].params,self.localgradients[k])))
+            totallocal_grads.update(dict((j, j - self.sup_lr * g) for j,g in\
+                    zip(self.daaig[-1].params,self.localgradients[-1])))
+            self.totallocalupdate = theano.Method(self.inputs[-1],self.localcost,totallocal_grads)
         
         # interface for the user
         self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone)
@@ -671,7 +721,7 @@
                         theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1])
     
     def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0,
-                                noise_level = 0 , noise_level_group = 0, seed = 1, alloc = True,**init):
+                                noise_level = 0 , noise_level_group = 0, seed = 1, orthoinit = False, alloc = True,**init):
         super(StackedDAAig, self)._instance_initialize(inst, **init)
         
         inst.unsup_lr = unsup_lr
@@ -680,14 +730,18 @@
         for i in range(self.depth):
             print '\tLayer = ', i+1
             inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\
-                    noise_level_group = noise_level_group, seed = seed + i, alloc = alloc)
+                    noise_level_group = noise_level_group, seed = seed + i, orthoinit = orthoinit, alloc = alloc)
         print '\tLayer supervised'
         inst.daaig[-1].initialize()
         if alloc:
             inst.daaig[-1].R = numpy.random.RandomState(seed+self.depth)
             # init the logreg weights
-            inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\
-                                low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid))
+            if not orthoinit:
+                inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\
+                                    low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid))
+            else:
+                inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\
+                                    low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid)))
         inst.daaig[-1].l1 = 0
         inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation
     
@@ -755,4 +809,30 @@
         scannoise.R.rand.seed(seed)
         for i in range(self.depth):
             inst.daaig[i].random.seed(seed+i+1)
+    
+    def _instance_unsupupdate(self,inst,data,layer='all',typeup = 'local',printcost = False):
+        cost = [None]*self.depth
+        if typeup is 'totallocal':
+            cost[-1] = inst.totallocalupdate(*data[i])
+        else: 
+            if typeup is 'total':
+                if layer is 'all':
+                    cost[-1] = inst.totalupdate[-1](*data[i])
+                else:
+                    cost[layer] = inst.totalupdate[layer](*data[i])
+            else:
+                if layer is 'all':
+                    for i in range(self.depth):
+                        if typeup == 'local':
+                            cost[i] = inst.localupdate[i](*data[i])
+                        if typeup == 'global':
+                            cost[i] = inst.globalupdate[i](*data[i])
+                else:
+                    if typeup == 'local':
+                        cost[layer] = inst.localupdate[i](*data)
+                    if typeup == 'global':
+                        cost[layer] = inst.globalupdate[i](*data)
+        if printcost:
+            print cost
+