changeset 799:9dd5af3b26fe

DAA_inputs_groups readability
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Tue, 21 Jul 2009 13:31:43 -0400
parents 0b20301ded89
children 49ba5d622c3a
files pylearn/algorithms/sandbox/DAA_inputs_groups.py
diffstat 1 files changed, 100 insertions(+), 122 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Tue Jul 21 12:19:30 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Tue Jul 21 13:31:43 2009 -0400
@@ -50,6 +50,10 @@
                     numpy.sqrt((Worto[:,i:i+1]*Worto[:,i:i+1]).sum(0)) * numpy.sqrt((W[:,i:i+1]*W[:,i:i+1]).sum(0))
     return Worto if axis == 1 else Worto.T
 
+# @todo
+def PCAinit(data,nhid):
+    pass
+
 # Initialize containers:
 class CreateContainer:
     pass
@@ -57,7 +61,7 @@
 # regularisation utils:-------------------------------------------
 def lnorm(param, type='l2'):
     if type == 'l1':
-        return T.sum(T.abs(param))
+        return T.sum(T.abs_(param))
     if type == 'l2':
         return T.sum(param*param)
     raise NotImplementedError('Only l1 and l2 regularization are currently implemented')
@@ -184,16 +188,17 @@
         
         ### DECLARE MODEL VARIABLES and default
         self.input = input
-        self.noisy_input = None
         if self.ignore_missing is not None and self.input is not None:
             no_missing = FillMissing(self.ignore_missing)(self.input)
             self.input = no_missing[0]  # With missing values replaced.
             self.input_missing_mask = no_missing[1] # Missingness pattern.
         else:
             self.input_missing_mask = None
+        
         self.auxinput = auxinput
         self.idx_list = T.ivector('idx_list') if self.auxinput is not None else None
-        self.noisy_idx_list, self.noisy_auxinput = None, None
+        
+        self.noisy_input, self.noisy_idx_list, self.noisy_auxinput = None , None, None 
         
         #parameters
         self.benc = T.dvector('benc')
@@ -210,7 +215,6 @@
         #hyper-parameters
         if self.interface:
             self.lr = T.scalar('lr')
-        
         self.noise_level = T.scalar('noise_level')
         self.noise_level_group = T.scalar('noise_level_group')
         self.scale_cost = T.scalar('scale_cost')
@@ -226,7 +230,7 @@
             self.noisy_input = self.corrupt_input()
         if self.auxinput is not None:
             self.noisy_idx_list , self.noisy_auxinput = \
-                scannoise(self.idx_list, self.auxinput,self.noise_level, self.noise_level_group)
+                    scannoise(self.idx_list, self.auxinput,self.noise_level, self.noise_level_group)
         
         self.noise = CreateContainer()
         self.clean = CreateContainer()
@@ -240,25 +244,43 @@
         if self.interface:
             self.define_gradients()
             self.define_interface()
+    
+    def filter_up(self, vis, w, b=None):
+        out = T.dot(vis, w)
+        return out + b if b else out
+    filter_down = filter_up
+    
+    def corrupt_input(self):
+        if self.corruption_pattern is None:
+            mask = self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level)
+        elif self.corruption_pattern == 'by_pair':
+            shape = T.shape(self.input)
+            # Do not ask me why, but just doing "/ 2" does not work (there is
+            # a bug in the optimizer).
+            shape = T.stack(shape[0], (shape[1] * 2) / 4)
+            mask = self.random.binomial(shape, 1, 1 - self.noise_level)
+            mask = T.horizontal_stack(mask, mask)
+        else:
+            raise ValueError('Unknown value for corruption_pattern: %s' % self.corruption_pattern)
+        return mask * self.input
      
     def define_behavioural(self, container, input, idx_list, auxinput):
         self.define_propup(container, input, idx_list , auxinput)
         container.hidden = self.hid_fn(container.hidden_activation)
+        
         self.define_propdown(container, idx_list , auxinput)
+        container.rec = self.rec_fn(container.rec_activation)
         if self.input is not None:
             container.rec_in = self.rec_fn(container.rec_activation_in)
         if (self.auxinput is not None):
             container.rec_aux = self.rec_fn(container.rec_activation_aux)
-        container.rec = self.rec_fn(container.rec_activation)
     
     def define_propup(self, container, input, idx_list, auxinput):
+        container.hidden_activation = self.benc
         if self.input is not None:
-            container.hidden_activation = self.filter_up(input, self.wenc, self.benc)
-            if self.auxinput is not None:
-                container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc)
-        else:
-            if self.auxinput is not None:
-                container.hidden_activation = scandotenc(idx_list,auxinput,self.wauxenc) + self.benc
+            container.hidden_activation += self.filter_up(input, self.wenc)
+        if self.auxinput is not None:
+            container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc)
     
     def define_propdown(self, container, idx_list, auxinput):
         if self.input is not None:
@@ -270,21 +292,13 @@
         if (self.ignore_missing is not None and self.input is not None and not self.reconstruct_missing):
             # Apply mask to gradient to ensure we do not backpropagate on the
             # cost computed on missing inputs (that have been imputed).
-            container.rec_activation_in = mask_gradient(container.rec_activation_in,
-                    self.input_missing_mask)
+            container.rec_activation_in = mask_gradient(container.rec_activation_in, self.input_missing_mask)
         
         if (self.input is not None) and (self.auxinput is not None):
             container.rec_activation = T.join(1,container.rec_activation_in,container.rec_activation_aux)
         else:
-            if self.input is not None:
-                container.rec_activation = container.rec_activation_in
-            if (self.auxinput is not None):
-                container.rec_activation = container.rec_activation_aux
-    
-    def filter_up(self, vis, w, b=None):
-        out = T.dot(vis, w)
-        return out + b if b else out
-    filter_down = filter_up
+            container.rec_activation = container.rec_activation_in \
+                    if self.input is not None else container.rec_activation_aux
     
     def define_regularization(self):
         self.reg_coef = T.scalar('reg_coef')
@@ -295,6 +309,7 @@
                 self.Maskup = [self.Maskup]
             if type(self.Maskdown) is not list:
                 self.Maskdown = [self.Maskdown]
+        
         listweights = []
         listweightsenc = []
         if self.auxinput is not None:
@@ -303,26 +318,21 @@
         if self.input is not None:
             listweights += [self.wenc,self.wdec]
             listweightsenc += [self.wenc]
+        
         self.regularization = self.reg_coef * get_reg_cost(listweights,'l1')
         self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l1')
     
     def define_cost(self, container):
-        if self.reconstruction_cost_function_name == 'cross_entropy':
-            if (self.input is not None):
-                container.reconstruction_cost_in = \
-                    self.reconstruction_cost_function(self.input,container.rec_activation_in,self.rec_name)
-            if (self.auxinput is not None):
-                container.reconstruction_cost_aux = \
-                    self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput),container.rec_activation_aux,\
-                    self.rec_name)
-        else:
-            if (self.input is not None):
-                container.reconstruction_cost_in = \
-                    self.reconstruction_cost_function(self.input,container.rec_in,self.rec_name)
-            if (self.auxinput is not None):
-                container.reconstruction_cost_aux = \
-                    self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput),container.rec_aux,\
-                    self.rec_name)
+        tmpbool = (self.reconstruction_cost_function_name == 'cross_entropy')
+        if (self.input is not None):
+            container.reconstruction_cost_in = \
+                self.reconstruction_cost_function(self.input, container.rec_activation_in \
+                if tmpbool else container.rec_in, self.rec_name)
+        if (self.auxinput is not None):
+            container.reconstruction_cost_aux = \
+                self.reconstruction_cost_function(scaninputs(self.idx_list, self.auxinput), container.rec_activation_aux \
+                if tmpbool else container.rec_aux, self.rec_name)
+        
         # TOTAL COST
         if (self.input is not None) and (self.auxinput is not None):
             container.reconstruction_cost = (T.min(T.constant(1),T.constant(1)+self.scale_cost)) * \
@@ -342,14 +352,17 @@
     def define_params(self):
         if not hasattr(self,'params'):
             self.params = []
+        
         self.params += [self.benc]
         self.paramsenc = copy.copy(self.params)
+        
         if self.input is not None:
             self.params += [self.wenc] + [self.bdec]
             self.paramsenc += [self.wenc]
         if self.auxinput is not None:
             self.params += self.wauxenc + self.bauxdec
             self.paramsenc += self.wauxenc
+        
         if not(self.tie_weights):
             if self.input is not None:
                 self.params += [self.wdec]
@@ -358,43 +371,27 @@
     
     def define_gradients(self):
         self.gradients = T.grad(self.noise.cost, self.params)
-        self.updates = dict((p, p - self.lr * g) for p, g in \
-                zip(self.params, self.gradients))
+        self.updates = dict((p, p - self.lr * g) for p, g in zip(self.params, self.gradients))
     
     def define_interface(self):
         # declare function to interface with module (if not stacked)
-        if self.input is None:
-            listin = [self.idx_list, self.auxinput]
+        listin = []
+        listout = []
+        if self.input is not None:
+            listin += [self.input]
+            listout += [self.noisy_input]
         if self.auxinput is None:
-            listin = [self.input]
-        if (self.input is not None) and (self.auxinput is not None):
-            listin =[self.input,self.idx_list, self.auxinput]
+            listin += [self.idx_list, self.auxinput]
+            listout += [self.noisy_auxinput]
+        
         self.update = theano.Method(listin, self.noise.cost, self.updates)
         self.compute_cost = theano.Method(listin, self.noise.cost)
-        if self.input is not None:
-            self.noisify = theano.Method(listin, self.noisy_input)
-        if self.auxinput is not None:
-            self.auxnoisify = theano.Method(listin, self.noisy_auxinput)
+        self.noisify = theano.Method(listin, listout)
         self.recactivation = theano.Method(listin, self.clean.rec_activation)
         self.reconstruction = theano.Method(listin, self.clean.rec)
         self.activation = theano.Method(listin, self.clean.hidden_activation)
         self.representation = theano.Method(listin, self.clean.hidden)
     
-    def corrupt_input(self):
-        if self.corruption_pattern is None:
-            mask = self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level)
-        elif self.corruption_pattern == 'by_pair':
-            shape = T.shape(self.input)
-            # Do not ask me why, but just doing "/ 2" does not work (there is
-            # a bug in the optimizer).
-            shape = T.stack(shape[0], (shape[1] * 2) / 4)
-            mask = self.random.binomial(shape, 1, 1 - self.noise_level)
-            mask = T.horizontal_stack(mask, mask)
-        else:
-            raise ValueError('Unknown value for corruption_pattern: %s'
-                    % self.corruption_pattern)
-        return mask * self.input
-    
     def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, scale_cost = 0,
                             seed=1, orthoinit = False, alloc=True, **init):
         super(DAAig, self)._instance_initialize(obj, **init)
@@ -409,12 +406,9 @@
             obj.lr = None
         
         obj.random.initialize()
-        if seed is not None:
-            obj.random.seed(seed)
+        obj.random.seed(seed)
         self.R = numpy.random.RandomState(seed)
         
-        obj.__hide__ = ['params']
-        
         if self.input is not None:
             self.inf = 1/numpy.sqrt(self.in_size)
         if self.auxinput is not None:
@@ -429,14 +423,13 @@
                 wdecshp = tuple(reversed(wencshp))
                 print 'wencshp = ', wencshp
                 print 'wdecshp = ', wdecshp
-                if not orthoinit:
-                    obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)
+                obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)
+                if not(self.tie_weights):
+                    obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif)
+                if orthoinit:
+                    obj.wenc = orthogonalinit(obj.wenc)
                     if not(self.tie_weights):
-                        obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif)
-                else:
-                    obj.wenc = orthogonalinit(self.R.uniform(size=wencshp, low = -self.inf, high = self.inf))
-                    if not(self.tie_weights):
-                        obj.wdec = orthogonalinit(self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif),0)
+                        obj.wdec = orthogonalinit(obj.wdec,0)
                 obj.bdec = numpy.zeros(self.in_size)
             
             if self.auxinput is not None:
@@ -444,16 +437,13 @@
                 wauxdecshp = [tuple(reversed(i)) for i in wauxencshp]
                 print 'wauxencshp = ', wauxencshp
                 print 'wauxdecshp = ', wauxdecshp
-                if not orthoinit:
-                    obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp]
+                obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp]
+                if not(self.tie_weights):
+                    obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp]
+                if orthoinit:
+                    obj.wauxenc = [orthogonalinit(w) for w in obj.wauxenc]
                     if not(self.tie_weights):
-                        obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp]
-                else:
-                    obj.wauxenc = [orthogonalinit(self.R.uniform(size=i, low = -self.inf, high = self.inf)) \
-                                for i in wauxencshp]
-                    if not(self.tie_weights):
-                        obj.wauxdec = [orthogonalinit(self.R.uniform(size=i, low=-self.hif, high=self.hif),0) \
-                                for i in wauxdecshp]
+                        obj.wauxdec = [orthogonalinit(w,0) for w in obj.wauxdec]
                 obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size]
             
             print 'self.inf = ', self.inf
@@ -539,25 +529,25 @@
             self.recactivation = [None] * (self.depth)
             self.reconstruction = [None] * (self.depth)
             self.noisyinputs = [None] * (self.depth)
+            self.compute_localgradients_in = [None] * (self.depth)
+            self.compute_localgradients_aux = [None] * (self.depth)
             self.compute_localcost = [None] * (self.depth+1)
             self.compute_localgradients = [None] * (self.depth+1)
             self.compute_globalcost = [None] * (self.depth+1)
             self.compute_globalgradients = [None] * (self.depth+1)
-            self.compute_localgradients_in = [None] * (self.depth)
-            self.compute_localgradients_aux = [None] * (self.depth)
             if self.totalupdatebool:
                 self.compute_totalcost = [None] * (self.depth+1)
                 self.compute_totalgradients = [None] * (self.depth+1)
         
         # some theano Variables we want to keep track on
-        if self.regularize:
-            self.regularizationenccost = [None] * (self.depth)
+        self.localgradients_in = [None] * (self.depth)
+        self.localgradients_aux = [None] * (self.depth)
         self.localcost = [None] * (self.depth+1)
         self.localgradients = [None] * (self.depth+1)
-        self.localgradients_in = [None] * (self.depth)
-        self.localgradients_aux = [None] * (self.depth)
         self.globalcost = [None] * (self.depth+1)
         self.globalgradients = [None] * (self.depth+1)
+        if self.regularize:
+            self.regularizationenccost = [None] * (self.depth)
         if self.totalupdatebool:
             self.totalcost = [None] * (self.depth+1)
             self.totalgradients = [None] * (self.depth+1)
@@ -606,34 +596,27 @@
             self.localcost[i] = self.daaig[i].noise.cost
             self.globalcost[i] = self.daaig[i].noise.cost
             if self.totalupdatebool:
-                if i:
-                    self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost
-                else:
-                    self.totalcost[i] = self.daaig[i].noise.cost
+                self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost if i else self.daaig[i].noise.cost
             
             if self.regularize:
-                if i:
-                    self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc
-                else:
-                    self.regularizationenccost[i] = 0
+                self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc if i else 0
                 self.localcost[i] += self.daaig[i].regularization
                 self.globalcost[i] += self.regularizationenccost[i]
                 if self.totalupdatebool:
                     self.totalcost[i] += self.daaig[i].regularization
             
-            self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params)
             self.localgradients_in[i] = T.grad(self.daaig[i].noise.reconstruction_cost_in, self.daaig[i].params) \
                 if inputprec is not None else T.constant(0)
             self.localgradients_aux[i] = T.grad(self.daaig[i].noise.reconstruction_cost_aux,self.daaig[i].params) \
                 if auxin_size[i] is not None else T.constant(0)
+            self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params)
             self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc)
             if self.totalupdatebool:
                 self.totalgradients[i] = T.grad(self.totalcost[i], paramstot)
             
             #create the updates dictionnaries
-            local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i]))
-            global_grads = dict((j, j - self.unsup_lr * g)\
-                    for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i]))
+            local_grads = dict((j,j-self.unsup_lr*g) for j,g in zip(self.daaig[i].params,self.localgradients[i]))
+            global_grads = dict((j,j-self.unsup_lr*g) for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i]))
             if self.totalupdatebool:
                 total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i]))
             
@@ -669,12 +652,10 @@
         self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,sigmoid_act(self.daaig[-2].clean.hidden_activation),self.target)
         paramstot += self.daaig[-1].params
         
-        if self.regularize:
-            self.localcost[-1] = self.daaig[-1].regularized_cost
-            self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1]
-        else:
-            self.localcost[-1] = self.daaig[-1].unregularized_cost
-            self.globalcost[-1] = self.daaig[-1].unregularized_cost
+        self.localcost[-1] = self.daaig[-1].regularized_cost \
+                if self.regularize else self.daaig[-1].unregularized_cost
+        self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] \
+                if self.regularize else self.daaig[-1].unregularized_cost
         
         if self.totalupdatebool:
             self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]]
@@ -682,12 +663,10 @@
         self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params)
         self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc)
         if self.totalupdatebool:
-            self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\
-                    T.grad(self.globalcost[-1], paramstot) ]
+            self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) , T.grad(self.globalcost[-1],paramstot) ]
         
-        local_grads = dict((j, j - self.sup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1]))
-        global_grads = dict((j, j - self.sup_lr * g)\
-                for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1]))
+        local_grads = dict((j,j-self.sup_lr*g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1]))
+        global_grads = dict((j,j-self.sup_lr*g) for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1]))
         if self.totalupdatebool:
             total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\
                     for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1]))
@@ -701,7 +680,7 @@
             for k in range(self.depth):
                 totallocal_grads.update(dict((j, j - self.unsup_lr * g) for j,g in \
                         zip(self.daaig[k].params,self.localgradients[k])))
-            totallocal_grads.update(dict((j, j - self.sup_lr * g) for j,g in\
+            totallocal_grads.update(dict((j, j - self.sup_lr * g) for j,g in
                     zip(self.daaig[-1].params,self.localgradients[-1])))
             self.totallocalupdate = theano.Method(self.inputs[-1],self.localcost,totallocal_grads)
         
@@ -726,7 +705,6 @@
         inst.unsup_lr = unsup_lr
         inst.sup_lr = sup_lr
         
-        
         for i in range(self.depth):
             print '\tLayer = ', i+1
             inst.daaig[i].initialize(reg_coef = reg_coef[i] if type(reg_coef) is list else reg_coef, \
@@ -734,17 +712,17 @@
                     scale_cost = scale_cost[i] if type(scale_cost) is list else scale_cost, \
                     noise_level_group = noise_level_group[i] if type(noise_level_group) is list else noise_level_group, \
                     seed = seed + i, orthoinit = orthoinit, alloc = alloc)
+        
         print '\tLayer supervised'
         inst.daaig[-1].initialize()
+        
         if alloc:
             inst.daaig[-1].R = numpy.random.RandomState(seed+self.depth)
             # init the logreg weights
-            if not orthoinit:
-                inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\
-                                    low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid))
-            else:
-                inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\
-                                    low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid)))
+            inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\
+                    low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid))
+            if orthoinit:
+                inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].w)
         inst.daaig[-1].l1 = reg_coef[-1] if type(reg_coef) is list else reg_coef
         inst.daaig[-1].l2 = 0
         #only l1 norm for regularisation to be consitent with the unsup regularisation