changeset 706:2b54c38e2c60

improved initialization of StackedDAAig
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Thu, 21 May 2009 19:52:46 -0400
parents eb91fa83e2c1
children 53a247cfee84
files pylearn/algorithms/sandbox/DAA_inputs_groups.py
diffstat 1 files changed, 71 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Thu May 21 14:11:54 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Thu May 21 19:52:46 2009 -0400
@@ -17,7 +17,7 @@
 	if type == 'l1':
 		return T.sum(T.abs(param))
 	if type == 'l2':
-		return T.sum(param*param)
+		return T.sum(T.pow(param,2))
 	raise NotImplementedError('Only l1 and l2 regularization are currently implemented')
 
 def get_reg_cost(params, type):
@@ -96,7 +96,8 @@
 			self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))]
 		
 		#hyper-parameters
-		self.lr = T.scalar('lr')
+		if self.interface:
+			self.lr = T.scalar('lr')
 		self.noise_level = T.scalar('noise_level')
 		self.noise_level_group = T.scalar('noise_level_group')
 		
@@ -249,7 +250,7 @@
 			return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec)
 	
 	def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0,
-							seed=1, alloc=True, **init):
+							seed=1, Alloc=True, **init):
 		super(DAAig, self)._instance_initialize(obj, **init)
 		
 		obj.reg_coef = reg_coef
@@ -276,7 +277,7 @@
 		self.hif = 1/numpy.sqrt(self.n_hid)
 		
 		
-		if alloc:
+		if Alloc:
 			if not(self.input is None):
 				wencshp = (self.in_size, self.n_hid)
 				wdecshp = tuple(reversed(wencshp))
@@ -312,7 +313,7 @@
 				in_size = None, auxin_size = [None], n_hid = [1],
 				regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
 				reconstruction_cost_function=cost.cross_entropy,
-				n_out = 2, target = None, totalupdatebool=True, **init):
+				n_out = 2, target = None, debugmethod = False, totalupdatebool=False, **init):
 		
 		super(StackedDAAig, self).__init__()
 		print '\t**** StackedDAAig.__init__ ****'
@@ -335,6 +336,7 @@
 		self.reconstruction_cost_function = reconstruction_cost_function
 		self.n_out = n_out
 		self.target = target if not(target is None) else T.lvector('target')
+		self.debugmethod = debugmethod
 		self.totalupdatebool = totalupdatebool
 		
 		# init for model construction
@@ -352,25 +354,33 @@
 		if self.totalupdatebool:
 			self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer
 		#
+		self.classify = None
 		
 		#others methods
-		self.representation = [None] * (self.depth+1)
-		self.reconstruction = [None] * (self.depth)
-		self.validate = [None] * (self.depth)
-		self.noisyinputs = [None] * (self.depth)
-		self.compute_localcost = [None] * (self.depth+1)
-		self.compute_globalcost = [None] * (self.depth+1)
-		if self.totalupdatebool:
-			self.compute_totalcost = [None] * (self.depth+1)
+		if self.debugmethod:
+			self.representation = [None] * (self.depth)
+			self.reconstruction = [None] * (self.depth)
+			self.validate = [None] * (self.depth)
+			self.noisyinputs = [None] * (self.depth)
+			self.compute_localcost = [None] * (self.depth+1)
+			self.compute_localgradients = [None] * (self.depth+1)
+			self.compute_globalcost = [None] * (self.depth+1)
+			self.compute_globalgradients = [None] * (self.depth+1)
+			if self.totalupdatebool:
+				self.compute_totalcost = [None] * (self.depth+1)
+				self.compute_totalgradients = [None] * (self.depth+1)
 		#
 		
 		# some theano Variables we want to keep track on
 		if self.regularize:
 			self.regularizationenccost = [None] * (self.depth)
 		self.localcost = [None] * (self.depth+1)
+		self.localgradients = [None] * (self.depth+1)
 		self.globalcost = [None] * (self.depth+1)
+		self.globalgradients = [None] * (self.depth+1)
 		if self.totalupdatebool:
 			self.totalcost = [None] * (self.depth+1)
+			self.totalgradients = [None] * (self.depth+1)
 		
 		#params to update and inputs initialization
 		paramstot = []
@@ -423,21 +433,24 @@
 				if i:
 					self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc
 				else:
-					self.regularizationenccost[i] = T.zero()
+					self.regularizationenccost[i] = 0
 				
 				self.localcost[i] += self.daaig[i].regularization
 				self.globalcost[i] += self.regularizationenccost[i]
 				if self.totalupdatebool:
 					self.totalcost[i] += self.daaig[i].regularization
 			
+			self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params)
+			self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc)
+			if self.totalupdatebool:
+				self.totalgradients[i] = T.grad(self.totalcost[i], paramstot)
+			
 			#create the updates dictionnaries
-			local_grads = dict((j, j - self.unsup_lr * T.grad(self.localcost[i], j))\
-					for j in self.daaig[i].params)
-			global_grads = dict((j, j - self.unsup_lr * T.grad(self.globalcost[i], j))\
-					for j in (self.daaig[i].params+paramsenc))
+			local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i]))
+			global_grads = dict((j, j - self.unsup_lr * g)\
+					for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i]))
 			if self.totalupdatebool:
-				total_grads = dict((j, j - self.unsup_lr * T.grad(self.totalcost[i], j))\
-						for j in (paramstot))
+				total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i]))
 			
 			# method declaration
 			self.localupdate[i] = theano.Method(self.inputs[i],self.localcost[i],local_grads)
@@ -445,15 +458,20 @@
 			if self.totalupdatebool:
 				self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads)
 			#
-			self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden)
-			self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec)
-			self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec])
-			self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout)
-			self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i])
-			self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i])
-			if self.totalupdatebool:
-				self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i])
+			if self.debugmethod:
+				self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden)
+				self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec)
+				self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec])
+				self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout)
+				self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i])
+				self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i])
+				self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i])
+				self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i])
+				if self.totalupdatebool:
+					self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i])
+					self.compute_totalgradients[i] = theano.Method(self.inputs[i],self.totalgradients[i])
 			#
+			
 			paramsenc += self.daaig[i].paramsenc
 			inputprec = self.daaig[i].clean.hidden
 			in_sizeprec = self.n_hid[i]
@@ -467,33 +485,41 @@
 		if self.regularize:
 			self.localcost[-1] = self.daaig[-1].regularized_cost
 			self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1]
-			if self.totalupdatebool:
-				self.totalcost[-1] = self.totalcost[-2] + self.daaig[-1].regularized_cost
 		else:
 			self.localcost[-1] = self.daaig[-1].unregularized_cost
 			self.globalcost[-1] = self.daaig[-1].unregularized_cost
-			if self.totalupdatebool:
-				self.totalcost[-1] = self.totalcost[-2] + self.daaig[-1].unregularized_cost
+		
+		if self.totalupdatebool:
+			self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]]
 		
-		local_grads = dict((j, j - self.sup_lr * T.grad(self.localcost[-1], j))\
-					for j in self.daaig[-1].params)
-		global_grads = dict((j, j - self.sup_lr * T.grad(self.globalcost[-1], j))\
-					for j in (self.daaig[-1].params+paramsenc))
+		self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params)
+		self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc)
 		if self.totalupdatebool:
-			total_grads = dict((j, j - \
-					(self.unsup_lr * T.grad(self.totalcost[-2], j) + self.sup_lr *T.grad(self.globalcost[-1], j)))\
-					for j in paramstot)
+			self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\
+					T.grad(self.globalcost[-1], paramstot) ]
+		
+		local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1]))
+		global_grads = dict((j, j - self.unsup_lr * g)\
+				for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1]))
+		if self.totalupdatebool:
+			total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\
+					for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1]))
 		
 		self.localupdate[-1] = theano.Method(self.inputs[-1],self.localcost[-1],local_grads)
 		self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads)
 		if self.totalupdatebool:
 			self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads)
-
-		self.representation[-1] = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone)
-		self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1])
-		self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1])
-		if self.totalupdatebool:
-			self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1])
+		self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone)
+		
+		if self.debugmethod:
+			self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1])
+			self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1])
+			self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1])
+			self.compute_globalgradients[-1] = theano.Method(self.inputs[-1],self.globalgradients[-1])
+			if self.totalupdatebool:
+				self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1])
+				self.compute_totalgradients[-1] =\
+						theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1])
 	
 	def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0,
 								noise_level = 0 , noise_level_group = 0, seed = 1, Alloc = True,**init):