changeset 708:53a247cfee84

Merged
author Olivier Delalleau <delallea@iro>
date Fri, 22 May 2009 10:03:29 -0400
parents f8bf9533f6b3 (current diff) 2b54c38e2c60 (diff)
children 55f77c7c3075
files pylearn/algorithms/sandbox/DAA_inputs_groups.py
diffstat 1 files changed, 108 insertions(+), 64 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Fri May 22 10:03:10 2009 -0400
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Fri May 22 10:03:29 2009 -0400
@@ -17,7 +17,7 @@
 	if type == 'l1':
 		return T.sum(T.abs(param))
 	if type == 'l2':
-		return T.sum(param*param)
+		return T.sum(T.pow(param,2))
 	raise NotImplementedError('Only l1 and l2 regularization are currently implemented')
 
 def get_reg_cost(params, type):
@@ -96,7 +96,8 @@
 			self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))]
 		
 		#hyper-parameters
-		self.lr = T.scalar('lr')
+		if self.interface:
+			self.lr = T.scalar('lr')
 		self.noise_level = T.scalar('noise_level')
 		self.noise_level_group = T.scalar('noise_level_group')
 		
@@ -198,7 +199,7 @@
 		if not hasattr(self,'params'):
 			self.params = []
 		self.params += [self.benc]
-		self.paramsenc = self.params
+		self.paramsenc = copy.copy(self.params)
 		if not(self.input is None):
 			self.params += [self.wenc] + [self.bdec]
 			self.paramsenc += [self.wenc]
@@ -249,7 +250,7 @@
 			return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec)
 	
 	def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0,
-							seed=1, alloc=True, **init):
+							seed=1, Alloc=True, **init):
 		super(DAAig, self)._instance_initialize(obj, **init)
 		
 		obj.reg_coef = reg_coef
@@ -276,7 +277,7 @@
 		self.hif = 1/numpy.sqrt(self.n_hid)
 		
 		
-		if alloc:
+		if Alloc:
 			if not(self.input is None):
 				wencshp = (self.in_size, self.n_hid)
 				wdecshp = tuple(reversed(wencshp))
@@ -312,7 +313,7 @@
 				in_size = None, auxin_size = [None], n_hid = [1],
 				regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
 				reconstruction_cost_function=cost.cross_entropy,
-				n_out = 2, target = None, totalupdatebool=True, **init):
+				n_out = 2, target = None, debugmethod = False, totalupdatebool=False, **init):
 		
 		super(StackedDAAig, self).__init__()
 		print '\t**** StackedDAAig.__init__ ****'
@@ -321,6 +322,7 @@
 		print '\tin_size = ', in_size
 		print '\tauxin_size = ', auxin_size
 		print '\tn_hid = ', n_hid
+		
 		# save parameters
 		self.depth = depth
 		self.input = input
@@ -334,6 +336,7 @@
 		self.reconstruction_cost_function = reconstruction_cost_function
 		self.n_out = n_out
 		self.target = target if not(target is None) else T.lvector('target')
+		self.debugmethod = debugmethod
 		self.totalupdatebool = totalupdatebool
 		
 		# init for model construction
@@ -345,26 +348,41 @@
 		self.unsup_lr = T.dscalar('unsup_lr')
 		self.sup_lr = T.dscalar('sup_lr')
 		
-		# methods
+		# updatemethods
 		self.localupdate = [None] * (self.depth+1) #update only on the layer parameters
 		self.globalupdate = [None] * (self.depth+1)#update wrt the layer cost backproped untill the input layer
 		if self.totalupdatebool:
 			self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer
 		#
-		self.representation = [None] * (self.depth+1)
-		self.reconstruction = [None] * (self.depth)
-		self.compute_localcost = [None] * (self.depth+1)
-		self.compute_globalcost = [None] * (self.depth+1)
-		if self.totalupdatebool:
-			self.compute_totalcost = [None] * (self.depth+1)
-		self.validate = [None] * (self.depth)
-		self.noisyinputs = [None] * (self.depth)
+		self.classify = None
+		
+		#others methods
+		if self.debugmethod:
+			self.representation = [None] * (self.depth)
+			self.reconstruction = [None] * (self.depth)
+			self.validate = [None] * (self.depth)
+			self.noisyinputs = [None] * (self.depth)
+			self.compute_localcost = [None] * (self.depth+1)
+			self.compute_localgradients = [None] * (self.depth+1)
+			self.compute_globalcost = [None] * (self.depth+1)
+			self.compute_globalgradients = [None] * (self.depth+1)
+			if self.totalupdatebool:
+				self.compute_totalcost = [None] * (self.depth+1)
+				self.compute_totalgradients = [None] * (self.depth+1)
 		#
+		
+		# some theano Variables we want to keep track on
+		if self.regularize:
+			self.regularizationenccost = [None] * (self.depth)
 		self.localcost = [None] * (self.depth+1)
+		self.localgradients = [None] * (self.depth+1)
 		self.globalcost = [None] * (self.depth+1)
+		self.globalgradients = [None] * (self.depth+1)
 		if self.totalupdatebool:
 			self.totalcost = [None] * (self.depth+1)
+			self.totalgradients = [None] * (self.depth+1)
 		
+		#params to update and inputs initialization
 		paramstot = []
 		paramsenc = []
 		self.inputs = [None] * (self.depth+1)
@@ -376,6 +394,7 @@
 		
 		offset = 0
 		for i in range(self.depth):
+			
 			if auxin_size[i] is None:
 				offset +=1
 				param = [inputprec, None, in_sizeprec, auxin_size[i], self.n_hid[i],\
@@ -383,64 +402,81 @@
 			else:
 				param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\
 					False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False]
+			
 			print '\tLayer init= ', i+1
 			self.daaig[i] = DAAig(*param)
 			
+			# method input, outputs and parameters update
 			if i:
 				self.inputs[i] = copy.copy(self.inputs[i-1])
 			if not(auxin_size[i] is None):
 				self.inputs[i] += [self.daaig[i].idx_list,self.auxinput[i-offset]]
 			
+			noisyout = []
+			if not(inputprec is None):
+				noisyout += [self.daaig[i].noisy_input]
+			if not(auxin_size[i] is None):
+				noisyout += [self.daaig[i].noisy_auxinput]
+			
 			paramstot += self.daaig[i].params
 			
+			# save the costs
+			self.localcost[i] = self.daaig[i].noise.cost
+			self.globalcost[i] = self.daaig[i].noise.cost
+			if self.totalupdatebool:
+				if i:
+					self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost
+				else:
+					self.totalcost[i] = self.daaig[i].noise.cost
+			
 			if self.regularize:
-				self.localcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization
-				self.globalcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization
-				if self.totalupdatebool:
-					self.totalcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization
-				for j in range(i):
-					self.globalcost[i] += self.daaig[j].regularizationenc
-					if self.totalupdatebool:
-						self.totalcost[i] += self.daaig[j].noise.cost+self.daaig[j].regularization
+				if i:
+					self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc
+				else:
+					self.regularizationenccost[i] = 0
 				
-			else:
-				self.localcost[i] = self.daaig[i].noise.cost
-				self.globalcost[i] = self.daaig[i].noise.cost
+				self.localcost[i] += self.daaig[i].regularization
+				self.globalcost[i] += self.regularizationenccost[i]
 				if self.totalupdatebool:
-					self.totalcost[i] = self.daaig[i].noise.cost
-					for j in range(i):
-						self.totalcost[i] += self.daaig[j].noise.cost
+					self.totalcost[i] += self.daaig[i].regularization
+			
+			self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params)
+			self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc)
+			if self.totalupdatebool:
+				self.totalgradients[i] = T.grad(self.totalcost[i], paramstot)
 			
-			local_grads = dict((j, j - self.unsup_lr * T.grad(self.localcost[i], j))\
-					for j in self.daaig[i].params)
-			global_grads = dict((j, j - self.unsup_lr * T.grad(self.globalcost[i], j))\
-					for j in (self.daaig[i].params+paramsenc))
+			#create the updates dictionnaries
+			local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i]))
+			global_grads = dict((j, j - self.unsup_lr * g)\
+					for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i]))
 			if self.totalupdatebool:
-				total_grads = dict((j, j - self.unsup_lr * T.grad(self.totalcost[i], j))\
-						for j in (paramstot))
+				total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i]))
 			
+			# method declaration
 			self.localupdate[i] = theano.Method(self.inputs[i],self.localcost[i],local_grads)
 			self.globalupdate[i] = theano.Method(self.inputs[i],self.globalcost[i],global_grads)
 			if self.totalupdatebool:
 				self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads)
 			#
-			self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden)
-			self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec)
-			self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i])
-			self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i])
-			if self.totalupdatebool:
-				self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i])
-			self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec])
-			noisyout = []
-			if not(inputprec is None):
-				noisyout += [self.daaig[i].noisy_input]
-			if not(auxin_size[i] is None):
-				noisyout += [self.daaig[i].noisy_auxinput]
-			self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout)
+			if self.debugmethod:
+				self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden)
+				self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec)
+				self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec])
+				self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout)
+				self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i])
+				self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i])
+				self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i])
+				self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i])
+				if self.totalupdatebool:
+					self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i])
+					self.compute_totalgradients[i] = theano.Method(self.inputs[i],self.totalgradients[i])
 			#
+			
 			paramsenc += self.daaig[i].paramsenc
 			inputprec = self.daaig[i].clean.hidden
 			in_sizeprec = self.n_hid[i]
+		
+		# supervised layer
 		print '\tLayer supervised init'
 		self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target]
 		self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,inputprec,self.target)
@@ -448,34 +484,42 @@
 		
 		if self.regularize:
 			self.localcost[-1] = self.daaig[-1].regularized_cost
-			self.globalcost[-1] = self.daaig[-1].regularized_cost
-			for j in range(self.depth):
-				self.globalcost[-1] += self.daaig[j].regularizationenc
+			self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1]
 		else:
 			self.localcost[-1] = self.daaig[-1].unregularized_cost
 			self.globalcost[-1] = self.daaig[-1].unregularized_cost
 		
 		if self.totalupdatebool:
-			self.totalcost[-1] = self.totalcost[-2] + self.localcost[-1]
+			self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]]
 		
-		local_grads = dict((j, j - self.sup_lr * T.grad(self.localcost[-1], j))\
-					for j in self.daaig[-1].params)
-		global_grads = dict((j, j - self.sup_lr * T.grad(self.globalcost[-1], j))\
-					for j in (self.daaig[-1].params+paramsenc))
+		self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params)
+		self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc)
 		if self.totalupdatebool:
-			total_grads = dict((j, j - \
-					(self.unsup_lr * T.grad(self.totalcost[-2], j) + self.sup_lr *T.grad(self.globalcost[-1], j)))\
-					for j in paramstot)
+			self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\
+					T.grad(self.globalcost[-1], paramstot) ]
+		
+		local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1]))
+		global_grads = dict((j, j - self.unsup_lr * g)\
+				for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1]))
+		if self.totalupdatebool:
+			total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\
+					for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1]))
 		
 		self.localupdate[-1] = theano.Method(self.inputs[-1],self.localcost[-1],local_grads)
 		self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads)
 		if self.totalupdatebool:
 			self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads)
-		self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1])
-		self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1])
-		if self.totalupdatebool:
-			self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1])
-		self.representation[-1] = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone)
+		self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone)
+		
+		if self.debugmethod:
+			self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1])
+			self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1])
+			self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1])
+			self.compute_globalgradients[-1] = theano.Method(self.inputs[-1],self.globalgradients[-1])
+			if self.totalupdatebool:
+				self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1])
+				self.compute_totalgradients[-1] =\
+						theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1])
 	
 	def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0,
 								noise_level = 0 , noise_level_group = 0, seed = 1, alloc = True,**init):