changeset 702:f76079ba8d9a

added a DAAig module and a StackedDAAig module to deal with auxiliary output possibly missing in algorithm.sandbox
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Wed, 20 May 2009 13:40:10 -0400
parents 113946723973
children 9078561a7c21
files pylearn/algorithms/sandbox/DAA_inputs_groups.py
diffstat 1 files changed, 478 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py	Wed May 20 13:40:10 2009 -0400
@@ -0,0 +1,478 @@
+import numpy
+import theano
+import os, copy
+
+from theano import tensor as T
+from theano.compile import module
+from theano.tensor.nnet import sigmoid
+
+from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \
+		scanmaskenc,scanmaskdec
+
+from pylearn.algorithms import cost
+from pylearn.algorithms.logistic_regression import LogRegN
+
+
+def lnorm(param, type='l2'):
+	if type == 'l1':
+		return T.sum(T.abs(param))
+	if type == 'l2':
+		return T.sum(param*param)
+	raise NotImplementedError('Only l1 and l2 regularization are currently implemented')
+
+def get_reg_cost(params, type):
+	rcost = 0
+	for param in params:
+		rcost += lnorm(param, type)
+	return rcost
+
+
+def sigmoid_act(x):
+	return theano.tensor.nnet.sigmoid(x)
+
+def tanh_act(x):
+	return theano.tensor.tanh(x)
+
+def softsign_act(x):
+	return theano.sandbox.softsign.softsign(x)
+
+class ScratchPad:
+	pass
+
+class DAAig(module.Module):
+	"""De-noising Auto-encoder
+	"""
+	
+	def __init__(self, input = None, auxinput = None,
+				in_size=None, auxin_size= None, n_hid=1,
+				regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
+				reconstruction_cost_function=cost.cross_entropy, interface = True,**init):
+		"""
+		:param regularize: WRITEME
+		:param tie_weights: WRITEME
+		:param hid_fn: WRITEME
+		:param reconstruction_cost: Should return one cost per example (row)
+		:todo: Default noise level for all daa levels
+		"""
+		print '\t\t**** DAAig.__init__ ****'
+		print '\t\tinput = ', input
+		print '\t\tauxinput = ', auxinput
+		print '\t\tin_size = ', in_size
+		print '\t\tauxin_size = ', auxin_size
+		print '\t\tn_hid = ', n_hid
+		
+		super(DAAig, self).__init__()
+		self.random = T.RandomStreams()
+		
+		# MODEL CONFIGURATION
+		self.in_size = in_size
+		self.auxin_size = auxin_size
+		self.n_hid = n_hid
+		self.regularize = regularize
+		self.tie_weights = tie_weights
+		self.reconstruction_cost_function = reconstruction_cost_function
+		self.interface = interface
+		
+		assert hid_fn in ('sigmoid_act','tanh_act','softsign_act')
+		self.hid_fn = eval(hid_fn)
+		
+		### DECLARE MODEL VARIABLES and default
+		self.input = input
+		self.noisy_input = None
+		self.auxinput = auxinput
+		self.idx_list = T.ivector('idx_list') if not(self.auxinput is None) else None
+		self.noisy_idx_list, self.noisy_auxinput = None, None
+		
+		#parameters
+		self.benc = T.dvector('benc')
+		if not(self.input is None):
+			self.wenc = T.dmatrix('wenc')
+			self.wdec = self.wenc.T if tie_weights else T.dmatrix('wdec')
+			self.bdec = T.dvector('bdec')
+		
+		if not(self.auxinput is None):
+			self.wauxenc = [T.dmatrix('wauxenc%s'%i) for i in range(len(auxin_size))]
+			self.wauxdec = [self.wauxenc[i].T if tie_weights else T.dmatrix('wauxdec%s'%i) for i in range(len(auxin_size))]
+			self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))]
+		
+		#hyper-parameters
+		self.lr = T.scalar('lr')
+		self.noise_level = T.scalar('noise_level')
+		self.noise_level_group = T.scalar('noise_level_group')
+		
+		# leave the chance for subclasses to initialize
+		if self.__class__ == DAAig:
+			self.init_behavioural()
+		print '\t\t**** end DAAig.__init__ ****'
+	
+	### BEHAVIOURAL MODEL
+	def init_behavioural(self):
+		if not(self.input is None):
+			self.noisy_input = self.corrupt_input()
+		if not(self.auxinput is None):
+			self.noisy_idx_list , self.noisy_auxinput = \
+				scannoise(self.idx_list,self.auxinput,self.noise_level,self.noise_level_group)
+		
+		self.noise = ScratchPad()
+		self.clean = ScratchPad()
+		
+		self.define_behavioural(self.clean, self.input, self.idx_list, self.auxinput)
+		self.define_behavioural(self.noise, self.noisy_input, self.noisy_idx_list, self.noisy_auxinput)
+		
+		self.define_regularization()  # call before cost
+		self.define_cost(self.clean)
+		self.define_cost(self.noise)
+		self.define_params()
+		if self.interface:
+			self.define_gradients()
+			self.define_interface()
+		
+	def define_behavioural(self,container, input, idx_list , auxinput):
+		self.define_propup(container, input, idx_list , auxinput)
+		container.hidden = self.hid_fn(container.hidden_activation)
+		self.define_propdown(container, idx_list , auxinput)
+		container.rec = self.hid_fn(container.rec_activation)
+		
+	def define_propup(self, container, input, idx_list , auxinput):
+		if not(self.input is None):
+			container.hidden_activation = self.filter_up(input,self.wenc,self.benc)
+			if not(self.auxinput is None):
+				container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc)
+		else:
+			if not(self.auxinput is None):
+				container.hidden_activation = scandotenc(idx_list,auxinput,self.wauxenc) + self.benc
+		
+	# DEPENDENCY: define_propup
+	def define_propdown(self, container, idx_list , auxinput):
+		if not(self.input is None):
+			rec_activation1 = self.filter_down(container.hidden,self.wdec,self.bdec)
+		if not(self.auxinput is None):
+			rec_activation2 = scandotdec(idx_list,auxinput,container.hidden,self.wauxdec) +\
+					scanbiasdec(idx_list,auxinput,self.bauxdec)
+		
+		if not(self.input is None) and not(auxinput is None):
+			container.rec_activation = T.join(1,rec_activation1,rec_activation2)
+		else:
+			if not(self.input is None):
+				container.rec_activation = rec_activation1
+			else:
+				container.rec_activation = rec_activation2
+		
+	def filter_up(self, vis, w, b=None):
+		out = T.dot(vis, w)
+		return out + b if b else out
+	filter_down = filter_up
+	
+	# TODO: fix regularization type (outside parameter ?)
+	def define_regularization(self):
+		self.reg_coef = T.scalar('reg_coef')
+		if not(self.auxinput is None):
+			self.Maskup = scanmaskenc(self.idx_list,self.wauxenc)
+			self.Maskdown = scanmaskdec(self.idx_list,self.wauxdec)
+			if not(type(self.Maskup) is list):
+				self.Maskup = [self.Maskup]
+			if not(type(self.Maskdown) is list):
+				self.Maskdown = [self.Maskdown]
+		listweights = []
+		listweightsenc = []
+		if self.input is None:
+			listweights += [w*m for w,m in zip(self.Maskup,self.wauxenc)] + [w*m for w,m in zip(self.Maskdown,self.wauxdec)]
+			listweightsenc += [w*m for w,m in zip(self.Maskup,self.wauxenc)]
+		if self.auxinput is None:
+			listweights += [self.wenc,self.wdec]
+			listweightsenc += [self.wenc]
+		self.regularization = self.reg_coef * get_reg_cost(listweights,'l2')
+		self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l2')
+	
+	
+	# DEPENDENCY: define_behavioural, define_regularization
+	def define_cost(self, container):
+		container.reconstruction_cost = self.reconstruction_costs(container.rec)
+		# TOTAL COST
+		container.cost = container.reconstruction_cost
+		if self.regularize: #if stacked don't merge regularization and cost here but in the stackeddaaig module
+			container.cost = container.cost + self.regularization
+	
+	# DEPENDENCY: define_cost
+	def define_params(self):
+		if not hasattr(self,'params'):
+			self.params = []
+		self.params += [self.benc]
+		if not(self.input is None):
+			self.params += [self.wenc] + [self.bdec]
+		if not(self.auxinput is None):
+			self.params += self.wauxenc + self.bauxdec
+		self.paramsenc = self.params
+		if not(self.tie_weights):
+			if not(self.input is None):
+				self.params += [self.bdec]
+			if not(self.auxinput is None):
+				self.params += self.wauxdec + self.bauxdec
+	
+	# DEPENDENCY: define_cost, define_gradients
+	def define_gradients(self):
+		self.gradients = T.grad(self.noise.cost, self.params)
+		self.updates = dict((p, p - self.lr * g) for p, g in \
+				zip(self.params, self.gradients))
+	
+	
+	# DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients
+	def define_interface(self):
+		# declare function to interface with module (if not stacked)
+		if self.input is None:
+			listin = [self.idx_list, self.auxinput]
+		if self.auxinput is None:
+			listin = [self.input]
+		if not((self.input is None) or (self.auxinput is None)):
+			listin =[self.input,self.idx_list, self.auxinput]
+		self.update = theano.Method(listin, self.noise.cost, self.updates)
+		self.compute_cost = theano.Method(listin, self.noise.cost)
+		if not(self.input is None):
+			self.noisify = theano.Method(listin, self.noisy_input)
+		if not(self.auxinput is None):
+			self.auxnoisify = theano.Method(listin, self.noisy_auxinput)
+		self.reconstruction = theano.Method(listin, self.clean.rec)
+		self.representation = theano.Method(listin, self.clean.hidden)
+		self.validate = theano.Method(listin, [self.clean.cost, self.clean.rec])
+	
+	def corrupt_input(self):
+		return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
+	
+	def reconstruction_costs(self, rec):
+		if self.input is None:
+			return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec)
+		if self.auxinput is None:
+			return self.reconstruction_cost_function(self.input, rec)
+		if not((self.input is None) or (self.auxinput is None)):
+			return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec)
+	
+	def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0,
+							seed=1, alloc=True, **init):
+		super(DAAig, self)._instance_initialize(obj, **init)
+		
+		obj.reg_coef = reg_coef
+		obj.noise_level = noise_level
+		obj.noise_level_group = noise_level_group
+		if self. interface:
+			obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module)
+		else:
+			obj.lr = None
+		
+		obj.random.initialize()
+		if seed is not None:
+			obj.random.seed(seed)
+		self.R = numpy.random.RandomState(seed)
+		
+		obj.__hide__ = ['params']
+		
+		if not(self.input is None):
+			self.inf = 1/numpy.sqrt(self.in_size)
+		if not(self.auxinput is None):
+			self.inf = 1/numpy.sqrt(sum(self.auxin_size))
+		if not(self.auxinput is None or self.input is None):
+			self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size)
+		self.hif = 1/numpy.sqrt(self.n_hid)
+		
+		
+		if alloc:
+			if not(self.input is None):
+				wencshp = (self.in_size, self.n_hid)
+				wdecshp = tuple(reversed(wencshp))
+				print 'wencshp = ', wencshp
+				print 'wdecshp = ', wdecshp
+				
+				obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)
+				if not self.tie_weights:
+					obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif)
+				obj.bdec = numpy.zeros(self.in_size)
+			
+			if not(self.auxinput is None):
+				wauxencshp = [(i, self.n_hid) for i in self.auxin_size]
+				wauxdecshp = [tuple(reversed(i)) for i in wauxencshp]
+				print 'wauxencshp = ', wauxencshp
+				print 'wauxdecshp = ', wauxdecshp
+				
+				obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp]
+				if not self.tie_weights:
+					obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp]
+				obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size]
+			
+			print 'self.inf = ', self.inf
+			print 'self.hif = ', self.hif
+			
+			obj.benc = numpy.zeros(self.n_hid)
+			
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+class StackedDAAig(module.Module):
+	def __init__(self, depth = 1, input = None, auxinput = [None],
+				in_size = None, auxin_size = [[None]], n_hid = [1],
+				regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
+				reconstruction_cost_function=cost.cross_entropy,
+				n_out = 2, target = None, **init):
+		
+		super(StackedDAAig, self).__init__()
+		print '\t**** StackedDAAig.__init__ ****'
+		print '\tinput = ', input
+		print '\tauxinput = ', auxinput
+		print '\tin_size = ', in_size
+		print '\tauxin_size = ', auxin_size
+		print '\tn_hid = ', n_hid
+		# save parameters
+		self.depth = depth
+		self.input = input
+		self.auxinput = auxinput
+		self.in_size = in_size
+		auxin_size = auxin_size
+		self.n_hid = n_hid
+		self.regularize = regularize
+		self.tie_weights = tie_weights
+		self.hid_fn = hid_fn
+		self.reconstruction_cost_function = reconstruction_cost_function
+		self.n_out = n_out
+		self.target = target
+		
+		# init for model construction
+		inputprec = input
+		in_sizeprec = in_size
+		self.daaig = [None] * (self.depth+1)
+		
+		#hyper parameters
+		self.unsup_lr = T.dscalar('unsup_lr')
+		self.sup_lr = T.dscalar('sup_lr')
+		
+		# methods
+		self.localupdate = [None] * (self.depth+1) #update only on the layer parameters
+		self.globalupdate = [None] * (self.depth+1)#update wrt the layer cost backproped untill the input layer
+		self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer
+		#
+		self.representation = [None] * (self.depth+1)
+		self.reconstruction = [None] * (self.depth)
+		self.compute_localcost = [None] * (self.depth+1)
+		self.compute_globalcost = [None] * (self.depth+1)
+		self.compute_totalcost = [None] * (self.depth+1)
+		self.validate = [None] * (self.depth)
+		self.noisyinputs = [None] * (self.depth)
+		#
+		self.localcost = [None] * (self.depth+1)
+		self.globalcost = [None] * (self.depth+1)
+		self.totalcost = [None] * (self.depth+1)
+		
+		paramstot = []
+		paramsenc = []
+		self.inputs = [None] * (self.depth+1)
+		
+		if not(self.input is None):
+			self.inputs[0] = [self.input]
+		else:
+			self.inputs[0] = []
+		
+		offset = 0
+		for i in range(self.depth):
+			if auxin_size[i] is None:
+				offset +=1
+				param = [inputprec, None, in_sizeprec, auxin_size[i], self.n_hid[i],\
+					False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False]
+			else:
+				param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\
+					False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False]
+			print '\tLayer init= ', i+1
+			self.daaig[i] = DAAig(*param)
+			
+			if i:
+				self.inputs[i] = copy.copy(self.inputs[i-1])
+			if not(auxin_size[i] is None):
+				self.inputs[i] += [self.daaig[i].idx_list,self.auxinput[i-offset]]
+			
+			paramstot += self.daaig[i].params
+			
+			if self.regularize:
+				self.localcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization
+				self.globalcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization
+				self.totalcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization
+				for j in range(i):
+					self.globalcost[i] += self.daaig[j].regularizationenc
+					self.totalcost[i] += self.daaig[j].noise.cost+self.daaig[j].regularization
+				
+			else:
+				self.localcost[i] = self.daaig[i].noise.cost
+				self.globalcost[i] = self.daaig[i].noise.cost
+				self.totalcost[i] = self.daaig[i].noise.cost
+				for j in range(i):
+					self.totalcost[i] += self.daaig[j].noise.cost
+			
+			local_grads = dict((j, j - self.unsup_lr * T.grad(self.localcost[i], j))\
+					for j in self.daaig[i].params)
+			global_grads = dict((j, j - self.unsup_lr * T.grad(self.globalcost[i], j))\
+					for j in (self.daaig[i].params+paramsenc))
+			total_grads = dict((j, j - self.unsup_lr * T.grad(self.totalcost[i], j))\
+					for j in (paramstot))
+			
+			self.localupdate[i] = theano.Method(self.inputs[i],self.localcost[i],local_grads)
+			self.globalupdate[i] = theano.Method(self.inputs[i],self.globalcost[i],global_grads)
+			self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads)
+			#
+			self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden)
+			self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec)
+			self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i])
+			self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i])
+			self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i])
+			self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec])
+			noisyout = []
+			if not(inputprec is None):
+				noisyout += [self.daaig[i].noisy_input]
+			if not(auxin_size[i] is None):
+				noisyout += [self.daaig[i].noisy_auxinput]
+			self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout)
+			#
+			paramsenc += self.daaig[i].paramsenc
+			inputprec = self.daaig[i].clean.hidden
+			in_sizeprec = self.n_hid[i]
+		print '\tLayer supervised init'
+		self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target]
+		self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,inputprec,self.target)
+		paramstot += self.daaig[-1].params
+		
+		if self.regularize:
+			self.localcost[-1] = self.daaig[-1].regularized_cost
+			self.globalcost[-1] = self.daaig[-1].regularized_cost
+			for j in range(self.depth):
+				self.globalcost[-1] += self.daaig[j].regularizationenc
+		else:
+			self.localcost[-1] = self.daaig[-1].unregularized_cost
+			self.globalcost[-1] = self.daaig[-1].unregularized_cost
+		
+		self.totalcost[-1] = self.totalcost[-2] + self.localcost[-1]
+		
+		local_grads = dict((j, j - self.sup_lr * T.grad(self.localcost[-1], j))\
+					for j in self.daaig[-1].params)
+		global_grads = dict((j, j - self.sup_lr * T.grad(self.globalcost[-1], j))\
+					for j in (self.daaig[-1].params+paramsenc))
+		total_grads = dict((j, j - \
+				(self.unsup_lr * T.grad(self.totalcost[-2], j) + self.sup_lr *T.grad(self.globalcost[-1], j)))\
+				for j in paramstot)
+		
+		self.localupdate[-1] = theano.Method(self.inputs[-1],self.localcost[-1],local_grads)
+		self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads)
+		self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads)
+		self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1])
+		self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1])
+		self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1])
+		self.representation[-1] = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone)
+	
+	def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0,
+								noise_level = 0 , noise_level_group = 0, seed = 1, Alloc = True,**init):
+		super(StackedDAAig, self)._instance_initialize(inst, **init)
+		
+		inst.unsup_lr = unsup_lr
+		inst.sup_lr = sup_lr
+		
+		for i in range(self.depth):
+			print '\tLayer = ', i+1
+			inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\
+					noise_level_group = noise_level_group, seed = seed, Alloc = Alloc)
+		print '\tLayer supervised'
+		inst.daaig[-1].initialize()
+		inst.daaig[-1].l1 = 0
+		inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation
\ No newline at end of file