changeset 692:5ca1a8e859db

merge
author James Bergstra <bergstrj@iro.umontreal.ca>
date Thu, 14 May 2009 17:00:22 -0400
parents e69249897f89 (current diff) 0457dfa6fcad (diff)
children ee7026de9681
files
diffstat 1 files changed, 502 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/scan_inputs_groups.py	Thu May 14 17:00:22 2009 -0400
@@ -0,0 +1,502 @@
+import numpy
+import theano
+from theano import tensor as T
+from theano.gof import Op
+from theano.gof import Apply
+from theano import scalar as scal
+
+# These Ops allows us to deal with static groups of possibly missing inputs efficiently in the dense DAA framework
+# (for exemple with multimodal data with sometimes entire modality missing).
+# The inputs will be represented with an index list and a theano.generic variable (which will be a list of matrices
+# (numpy array), each element will correspond to an available modality and the index list will indicates the weights
+# associated to it).
+# Exemple of index list: [1, 0, -3]
+#	*the 1 says that the first element of the input list will refer to the first element of the weights_list
+#		(auxiliary target as input)
+#								if inputslist[i]>0 it refers to Weightslist[indexlist[i]-1]
+#	*the 0 means that the second element of the input list will not be encoded neither decoded (it is remplaced by zeros)
+#		this is not efficient, so in this case it is better to give: [1,-3] and [inputslist[0],inputslist[2]]
+#		but it allows us to deal with empty lists: give indexlist = [.0] and inputlist=[[.0]]
+#	*when an index is negative it means that the input will not be used for encoding but we will still reconstruct it
+#		(auxiliary target as output)
+#								if inputslist[i]<0 it refers to Weightslist[-indexlist[i]-1]
+#
+# An entire batch should have the same available inputs configuration.
+#
+# Dense DAA Exemple:----------------------------------------------------------------------------
+#
+#from theano.tensor.nnet import sigmoid
+#
+#nb_modality = 4
+#wenc = [T.dmatrix('wenc%s'%i) for i in range(nb_modality)]
+#wdec = [T.dmatrix('wdec%s'%i) for i in range(nb_modality)]
+#benc = T.dvector('benc')
+#bdec = [T.dvector('bdec%s'%i) for i in range(nb_modality)]
+#vectin = T.ivector('vectin')
+#inputpart = theano.generic('inputpart')
+#noise_bit = T.dscalar('noise_bit')
+#noise_group = T.dscalar('noise_group')
+#
+#[vectin2,inputpart2] = scannoise(vectin,inputpart,noise_bit,noise_group)
+#hid = scandotenc(vectin2, inputpart2, wenc)
+#acthid = sigmoid(hid + benc)
+#dec = sigmoid(scanbiasdec(vectin2,inputpart2,bdec) + scandotdec(vectin2, inputpart2,acthid,wdec))
+#cost = T.sum(T.sum(T.sqr( scaninput(vectin,inputpart) - rec ),1),0)
+
+# Checking inputs in make_node methods----------------------
+def Checkidx_list(idx_list):
+	idx_list = T.as_tensor_variable(idx_list)
+	nidx = idx_list.type.ndim
+	if nidx != 1: raise TypeError('not vector', idx_list)
+	return idx_list
+
+def Checkhidd(hidd):
+	hidd = T.as_tensor_variable(hidd)
+	nhidd = hidd.type.ndim
+	if nhidd not in (1,2): raise TypeError('not matrix or vector', hidd)
+	return hidd
+
+def Checkweights_list(weights_list):
+	weights_list = map(T.as_tensor_variable, weights_list)
+	for i in range(len(weights_list)):
+		nweights = weights_list[i].type.ndim
+		if nweights not in (1,2): raise TypeError('not matrix or vector', weights_list[i])
+	return weights_list
+
+def Checkbias_list(bias_list):
+	bias_list = map(T.as_tensor_variable, bias_list)
+	for i in range(len(bias_list)):
+		nbias = bias_list[i].type.ndim
+		if nbias != 1: raise TypeError('not vector', bias_list[i])
+	return bias_list
+
+# Encoding scan dot product------------------------------------
+class ScanDotEnc(Op):
+	"""This Op takes an index list (as tensor.ivector), a list of matrices representing
+	the available inputs (as theano.generic), and all the encoding weights tensor.dmatrix of the model. It will select the
+	weights corresponding to the inputs (according to index list) and compute only the necessary dot products"""
+	def __init__(self):
+		#Create Theano methods to do the dot products with blas or at least in C.
+		self.M=theano.Module()
+		inputs = T.dmatrix('input')
+		weights = T.dmatrix('weights')
+		self.M.hid = T.dmatrix('hid')
+		self.M.resultin = self.M.hid + T.dot(inputs,weights)
+		result = T.dot(inputs,weights)
+		
+		self.M.dotin = theano.Method([inputs,weights],None,{self.M.hid : self.M.resultin})
+		self.M.dot = theano.Method([inputs,weights],result)
+		self.m = self.M.make()
+	
+	def make_node(self, idx_list, inputs_list, weights_list):
+		idx_list = Checkidx_list(idx_list)
+		weights_list = Checkweights_list(weights_list)
+		return Apply(self, [idx_list] + [inputs_list] + weights_list, [T.dmatrix()])
+	
+	def perform(self, node, args, (hid,)):
+		idx_list = args[0]
+		hidcalc = False
+		
+		batchsize = (args[1][0].shape)[0]
+		n_hid = (args[2].shape)[1]
+		if len(idx_list) != len(args[1]) :
+			raise NotImplementedError('size of index different of inputs list size',idx_list)
+		if max(idx_list) >= (len(args)-2)+1 :
+			raise NotImplementedError('index superior to weight list length',idx_list)
+		for i in range(len(args[1])):
+			if (args[1][i].shape)[0] != batchsize:
+				raise NotImplementedError('different batchsize in the inputs list',args[1][i])
+		for i in range(len(args)-2):
+			if (args[2+i].shape)[1] != n_hid:
+				raise NotImplementedError('different length of hidden in the weights list',args[2+i])
+		
+		for i in range(len(idx_list)):
+			if idx_list[i]>0:
+				if hidcalc:
+					self.m.dotin(args[1][i],args[2+int(idx_list[i]-1)])
+				else:
+					self.m.hid = self.m.dot(args[1][i],args[2+int(idx_list[i]-1)])
+					hidcalc = True
+		
+		if not hidcalc:
+			hid[0] = numpy.zeros([batchsize,n_hid])
+		else:
+			hid[0] = self.m.hid
+		
+	
+	def grad(self, args, gz):
+		return [None, None] + ScanDotEncGrad()(args,gz)
+	
+	def __hash__(self):
+		return hash(ScanDotEnc)^58994
+	
+	def __str__(self):
+		return "ScanDotEnc"
+
+scandotenc=ScanDotEnc()
+
+class ScanDotEncGrad(Op):
+	"""This Op computes the gradient wrt the weights for ScanDotEnc"""
+	def __init__(self):
+		#Create Theano methods to do the dot products with blas or at least in C.
+		self.M=theano.Module()
+		input1 = T.dmatrix('input1')
+		self.M.g_out = T.dmatrix('g_out')
+		result = T.dmatrix('result')
+		input2=T.transpose(input1)
+		self.M.resultin = result + T.dot(input2,self.M.g_out)
+		self.M.result = T.dot(input2,self.M.g_out)
+		
+		self.M.dotin = theano.Method([input1,result],self.M.resultin)
+		self.M.dot = theano.Method([input1],self.M.result)
+		self.m = self.M.make()
+	
+	def make_node(self, args, g_out):
+		idx_list = Checkidx_list(args[0])
+		weights_list = Checkweights_list(args[2:])
+		return Apply(self, args + g_out, [T.dmatrix() for i in xrange(2,len(args))])
+	
+	def perform(self, node, args, z):
+		idx_list = args[0]
+		self.m.g_out = args[-1]
+		
+		batchsize = (args[1][0].shape)[0]
+		n_hid = (args[2].shape)[1]
+		if len(idx_list) != len(args[1]) :
+			raise NotImplementedError('size of index different of inputs list size',idx_list)
+		if max(idx_list) >= (len(args)-3)+1 :
+			raise NotImplementedError('index superior to weight list length',idx_list)
+		for i in range(len(args[1])):
+			if (args[1][i].shape)[0] != batchsize:
+				raise NotImplementedError('different batchsize in the inputs list',args[1][i])
+		for i in range(len(args)-3):
+			if (args[2+i].shape)[1] != n_hid:
+				raise NotImplementedError('different length of hidden in the weights list',args[2+i])
+		
+		zcalc = [False for i in range(len(args)-3)]
+		
+		for i in range(len(idx_list)):
+			if idx_list[i]>0:
+				if zcalc[int(idx_list[i]-1)]:
+					z[int(idx_list[i]-1)][0] = self.m.dotin(args[1][i],z[int(idx_list[i]-1)][0])
+				else:
+					z[int(idx_list[i]-1)][0] = self.m.dot(args[1][i])
+					zcalc[int(idx_list[i]-1)] = True
+		
+		for i in range(len(args)-3):
+			if not zcalc[i]:
+				shp = args[2+i].shape
+				z[i][0] = numpy.zeros((shp[0],shp[1]))
+		
+	def __hash__(self):
+		return hash(ScanDotEncGrad)^15684
+		
+	def __str__(self):
+		return "ScanDotEncGrad"
+
+# Decoding scan dot product------------------------------------
+class ScanDotDec(Op):
+	"""This Op takes an index list (as tensor.ivector), a list of matrices representing
+	the available inputs (as theano.generic), the hidden layer of the DAA (theano.dmatrix)
+	and all the decoding weights tensor.dmatrix of the model. It will select the
+	weights corresponding to the available inputs (according to index list) and compute
+	only the necessary dot products. The outputs will be concatenated and will represent
+	the reconstruction of the different modality in the same order than the index list"""
+	def __init__(self):
+		#Create Theano methods to do the dot products with blas or at least in C.
+		self.M=theano.Module()
+		weights = T.dmatrix('weights')
+		self.M.hid = T.dmatrix('hid')
+		oldval = T.dmatrix('oldval')
+		resultin = oldval + T.dot(self.M.hid,weights)
+		result = T.dot(self.M.hid,weights)
+		
+		self.M.dotin = theano.Method([weights,oldval],resultin)
+		self.M.dot = theano.Method([weights],result)
+		self.m = self.M.make()
+	
+	def make_node(self, idx_list, input_list, hidd, weights_list):
+		idx_list = Checkidx_list(idx_list)
+		hidd = Checkhidd(hidd)
+		weights_list = Checkweights_list(weights_list)
+		return Apply(self, [idx_list] + [input_list] +[hidd] + weights_list,[T.dmatrix()])
+	
+	def perform(self, node, args, (z,)):
+		
+		idx_list = abs(args[0])
+		self.m.hid = args[2]
+		
+		batchsize = (self.m.hid.shape)[0]
+		n_hid = self.m.hid.shape[1]
+		if max(idx_list) >= len(args)-3+1 :
+			raise NotImplementedError('index superior to weight list length',idx_list)
+		if len(idx_list) != len(args[1]) :
+			raise NotImplementedError('size of index different of inputs list size',idx_list)
+		for i in range(len(args)-3):
+			if (args[3+i].shape)[0] != n_hid:
+				raise NotImplementedError('different length of hidden in the weights list',args[3+i])
+		
+		zcalc = [False for i in idx_list]
+		z[0] = [None for i in idx_list]
+		
+		for i in range(len(idx_list)):
+			if idx_list[i]>0:
+				if zcalc[i]:
+					z[0][i] = self.m.dotin(args[3+int(idx_list[i]-1)],z[0][i])
+				else:
+					z[0][i] = self.m.dot(args[3+int(idx_list[i]-1)])
+					zcalc[i] = True
+		
+		for i in range(len(idx_list)):
+			if not zcalc[i]:
+				shp = args[1][int(idx_list[i]-1)].shape
+				z[0][i] = numpy.zeros((batchsize,shp[1]))
+		
+		z[0] = numpy.concatenate(z[0],1)
+		
+	def grad(self, args, gz):
+		return [None, None] + ScanDotDecGrad()(args,gz)
+	
+	def __hash__(self):
+		return hash(ScanDotDec)^73568
+	
+	def __str__(self):
+		return "ScanDotDec"
+
+scandotdec=ScanDotDec()
+
+class ScanDotDecGrad(Op):
+	"""This Op computes the gradient wrt the weights for ScanDotDec"""
+	def __init__(self):
+		self.M=theano.Module()
+		gout = T.dmatrix('gout')
+		self.M.hidt = T.dmatrix('hid')
+		oldval = T.dmatrix('oldval')
+		resultin1 = oldval + T.dot(self.M.hidt,gout)
+		result1 = T.dot(self.M.hidt,gout)
+		weights = T.dmatrix('weights')
+		weights2 = T.transpose(weights)
+		resultin2 = oldval + T.dot(gout,weights2)
+		result2 = T.dot(gout,weights2)
+		
+		self.M.dotin1 = theano.Method([gout,oldval],resultin1)
+		self.M.dot1 = theano.Method([gout],result1)
+		self.M.dotin2 = theano.Method([gout,weights,oldval],resultin2)
+		self.M.dot2 = theano.Method([gout,weights],result2)
+		self.m = self.M.make()
+	
+	
+	def make_node(self, args, g_out):
+		idx_list = Checkidx_list(args[0])
+		hidd = Checkhidd(args[2])
+		weights_list = Checkweights_list(args[3:])
+		return Apply(self, args + g_out, [T.dmatrix() for i in xrange(2,len(args))])
+	
+	def perform(self, node, args, z):
+		idx_list = abs(args[0])
+		self.m.hidt = args[2].T
+		
+		batchsize = (self.m.hidt.shape)[1]
+		n_hid = self.m.hidt.shape[0]
+		if max(idx_list) >= len(args)-4+1 :
+			raise NotImplementedError('index superior to weight list length',idx_list)
+		if len(idx_list) != len(args[1]) :
+			raise NotImplementedError('size of index different of inputs list size',idx_list)
+		for i in range(len(args)-4):
+			if (args[3+i].shape)[0] != n_hid:
+				raise NotImplementedError('different length of hidden in the weights list',args[3+i])
+		
+		zidx=numpy.zeros((len(idx_list)+1))
+		
+		for i in range(len(idx_list)):
+			if idx_list[i] == 0:
+				zidx[i+1] = (args[1][i].shape)[1]
+			else:
+				zidx[i+1] = (args[3+idx_list[i]-1].shape)[1]
+		
+		zidx=zidx.cumsum()
+		hidcalc = False
+		zcalc = [False for i in range((len(args)-4))]
+		
+		for i in range(len(idx_list)):
+			if idx_list[i]>0:
+				if zcalc[int(idx_list[i])-1]:
+					z[int(idx_list[i])][0] = self.m.dotin1(args[-1][:,zidx[i]:zidx[i+1]],z[int(idx_list[i])][0])
+				else:
+					z[int(idx_list[i])][0] = self.m.dot1(args[-1][:,zidx[i]:zidx[i+1]])
+					zcalc[int(idx_list[i])-1] = True
+				if hidcalc:
+					z[0][0] = self.m.dotin2(args[-1][:,zidx[i]:zidx[i+1]],args[3+int(idx_list[i]-1)],z[0][0])
+				else:
+					z[0][0] = self.m.dot2(args[-1][:,zidx[i]:zidx[i+1]],args[3+int(idx_list[i]-1)])
+					hidcalc = True
+		
+		if not hidcalc:
+			z[0][0] = numpy.zeros((self.m.hidt.shape[1],self.m.hidt.shape[0]))
+		
+		for i in range((len(args)-4)):
+			if not zcalc[i]:
+				shp = args[3+i].shape
+				z[i+1][0] = numpy.zeros((shp[0],shp[1]))
+		
+		
+	def __hash__(self):
+		return hash(ScanDotDecGrad)^87445
+	
+	def __str__(self):
+		return "ScanDotDecGrad"
+
+# DAA input noise------------------------------------
+class ScanNoise(Op):
+	"""This Op takes an index list (as tensor.ivector), a list of matrices representing
+	the available inputs (as theano.generic), a probability of individual bit masking and
+	a probability of modality masking. It will return the inputs list with randoms zeros entry
+	and the index list with some positive values changed to negative values (groups masking)"""
+	def __init__(self, seed = 1):
+		self.M=theano.Module()
+		self.M.rand = T.RandomStreams(seed)
+		self.seed = seed
+		mat = T.matrix('mat')
+		noise_level_bit = T.dscalar('noise_level_bit')
+		noise_level_group = T.dscalar('noise_level_group')
+		self.M.out1 = self.M.rand.binomial(T.shape(mat), 1, 1 - noise_level_bit) * mat
+		self.M.out2 = self.M.rand.binomial((1,1), 1, 1 - noise_level_group)
+		
+		self.M.noisify_bit = theano.Method([mat,noise_level_bit],self.M.out1)
+		self.M.noisify_group_bool = theano.Method([noise_level_group],self.M.out2)
+		self.R = self.M.make()
+		self.R.rand.initialize()
+	
+	def make_node(self, idx_list, inputs_list, noise_level_bit, noise_level_group):
+		idx_list = Checkidx_list(idx_list)
+		return Apply(self, [idx_list] + [inputs_list] + [noise_level_bit] + [noise_level_group],\
+				[T.ivector(), theano.generic()])
+	
+	def perform(self, node, (idx_list,inputs_list,noise_level_bit,noise_level_group), (y,z)):
+		
+		if len(idx_list) != len(inputs_list) :
+			raise NotImplementedError('size of index different of inputs list size',idx_list)
+		
+		y[0] = [-i if (i>0 and not(self.R.noisify_group_bool(noise_level_group))) else i for i in idx_list]
+		z[0] = [(self.R.noisify_bit(inputs_list[i],noise_level_bit) if y[0][i]>0 else numpy.zeros((inputs_list[i].shape)))\
+				for i in range(len(inputs_list))]
+	
+	def grad(self,args,gz):
+		return [None,None,None,None]
+	
+	
+	def __hash__(self):
+		return hash(ScanNoise)^hash(self.seed)^hash(self.R)^12254
+	
+	def __str__(self):
+		return "ScanNoise"
+
+scannoise=ScanNoise()
+
+# Total input matrix construction------------------------------------
+class ScanInputs(Op):
+	"""This Op takes an index list (as tensor.ivector) and a list of matrices representing
+	the available inputs (as theano.generic). It will construct the appropriate tensor.dmatrix
+	to compare to the reconstruction obtained with ScanDotDec"""
+	def make_node(self, idx_list, inputs_list):
+		idx_list = Checkidx_list(idx_list)
+		return Apply(self, [idx_list] + [inputs_list],[T.dmatrix()])
+	
+	def perform(self, node, (idx_list, inputs_list), (z,)):
+		
+		if len(idx_list) != len(inputs_list):
+			raise NotImplementedError('size of index different of inputs list size',idx_list)
+		
+		for i in range(len(idx_list)):
+			if idx_list[i] == 0:
+				inputs_list[i] = 0 * inputs_list[i]
+		
+		z[0] = numpy.concatenate(inputs_list,1)
+	
+	def grad(self,args,gz):
+		return [None,None]
+	
+	def __hash__(self):
+		return hash(ScanInputs)^75902
+	
+	def __str__(self):
+		return "ScanInputs"
+
+scaninputs=ScanInputs()
+
+# Decoding bias vector construction------------------------------------
+class ScanBiasDec(Op):
+	"""This Op takes an index list (as tensor.ivector), a list of matrices representing
+	the available inputs (as theano.generic) and the decoding bias tensor.dvector.
+	It will construct the appropriate bias tensor.dvector
+	to add to the reconstruction obtained with ScanDotDec"""
+	def make_node(self, idx_list, input_list, bias_list):
+		idx_list = Checkidx_list(idx_list)
+		bias_list = Checkbias_list(bias_list)
+		return Apply(self, [idx_list] + [input_list] + bias_list, [T.dvector()])
+	
+	def perform(self, node, args, (z,)):
+		idx_list = abs(args[0])
+		
+		if max(idx_list) >= (len(args)-2)+1 :
+			raise NotImplementedError('index superior to bias list length',idx_list)
+		if len(idx_list) != len(args[1]) :
+			raise NotImplementedError('size of index different of inputs list size',idx_list)
+		z[0] = [args[idx_list[i]+1] if idx_list[i] != 0 else numpy.zeros(args[1][i].shape[1]) \
+				for i in range(len(idx_list))]
+		z[0] = numpy.concatenate(z[0],1)
+	
+	def __hash__(self):
+		return hash(ScanBiasDec)^60056
+	
+	def grad(self,args,gz):
+		return [None,None] + ScanBiasDecGrad()(args,gz)
+	
+	def __str__(self):
+		return "ScanBiasDec"
+
+scanbiasdec=ScanBiasDec()
+
+class ScanBiasDecGrad(Op):
+	"""This Op computes the gradient wrt the bias for ScanBiasDec"""
+	def make_node(self, args, g_out):
+		idx_list = Checkidx_list(args[0])
+		bias_list = Checkbias_list(args[2:])
+		return Apply(self, args + g_out, [T.dvector() for i in range(len(args)-2)])
+	
+	def perform(self, node, args, z):
+		idx_list = abs(args[0])
+		
+		if max(idx_list) >= (len(args)-3)+1 :
+			raise NotImplementedError('index superior to bias list length',idx_list)
+		if len(idx_list) != len(args[1]) :
+			raise NotImplementedError('size of index different of inputs list size',idx_list)
+		
+		zidx=numpy.zeros((len(idx_list)+1))
+		for i in range(len(idx_list)):
+			if idx_list[i] == 0:
+				zidx[i+1] = (args[1][i].shape)[1]
+			else:
+				zidx[i+1] = (args[2+idx_list[i]-1].size)
+		zidx=zidx.cumsum()
+		zcalc = [False for i in range((len(args)-3))]
+		
+		for i in range(len(idx_list)):
+			if idx_list[i]>0:
+				if zcalc[int(idx_list[i])-1]:
+					z[int(idx_list[i])-1][0] += args[-1][zidx[i]:zidx[i+1]]
+				else:
+					z[int(idx_list[i])-1][0] = args[-1][zidx[i]:zidx[i+1]]
+					zcalc[int(idx_list[i])-1] = True
+		
+		for i in range((len(args)-3)):
+			if not zcalc[i]:
+				shp = args[2+i].size
+				z[i][0] = numpy.zeros(shp)
+		
+	
+	def __hash__(self):
+		return hash(ScanBiasDecGrad)^41256
+	
+	def __str__(self):
+		return "ScanBiasDecGrad"