view pylearn/algorithms/sandbox/DAA_inputs_groups.py @ 706:2b54c38e2c60

improved initialization of StackedDAAig
author Xavier Glorot <glorotxa@iro.umontreal.ca>
date Thu, 21 May 2009 19:52:46 -0400
parents eb91fa83e2c1
children 53a247cfee84
line wrap: on
line source

import numpy
import theano
import os, copy

from theano import tensor as T
from theano.compile import module
from theano.tensor.nnet import sigmoid

from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \
		scanmaskenc,scanmaskdec

from pylearn.algorithms import cost
from pylearn.algorithms.logistic_regression import LogRegN


def lnorm(param, type='l2'):
	if type == 'l1':
		return T.sum(T.abs(param))
	if type == 'l2':
		return T.sum(T.pow(param,2))
	raise NotImplementedError('Only l1 and l2 regularization are currently implemented')

def get_reg_cost(params, type):
	rcost = 0
	for param in params:
		rcost += lnorm(param, type)
	return rcost


def sigmoid_act(x):
	return theano.tensor.nnet.sigmoid(x)

def tanh_act(x):
	return theano.tensor.tanh(x)

def softsign_act(x):
	return theano.sandbox.softsign.softsign(x)

class ScratchPad:
	pass

class DAAig(module.Module):
	"""De-noising Auto-encoder
	"""
	
	def __init__(self, input = None, auxinput = None,
				in_size=None, auxin_size= None, n_hid=1,
				regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
				reconstruction_cost_function=cost.cross_entropy, interface = True,**init):
		"""
		:param regularize: WRITEME
		:param tie_weights: WRITEME
		:param hid_fn: WRITEME
		:param reconstruction_cost: Should return one cost per example (row)
		:todo: Default noise level for all daa levels
		"""
		print '\t\t**** DAAig.__init__ ****'
		print '\t\tinput = ', input
		print '\t\tauxinput = ', auxinput
		print '\t\tin_size = ', in_size
		print '\t\tauxin_size = ', auxin_size
		print '\t\tn_hid = ', n_hid
		
		super(DAAig, self).__init__()
		self.random = T.RandomStreams()
		
		# MODEL CONFIGURATION
		self.in_size = in_size
		self.auxin_size = auxin_size
		self.n_hid = n_hid
		self.regularize = regularize
		self.tie_weights = tie_weights
		self.reconstruction_cost_function = reconstruction_cost_function
		self.interface = interface
		
		assert hid_fn in ('sigmoid_act','tanh_act','softsign_act')
		self.hid_fn = eval(hid_fn)
		
		### DECLARE MODEL VARIABLES and default
		self.input = input
		self.noisy_input = None
		self.auxinput = auxinput
		self.idx_list = T.ivector('idx_list') if not(self.auxinput is None) else None
		self.noisy_idx_list, self.noisy_auxinput = None, None
		
		#parameters
		self.benc = T.dvector('benc')
		if not(self.input is None):
			self.wenc = T.dmatrix('wenc')
			self.wdec = self.wenc.T if tie_weights else T.dmatrix('wdec')
			self.bdec = T.dvector('bdec')
		
		if not(self.auxinput is None):
			self.wauxenc = [T.dmatrix('wauxenc%s'%i) for i in range(len(auxin_size))]
			self.wauxdec = [self.wauxenc[i].T if tie_weights else T.dmatrix('wauxdec%s'%i) for i in range(len(auxin_size))]
			self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))]
		
		#hyper-parameters
		if self.interface:
			self.lr = T.scalar('lr')
		self.noise_level = T.scalar('noise_level')
		self.noise_level_group = T.scalar('noise_level_group')
		
		# leave the chance for subclasses to initialize
		if self.__class__ == DAAig:
			self.init_behavioural()
		print '\t\t**** end DAAig.__init__ ****'
	
	### BEHAVIOURAL MODEL
	def init_behavioural(self):
		if not(self.input is None):
			self.noisy_input = self.corrupt_input()
		if not(self.auxinput is None):
			self.noisy_idx_list , self.noisy_auxinput = \
				scannoise(self.idx_list,self.auxinput,self.noise_level,self.noise_level_group)
		
		self.noise = ScratchPad()
		self.clean = ScratchPad()
		
		self.define_behavioural(self.clean, self.input, self.idx_list, self.auxinput)
		self.define_behavioural(self.noise, self.noisy_input, self.noisy_idx_list, self.noisy_auxinput)
		
		self.define_regularization()  # call before cost
		self.define_cost(self.clean)
		self.define_cost(self.noise)
		self.define_params()
		if self.interface:
			self.define_gradients()
			self.define_interface()
		
	def define_behavioural(self,container, input, idx_list , auxinput):
		self.define_propup(container, input, idx_list , auxinput)
		container.hidden = self.hid_fn(container.hidden_activation)
		self.define_propdown(container, idx_list , auxinput)
		container.rec = self.hid_fn(container.rec_activation)
		
	def define_propup(self, container, input, idx_list , auxinput):
		if not(self.input is None):
			container.hidden_activation = self.filter_up(input,self.wenc,self.benc)
			if not(self.auxinput is None):
				container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc)
		else:
			if not(self.auxinput is None):
				container.hidden_activation = scandotenc(idx_list,auxinput,self.wauxenc) + self.benc
		
	# DEPENDENCY: define_propup
	def define_propdown(self, container, idx_list , auxinput):
		if not(self.input is None):
			rec_activation1 = self.filter_down(container.hidden,self.wdec,self.bdec)
		if not(self.auxinput is None):
			rec_activation2 = scandotdec(idx_list,auxinput,container.hidden,self.wauxdec) +\
					scanbiasdec(idx_list,auxinput,self.bauxdec)
		
		if not(self.input is None) and not(auxinput is None):
			container.rec_activation = T.join(1,rec_activation1,rec_activation2)
		else:
			if not(self.input is None):
				container.rec_activation = rec_activation1
			else:
				container.rec_activation = rec_activation2
		
	def filter_up(self, vis, w, b=None):
		out = T.dot(vis, w)
		return out + b if b else out
	filter_down = filter_up
	
	# TODO: fix regularization type (outside parameter ?)
	def define_regularization(self):
		self.reg_coef = T.scalar('reg_coef')
		if not(self.auxinput is None):
			self.Maskup = scanmaskenc(self.idx_list,self.wauxenc)
			self.Maskdown = scanmaskdec(self.idx_list,self.wauxdec)
			if not(type(self.Maskup) is list):
				self.Maskup = [self.Maskup]
			if not(type(self.Maskdown) is list):
				self.Maskdown = [self.Maskdown]
		listweights = []
		listweightsenc = []
		if not(self.auxinput is None):
			listweights += [w*m for w,m in zip(self.Maskup,self.wauxenc)] + [w*m for w,m in zip(self.Maskdown,self.wauxdec)]
			listweightsenc += [w*m for w,m in zip(self.Maskup,self.wauxenc)]
		if not(self.input is None):
			listweights += [self.wenc,self.wdec]
			listweightsenc += [self.wenc]
		self.regularization = self.reg_coef * get_reg_cost(listweights,'l2')
		self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l2')
	
	
	# DEPENDENCY: define_behavioural, define_regularization
	def define_cost(self, container):
		container.reconstruction_cost = self.reconstruction_costs(container.rec)
		# TOTAL COST
		container.cost = container.reconstruction_cost
		if self.regularize: #if stacked don't merge regularization and cost here but in the stackeddaaig module
			container.cost = container.cost + self.regularization
	
	# DEPENDENCY: define_cost
	def define_params(self):
		if not hasattr(self,'params'):
			self.params = []
		self.params += [self.benc]
		self.paramsenc = copy.copy(self.params)
		if not(self.input is None):
			self.params += [self.wenc] + [self.bdec]
			self.paramsenc += [self.wenc]
		if not(self.auxinput is None):
			self.params += self.wauxenc + self.bauxdec
			self.paramsenc += self.wauxenc
		if not(self.tie_weights):
			if not(self.input is None):
				self.params += [self.wdec]
			if not(self.auxinput is None):
				self.params += self.wauxdec
	
	# DEPENDENCY: define_cost, define_gradients
	def define_gradients(self):
		self.gradients = T.grad(self.noise.cost, self.params)
		self.updates = dict((p, p - self.lr * g) for p, g in \
				zip(self.params, self.gradients))
	
	
	# DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients
	def define_interface(self):
		# declare function to interface with module (if not stacked)
		if self.input is None:
			listin = [self.idx_list, self.auxinput]
		if self.auxinput is None:
			listin = [self.input]
		if not((self.input is None) or (self.auxinput is None)):
			listin =[self.input,self.idx_list, self.auxinput]
		self.update = theano.Method(listin, self.noise.cost, self.updates)
		self.compute_cost = theano.Method(listin, self.noise.cost)
		if not(self.input is None):
			self.noisify = theano.Method(listin, self.noisy_input)
		if not(self.auxinput is None):
			self.auxnoisify = theano.Method(listin, self.noisy_auxinput)
		self.reconstruction = theano.Method(listin, self.clean.rec)
		self.representation = theano.Method(listin, self.clean.hidden)
		self.validate = theano.Method(listin, [self.clean.cost, self.clean.rec])
	
	def corrupt_input(self):
		return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input
	
	def reconstruction_costs(self, rec):
		if self.input is None:
			return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec)
		if self.auxinput is None:
			return self.reconstruction_cost_function(self.input, rec)
		if not((self.input is None) or (self.auxinput is None)):
			return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec)
	
	def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0,
							seed=1, Alloc=True, **init):
		super(DAAig, self)._instance_initialize(obj, **init)
		
		obj.reg_coef = reg_coef
		obj.noise_level = noise_level
		obj.noise_level_group = noise_level_group
		if self. interface:
			obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module)
		else:
			obj.lr = None
		
		obj.random.initialize()
		if seed is not None:
			obj.random.seed(seed)
		self.R = numpy.random.RandomState(seed)
		
		obj.__hide__ = ['params']
		
		if not(self.input is None):
			self.inf = 1/numpy.sqrt(self.in_size)
		if not(self.auxinput is None):
			self.inf = 1/numpy.sqrt(sum(self.auxin_size))
		if not(self.auxinput is None or self.input is None):
			self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size)
		self.hif = 1/numpy.sqrt(self.n_hid)
		
		
		if Alloc:
			if not(self.input is None):
				wencshp = (self.in_size, self.n_hid)
				wdecshp = tuple(reversed(wencshp))
				print 'wencshp = ', wencshp
				print 'wdecshp = ', wdecshp
				
				obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)
				if not self.tie_weights:
					obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif)
				obj.bdec = numpy.zeros(self.in_size)
			
			if not(self.auxinput is None):
				wauxencshp = [(i, self.n_hid) for i in self.auxin_size]
				wauxdecshp = [tuple(reversed(i)) for i in wauxencshp]
				print 'wauxencshp = ', wauxencshp
				print 'wauxdecshp = ', wauxdecshp
				
				obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp]
				if not self.tie_weights:
					obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp]
				obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size]
			
			print 'self.inf = ', self.inf
			print 'self.hif = ', self.hif
			
			obj.benc = numpy.zeros(self.n_hid)
			

#-----------------------------------------------------------------------------------------------------------------------

class StackedDAAig(module.Module):
	def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None],
				in_size = None, auxin_size = [None], n_hid = [1],
				regularize = False, tie_weights = False, hid_fn = 'sigmoid_act',
				reconstruction_cost_function=cost.cross_entropy,
				n_out = 2, target = None, debugmethod = False, totalupdatebool=False, **init):
		
		super(StackedDAAig, self).__init__()
		print '\t**** StackedDAAig.__init__ ****'
		print '\tinput = ', input
		print '\tauxinput = ', auxinput
		print '\tin_size = ', in_size
		print '\tauxin_size = ', auxin_size
		print '\tn_hid = ', n_hid
		
		# save parameters
		self.depth = depth
		self.input = input
		self.auxinput = auxinput
		self.in_size = in_size
		auxin_size = auxin_size
		self.n_hid = n_hid
		self.regularize = regularize
		self.tie_weights = tie_weights
		self.hid_fn = hid_fn
		self.reconstruction_cost_function = reconstruction_cost_function
		self.n_out = n_out
		self.target = target if not(target is None) else T.lvector('target')
		self.debugmethod = debugmethod
		self.totalupdatebool = totalupdatebool
		
		# init for model construction
		inputprec = input
		in_sizeprec = in_size
		self.daaig = [None] * (self.depth+1)
		
		#hyper parameters
		self.unsup_lr = T.dscalar('unsup_lr')
		self.sup_lr = T.dscalar('sup_lr')
		
		# updatemethods
		self.localupdate = [None] * (self.depth+1) #update only on the layer parameters
		self.globalupdate = [None] * (self.depth+1)#update wrt the layer cost backproped untill the input layer
		if self.totalupdatebool:
			self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer
		#
		self.classify = None
		
		#others methods
		if self.debugmethod:
			self.representation = [None] * (self.depth)
			self.reconstruction = [None] * (self.depth)
			self.validate = [None] * (self.depth)
			self.noisyinputs = [None] * (self.depth)
			self.compute_localcost = [None] * (self.depth+1)
			self.compute_localgradients = [None] * (self.depth+1)
			self.compute_globalcost = [None] * (self.depth+1)
			self.compute_globalgradients = [None] * (self.depth+1)
			if self.totalupdatebool:
				self.compute_totalcost = [None] * (self.depth+1)
				self.compute_totalgradients = [None] * (self.depth+1)
		#
		
		# some theano Variables we want to keep track on
		if self.regularize:
			self.regularizationenccost = [None] * (self.depth)
		self.localcost = [None] * (self.depth+1)
		self.localgradients = [None] * (self.depth+1)
		self.globalcost = [None] * (self.depth+1)
		self.globalgradients = [None] * (self.depth+1)
		if self.totalupdatebool:
			self.totalcost = [None] * (self.depth+1)
			self.totalgradients = [None] * (self.depth+1)
		
		#params to update and inputs initialization
		paramstot = []
		paramsenc = []
		self.inputs = [None] * (self.depth+1)
		
		if not(self.input is None):
			self.inputs[0] = [self.input]
		else:
			self.inputs[0] = []
		
		offset = 0
		for i in range(self.depth):
			
			if auxin_size[i] is None:
				offset +=1
				param = [inputprec, None, in_sizeprec, auxin_size[i], self.n_hid[i],\
					False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False]
			else:
				param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\
					False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False]
			
			print '\tLayer init= ', i+1
			self.daaig[i] = DAAig(*param)
			
			# method input, outputs and parameters update
			if i:
				self.inputs[i] = copy.copy(self.inputs[i-1])
			if not(auxin_size[i] is None):
				self.inputs[i] += [self.daaig[i].idx_list,self.auxinput[i-offset]]
			
			noisyout = []
			if not(inputprec is None):
				noisyout += [self.daaig[i].noisy_input]
			if not(auxin_size[i] is None):
				noisyout += [self.daaig[i].noisy_auxinput]
			
			paramstot += self.daaig[i].params
			
			# save the costs
			self.localcost[i] = self.daaig[i].noise.cost
			self.globalcost[i] = self.daaig[i].noise.cost
			if self.totalupdatebool:
				if i:
					self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost
				else:
					self.totalcost[i] = self.daaig[i].noise.cost
			
			if self.regularize:
				if i:
					self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc
				else:
					self.regularizationenccost[i] = 0
				
				self.localcost[i] += self.daaig[i].regularization
				self.globalcost[i] += self.regularizationenccost[i]
				if self.totalupdatebool:
					self.totalcost[i] += self.daaig[i].regularization
			
			self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params)
			self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc)
			if self.totalupdatebool:
				self.totalgradients[i] = T.grad(self.totalcost[i], paramstot)
			
			#create the updates dictionnaries
			local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i]))
			global_grads = dict((j, j - self.unsup_lr * g)\
					for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i]))
			if self.totalupdatebool:
				total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i]))
			
			# method declaration
			self.localupdate[i] = theano.Method(self.inputs[i],self.localcost[i],local_grads)
			self.globalupdate[i] = theano.Method(self.inputs[i],self.globalcost[i],global_grads)
			if self.totalupdatebool:
				self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads)
			#
			if self.debugmethod:
				self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden)
				self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec)
				self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec])
				self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout)
				self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i])
				self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i])
				self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i])
				self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i])
				if self.totalupdatebool:
					self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i])
					self.compute_totalgradients[i] = theano.Method(self.inputs[i],self.totalgradients[i])
			#
			
			paramsenc += self.daaig[i].paramsenc
			inputprec = self.daaig[i].clean.hidden
			in_sizeprec = self.n_hid[i]
		
		# supervised layer
		print '\tLayer supervised init'
		self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target]
		self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,inputprec,self.target)
		paramstot += self.daaig[-1].params
		
		if self.regularize:
			self.localcost[-1] = self.daaig[-1].regularized_cost
			self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1]
		else:
			self.localcost[-1] = self.daaig[-1].unregularized_cost
			self.globalcost[-1] = self.daaig[-1].unregularized_cost
		
		if self.totalupdatebool:
			self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]]
		
		self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params)
		self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc)
		if self.totalupdatebool:
			self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\
					T.grad(self.globalcost[-1], paramstot) ]
		
		local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1]))
		global_grads = dict((j, j - self.unsup_lr * g)\
				for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1]))
		if self.totalupdatebool:
			total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\
					for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1]))
		
		self.localupdate[-1] = theano.Method(self.inputs[-1],self.localcost[-1],local_grads)
		self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads)
		if self.totalupdatebool:
			self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads)
		self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone)
		
		if self.debugmethod:
			self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1])
			self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1])
			self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1])
			self.compute_globalgradients[-1] = theano.Method(self.inputs[-1],self.globalgradients[-1])
			if self.totalupdatebool:
				self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1])
				self.compute_totalgradients[-1] =\
						theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1])
	
	def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0,
								noise_level = 0 , noise_level_group = 0, seed = 1, Alloc = True,**init):
		super(StackedDAAig, self)._instance_initialize(inst, **init)
		
		inst.unsup_lr = unsup_lr
		inst.sup_lr = sup_lr
		
		for i in range(self.depth):
			print '\tLayer = ', i+1
			inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\
					noise_level_group = noise_level_group, seed = seed, Alloc = Alloc)
		print '\tLayer supervised'
		inst.daaig[-1].initialize()
		inst.daaig[-1].l1 = 0
		inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation