Mercurial > pylearn
changeset 702:f76079ba8d9a
added a DAAig module and a StackedDAAig module to deal with auxiliary output possibly missing in algorithm.sandbox
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Wed, 20 May 2009 13:40:10 -0400 |
parents | 113946723973 |
children | 9078561a7c21 |
files | pylearn/algorithms/sandbox/DAA_inputs_groups.py |
diffstat | 1 files changed, 478 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Wed May 20 13:40:10 2009 -0400 @@ -0,0 +1,478 @@ +import numpy +import theano +import os, copy + +from theano import tensor as T +from theano.compile import module +from theano.tensor.nnet import sigmoid + +from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \ + scanmaskenc,scanmaskdec + +from pylearn.algorithms import cost +from pylearn.algorithms.logistic_regression import LogRegN + + +def lnorm(param, type='l2'): + if type == 'l1': + return T.sum(T.abs(param)) + if type == 'l2': + return T.sum(param*param) + raise NotImplementedError('Only l1 and l2 regularization are currently implemented') + +def get_reg_cost(params, type): + rcost = 0 + for param in params: + rcost += lnorm(param, type) + return rcost + + +def sigmoid_act(x): + return theano.tensor.nnet.sigmoid(x) + +def tanh_act(x): + return theano.tensor.tanh(x) + +def softsign_act(x): + return theano.sandbox.softsign.softsign(x) + +class ScratchPad: + pass + +class DAAig(module.Module): + """De-noising Auto-encoder + """ + + def __init__(self, input = None, auxinput = None, + in_size=None, auxin_size= None, n_hid=1, + regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', + reconstruction_cost_function=cost.cross_entropy, interface = True,**init): + """ + :param regularize: WRITEME + :param tie_weights: WRITEME + :param hid_fn: WRITEME + :param reconstruction_cost: Should return one cost per example (row) + :todo: Default noise level for all daa levels + """ + print '\t\t**** DAAig.__init__ ****' + print '\t\tinput = ', input + print '\t\tauxinput = ', auxinput + print '\t\tin_size = ', in_size + print '\t\tauxin_size = ', auxin_size + print '\t\tn_hid = ', n_hid + + super(DAAig, self).__init__() + self.random = T.RandomStreams() + + # MODEL CONFIGURATION + self.in_size = in_size + self.auxin_size = auxin_size + self.n_hid = n_hid + self.regularize = regularize + self.tie_weights = tie_weights + self.reconstruction_cost_function = reconstruction_cost_function + self.interface = interface + + assert hid_fn in ('sigmoid_act','tanh_act','softsign_act') + self.hid_fn = eval(hid_fn) + + ### DECLARE MODEL VARIABLES and default + self.input = input + self.noisy_input = None + self.auxinput = auxinput + self.idx_list = T.ivector('idx_list') if not(self.auxinput is None) else None + self.noisy_idx_list, self.noisy_auxinput = None, None + + #parameters + self.benc = T.dvector('benc') + if not(self.input is None): + self.wenc = T.dmatrix('wenc') + self.wdec = self.wenc.T if tie_weights else T.dmatrix('wdec') + self.bdec = T.dvector('bdec') + + if not(self.auxinput is None): + self.wauxenc = [T.dmatrix('wauxenc%s'%i) for i in range(len(auxin_size))] + self.wauxdec = [self.wauxenc[i].T if tie_weights else T.dmatrix('wauxdec%s'%i) for i in range(len(auxin_size))] + self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))] + + #hyper-parameters + self.lr = T.scalar('lr') + self.noise_level = T.scalar('noise_level') + self.noise_level_group = T.scalar('noise_level_group') + + # leave the chance for subclasses to initialize + if self.__class__ == DAAig: + self.init_behavioural() + print '\t\t**** end DAAig.__init__ ****' + + ### BEHAVIOURAL MODEL + def init_behavioural(self): + if not(self.input is None): + self.noisy_input = self.corrupt_input() + if not(self.auxinput is None): + self.noisy_idx_list , self.noisy_auxinput = \ + scannoise(self.idx_list,self.auxinput,self.noise_level,self.noise_level_group) + + self.noise = ScratchPad() + self.clean = ScratchPad() + + self.define_behavioural(self.clean, self.input, self.idx_list, self.auxinput) + self.define_behavioural(self.noise, self.noisy_input, self.noisy_idx_list, self.noisy_auxinput) + + self.define_regularization() # call before cost + self.define_cost(self.clean) + self.define_cost(self.noise) + self.define_params() + if self.interface: + self.define_gradients() + self.define_interface() + + def define_behavioural(self,container, input, idx_list , auxinput): + self.define_propup(container, input, idx_list , auxinput) + container.hidden = self.hid_fn(container.hidden_activation) + self.define_propdown(container, idx_list , auxinput) + container.rec = self.hid_fn(container.rec_activation) + + def define_propup(self, container, input, idx_list , auxinput): + if not(self.input is None): + container.hidden_activation = self.filter_up(input,self.wenc,self.benc) + if not(self.auxinput is None): + container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc) + else: + if not(self.auxinput is None): + container.hidden_activation = scandotenc(idx_list,auxinput,self.wauxenc) + self.benc + + # DEPENDENCY: define_propup + def define_propdown(self, container, idx_list , auxinput): + if not(self.input is None): + rec_activation1 = self.filter_down(container.hidden,self.wdec,self.bdec) + if not(self.auxinput is None): + rec_activation2 = scandotdec(idx_list,auxinput,container.hidden,self.wauxdec) +\ + scanbiasdec(idx_list,auxinput,self.bauxdec) + + if not(self.input is None) and not(auxinput is None): + container.rec_activation = T.join(1,rec_activation1,rec_activation2) + else: + if not(self.input is None): + container.rec_activation = rec_activation1 + else: + container.rec_activation = rec_activation2 + + def filter_up(self, vis, w, b=None): + out = T.dot(vis, w) + return out + b if b else out + filter_down = filter_up + + # TODO: fix regularization type (outside parameter ?) + def define_regularization(self): + self.reg_coef = T.scalar('reg_coef') + if not(self.auxinput is None): + self.Maskup = scanmaskenc(self.idx_list,self.wauxenc) + self.Maskdown = scanmaskdec(self.idx_list,self.wauxdec) + if not(type(self.Maskup) is list): + self.Maskup = [self.Maskup] + if not(type(self.Maskdown) is list): + self.Maskdown = [self.Maskdown] + listweights = [] + listweightsenc = [] + if self.input is None: + listweights += [w*m for w,m in zip(self.Maskup,self.wauxenc)] + [w*m for w,m in zip(self.Maskdown,self.wauxdec)] + listweightsenc += [w*m for w,m in zip(self.Maskup,self.wauxenc)] + if self.auxinput is None: + listweights += [self.wenc,self.wdec] + listweightsenc += [self.wenc] + self.regularization = self.reg_coef * get_reg_cost(listweights,'l2') + self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l2') + + + # DEPENDENCY: define_behavioural, define_regularization + def define_cost(self, container): + container.reconstruction_cost = self.reconstruction_costs(container.rec) + # TOTAL COST + container.cost = container.reconstruction_cost + if self.regularize: #if stacked don't merge regularization and cost here but in the stackeddaaig module + container.cost = container.cost + self.regularization + + # DEPENDENCY: define_cost + def define_params(self): + if not hasattr(self,'params'): + self.params = [] + self.params += [self.benc] + if not(self.input is None): + self.params += [self.wenc] + [self.bdec] + if not(self.auxinput is None): + self.params += self.wauxenc + self.bauxdec + self.paramsenc = self.params + if not(self.tie_weights): + if not(self.input is None): + self.params += [self.bdec] + if not(self.auxinput is None): + self.params += self.wauxdec + self.bauxdec + + # DEPENDENCY: define_cost, define_gradients + def define_gradients(self): + self.gradients = T.grad(self.noise.cost, self.params) + self.updates = dict((p, p - self.lr * g) for p, g in \ + zip(self.params, self.gradients)) + + + # DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients + def define_interface(self): + # declare function to interface with module (if not stacked) + if self.input is None: + listin = [self.idx_list, self.auxinput] + if self.auxinput is None: + listin = [self.input] + if not((self.input is None) or (self.auxinput is None)): + listin =[self.input,self.idx_list, self.auxinput] + self.update = theano.Method(listin, self.noise.cost, self.updates) + self.compute_cost = theano.Method(listin, self.noise.cost) + if not(self.input is None): + self.noisify = theano.Method(listin, self.noisy_input) + if not(self.auxinput is None): + self.auxnoisify = theano.Method(listin, self.noisy_auxinput) + self.reconstruction = theano.Method(listin, self.clean.rec) + self.representation = theano.Method(listin, self.clean.hidden) + self.validate = theano.Method(listin, [self.clean.cost, self.clean.rec]) + + def corrupt_input(self): + return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input + + def reconstruction_costs(self, rec): + if self.input is None: + return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec) + if self.auxinput is None: + return self.reconstruction_cost_function(self.input, rec) + if not((self.input is None) or (self.auxinput is None)): + return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec) + + def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, + seed=1, alloc=True, **init): + super(DAAig, self)._instance_initialize(obj, **init) + + obj.reg_coef = reg_coef + obj.noise_level = noise_level + obj.noise_level_group = noise_level_group + if self. interface: + obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module) + else: + obj.lr = None + + obj.random.initialize() + if seed is not None: + obj.random.seed(seed) + self.R = numpy.random.RandomState(seed) + + obj.__hide__ = ['params'] + + if not(self.input is None): + self.inf = 1/numpy.sqrt(self.in_size) + if not(self.auxinput is None): + self.inf = 1/numpy.sqrt(sum(self.auxin_size)) + if not(self.auxinput is None or self.input is None): + self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size) + self.hif = 1/numpy.sqrt(self.n_hid) + + + if alloc: + if not(self.input is None): + wencshp = (self.in_size, self.n_hid) + wdecshp = tuple(reversed(wencshp)) + print 'wencshp = ', wencshp + print 'wdecshp = ', wdecshp + + obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf) + if not self.tie_weights: + obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif) + obj.bdec = numpy.zeros(self.in_size) + + if not(self.auxinput is None): + wauxencshp = [(i, self.n_hid) for i in self.auxin_size] + wauxdecshp = [tuple(reversed(i)) for i in wauxencshp] + print 'wauxencshp = ', wauxencshp + print 'wauxdecshp = ', wauxdecshp + + obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] + if not self.tie_weights: + obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] + obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size] + + print 'self.inf = ', self.inf + print 'self.hif = ', self.hif + + obj.benc = numpy.zeros(self.n_hid) + + +#----------------------------------------------------------------------------------------------------------------------- + +class StackedDAAig(module.Module): + def __init__(self, depth = 1, input = None, auxinput = [None], + in_size = None, auxin_size = [[None]], n_hid = [1], + regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', + reconstruction_cost_function=cost.cross_entropy, + n_out = 2, target = None, **init): + + super(StackedDAAig, self).__init__() + print '\t**** StackedDAAig.__init__ ****' + print '\tinput = ', input + print '\tauxinput = ', auxinput + print '\tin_size = ', in_size + print '\tauxin_size = ', auxin_size + print '\tn_hid = ', n_hid + # save parameters + self.depth = depth + self.input = input + self.auxinput = auxinput + self.in_size = in_size + auxin_size = auxin_size + self.n_hid = n_hid + self.regularize = regularize + self.tie_weights = tie_weights + self.hid_fn = hid_fn + self.reconstruction_cost_function = reconstruction_cost_function + self.n_out = n_out + self.target = target + + # init for model construction + inputprec = input + in_sizeprec = in_size + self.daaig = [None] * (self.depth+1) + + #hyper parameters + self.unsup_lr = T.dscalar('unsup_lr') + self.sup_lr = T.dscalar('sup_lr') + + # methods + self.localupdate = [None] * (self.depth+1) #update only on the layer parameters + self.globalupdate = [None] * (self.depth+1)#update wrt the layer cost backproped untill the input layer + self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer + # + self.representation = [None] * (self.depth+1) + self.reconstruction = [None] * (self.depth) + self.compute_localcost = [None] * (self.depth+1) + self.compute_globalcost = [None] * (self.depth+1) + self.compute_totalcost = [None] * (self.depth+1) + self.validate = [None] * (self.depth) + self.noisyinputs = [None] * (self.depth) + # + self.localcost = [None] * (self.depth+1) + self.globalcost = [None] * (self.depth+1) + self.totalcost = [None] * (self.depth+1) + + paramstot = [] + paramsenc = [] + self.inputs = [None] * (self.depth+1) + + if not(self.input is None): + self.inputs[0] = [self.input] + else: + self.inputs[0] = [] + + offset = 0 + for i in range(self.depth): + if auxin_size[i] is None: + offset +=1 + param = [inputprec, None, in_sizeprec, auxin_size[i], self.n_hid[i],\ + False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] + else: + param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\ + False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] + print '\tLayer init= ', i+1 + self.daaig[i] = DAAig(*param) + + if i: + self.inputs[i] = copy.copy(self.inputs[i-1]) + if not(auxin_size[i] is None): + self.inputs[i] += [self.daaig[i].idx_list,self.auxinput[i-offset]] + + paramstot += self.daaig[i].params + + if self.regularize: + self.localcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization + self.globalcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization + self.totalcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization + for j in range(i): + self.globalcost[i] += self.daaig[j].regularizationenc + self.totalcost[i] += self.daaig[j].noise.cost+self.daaig[j].regularization + + else: + self.localcost[i] = self.daaig[i].noise.cost + self.globalcost[i] = self.daaig[i].noise.cost + self.totalcost[i] = self.daaig[i].noise.cost + for j in range(i): + self.totalcost[i] += self.daaig[j].noise.cost + + local_grads = dict((j, j - self.unsup_lr * T.grad(self.localcost[i], j))\ + for j in self.daaig[i].params) + global_grads = dict((j, j - self.unsup_lr * T.grad(self.globalcost[i], j))\ + for j in (self.daaig[i].params+paramsenc)) + total_grads = dict((j, j - self.unsup_lr * T.grad(self.totalcost[i], j))\ + for j in (paramstot)) + + self.localupdate[i] = theano.Method(self.inputs[i],self.localcost[i],local_grads) + self.globalupdate[i] = theano.Method(self.inputs[i],self.globalcost[i],global_grads) + self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads) + # + self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) + self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) + self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) + self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i]) + self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i]) + self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec]) + noisyout = [] + if not(inputprec is None): + noisyout += [self.daaig[i].noisy_input] + if not(auxin_size[i] is None): + noisyout += [self.daaig[i].noisy_auxinput] + self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) + # + paramsenc += self.daaig[i].paramsenc + inputprec = self.daaig[i].clean.hidden + in_sizeprec = self.n_hid[i] + print '\tLayer supervised init' + self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target] + self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,inputprec,self.target) + paramstot += self.daaig[-1].params + + if self.regularize: + self.localcost[-1] = self.daaig[-1].regularized_cost + self.globalcost[-1] = self.daaig[-1].regularized_cost + for j in range(self.depth): + self.globalcost[-1] += self.daaig[j].regularizationenc + else: + self.localcost[-1] = self.daaig[-1].unregularized_cost + self.globalcost[-1] = self.daaig[-1].unregularized_cost + + self.totalcost[-1] = self.totalcost[-2] + self.localcost[-1] + + local_grads = dict((j, j - self.sup_lr * T.grad(self.localcost[-1], j))\ + for j in self.daaig[-1].params) + global_grads = dict((j, j - self.sup_lr * T.grad(self.globalcost[-1], j))\ + for j in (self.daaig[-1].params+paramsenc)) + total_grads = dict((j, j - \ + (self.unsup_lr * T.grad(self.totalcost[-2], j) + self.sup_lr *T.grad(self.globalcost[-1], j)))\ + for j in paramstot) + + self.localupdate[-1] = theano.Method(self.inputs[-1],self.localcost[-1],local_grads) + self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads) + self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads) + self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1]) + self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1]) + self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1]) + self.representation[-1] = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone) + + def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0, + noise_level = 0 , noise_level_group = 0, seed = 1, Alloc = True,**init): + super(StackedDAAig, self)._instance_initialize(inst, **init) + + inst.unsup_lr = unsup_lr + inst.sup_lr = sup_lr + + for i in range(self.depth): + print '\tLayer = ', i+1 + inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\ + noise_level_group = noise_level_group, seed = seed, Alloc = Alloc) + print '\tLayer supervised' + inst.daaig[-1].initialize() + inst.daaig[-1].l1 = 0 + inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation \ No newline at end of file