Mercurial > pylearn
view pylearn/algorithms/sandbox/DAA_inputs_groups.py @ 714:8d5d42274bd1
improved readability DAA_inputs_groups and scan_inputs_groups
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Fri, 22 May 2009 15:14:34 -0400 |
parents | a268c5ea0db4 |
children | 573e3370d0fa |
line wrap: on
line source
import numpy import theano import os, copy from theano import tensor as T from theano.compile import module from theano.tensor.nnet import sigmoid from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \ scanmaskenc,scanmaskdec from pylearn.algorithms import cost from pylearn.algorithms.logistic_regression import LogRegN def lnorm(param, type='l2'): if type == 'l1': return T.sum(T.abs(param)) if type == 'l2': return T.sum(T.pow(param,2)) raise NotImplementedError('Only l1 and l2 regularization are currently implemented') def get_reg_cost(params, type): rcost = 0 for param in params: rcost += lnorm(param, type) return rcost def sigmoid_act(x): return theano.tensor.nnet.sigmoid(x) def tanh_act(x): return theano.tensor.tanh(x) def softsign_act(x): return theano.sandbox.softsign.softsign(x) class ScratchPad: pass class DAAig(module.Module): """De-noising Auto-encoder """ def __init__(self, input = None, auxinput = None, in_size=None, auxin_size= None, n_hid=1, regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', reconstruction_cost_function=cost.cross_entropy, interface = True,**init): """ :param regularize: WRITEME :param tie_weights: WRITEME :param hid_fn: WRITEME :param reconstruction_cost: Should return one cost per example (row) :todo: Default noise level for all daa levels """ print '\t\t**** DAAig.__init__ ****' print '\t\tinput = ', input print '\t\tauxinput = ', auxinput print '\t\tin_size = ', in_size print '\t\tauxin_size = ', auxin_size print '\t\tn_hid = ', n_hid super(DAAig, self).__init__() self.random = T.RandomStreams() # MODEL CONFIGURATION self.in_size = in_size self.auxin_size = auxin_size self.n_hid = n_hid self.regularize = regularize self.tie_weights = tie_weights self.reconstruction_cost_function = reconstruction_cost_function self.interface = interface assert hid_fn in ('sigmoid_act','tanh_act','softsign_act') self.hid_fn = eval(hid_fn) ### DECLARE MODEL VARIABLES and default self.input = input self.noisy_input = None self.auxinput = auxinput self.idx_list = T.ivector('idx_list') if self.auxinput is not None else None self.noisy_idx_list, self.noisy_auxinput = None, None #parameters self.benc = T.dvector('benc') if self.input is not None: self.wenc = T.dmatrix('wenc') self.wdec = self.wenc.T if tie_weights else T.dmatrix('wdec') self.bdec = T.dvector('bdec') if self.auxinput is not None: self.wauxenc = [T.dmatrix('wauxenc%s'%i) for i in range(len(auxin_size))] self.wauxdec = [self.wauxenc[i].T if tie_weights else T.dmatrix('wauxdec%s'%i) for i in range(len(auxin_size))] self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))] #hyper-parameters if self.interface: self.lr = T.scalar('lr') self.noise_level = T.scalar('noise_level') self.noise_level_group = T.scalar('noise_level_group') # leave the chance for subclasses to initialize if self.__class__ == DAAig: self.init_behavioural() print '\t\t**** end DAAig.__init__ ****' ### BEHAVIOURAL MODEL def init_behavioural(self): if self.input is not None: self.noisy_input = self.corrupt_input() if self.auxinput is not None: self.noisy_idx_list , self.noisy_auxinput = \ scannoise(self.idx_list,self.auxinput,self.noise_level,self.noise_level_group) self.noise = ScratchPad() self.clean = ScratchPad() self.define_behavioural(self.clean, self.input, self.idx_list, self.auxinput) self.define_behavioural(self.noise, self.noisy_input, self.noisy_idx_list, self.noisy_auxinput) self.define_regularization() # call before cost self.define_cost(self.clean) self.define_cost(self.noise) self.define_params() if self.interface: self.define_gradients() self.define_interface() def define_behavioural(self, container, input, idx_list, auxinput): self.define_propup(container, input, idx_list , auxinput) container.hidden = self.hid_fn(container.hidden_activation) self.define_propdown(container, idx_list , auxinput) container.rec = self.hid_fn(container.rec_activation) def define_propup(self, container, input, idx_list, auxinput): if self.input is not None: container.hidden_activation = self.filter_up(input, self.wenc, self.benc) if self.auxinput is not None: container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc) else: if self.auxinput is not None: container.hidden_activation = scandotenc(idx_list,auxinput,self.wauxenc) + self.benc # DEPENDENCY: define_propup def define_propdown(self, container, idx_list, auxinput): if self.input is not None: rec_activation1 = self.filter_down(container.hidden,self.wdec,self.bdec) if self.auxinput is not None: rec_activation2 = scandotdec(idx_list,auxinput,container.hidden,self.wauxdec) +\ scanbiasdec(idx_list,auxinput,self.bauxdec) if (self.input is not None) and (self.auxinput is not None): container.rec_activation = T.join(1,rec_activation1,rec_activation2) else: if self.input is not None: container.rec_activation = rec_activation1 else: container.rec_activation = rec_activation2 def filter_up(self, vis, w, b=None): out = T.dot(vis, w) return out + b if b else out filter_down = filter_up # TODO: fix regularization type (outside parameter ?) def define_regularization(self): self.reg_coef = T.scalar('reg_coef') if self.auxinput is not None: self.Maskup = scanmaskenc(self.idx_list,self.wauxenc) self.Maskdown = scanmaskdec(self.idx_list,self.wauxdec) if type(self.Maskup) is not list: self.Maskup = [self.Maskup] if type(self.Maskdown) is not list: self.Maskdown = [self.Maskdown] listweights = [] listweightsenc = [] if self.auxinput is not None: listweights += [w*m for w,m in zip(self.Maskup,self.wauxenc)] + [w*m for w,m in zip(self.Maskdown,self.wauxdec)] listweightsenc += [w*m for w,m in zip(self.Maskup,self.wauxenc)] if self.input is not None: listweights += [self.wenc,self.wdec] listweightsenc += [self.wenc] self.regularization = self.reg_coef * get_reg_cost(listweights,'l2') self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l2') # DEPENDENCY: define_behavioural, define_regularization def define_cost(self, container): container.reconstruction_cost = self.reconstruction_costs(container.rec) # TOTAL COST container.cost = container.reconstruction_cost if self.regularize: #if stacked don't merge regularization and cost here but in the stackeddaaig module container.cost = container.cost + self.regularization # DEPENDENCY: define_cost def define_params(self): if not hasattr(self,'params'): self.params = [] self.params += [self.benc] self.paramsenc = copy.copy(self.params) if self.input is not None: self.params += [self.wenc] + [self.bdec] self.paramsenc += [self.wenc] if self.auxinput is not None: self.params += self.wauxenc + self.bauxdec self.paramsenc += self.wauxenc if not(self.tie_weights): if self.input is not None: self.params += [self.wdec] if self.auxinput is not None: self.params += self.wauxdec # DEPENDENCY: define_cost, define_gradients def define_gradients(self): self.gradients = T.grad(self.noise.cost, self.params) self.updates = dict((p, p - self.lr * g) for p, g in \ zip(self.params, self.gradients)) # DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients def define_interface(self): # declare function to interface with module (if not stacked) if self.input is None: listin = [self.idx_list, self.auxinput] if self.auxinput is None: listin = [self.input] if (self.input is not None) and (self.auxinput is not None): listin =[self.input,self.idx_list, self.auxinput] self.update = theano.Method(listin, self.noise.cost, self.updates) self.compute_cost = theano.Method(listin, self.noise.cost) if self.input is not None: self.noisify = theano.Method(listin, self.noisy_input) if self.auxinput is not None: self.auxnoisify = theano.Method(listin, self.noisy_auxinput) self.reconstruction = theano.Method(listin, self.clean.rec) self.representation = theano.Method(listin, self.clean.hidden) self.validate = theano.Method(listin, [self.clean.cost, self.clean.rec]) def corrupt_input(self): return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input def reconstruction_costs(self, rec): if (self.input is not None) and (self.auxinput is not None): return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec) if self.input is not None: return self.reconstruction_cost_function(self.input, rec) if self.auxinput is not None: return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec) # All cases should be covered above. If not, something is wrong! assert False def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, seed=1, alloc=True, **init): super(DAAig, self)._instance_initialize(obj, **init) obj.reg_coef = reg_coef obj.noise_level = noise_level obj.noise_level_group = noise_level_group if self. interface: obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module) else: obj.lr = None obj.random.initialize() if seed is not None: obj.random.seed(seed) self.R = numpy.random.RandomState(seed) obj.__hide__ = ['params'] if self.input is not None: self.inf = 1/numpy.sqrt(self.in_size) if self.auxinput is not None: self.inf = 1/numpy.sqrt(sum(self.auxin_size)) if (self.auxinput is not None) and (self.input is not None): self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size) self.hif = 1/numpy.sqrt(self.n_hid) if alloc: if self.input is not None: wencshp = (self.in_size, self.n_hid) wdecshp = tuple(reversed(wencshp)) print 'wencshp = ', wencshp print 'wdecshp = ', wdecshp obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf) if not(self.tie_weights): obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif) obj.bdec = numpy.zeros(self.in_size) if self.auxinput is not None: wauxencshp = [(i, self.n_hid) for i in self.auxin_size] wauxdecshp = [tuple(reversed(i)) for i in wauxencshp] print 'wauxencshp = ', wauxencshp print 'wauxdecshp = ', wauxdecshp obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] if not(self.tie_weights): obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size] print 'self.inf = ', self.inf print 'self.hif = ', self.hif obj.benc = numpy.zeros(self.n_hid) #----------------------------------------------------------------------------------------------------------------------- class StackedDAAig(module.Module): def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None], in_size = None, auxin_size = [None], n_hid = [1], regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', reconstruction_cost_function=cost.cross_entropy, n_out = 2, target = None, debugmethod = False, totalupdatebool=False, **init): super(StackedDAAig, self).__init__() print '\t**** StackedDAAig.__init__ ****' print '\tinput = ', input print '\tauxinput = ', auxinput print '\tin_size = ', in_size print '\tauxin_size = ', auxin_size print '\tn_hid = ', n_hid # save parameters self.depth = depth self.input = input self.auxinput = auxinput self.in_size = in_size auxin_size = auxin_size self.n_hid = n_hid self.regularize = regularize self.tie_weights = tie_weights self.hid_fn = hid_fn self.reconstruction_cost_function = reconstruction_cost_function self.n_out = n_out self.target = target if target is not None else T.lvector('target') self.debugmethod = debugmethod self.totalupdatebool = totalupdatebool # init for model construction inputprec = input in_sizeprec = in_size self.daaig = [None] * (self.depth+1) #hyper parameters self.unsup_lr = T.dscalar('unsup_lr') self.sup_lr = T.dscalar('sup_lr') # updatemethods self.localupdate = [None] * (self.depth+1) #update only on the layer parameters self.globalupdate = [None] * (self.depth+1)#update wrt the layer cost backproped untill the input layer if self.totalupdatebool: self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer # self.classify = None #others methods if self.debugmethod: self.representation = [None] * (self.depth) self.reconstruction = [None] * (self.depth) self.validate = [None] * (self.depth) self.noisyinputs = [None] * (self.depth) self.compute_localcost = [None] * (self.depth+1) self.compute_localgradients = [None] * (self.depth+1) self.compute_globalcost = [None] * (self.depth+1) self.compute_globalgradients = [None] * (self.depth+1) if self.totalupdatebool: self.compute_totalcost = [None] * (self.depth+1) self.compute_totalgradients = [None] * (self.depth+1) # # some theano Variables we want to keep track on if self.regularize: self.regularizationenccost = [None] * (self.depth) self.localcost = [None] * (self.depth+1) self.localgradients = [None] * (self.depth+1) self.globalcost = [None] * (self.depth+1) self.globalgradients = [None] * (self.depth+1) if self.totalupdatebool: self.totalcost = [None] * (self.depth+1) self.totalgradients = [None] * (self.depth+1) #params to update and inputs initialization paramstot = [] paramsenc = [] self.inputs = [None] * (self.depth+1) if self.input is not None: self.inputs[0] = [self.input] else: self.inputs[0] = [] offset = 0 for i in range(self.depth): if auxin_size[i] is None: offset +=1 param = [inputprec, None, in_sizeprec, auxin_size[i], self.n_hid[i],\ False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] else: param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\ False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] print '\tLayer init= ', i+1 self.daaig[i] = DAAig(*param) # method input, outputs and parameters update if i: self.inputs[i] = copy.copy(self.inputs[i-1]) if auxin_size[i] is not None: self.inputs[i] += [self.daaig[i].idx_list,self.auxinput[i-offset]] noisyout = [] if inputprec is not None: noisyout += [self.daaig[i].noisy_input] if auxin_size[i] is not None: noisyout += [self.daaig[i].noisy_auxinput] paramstot += self.daaig[i].params # save the costs self.localcost[i] = self.daaig[i].noise.cost self.globalcost[i] = self.daaig[i].noise.cost if self.totalupdatebool: if i: self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost else: self.totalcost[i] = self.daaig[i].noise.cost if self.regularize: if i: self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc else: self.regularizationenccost[i] = 0 self.localcost[i] += self.daaig[i].regularization self.globalcost[i] += self.regularizationenccost[i] if self.totalupdatebool: self.totalcost[i] += self.daaig[i].regularization self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params) self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc) if self.totalupdatebool: self.totalgradients[i] = T.grad(self.totalcost[i], paramstot) #create the updates dictionnaries local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i])) global_grads = dict((j, j - self.unsup_lr * g)\ for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i])) if self.totalupdatebool: total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i])) # method declaration self.localupdate[i] = theano.Method(self.inputs[i],self.localcost[i],local_grads) self.globalupdate[i] = theano.Method(self.inputs[i],self.globalcost[i],global_grads) if self.totalupdatebool: self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads) # if self.debugmethod: self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec]) self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i]) self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i]) self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i]) if self.totalupdatebool: self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i]) self.compute_totalgradients[i] = theano.Method(self.inputs[i],self.totalgradients[i]) # paramsenc += self.daaig[i].paramsenc inputprec = self.daaig[i].clean.hidden in_sizeprec = self.n_hid[i] # supervised layer print '\tLayer supervised init' self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target] self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,inputprec,self.target) paramstot += self.daaig[-1].params if self.regularize: self.localcost[-1] = self.daaig[-1].regularized_cost self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] else: self.localcost[-1] = self.daaig[-1].unregularized_cost self.globalcost[-1] = self.daaig[-1].unregularized_cost if self.totalupdatebool: self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]] self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params) self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc) if self.totalupdatebool: self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\ T.grad(self.globalcost[-1], paramstot) ] local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1])) global_grads = dict((j, j - self.unsup_lr * g)\ for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1])) if self.totalupdatebool: total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\ for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1])) self.localupdate[-1] = theano.Method(self.inputs[-1],self.localcost[-1],local_grads) self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads) if self.totalupdatebool: self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads) self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone) if self.debugmethod: self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1]) self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1]) self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1]) self.compute_globalgradients[-1] = theano.Method(self.inputs[-1],self.globalgradients[-1]) if self.totalupdatebool: self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1]) self.compute_totalgradients[-1] =\ theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1]) def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0, noise_level = 0 , noise_level_group = 0, seed = 1, alloc = True,**init): super(StackedDAAig, self)._instance_initialize(inst, **init) inst.unsup_lr = unsup_lr inst.sup_lr = sup_lr for i in range(self.depth): print '\tLayer = ', i+1 inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\ noise_level_group = noise_level_group, seed = seed, alloc = alloc) print '\tLayer supervised' inst.daaig[-1].initialize() inst.daaig[-1].l1 = 0 inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation