Mercurial > pylearn
changeset 796:ef749d03d055
added a separated auxiliary and normal inputs gradient computation behavior
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Mon, 20 Jul 2009 13:04:18 -0400 |
parents | f30bb746f279 |
children | cc94cdd48d85 |
files | pylearn/algorithms/sandbox/DAA_inputs_groups.py |
diffstat | 1 files changed, 100 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Wed Jul 15 13:19:56 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Mon Jul 20 13:04:18 2009 -0400 @@ -28,6 +28,27 @@ file_handle.close() return rval +# Weight initialisation utils-------------------------------------- +# time consuming but just a test +def orthogonalinit(W,axis=1): + nb = W.shape[axis] + bn = W.shape[0] if axis is 1 else W.shape[1] + if axis == 0: + W=W.T + Worto = copy.copy(W) + offset=0 + tmp=[] + for i in range(nb): + if i==bn: + offset=offset+bn + if i-offset != 0: + for j in xrange(offset,i): + orthoproj = (Worto[:,i]*Worto[:,j]).sum()*Worto[:,j]/(Worto[:,j]*Worto[:,j]).sum() + orthoproj.shape=(bn,1) + Worto[:,i:i+1] = Worto[:,i:i+1] - orthoproj + Worto[:,i:i+1] = Worto[:,i:i+1] / \ + numpy.sqrt((Worto[:,i:i+1]*Worto[:,i:i+1]).sum(0)) * numpy.sqrt((W[:,i:i+1]*W[:,i:i+1]).sum(0)) + return Worto if axis == 1 else Worto.T # Initialize containers: class CreateContainer: @@ -195,7 +216,7 @@ self.noise_level = T.scalar('noise_level') self.noise_level_group = T.scalar('noise_level_group') - # leave the chance for subclasses to initialize + # leave the chance for subclasses to initialize (example convolutionnal to implement) if self.__class__ == DAAig: self.init_behavioural() print '\t\t**** end DAAig.__init__ ****' @@ -354,7 +375,9 @@ self.noisify = theano.Method(listin, self.noisy_input) if self.auxinput is not None: self.auxnoisify = theano.Method(listin, self.noisy_auxinput) + self.recactivation = theano.Method(listin, self.clean.rec_activation) self.reconstruction = theano.Method(listin, self.clean.rec) + self.activation = theano.Method(listin, self.clean.hidden_activation) self.representation = theano.Method(listin, self.clean.hidden) def corrupt_input(self): @@ -373,7 +396,7 @@ return mask * self.input def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, - seed=1, alloc=True, **init): + seed=1, orthoinit = False, alloc=True, **init): super(DAAig, self)._instance_initialize(obj, **init) obj.reg_coef = reg_coef @@ -405,9 +428,14 @@ wdecshp = tuple(reversed(wencshp)) print 'wencshp = ', wencshp print 'wdecshp = ', wdecshp - obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf) - if not(self.tie_weights): - obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif) + if not orthoinit: + obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf) + if not(self.tie_weights): + obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif) + else: + obj.wenc = orthogonalinit(self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)) + if not(self.tie_weights): + obj.wdec = orthogonalinit(self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif),0) obj.bdec = numpy.zeros(self.in_size) if self.auxinput is not None: @@ -415,9 +443,16 @@ wauxdecshp = [tuple(reversed(i)) for i in wauxencshp] print 'wauxencshp = ', wauxencshp print 'wauxdecshp = ', wauxdecshp - obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] - if not(self.tie_weights): - obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] + if not orthoinit: + obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] + if not(self.tie_weights): + obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] + else: + obj.wauxenc = [orthogonalinit(self.R.uniform(size=i, low = -self.inf, high = self.inf)) \ + for i in wauxencshp] + if not(self.tie_weights): + obj.wauxdec = [orthogonalinit(self.R.uniform(size=i, low=-self.hif, high=self.hif),0) \ + for i in wauxdecshp] obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size] print 'self.inf = ', self.inf @@ -500,13 +535,17 @@ # facultative methods if self.debugmethod: + self.activation = [None] * (self.depth) self.representation = [None] * (self.depth) + self.recactivation = [None] * (self.depth) self.reconstruction = [None] * (self.depth) self.noisyinputs = [None] * (self.depth) self.compute_localcost = [None] * (self.depth+1) self.compute_localgradients = [None] * (self.depth+1) self.compute_globalcost = [None] * (self.depth+1) self.compute_globalgradients = [None] * (self.depth+1) + self.compute_localgradients_in = [None] * (self.depth) + self.compute_localgradients_aux = [None] * (self.depth) if self.totalupdatebool: self.compute_totalcost = [None] * (self.depth+1) self.compute_totalgradients = [None] * (self.depth+1) @@ -516,6 +555,8 @@ self.regularizationenccost = [None] * (self.depth) self.localcost = [None] * (self.depth+1) self.localgradients = [None] * (self.depth+1) + self.localgradients_in = [None] * (self.depth) + self.localgradients_aux = [None] * (self.depth) self.globalcost = [None] * (self.depth+1) self.globalgradients = [None] * (self.depth+1) if self.totalupdatebool: @@ -582,6 +623,10 @@ self.totalcost[i] += self.daaig[i].regularization self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params) + self.localgradients_in[i] = T.grad(self.daaig[i].noise.reconstruction_cost_in, self.daaig[i].params) \ + if inputprec is not None else T.constant(0) + self.localgradients_aux[i] = T.grad(self.daaig[i].noise.reconstruction_cost_aux,self.daaig[i].params) \ + if auxin_size[i] is not None else T.constant(0) self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc) if self.totalupdatebool: self.totalgradients[i] = T.grad(self.totalcost[i], paramstot) @@ -600,11 +645,15 @@ self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads) if self.debugmethod: - self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden_activation) + self.activation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden_activation) + self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) + self.recactivation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec_activation) self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i]) + self.compute_localgradients_in[i] = theano.Method(self.inputs[i],self.localgradients_in[i]) + self.compute_localgradients_aux[i] = theano.Method(self.inputs[i],self.localgradients_aux[i]) self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i]) self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i]) if self.totalupdatebool: @@ -648,13 +697,14 @@ self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads) if self.totalupdatebool: self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads) - - totallocal_grads={} - for k in range(self.depth): - totallocal_grads.update(dict((j, j - self.unsup_lr * g) for j,g in - zip(self.daaig[k].params,self.localgradients[k]))) - totallocal_grads.update(dict((j, j - self.sup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1]))) - self.totallocalupdate = theano.Method(self.inputs[-1],self.localcost,totallocal_grads) + # total update of each local cost [no global cost backpropagated] + totallocal_grads={} + for k in range(self.depth): + totallocal_grads.update(dict((j, j - self.unsup_lr * g) for j,g in \ + zip(self.daaig[k].params,self.localgradients[k]))) + totallocal_grads.update(dict((j, j - self.sup_lr * g) for j,g in\ + zip(self.daaig[-1].params,self.localgradients[-1]))) + self.totallocalupdate = theano.Method(self.inputs[-1],self.localcost,totallocal_grads) # interface for the user self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone) @@ -671,7 +721,7 @@ theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1]) def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0, - noise_level = 0 , noise_level_group = 0, seed = 1, alloc = True,**init): + noise_level = 0 , noise_level_group = 0, seed = 1, orthoinit = False, alloc = True,**init): super(StackedDAAig, self)._instance_initialize(inst, **init) inst.unsup_lr = unsup_lr @@ -680,14 +730,18 @@ for i in range(self.depth): print '\tLayer = ', i+1 inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\ - noise_level_group = noise_level_group, seed = seed + i, alloc = alloc) + noise_level_group = noise_level_group, seed = seed + i, orthoinit = orthoinit, alloc = alloc) print '\tLayer supervised' inst.daaig[-1].initialize() if alloc: inst.daaig[-1].R = numpy.random.RandomState(seed+self.depth) # init the logreg weights - inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\ - low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid)) + if not orthoinit: + inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\ + low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid)) + else: + inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\ + low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid))) inst.daaig[-1].l1 = 0 inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation @@ -755,4 +809,30 @@ scannoise.R.rand.seed(seed) for i in range(self.depth): inst.daaig[i].random.seed(seed+i+1) + + def _instance_unsupupdate(self,inst,data,layer='all',typeup = 'local',printcost = False): + cost = [None]*self.depth + if typeup is 'totallocal': + cost[-1] = inst.totallocalupdate(*data[i]) + else: + if typeup is 'total': + if layer is 'all': + cost[-1] = inst.totalupdate[-1](*data[i]) + else: + cost[layer] = inst.totalupdate[layer](*data[i]) + else: + if layer is 'all': + for i in range(self.depth): + if typeup == 'local': + cost[i] = inst.localupdate[i](*data[i]) + if typeup == 'global': + cost[i] = inst.globalupdate[i](*data[i]) + else: + if typeup == 'local': + cost[layer] = inst.localupdate[i](*data) + if typeup == 'global': + cost[layer] = inst.globalupdate[i](*data) + if printcost: + print cost +