Mercurial > pylearn
changeset 708:53a247cfee84
Merged
author | Olivier Delalleau <delallea@iro> |
---|---|
date | Fri, 22 May 2009 10:03:29 -0400 |
parents | f8bf9533f6b3 (current diff) 2b54c38e2c60 (diff) |
children | 55f77c7c3075 |
files | pylearn/algorithms/sandbox/DAA_inputs_groups.py |
diffstat | 1 files changed, 108 insertions(+), 64 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Fri May 22 10:03:10 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Fri May 22 10:03:29 2009 -0400 @@ -17,7 +17,7 @@ if type == 'l1': return T.sum(T.abs(param)) if type == 'l2': - return T.sum(param*param) + return T.sum(T.pow(param,2)) raise NotImplementedError('Only l1 and l2 regularization are currently implemented') def get_reg_cost(params, type): @@ -96,7 +96,8 @@ self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))] #hyper-parameters - self.lr = T.scalar('lr') + if self.interface: + self.lr = T.scalar('lr') self.noise_level = T.scalar('noise_level') self.noise_level_group = T.scalar('noise_level_group') @@ -198,7 +199,7 @@ if not hasattr(self,'params'): self.params = [] self.params += [self.benc] - self.paramsenc = self.params + self.paramsenc = copy.copy(self.params) if not(self.input is None): self.params += [self.wenc] + [self.bdec] self.paramsenc += [self.wenc] @@ -249,7 +250,7 @@ return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec) def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, - seed=1, alloc=True, **init): + seed=1, Alloc=True, **init): super(DAAig, self)._instance_initialize(obj, **init) obj.reg_coef = reg_coef @@ -276,7 +277,7 @@ self.hif = 1/numpy.sqrt(self.n_hid) - if alloc: + if Alloc: if not(self.input is None): wencshp = (self.in_size, self.n_hid) wdecshp = tuple(reversed(wencshp)) @@ -312,7 +313,7 @@ in_size = None, auxin_size = [None], n_hid = [1], regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', reconstruction_cost_function=cost.cross_entropy, - n_out = 2, target = None, totalupdatebool=True, **init): + n_out = 2, target = None, debugmethod = False, totalupdatebool=False, **init): super(StackedDAAig, self).__init__() print '\t**** StackedDAAig.__init__ ****' @@ -321,6 +322,7 @@ print '\tin_size = ', in_size print '\tauxin_size = ', auxin_size print '\tn_hid = ', n_hid + # save parameters self.depth = depth self.input = input @@ -334,6 +336,7 @@ self.reconstruction_cost_function = reconstruction_cost_function self.n_out = n_out self.target = target if not(target is None) else T.lvector('target') + self.debugmethod = debugmethod self.totalupdatebool = totalupdatebool # init for model construction @@ -345,26 +348,41 @@ self.unsup_lr = T.dscalar('unsup_lr') self.sup_lr = T.dscalar('sup_lr') - # methods + # updatemethods self.localupdate = [None] * (self.depth+1) #update only on the layer parameters self.globalupdate = [None] * (self.depth+1)#update wrt the layer cost backproped untill the input layer if self.totalupdatebool: self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer # - self.representation = [None] * (self.depth+1) - self.reconstruction = [None] * (self.depth) - self.compute_localcost = [None] * (self.depth+1) - self.compute_globalcost = [None] * (self.depth+1) - if self.totalupdatebool: - self.compute_totalcost = [None] * (self.depth+1) - self.validate = [None] * (self.depth) - self.noisyinputs = [None] * (self.depth) + self.classify = None + + #others methods + if self.debugmethod: + self.representation = [None] * (self.depth) + self.reconstruction = [None] * (self.depth) + self.validate = [None] * (self.depth) + self.noisyinputs = [None] * (self.depth) + self.compute_localcost = [None] * (self.depth+1) + self.compute_localgradients = [None] * (self.depth+1) + self.compute_globalcost = [None] * (self.depth+1) + self.compute_globalgradients = [None] * (self.depth+1) + if self.totalupdatebool: + self.compute_totalcost = [None] * (self.depth+1) + self.compute_totalgradients = [None] * (self.depth+1) # + + # some theano Variables we want to keep track on + if self.regularize: + self.regularizationenccost = [None] * (self.depth) self.localcost = [None] * (self.depth+1) + self.localgradients = [None] * (self.depth+1) self.globalcost = [None] * (self.depth+1) + self.globalgradients = [None] * (self.depth+1) if self.totalupdatebool: self.totalcost = [None] * (self.depth+1) + self.totalgradients = [None] * (self.depth+1) + #params to update and inputs initialization paramstot = [] paramsenc = [] self.inputs = [None] * (self.depth+1) @@ -376,6 +394,7 @@ offset = 0 for i in range(self.depth): + if auxin_size[i] is None: offset +=1 param = [inputprec, None, in_sizeprec, auxin_size[i], self.n_hid[i],\ @@ -383,64 +402,81 @@ else: param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\ False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] + print '\tLayer init= ', i+1 self.daaig[i] = DAAig(*param) + # method input, outputs and parameters update if i: self.inputs[i] = copy.copy(self.inputs[i-1]) if not(auxin_size[i] is None): self.inputs[i] += [self.daaig[i].idx_list,self.auxinput[i-offset]] + noisyout = [] + if not(inputprec is None): + noisyout += [self.daaig[i].noisy_input] + if not(auxin_size[i] is None): + noisyout += [self.daaig[i].noisy_auxinput] + paramstot += self.daaig[i].params + # save the costs + self.localcost[i] = self.daaig[i].noise.cost + self.globalcost[i] = self.daaig[i].noise.cost + if self.totalupdatebool: + if i: + self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost + else: + self.totalcost[i] = self.daaig[i].noise.cost + if self.regularize: - self.localcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization - self.globalcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization - if self.totalupdatebool: - self.totalcost[i] = self.daaig[i].noise.cost+self.daaig[i].regularization - for j in range(i): - self.globalcost[i] += self.daaig[j].regularizationenc - if self.totalupdatebool: - self.totalcost[i] += self.daaig[j].noise.cost+self.daaig[j].regularization + if i: + self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc + else: + self.regularizationenccost[i] = 0 - else: - self.localcost[i] = self.daaig[i].noise.cost - self.globalcost[i] = self.daaig[i].noise.cost + self.localcost[i] += self.daaig[i].regularization + self.globalcost[i] += self.regularizationenccost[i] if self.totalupdatebool: - self.totalcost[i] = self.daaig[i].noise.cost - for j in range(i): - self.totalcost[i] += self.daaig[j].noise.cost + self.totalcost[i] += self.daaig[i].regularization + + self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params) + self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc) + if self.totalupdatebool: + self.totalgradients[i] = T.grad(self.totalcost[i], paramstot) - local_grads = dict((j, j - self.unsup_lr * T.grad(self.localcost[i], j))\ - for j in self.daaig[i].params) - global_grads = dict((j, j - self.unsup_lr * T.grad(self.globalcost[i], j))\ - for j in (self.daaig[i].params+paramsenc)) + #create the updates dictionnaries + local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i])) + global_grads = dict((j, j - self.unsup_lr * g)\ + for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i])) if self.totalupdatebool: - total_grads = dict((j, j - self.unsup_lr * T.grad(self.totalcost[i], j))\ - for j in (paramstot)) + total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i])) + # method declaration self.localupdate[i] = theano.Method(self.inputs[i],self.localcost[i],local_grads) self.globalupdate[i] = theano.Method(self.inputs[i],self.globalcost[i],global_grads) if self.totalupdatebool: self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads) # - self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) - self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) - self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) - self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i]) - if self.totalupdatebool: - self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i]) - self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec]) - noisyout = [] - if not(inputprec is None): - noisyout += [self.daaig[i].noisy_input] - if not(auxin_size[i] is None): - noisyout += [self.daaig[i].noisy_auxinput] - self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) + if self.debugmethod: + self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) + self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) + self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec]) + self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) + self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) + self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i]) + self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i]) + self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i]) + if self.totalupdatebool: + self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i]) + self.compute_totalgradients[i] = theano.Method(self.inputs[i],self.totalgradients[i]) # + paramsenc += self.daaig[i].paramsenc inputprec = self.daaig[i].clean.hidden in_sizeprec = self.n_hid[i] + + # supervised layer print '\tLayer supervised init' self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target] self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,inputprec,self.target) @@ -448,34 +484,42 @@ if self.regularize: self.localcost[-1] = self.daaig[-1].regularized_cost - self.globalcost[-1] = self.daaig[-1].regularized_cost - for j in range(self.depth): - self.globalcost[-1] += self.daaig[j].regularizationenc + self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] else: self.localcost[-1] = self.daaig[-1].unregularized_cost self.globalcost[-1] = self.daaig[-1].unregularized_cost if self.totalupdatebool: - self.totalcost[-1] = self.totalcost[-2] + self.localcost[-1] + self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]] - local_grads = dict((j, j - self.sup_lr * T.grad(self.localcost[-1], j))\ - for j in self.daaig[-1].params) - global_grads = dict((j, j - self.sup_lr * T.grad(self.globalcost[-1], j))\ - for j in (self.daaig[-1].params+paramsenc)) + self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params) + self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc) if self.totalupdatebool: - total_grads = dict((j, j - \ - (self.unsup_lr * T.grad(self.totalcost[-2], j) + self.sup_lr *T.grad(self.globalcost[-1], j)))\ - for j in paramstot) + self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\ + T.grad(self.globalcost[-1], paramstot) ] + + local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1])) + global_grads = dict((j, j - self.unsup_lr * g)\ + for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1])) + if self.totalupdatebool: + total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\ + for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1])) self.localupdate[-1] = theano.Method(self.inputs[-1],self.localcost[-1],local_grads) self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads) if self.totalupdatebool: self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads) - self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1]) - self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1]) - if self.totalupdatebool: - self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1]) - self.representation[-1] = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone) + self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone) + + if self.debugmethod: + self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1]) + self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1]) + self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1]) + self.compute_globalgradients[-1] = theano.Method(self.inputs[-1],self.globalgradients[-1]) + if self.totalupdatebool: + self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1]) + self.compute_totalgradients[-1] =\ + theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1]) def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0, noise_level = 0 , noise_level_group = 0, seed = 1, alloc = True,**init):