# HG changeset patch # User Xavier Glorot # Date 1242949966 14400 # Node ID 2b54c38e2c6098b91b42fb9906fc3f63fc0e93a9 # Parent eb91fa83e2c1a5378e02279f49cac9141c039d59 improved initialization of StackedDAAig diff -r eb91fa83e2c1 -r 2b54c38e2c60 pylearn/algorithms/sandbox/DAA_inputs_groups.py --- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Thu May 21 14:11:54 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Thu May 21 19:52:46 2009 -0400 @@ -17,7 +17,7 @@ if type == 'l1': return T.sum(T.abs(param)) if type == 'l2': - return T.sum(param*param) + return T.sum(T.pow(param,2)) raise NotImplementedError('Only l1 and l2 regularization are currently implemented') def get_reg_cost(params, type): @@ -96,7 +96,8 @@ self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))] #hyper-parameters - self.lr = T.scalar('lr') + if self.interface: + self.lr = T.scalar('lr') self.noise_level = T.scalar('noise_level') self.noise_level_group = T.scalar('noise_level_group') @@ -249,7 +250,7 @@ return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec) def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, - seed=1, alloc=True, **init): + seed=1, Alloc=True, **init): super(DAAig, self)._instance_initialize(obj, **init) obj.reg_coef = reg_coef @@ -276,7 +277,7 @@ self.hif = 1/numpy.sqrt(self.n_hid) - if alloc: + if Alloc: if not(self.input is None): wencshp = (self.in_size, self.n_hid) wdecshp = tuple(reversed(wencshp)) @@ -312,7 +313,7 @@ in_size = None, auxin_size = [None], n_hid = [1], regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', reconstruction_cost_function=cost.cross_entropy, - n_out = 2, target = None, totalupdatebool=True, **init): + n_out = 2, target = None, debugmethod = False, totalupdatebool=False, **init): super(StackedDAAig, self).__init__() print '\t**** StackedDAAig.__init__ ****' @@ -335,6 +336,7 @@ self.reconstruction_cost_function = reconstruction_cost_function self.n_out = n_out self.target = target if not(target is None) else T.lvector('target') + self.debugmethod = debugmethod self.totalupdatebool = totalupdatebool # init for model construction @@ -352,25 +354,33 @@ if self.totalupdatebool: self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer # + self.classify = None #others methods - self.representation = [None] * (self.depth+1) - self.reconstruction = [None] * (self.depth) - self.validate = [None] * (self.depth) - self.noisyinputs = [None] * (self.depth) - self.compute_localcost = [None] * (self.depth+1) - self.compute_globalcost = [None] * (self.depth+1) - if self.totalupdatebool: - self.compute_totalcost = [None] * (self.depth+1) + if self.debugmethod: + self.representation = [None] * (self.depth) + self.reconstruction = [None] * (self.depth) + self.validate = [None] * (self.depth) + self.noisyinputs = [None] * (self.depth) + self.compute_localcost = [None] * (self.depth+1) + self.compute_localgradients = [None] * (self.depth+1) + self.compute_globalcost = [None] * (self.depth+1) + self.compute_globalgradients = [None] * (self.depth+1) + if self.totalupdatebool: + self.compute_totalcost = [None] * (self.depth+1) + self.compute_totalgradients = [None] * (self.depth+1) # # some theano Variables we want to keep track on if self.regularize: self.regularizationenccost = [None] * (self.depth) self.localcost = [None] * (self.depth+1) + self.localgradients = [None] * (self.depth+1) self.globalcost = [None] * (self.depth+1) + self.globalgradients = [None] * (self.depth+1) if self.totalupdatebool: self.totalcost = [None] * (self.depth+1) + self.totalgradients = [None] * (self.depth+1) #params to update and inputs initialization paramstot = [] @@ -423,21 +433,24 @@ if i: self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc else: - self.regularizationenccost[i] = T.zero() + self.regularizationenccost[i] = 0 self.localcost[i] += self.daaig[i].regularization self.globalcost[i] += self.regularizationenccost[i] if self.totalupdatebool: self.totalcost[i] += self.daaig[i].regularization + self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params) + self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc) + if self.totalupdatebool: + self.totalgradients[i] = T.grad(self.totalcost[i], paramstot) + #create the updates dictionnaries - local_grads = dict((j, j - self.unsup_lr * T.grad(self.localcost[i], j))\ - for j in self.daaig[i].params) - global_grads = dict((j, j - self.unsup_lr * T.grad(self.globalcost[i], j))\ - for j in (self.daaig[i].params+paramsenc)) + local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i])) + global_grads = dict((j, j - self.unsup_lr * g)\ + for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i])) if self.totalupdatebool: - total_grads = dict((j, j - self.unsup_lr * T.grad(self.totalcost[i], j))\ - for j in (paramstot)) + total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i])) # method declaration self.localupdate[i] = theano.Method(self.inputs[i],self.localcost[i],local_grads) @@ -445,15 +458,20 @@ if self.totalupdatebool: self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads) # - self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) - self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) - self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec]) - self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) - self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) - self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i]) - if self.totalupdatebool: - self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i]) + if self.debugmethod: + self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) + self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) + self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec]) + self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) + self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) + self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i]) + self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i]) + self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i]) + if self.totalupdatebool: + self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i]) + self.compute_totalgradients[i] = theano.Method(self.inputs[i],self.totalgradients[i]) # + paramsenc += self.daaig[i].paramsenc inputprec = self.daaig[i].clean.hidden in_sizeprec = self.n_hid[i] @@ -467,33 +485,41 @@ if self.regularize: self.localcost[-1] = self.daaig[-1].regularized_cost self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] - if self.totalupdatebool: - self.totalcost[-1] = self.totalcost[-2] + self.daaig[-1].regularized_cost else: self.localcost[-1] = self.daaig[-1].unregularized_cost self.globalcost[-1] = self.daaig[-1].unregularized_cost - if self.totalupdatebool: - self.totalcost[-1] = self.totalcost[-2] + self.daaig[-1].unregularized_cost + + if self.totalupdatebool: + self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]] - local_grads = dict((j, j - self.sup_lr * T.grad(self.localcost[-1], j))\ - for j in self.daaig[-1].params) - global_grads = dict((j, j - self.sup_lr * T.grad(self.globalcost[-1], j))\ - for j in (self.daaig[-1].params+paramsenc)) + self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params) + self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc) if self.totalupdatebool: - total_grads = dict((j, j - \ - (self.unsup_lr * T.grad(self.totalcost[-2], j) + self.sup_lr *T.grad(self.globalcost[-1], j)))\ - for j in paramstot) + self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\ + T.grad(self.globalcost[-1], paramstot) ] + + local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1])) + global_grads = dict((j, j - self.unsup_lr * g)\ + for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1])) + if self.totalupdatebool: + total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\ + for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1])) self.localupdate[-1] = theano.Method(self.inputs[-1],self.localcost[-1],local_grads) self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads) if self.totalupdatebool: self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads) - - self.representation[-1] = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone) - self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1]) - self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1]) - if self.totalupdatebool: - self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1]) + self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone) + + if self.debugmethod: + self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1]) + self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1]) + self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1]) + self.compute_globalgradients[-1] = theano.Method(self.inputs[-1],self.globalgradients[-1]) + if self.totalupdatebool: + self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1]) + self.compute_totalgradients[-1] =\ + theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1]) def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0, noise_level = 0 , noise_level_group = 0, seed = 1, Alloc = True,**init):