# HG changeset patch # User Xavier Glorot # Date 1248197503 14400 # Node ID 9dd5af3b26fe53c23f829ba891f1f1d614556194 # Parent 0b20301ded8954d8775b7bc704e24d09d2b85f31 DAA_inputs_groups readability diff -r 0b20301ded89 -r 9dd5af3b26fe pylearn/algorithms/sandbox/DAA_inputs_groups.py --- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Tue Jul 21 12:19:30 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Tue Jul 21 13:31:43 2009 -0400 @@ -50,6 +50,10 @@ numpy.sqrt((Worto[:,i:i+1]*Worto[:,i:i+1]).sum(0)) * numpy.sqrt((W[:,i:i+1]*W[:,i:i+1]).sum(0)) return Worto if axis == 1 else Worto.T +# @todo +def PCAinit(data,nhid): + pass + # Initialize containers: class CreateContainer: pass @@ -57,7 +61,7 @@ # regularisation utils:------------------------------------------- def lnorm(param, type='l2'): if type == 'l1': - return T.sum(T.abs(param)) + return T.sum(T.abs_(param)) if type == 'l2': return T.sum(param*param) raise NotImplementedError('Only l1 and l2 regularization are currently implemented') @@ -184,16 +188,17 @@ ### DECLARE MODEL VARIABLES and default self.input = input - self.noisy_input = None if self.ignore_missing is not None and self.input is not None: no_missing = FillMissing(self.ignore_missing)(self.input) self.input = no_missing[0] # With missing values replaced. self.input_missing_mask = no_missing[1] # Missingness pattern. else: self.input_missing_mask = None + self.auxinput = auxinput self.idx_list = T.ivector('idx_list') if self.auxinput is not None else None - self.noisy_idx_list, self.noisy_auxinput = None, None + + self.noisy_input, self.noisy_idx_list, self.noisy_auxinput = None , None, None #parameters self.benc = T.dvector('benc') @@ -210,7 +215,6 @@ #hyper-parameters if self.interface: self.lr = T.scalar('lr') - self.noise_level = T.scalar('noise_level') self.noise_level_group = T.scalar('noise_level_group') self.scale_cost = T.scalar('scale_cost') @@ -226,7 +230,7 @@ self.noisy_input = self.corrupt_input() if self.auxinput is not None: self.noisy_idx_list , self.noisy_auxinput = \ - scannoise(self.idx_list, self.auxinput,self.noise_level, self.noise_level_group) + scannoise(self.idx_list, self.auxinput,self.noise_level, self.noise_level_group) self.noise = CreateContainer() self.clean = CreateContainer() @@ -240,25 +244,43 @@ if self.interface: self.define_gradients() self.define_interface() + + def filter_up(self, vis, w, b=None): + out = T.dot(vis, w) + return out + b if b else out + filter_down = filter_up + + def corrupt_input(self): + if self.corruption_pattern is None: + mask = self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) + elif self.corruption_pattern == 'by_pair': + shape = T.shape(self.input) + # Do not ask me why, but just doing "/ 2" does not work (there is + # a bug in the optimizer). + shape = T.stack(shape[0], (shape[1] * 2) / 4) + mask = self.random.binomial(shape, 1, 1 - self.noise_level) + mask = T.horizontal_stack(mask, mask) + else: + raise ValueError('Unknown value for corruption_pattern: %s' % self.corruption_pattern) + return mask * self.input def define_behavioural(self, container, input, idx_list, auxinput): self.define_propup(container, input, idx_list , auxinput) container.hidden = self.hid_fn(container.hidden_activation) + self.define_propdown(container, idx_list , auxinput) + container.rec = self.rec_fn(container.rec_activation) if self.input is not None: container.rec_in = self.rec_fn(container.rec_activation_in) if (self.auxinput is not None): container.rec_aux = self.rec_fn(container.rec_activation_aux) - container.rec = self.rec_fn(container.rec_activation) def define_propup(self, container, input, idx_list, auxinput): + container.hidden_activation = self.benc if self.input is not None: - container.hidden_activation = self.filter_up(input, self.wenc, self.benc) - if self.auxinput is not None: - container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc) - else: - if self.auxinput is not None: - container.hidden_activation = scandotenc(idx_list,auxinput,self.wauxenc) + self.benc + container.hidden_activation += self.filter_up(input, self.wenc) + if self.auxinput is not None: + container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc) def define_propdown(self, container, idx_list, auxinput): if self.input is not None: @@ -270,21 +292,13 @@ if (self.ignore_missing is not None and self.input is not None and not self.reconstruct_missing): # Apply mask to gradient to ensure we do not backpropagate on the # cost computed on missing inputs (that have been imputed). - container.rec_activation_in = mask_gradient(container.rec_activation_in, - self.input_missing_mask) + container.rec_activation_in = mask_gradient(container.rec_activation_in, self.input_missing_mask) if (self.input is not None) and (self.auxinput is not None): container.rec_activation = T.join(1,container.rec_activation_in,container.rec_activation_aux) else: - if self.input is not None: - container.rec_activation = container.rec_activation_in - if (self.auxinput is not None): - container.rec_activation = container.rec_activation_aux - - def filter_up(self, vis, w, b=None): - out = T.dot(vis, w) - return out + b if b else out - filter_down = filter_up + container.rec_activation = container.rec_activation_in \ + if self.input is not None else container.rec_activation_aux def define_regularization(self): self.reg_coef = T.scalar('reg_coef') @@ -295,6 +309,7 @@ self.Maskup = [self.Maskup] if type(self.Maskdown) is not list: self.Maskdown = [self.Maskdown] + listweights = [] listweightsenc = [] if self.auxinput is not None: @@ -303,26 +318,21 @@ if self.input is not None: listweights += [self.wenc,self.wdec] listweightsenc += [self.wenc] + self.regularization = self.reg_coef * get_reg_cost(listweights,'l1') self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l1') def define_cost(self, container): - if self.reconstruction_cost_function_name == 'cross_entropy': - if (self.input is not None): - container.reconstruction_cost_in = \ - self.reconstruction_cost_function(self.input,container.rec_activation_in,self.rec_name) - if (self.auxinput is not None): - container.reconstruction_cost_aux = \ - self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput),container.rec_activation_aux,\ - self.rec_name) - else: - if (self.input is not None): - container.reconstruction_cost_in = \ - self.reconstruction_cost_function(self.input,container.rec_in,self.rec_name) - if (self.auxinput is not None): - container.reconstruction_cost_aux = \ - self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput),container.rec_aux,\ - self.rec_name) + tmpbool = (self.reconstruction_cost_function_name == 'cross_entropy') + if (self.input is not None): + container.reconstruction_cost_in = \ + self.reconstruction_cost_function(self.input, container.rec_activation_in \ + if tmpbool else container.rec_in, self.rec_name) + if (self.auxinput is not None): + container.reconstruction_cost_aux = \ + self.reconstruction_cost_function(scaninputs(self.idx_list, self.auxinput), container.rec_activation_aux \ + if tmpbool else container.rec_aux, self.rec_name) + # TOTAL COST if (self.input is not None) and (self.auxinput is not None): container.reconstruction_cost = (T.min(T.constant(1),T.constant(1)+self.scale_cost)) * \ @@ -342,14 +352,17 @@ def define_params(self): if not hasattr(self,'params'): self.params = [] + self.params += [self.benc] self.paramsenc = copy.copy(self.params) + if self.input is not None: self.params += [self.wenc] + [self.bdec] self.paramsenc += [self.wenc] if self.auxinput is not None: self.params += self.wauxenc + self.bauxdec self.paramsenc += self.wauxenc + if not(self.tie_weights): if self.input is not None: self.params += [self.wdec] @@ -358,43 +371,27 @@ def define_gradients(self): self.gradients = T.grad(self.noise.cost, self.params) - self.updates = dict((p, p - self.lr * g) for p, g in \ - zip(self.params, self.gradients)) + self.updates = dict((p, p - self.lr * g) for p, g in zip(self.params, self.gradients)) def define_interface(self): # declare function to interface with module (if not stacked) - if self.input is None: - listin = [self.idx_list, self.auxinput] + listin = [] + listout = [] + if self.input is not None: + listin += [self.input] + listout += [self.noisy_input] if self.auxinput is None: - listin = [self.input] - if (self.input is not None) and (self.auxinput is not None): - listin =[self.input,self.idx_list, self.auxinput] + listin += [self.idx_list, self.auxinput] + listout += [self.noisy_auxinput] + self.update = theano.Method(listin, self.noise.cost, self.updates) self.compute_cost = theano.Method(listin, self.noise.cost) - if self.input is not None: - self.noisify = theano.Method(listin, self.noisy_input) - if self.auxinput is not None: - self.auxnoisify = theano.Method(listin, self.noisy_auxinput) + self.noisify = theano.Method(listin, listout) self.recactivation = theano.Method(listin, self.clean.rec_activation) self.reconstruction = theano.Method(listin, self.clean.rec) self.activation = theano.Method(listin, self.clean.hidden_activation) self.representation = theano.Method(listin, self.clean.hidden) - def corrupt_input(self): - if self.corruption_pattern is None: - mask = self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) - elif self.corruption_pattern == 'by_pair': - shape = T.shape(self.input) - # Do not ask me why, but just doing "/ 2" does not work (there is - # a bug in the optimizer). - shape = T.stack(shape[0], (shape[1] * 2) / 4) - mask = self.random.binomial(shape, 1, 1 - self.noise_level) - mask = T.horizontal_stack(mask, mask) - else: - raise ValueError('Unknown value for corruption_pattern: %s' - % self.corruption_pattern) - return mask * self.input - def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, scale_cost = 0, seed=1, orthoinit = False, alloc=True, **init): super(DAAig, self)._instance_initialize(obj, **init) @@ -409,12 +406,9 @@ obj.lr = None obj.random.initialize() - if seed is not None: - obj.random.seed(seed) + obj.random.seed(seed) self.R = numpy.random.RandomState(seed) - obj.__hide__ = ['params'] - if self.input is not None: self.inf = 1/numpy.sqrt(self.in_size) if self.auxinput is not None: @@ -429,14 +423,13 @@ wdecshp = tuple(reversed(wencshp)) print 'wencshp = ', wencshp print 'wdecshp = ', wdecshp - if not orthoinit: - obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf) + obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf) + if not(self.tie_weights): + obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif) + if orthoinit: + obj.wenc = orthogonalinit(obj.wenc) if not(self.tie_weights): - obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif) - else: - obj.wenc = orthogonalinit(self.R.uniform(size=wencshp, low = -self.inf, high = self.inf)) - if not(self.tie_weights): - obj.wdec = orthogonalinit(self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif),0) + obj.wdec = orthogonalinit(obj.wdec,0) obj.bdec = numpy.zeros(self.in_size) if self.auxinput is not None: @@ -444,16 +437,13 @@ wauxdecshp = [tuple(reversed(i)) for i in wauxencshp] print 'wauxencshp = ', wauxencshp print 'wauxdecshp = ', wauxdecshp - if not orthoinit: - obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] + obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] + if not(self.tie_weights): + obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] + if orthoinit: + obj.wauxenc = [orthogonalinit(w) for w in obj.wauxenc] if not(self.tie_weights): - obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] - else: - obj.wauxenc = [orthogonalinit(self.R.uniform(size=i, low = -self.inf, high = self.inf)) \ - for i in wauxencshp] - if not(self.tie_weights): - obj.wauxdec = [orthogonalinit(self.R.uniform(size=i, low=-self.hif, high=self.hif),0) \ - for i in wauxdecshp] + obj.wauxdec = [orthogonalinit(w,0) for w in obj.wauxdec] obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size] print 'self.inf = ', self.inf @@ -539,25 +529,25 @@ self.recactivation = [None] * (self.depth) self.reconstruction = [None] * (self.depth) self.noisyinputs = [None] * (self.depth) + self.compute_localgradients_in = [None] * (self.depth) + self.compute_localgradients_aux = [None] * (self.depth) self.compute_localcost = [None] * (self.depth+1) self.compute_localgradients = [None] * (self.depth+1) self.compute_globalcost = [None] * (self.depth+1) self.compute_globalgradients = [None] * (self.depth+1) - self.compute_localgradients_in = [None] * (self.depth) - self.compute_localgradients_aux = [None] * (self.depth) if self.totalupdatebool: self.compute_totalcost = [None] * (self.depth+1) self.compute_totalgradients = [None] * (self.depth+1) # some theano Variables we want to keep track on - if self.regularize: - self.regularizationenccost = [None] * (self.depth) + self.localgradients_in = [None] * (self.depth) + self.localgradients_aux = [None] * (self.depth) self.localcost = [None] * (self.depth+1) self.localgradients = [None] * (self.depth+1) - self.localgradients_in = [None] * (self.depth) - self.localgradients_aux = [None] * (self.depth) self.globalcost = [None] * (self.depth+1) self.globalgradients = [None] * (self.depth+1) + if self.regularize: + self.regularizationenccost = [None] * (self.depth) if self.totalupdatebool: self.totalcost = [None] * (self.depth+1) self.totalgradients = [None] * (self.depth+1) @@ -606,34 +596,27 @@ self.localcost[i] = self.daaig[i].noise.cost self.globalcost[i] = self.daaig[i].noise.cost if self.totalupdatebool: - if i: - self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost - else: - self.totalcost[i] = self.daaig[i].noise.cost + self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost if i else self.daaig[i].noise.cost if self.regularize: - if i: - self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc - else: - self.regularizationenccost[i] = 0 + self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc if i else 0 self.localcost[i] += self.daaig[i].regularization self.globalcost[i] += self.regularizationenccost[i] if self.totalupdatebool: self.totalcost[i] += self.daaig[i].regularization - self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params) self.localgradients_in[i] = T.grad(self.daaig[i].noise.reconstruction_cost_in, self.daaig[i].params) \ if inputprec is not None else T.constant(0) self.localgradients_aux[i] = T.grad(self.daaig[i].noise.reconstruction_cost_aux,self.daaig[i].params) \ if auxin_size[i] is not None else T.constant(0) + self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params) self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc) if self.totalupdatebool: self.totalgradients[i] = T.grad(self.totalcost[i], paramstot) #create the updates dictionnaries - local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i])) - global_grads = dict((j, j - self.unsup_lr * g)\ - for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i])) + local_grads = dict((j,j-self.unsup_lr*g) for j,g in zip(self.daaig[i].params,self.localgradients[i])) + global_grads = dict((j,j-self.unsup_lr*g) for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i])) if self.totalupdatebool: total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i])) @@ -669,12 +652,10 @@ self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,sigmoid_act(self.daaig[-2].clean.hidden_activation),self.target) paramstot += self.daaig[-1].params - if self.regularize: - self.localcost[-1] = self.daaig[-1].regularized_cost - self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] - else: - self.localcost[-1] = self.daaig[-1].unregularized_cost - self.globalcost[-1] = self.daaig[-1].unregularized_cost + self.localcost[-1] = self.daaig[-1].regularized_cost \ + if self.regularize else self.daaig[-1].unregularized_cost + self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] \ + if self.regularize else self.daaig[-1].unregularized_cost if self.totalupdatebool: self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]] @@ -682,12 +663,10 @@ self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params) self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc) if self.totalupdatebool: - self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\ - T.grad(self.globalcost[-1], paramstot) ] + self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) , T.grad(self.globalcost[-1],paramstot) ] - local_grads = dict((j, j - self.sup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1])) - global_grads = dict((j, j - self.sup_lr * g)\ - for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1])) + local_grads = dict((j,j-self.sup_lr*g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1])) + global_grads = dict((j,j-self.sup_lr*g) for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1])) if self.totalupdatebool: total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\ for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1])) @@ -701,7 +680,7 @@ for k in range(self.depth): totallocal_grads.update(dict((j, j - self.unsup_lr * g) for j,g in \ zip(self.daaig[k].params,self.localgradients[k]))) - totallocal_grads.update(dict((j, j - self.sup_lr * g) for j,g in\ + totallocal_grads.update(dict((j, j - self.sup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1]))) self.totallocalupdate = theano.Method(self.inputs[-1],self.localcost,totallocal_grads) @@ -726,7 +705,6 @@ inst.unsup_lr = unsup_lr inst.sup_lr = sup_lr - for i in range(self.depth): print '\tLayer = ', i+1 inst.daaig[i].initialize(reg_coef = reg_coef[i] if type(reg_coef) is list else reg_coef, \ @@ -734,17 +712,17 @@ scale_cost = scale_cost[i] if type(scale_cost) is list else scale_cost, \ noise_level_group = noise_level_group[i] if type(noise_level_group) is list else noise_level_group, \ seed = seed + i, orthoinit = orthoinit, alloc = alloc) + print '\tLayer supervised' inst.daaig[-1].initialize() + if alloc: inst.daaig[-1].R = numpy.random.RandomState(seed+self.depth) # init the logreg weights - if not orthoinit: - inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\ - low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid)) - else: - inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\ - low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid))) + inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\ + low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid)) + if orthoinit: + inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].w) inst.daaig[-1].l1 = reg_coef[-1] if type(reg_coef) is list else reg_coef inst.daaig[-1].l2 = 0 #only l1 norm for regularisation to be consitent with the unsup regularisation