# HG changeset patch # User Olivier Delalleau # Date 1243016018 14400 # Node ID a268c5ea0db4086008b0ce0430603f2d3971900a # Parent f308cc89360bc35e6b147af2cafd350fdc519c6b Replaced tabs by 4 blank spaces diff -r f308cc89360b -r a268c5ea0db4 pylearn/algorithms/sandbox/DAA_inputs_groups.py --- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Fri May 22 14:12:17 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Fri May 22 14:13:38 2009 -0400 @@ -7,535 +7,535 @@ from theano.tensor.nnet import sigmoid from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \ - scanmaskenc,scanmaskdec + scanmaskenc,scanmaskdec from pylearn.algorithms import cost from pylearn.algorithms.logistic_regression import LogRegN def lnorm(param, type='l2'): - if type == 'l1': - return T.sum(T.abs(param)) - if type == 'l2': - return T.sum(T.pow(param,2)) - raise NotImplementedError('Only l1 and l2 regularization are currently implemented') + if type == 'l1': + return T.sum(T.abs(param)) + if type == 'l2': + return T.sum(T.pow(param,2)) + raise NotImplementedError('Only l1 and l2 regularization are currently implemented') def get_reg_cost(params, type): - rcost = 0 - for param in params: - rcost += lnorm(param, type) - return rcost + rcost = 0 + for param in params: + rcost += lnorm(param, type) + return rcost def sigmoid_act(x): - return theano.tensor.nnet.sigmoid(x) + return theano.tensor.nnet.sigmoid(x) def tanh_act(x): - return theano.tensor.tanh(x) + return theano.tensor.tanh(x) def softsign_act(x): - return theano.sandbox.softsign.softsign(x) + return theano.sandbox.softsign.softsign(x) class ScratchPad: - pass + pass class DAAig(module.Module): - """De-noising Auto-encoder - """ - - def __init__(self, input = None, auxinput = None, - in_size=None, auxin_size= None, n_hid=1, - regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', - reconstruction_cost_function=cost.cross_entropy, interface = True,**init): - """ - :param regularize: WRITEME - :param tie_weights: WRITEME - :param hid_fn: WRITEME - :param reconstruction_cost: Should return one cost per example (row) - :todo: Default noise level for all daa levels - """ - print '\t\t**** DAAig.__init__ ****' - print '\t\tinput = ', input - print '\t\tauxinput = ', auxinput - print '\t\tin_size = ', in_size - print '\t\tauxin_size = ', auxin_size - print '\t\tn_hid = ', n_hid - - super(DAAig, self).__init__() - self.random = T.RandomStreams() - - # MODEL CONFIGURATION - self.in_size = in_size - self.auxin_size = auxin_size - self.n_hid = n_hid - self.regularize = regularize - self.tie_weights = tie_weights - self.reconstruction_cost_function = reconstruction_cost_function - self.interface = interface - - assert hid_fn in ('sigmoid_act','tanh_act','softsign_act') - self.hid_fn = eval(hid_fn) - - ### DECLARE MODEL VARIABLES and default - self.input = input - self.noisy_input = None - self.auxinput = auxinput - self.idx_list = T.ivector('idx_list') if not(self.auxinput is None) else None - self.noisy_idx_list, self.noisy_auxinput = None, None - - #parameters - self.benc = T.dvector('benc') - if not(self.input is None): - self.wenc = T.dmatrix('wenc') - self.wdec = self.wenc.T if tie_weights else T.dmatrix('wdec') - self.bdec = T.dvector('bdec') - - if not(self.auxinput is None): - self.wauxenc = [T.dmatrix('wauxenc%s'%i) for i in range(len(auxin_size))] - self.wauxdec = [self.wauxenc[i].T if tie_weights else T.dmatrix('wauxdec%s'%i) for i in range(len(auxin_size))] - self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))] - - #hyper-parameters - if self.interface: - self.lr = T.scalar('lr') - self.noise_level = T.scalar('noise_level') - self.noise_level_group = T.scalar('noise_level_group') - - # leave the chance for subclasses to initialize - if self.__class__ == DAAig: - self.init_behavioural() - print '\t\t**** end DAAig.__init__ ****' - - ### BEHAVIOURAL MODEL - def init_behavioural(self): - if not(self.input is None): - self.noisy_input = self.corrupt_input() - if not(self.auxinput is None): - self.noisy_idx_list , self.noisy_auxinput = \ - scannoise(self.idx_list,self.auxinput,self.noise_level,self.noise_level_group) - - self.noise = ScratchPad() - self.clean = ScratchPad() - - self.define_behavioural(self.clean, self.input, self.idx_list, self.auxinput) - self.define_behavioural(self.noise, self.noisy_input, self.noisy_idx_list, self.noisy_auxinput) - - self.define_regularization() # call before cost - self.define_cost(self.clean) - self.define_cost(self.noise) - self.define_params() - if self.interface: - self.define_gradients() - self.define_interface() - - def define_behavioural(self, container, input, idx_list, auxinput): - self.define_propup(container, input, idx_list , auxinput) - container.hidden = self.hid_fn(container.hidden_activation) - self.define_propdown(container, idx_list , auxinput) - container.rec = self.hid_fn(container.rec_activation) - - def define_propup(self, container, input, idx_list, auxinput): - if self.input is not None: + """De-noising Auto-encoder + """ + + def __init__(self, input = None, auxinput = None, + in_size=None, auxin_size= None, n_hid=1, + regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', + reconstruction_cost_function=cost.cross_entropy, interface = True,**init): + """ + :param regularize: WRITEME + :param tie_weights: WRITEME + :param hid_fn: WRITEME + :param reconstruction_cost: Should return one cost per example (row) + :todo: Default noise level for all daa levels + """ + print '\t\t**** DAAig.__init__ ****' + print '\t\tinput = ', input + print '\t\tauxinput = ', auxinput + print '\t\tin_size = ', in_size + print '\t\tauxin_size = ', auxin_size + print '\t\tn_hid = ', n_hid + + super(DAAig, self).__init__() + self.random = T.RandomStreams() + + # MODEL CONFIGURATION + self.in_size = in_size + self.auxin_size = auxin_size + self.n_hid = n_hid + self.regularize = regularize + self.tie_weights = tie_weights + self.reconstruction_cost_function = reconstruction_cost_function + self.interface = interface + + assert hid_fn in ('sigmoid_act','tanh_act','softsign_act') + self.hid_fn = eval(hid_fn) + + ### DECLARE MODEL VARIABLES and default + self.input = input + self.noisy_input = None + self.auxinput = auxinput + self.idx_list = T.ivector('idx_list') if not(self.auxinput is None) else None + self.noisy_idx_list, self.noisy_auxinput = None, None + + #parameters + self.benc = T.dvector('benc') + if not(self.input is None): + self.wenc = T.dmatrix('wenc') + self.wdec = self.wenc.T if tie_weights else T.dmatrix('wdec') + self.bdec = T.dvector('bdec') + + if not(self.auxinput is None): + self.wauxenc = [T.dmatrix('wauxenc%s'%i) for i in range(len(auxin_size))] + self.wauxdec = [self.wauxenc[i].T if tie_weights else T.dmatrix('wauxdec%s'%i) for i in range(len(auxin_size))] + self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))] + + #hyper-parameters + if self.interface: + self.lr = T.scalar('lr') + self.noise_level = T.scalar('noise_level') + self.noise_level_group = T.scalar('noise_level_group') + + # leave the chance for subclasses to initialize + if self.__class__ == DAAig: + self.init_behavioural() + print '\t\t**** end DAAig.__init__ ****' + + ### BEHAVIOURAL MODEL + def init_behavioural(self): + if not(self.input is None): + self.noisy_input = self.corrupt_input() + if not(self.auxinput is None): + self.noisy_idx_list , self.noisy_auxinput = \ + scannoise(self.idx_list,self.auxinput,self.noise_level,self.noise_level_group) + + self.noise = ScratchPad() + self.clean = ScratchPad() + + self.define_behavioural(self.clean, self.input, self.idx_list, self.auxinput) + self.define_behavioural(self.noise, self.noisy_input, self.noisy_idx_list, self.noisy_auxinput) + + self.define_regularization() # call before cost + self.define_cost(self.clean) + self.define_cost(self.noise) + self.define_params() + if self.interface: + self.define_gradients() + self.define_interface() + + def define_behavioural(self, container, input, idx_list, auxinput): + self.define_propup(container, input, idx_list , auxinput) + container.hidden = self.hid_fn(container.hidden_activation) + self.define_propdown(container, idx_list , auxinput) + container.rec = self.hid_fn(container.rec_activation) + + def define_propup(self, container, input, idx_list, auxinput): + if self.input is not None: container.hidden_activation = self.filter_up(input, self.wenc, self.benc) - if self.auxinput is not None: - container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc) - else: - if self.auxinput is not None: - container.hidden_activation = scandotenc(idx_list,auxinput,self.wauxenc) + self.benc - - # DEPENDENCY: define_propup - def define_propdown(self, container, idx_list, auxinput): - if not(self.input is None): - rec_activation1 = self.filter_down(container.hidden,self.wdec,self.bdec) - if not(self.auxinput is None): - rec_activation2 = scandotdec(idx_list,auxinput,container.hidden,self.wauxdec) +\ - scanbiasdec(idx_list,auxinput,self.bauxdec) - - if not(self.input is None) and not(auxinput is None): - container.rec_activation = T.join(1,rec_activation1,rec_activation2) - else: - if not(self.input is None): - container.rec_activation = rec_activation1 - else: - container.rec_activation = rec_activation2 - - def filter_up(self, vis, w, b=None): - out = T.dot(vis, w) - return out + b if b else out - filter_down = filter_up - - # TODO: fix regularization type (outside parameter ?) - def define_regularization(self): - self.reg_coef = T.scalar('reg_coef') - if not(self.auxinput is None): - self.Maskup = scanmaskenc(self.idx_list,self.wauxenc) - self.Maskdown = scanmaskdec(self.idx_list,self.wauxdec) - if not(type(self.Maskup) is list): - self.Maskup = [self.Maskup] - if not(type(self.Maskdown) is list): - self.Maskdown = [self.Maskdown] - listweights = [] - listweightsenc = [] - if not(self.auxinput is None): - listweights += [w*m for w,m in zip(self.Maskup,self.wauxenc)] + [w*m for w,m in zip(self.Maskdown,self.wauxdec)] - listweightsenc += [w*m for w,m in zip(self.Maskup,self.wauxenc)] - if not(self.input is None): - listweights += [self.wenc,self.wdec] - listweightsenc += [self.wenc] - self.regularization = self.reg_coef * get_reg_cost(listweights,'l2') - self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l2') - - - # DEPENDENCY: define_behavioural, define_regularization - def define_cost(self, container): - container.reconstruction_cost = self.reconstruction_costs(container.rec) - # TOTAL COST - container.cost = container.reconstruction_cost - if self.regularize: #if stacked don't merge regularization and cost here but in the stackeddaaig module - container.cost = container.cost + self.regularization - - # DEPENDENCY: define_cost - def define_params(self): - if not hasattr(self,'params'): - self.params = [] - self.params += [self.benc] - self.paramsenc = copy.copy(self.params) - if not(self.input is None): - self.params += [self.wenc] + [self.bdec] - self.paramsenc += [self.wenc] - if not(self.auxinput is None): - self.params += self.wauxenc + self.bauxdec - self.paramsenc += self.wauxenc - if not(self.tie_weights): - if not(self.input is None): - self.params += [self.wdec] - if not(self.auxinput is None): - self.params += self.wauxdec - - # DEPENDENCY: define_cost, define_gradients - def define_gradients(self): - self.gradients = T.grad(self.noise.cost, self.params) - self.updates = dict((p, p - self.lr * g) for p, g in \ - zip(self.params, self.gradients)) - - - # DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients - def define_interface(self): - # declare function to interface with module (if not stacked) - if self.input is None: - listin = [self.idx_list, self.auxinput] - if self.auxinput is None: - listin = [self.input] - if not((self.input is None) or (self.auxinput is None)): - listin =[self.input,self.idx_list, self.auxinput] - self.update = theano.Method(listin, self.noise.cost, self.updates) - self.compute_cost = theano.Method(listin, self.noise.cost) - if not(self.input is None): - self.noisify = theano.Method(listin, self.noisy_input) - if not(self.auxinput is None): - self.auxnoisify = theano.Method(listin, self.noisy_auxinput) - self.reconstruction = theano.Method(listin, self.clean.rec) - self.representation = theano.Method(listin, self.clean.hidden) - self.validate = theano.Method(listin, [self.clean.cost, self.clean.rec]) - - def corrupt_input(self): - return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input - - def reconstruction_costs(self, rec): - if self.input is None: - return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec) - if self.auxinput is None: - return self.reconstruction_cost_function(self.input, rec) - if not((self.input is None) or (self.auxinput is None)): - return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec) + if self.auxinput is not None: + container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc) + else: + if self.auxinput is not None: + container.hidden_activation = scandotenc(idx_list,auxinput,self.wauxenc) + self.benc + + # DEPENDENCY: define_propup + def define_propdown(self, container, idx_list, auxinput): + if not(self.input is None): + rec_activation1 = self.filter_down(container.hidden,self.wdec,self.bdec) + if not(self.auxinput is None): + rec_activation2 = scandotdec(idx_list,auxinput,container.hidden,self.wauxdec) +\ + scanbiasdec(idx_list,auxinput,self.bauxdec) + + if not(self.input is None) and not(auxinput is None): + container.rec_activation = T.join(1,rec_activation1,rec_activation2) + else: + if not(self.input is None): + container.rec_activation = rec_activation1 + else: + container.rec_activation = rec_activation2 + + def filter_up(self, vis, w, b=None): + out = T.dot(vis, w) + return out + b if b else out + filter_down = filter_up + + # TODO: fix regularization type (outside parameter ?) + def define_regularization(self): + self.reg_coef = T.scalar('reg_coef') + if not(self.auxinput is None): + self.Maskup = scanmaskenc(self.idx_list,self.wauxenc) + self.Maskdown = scanmaskdec(self.idx_list,self.wauxdec) + if not(type(self.Maskup) is list): + self.Maskup = [self.Maskup] + if not(type(self.Maskdown) is list): + self.Maskdown = [self.Maskdown] + listweights = [] + listweightsenc = [] + if not(self.auxinput is None): + listweights += [w*m for w,m in zip(self.Maskup,self.wauxenc)] + [w*m for w,m in zip(self.Maskdown,self.wauxdec)] + listweightsenc += [w*m for w,m in zip(self.Maskup,self.wauxenc)] + if not(self.input is None): + listweights += [self.wenc,self.wdec] + listweightsenc += [self.wenc] + self.regularization = self.reg_coef * get_reg_cost(listweights,'l2') + self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l2') + + + # DEPENDENCY: define_behavioural, define_regularization + def define_cost(self, container): + container.reconstruction_cost = self.reconstruction_costs(container.rec) + # TOTAL COST + container.cost = container.reconstruction_cost + if self.regularize: #if stacked don't merge regularization and cost here but in the stackeddaaig module + container.cost = container.cost + self.regularization + + # DEPENDENCY: define_cost + def define_params(self): + if not hasattr(self,'params'): + self.params = [] + self.params += [self.benc] + self.paramsenc = copy.copy(self.params) + if not(self.input is None): + self.params += [self.wenc] + [self.bdec] + self.paramsenc += [self.wenc] + if not(self.auxinput is None): + self.params += self.wauxenc + self.bauxdec + self.paramsenc += self.wauxenc + if not(self.tie_weights): + if not(self.input is None): + self.params += [self.wdec] + if not(self.auxinput is None): + self.params += self.wauxdec + + # DEPENDENCY: define_cost, define_gradients + def define_gradients(self): + self.gradients = T.grad(self.noise.cost, self.params) + self.updates = dict((p, p - self.lr * g) for p, g in \ + zip(self.params, self.gradients)) + + + # DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients + def define_interface(self): + # declare function to interface with module (if not stacked) + if self.input is None: + listin = [self.idx_list, self.auxinput] + if self.auxinput is None: + listin = [self.input] + if not((self.input is None) or (self.auxinput is None)): + listin =[self.input,self.idx_list, self.auxinput] + self.update = theano.Method(listin, self.noise.cost, self.updates) + self.compute_cost = theano.Method(listin, self.noise.cost) + if not(self.input is None): + self.noisify = theano.Method(listin, self.noisy_input) + if not(self.auxinput is None): + self.auxnoisify = theano.Method(listin, self.noisy_auxinput) + self.reconstruction = theano.Method(listin, self.clean.rec) + self.representation = theano.Method(listin, self.clean.hidden) + self.validate = theano.Method(listin, [self.clean.cost, self.clean.rec]) + + def corrupt_input(self): + return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input + + def reconstruction_costs(self, rec): + if self.input is None: + return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec) + if self.auxinput is None: + return self.reconstruction_cost_function(self.input, rec) + if not((self.input is None) or (self.auxinput is None)): + return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)), rec) # All cases should be covered above. If not, something is wrong! assert False - - def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, - seed=1, alloc=True, **init): - super(DAAig, self)._instance_initialize(obj, **init) - - obj.reg_coef = reg_coef - obj.noise_level = noise_level - obj.noise_level_group = noise_level_group - if self. interface: - obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module) - else: - obj.lr = None - - obj.random.initialize() - if seed is not None: - obj.random.seed(seed) - self.R = numpy.random.RandomState(seed) - - obj.__hide__ = ['params'] - - if not(self.input is None): - self.inf = 1/numpy.sqrt(self.in_size) - if not(self.auxinput is None): - self.inf = 1/numpy.sqrt(sum(self.auxin_size)) - if not(self.auxinput is None or self.input is None): - self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size) - self.hif = 1/numpy.sqrt(self.n_hid) - - - if alloc: - if not(self.input is None): - wencshp = (self.in_size, self.n_hid) - wdecshp = tuple(reversed(wencshp)) - print 'wencshp = ', wencshp - print 'wdecshp = ', wdecshp - - obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf) - if not self.tie_weights: - obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif) - obj.bdec = numpy.zeros(self.in_size) - - if not(self.auxinput is None): - wauxencshp = [(i, self.n_hid) for i in self.auxin_size] - wauxdecshp = [tuple(reversed(i)) for i in wauxencshp] - print 'wauxencshp = ', wauxencshp - print 'wauxdecshp = ', wauxdecshp - - obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] - if not self.tie_weights: - obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] - obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size] - - print 'self.inf = ', self.inf - print 'self.hif = ', self.hif - - obj.benc = numpy.zeros(self.n_hid) - + + def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, + seed=1, alloc=True, **init): + super(DAAig, self)._instance_initialize(obj, **init) + + obj.reg_coef = reg_coef + obj.noise_level = noise_level + obj.noise_level_group = noise_level_group + if self. interface: + obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module) + else: + obj.lr = None + + obj.random.initialize() + if seed is not None: + obj.random.seed(seed) + self.R = numpy.random.RandomState(seed) + + obj.__hide__ = ['params'] + + if not(self.input is None): + self.inf = 1/numpy.sqrt(self.in_size) + if not(self.auxinput is None): + self.inf = 1/numpy.sqrt(sum(self.auxin_size)) + if not(self.auxinput is None or self.input is None): + self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size) + self.hif = 1/numpy.sqrt(self.n_hid) + + + if alloc: + if not(self.input is None): + wencshp = (self.in_size, self.n_hid) + wdecshp = tuple(reversed(wencshp)) + print 'wencshp = ', wencshp + print 'wdecshp = ', wdecshp + + obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf) + if not self.tie_weights: + obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif) + obj.bdec = numpy.zeros(self.in_size) + + if not(self.auxinput is None): + wauxencshp = [(i, self.n_hid) for i in self.auxin_size] + wauxdecshp = [tuple(reversed(i)) for i in wauxencshp] + print 'wauxencshp = ', wauxencshp + print 'wauxdecshp = ', wauxdecshp + + obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] + if not self.tie_weights: + obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] + obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size] + + print 'self.inf = ', self.inf + print 'self.hif = ', self.hif + + obj.benc = numpy.zeros(self.n_hid) + #----------------------------------------------------------------------------------------------------------------------- class StackedDAAig(module.Module): - def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None], - in_size = None, auxin_size = [None], n_hid = [1], - regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', - reconstruction_cost_function=cost.cross_entropy, - n_out = 2, target = None, debugmethod = False, totalupdatebool=False, **init): - - super(StackedDAAig, self).__init__() - print '\t**** StackedDAAig.__init__ ****' - print '\tinput = ', input - print '\tauxinput = ', auxinput - print '\tin_size = ', in_size - print '\tauxin_size = ', auxin_size - print '\tn_hid = ', n_hid - - # save parameters - self.depth = depth - self.input = input - self.auxinput = auxinput - self.in_size = in_size - auxin_size = auxin_size - self.n_hid = n_hid - self.regularize = regularize - self.tie_weights = tie_weights - self.hid_fn = hid_fn - self.reconstruction_cost_function = reconstruction_cost_function - self.n_out = n_out - self.target = target if not(target is None) else T.lvector('target') - self.debugmethod = debugmethod - self.totalupdatebool = totalupdatebool - - # init for model construction - inputprec = input - in_sizeprec = in_size - self.daaig = [None] * (self.depth+1) - - #hyper parameters - self.unsup_lr = T.dscalar('unsup_lr') - self.sup_lr = T.dscalar('sup_lr') - - # updatemethods - self.localupdate = [None] * (self.depth+1) #update only on the layer parameters - self.globalupdate = [None] * (self.depth+1)#update wrt the layer cost backproped untill the input layer - if self.totalupdatebool: - self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer - # - self.classify = None - - #others methods - if self.debugmethod: - self.representation = [None] * (self.depth) - self.reconstruction = [None] * (self.depth) - self.validate = [None] * (self.depth) - self.noisyinputs = [None] * (self.depth) - self.compute_localcost = [None] * (self.depth+1) - self.compute_localgradients = [None] * (self.depth+1) - self.compute_globalcost = [None] * (self.depth+1) - self.compute_globalgradients = [None] * (self.depth+1) - if self.totalupdatebool: - self.compute_totalcost = [None] * (self.depth+1) - self.compute_totalgradients = [None] * (self.depth+1) - # - - # some theano Variables we want to keep track on - if self.regularize: - self.regularizationenccost = [None] * (self.depth) - self.localcost = [None] * (self.depth+1) - self.localgradients = [None] * (self.depth+1) - self.globalcost = [None] * (self.depth+1) - self.globalgradients = [None] * (self.depth+1) - if self.totalupdatebool: - self.totalcost = [None] * (self.depth+1) - self.totalgradients = [None] * (self.depth+1) - - #params to update and inputs initialization - paramstot = [] - paramsenc = [] - self.inputs = [None] * (self.depth+1) - - if not(self.input is None): - self.inputs[0] = [self.input] - else: - self.inputs[0] = [] - - offset = 0 - for i in range(self.depth): - - if auxin_size[i] is None: - offset +=1 - param = [inputprec, None, in_sizeprec, auxin_size[i], self.n_hid[i],\ - False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] - else: - param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\ - False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] - - print '\tLayer init= ', i+1 - self.daaig[i] = DAAig(*param) - - # method input, outputs and parameters update - if i: - self.inputs[i] = copy.copy(self.inputs[i-1]) - if not(auxin_size[i] is None): - self.inputs[i] += [self.daaig[i].idx_list,self.auxinput[i-offset]] - - noisyout = [] - if not(inputprec is None): - noisyout += [self.daaig[i].noisy_input] - if not(auxin_size[i] is None): - noisyout += [self.daaig[i].noisy_auxinput] - - paramstot += self.daaig[i].params - - # save the costs - self.localcost[i] = self.daaig[i].noise.cost - self.globalcost[i] = self.daaig[i].noise.cost - if self.totalupdatebool: - if i: - self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost - else: - self.totalcost[i] = self.daaig[i].noise.cost - - if self.regularize: - if i: - self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc - else: - self.regularizationenccost[i] = 0 - - self.localcost[i] += self.daaig[i].regularization - self.globalcost[i] += self.regularizationenccost[i] - if self.totalupdatebool: - self.totalcost[i] += self.daaig[i].regularization - - self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params) - self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc) - if self.totalupdatebool: - self.totalgradients[i] = T.grad(self.totalcost[i], paramstot) - - #create the updates dictionnaries - local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i])) - global_grads = dict((j, j - self.unsup_lr * g)\ - for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i])) - if self.totalupdatebool: - total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i])) - - # method declaration - self.localupdate[i] = theano.Method(self.inputs[i],self.localcost[i],local_grads) - self.globalupdate[i] = theano.Method(self.inputs[i],self.globalcost[i],global_grads) - if self.totalupdatebool: - self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads) - # - if self.debugmethod: - self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) - self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) - self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec]) - self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) - self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) - self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i]) - self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i]) - self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i]) - if self.totalupdatebool: - self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i]) - self.compute_totalgradients[i] = theano.Method(self.inputs[i],self.totalgradients[i]) - # - - paramsenc += self.daaig[i].paramsenc - inputprec = self.daaig[i].clean.hidden - in_sizeprec = self.n_hid[i] - - # supervised layer - print '\tLayer supervised init' - self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target] - self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,inputprec,self.target) - paramstot += self.daaig[-1].params - - if self.regularize: - self.localcost[-1] = self.daaig[-1].regularized_cost - self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] - else: - self.localcost[-1] = self.daaig[-1].unregularized_cost - self.globalcost[-1] = self.daaig[-1].unregularized_cost - - if self.totalupdatebool: - self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]] - - self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params) - self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc) - if self.totalupdatebool: - self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\ - T.grad(self.globalcost[-1], paramstot) ] - - local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1])) - global_grads = dict((j, j - self.unsup_lr * g)\ - for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1])) - if self.totalupdatebool: - total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\ - for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1])) - - self.localupdate[-1] = theano.Method(self.inputs[-1],self.localcost[-1],local_grads) - self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads) - if self.totalupdatebool: - self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads) - self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone) - - if self.debugmethod: - self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1]) - self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1]) - self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1]) - self.compute_globalgradients[-1] = theano.Method(self.inputs[-1],self.globalgradients[-1]) - if self.totalupdatebool: - self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1]) - self.compute_totalgradients[-1] =\ - theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1]) - - def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0, - noise_level = 0 , noise_level_group = 0, seed = 1, alloc = True,**init): - super(StackedDAAig, self)._instance_initialize(inst, **init) - - inst.unsup_lr = unsup_lr - inst.sup_lr = sup_lr - - for i in range(self.depth): - print '\tLayer = ', i+1 - inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\ - noise_level_group = noise_level_group, seed = seed, alloc = alloc) - print '\tLayer supervised' - inst.daaig[-1].initialize() - inst.daaig[-1].l1 = 0 - inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation + def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None], + in_size = None, auxin_size = [None], n_hid = [1], + regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', + reconstruction_cost_function=cost.cross_entropy, + n_out = 2, target = None, debugmethod = False, totalupdatebool=False, **init): + + super(StackedDAAig, self).__init__() + print '\t**** StackedDAAig.__init__ ****' + print '\tinput = ', input + print '\tauxinput = ', auxinput + print '\tin_size = ', in_size + print '\tauxin_size = ', auxin_size + print '\tn_hid = ', n_hid + + # save parameters + self.depth = depth + self.input = input + self.auxinput = auxinput + self.in_size = in_size + auxin_size = auxin_size + self.n_hid = n_hid + self.regularize = regularize + self.tie_weights = tie_weights + self.hid_fn = hid_fn + self.reconstruction_cost_function = reconstruction_cost_function + self.n_out = n_out + self.target = target if not(target is None) else T.lvector('target') + self.debugmethod = debugmethod + self.totalupdatebool = totalupdatebool + + # init for model construction + inputprec = input + in_sizeprec = in_size + self.daaig = [None] * (self.depth+1) + + #hyper parameters + self.unsup_lr = T.dscalar('unsup_lr') + self.sup_lr = T.dscalar('sup_lr') + + # updatemethods + self.localupdate = [None] * (self.depth+1) #update only on the layer parameters + self.globalupdate = [None] * (self.depth+1)#update wrt the layer cost backproped untill the input layer + if self.totalupdatebool: + self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer + # + self.classify = None + + #others methods + if self.debugmethod: + self.representation = [None] * (self.depth) + self.reconstruction = [None] * (self.depth) + self.validate = [None] * (self.depth) + self.noisyinputs = [None] * (self.depth) + self.compute_localcost = [None] * (self.depth+1) + self.compute_localgradients = [None] * (self.depth+1) + self.compute_globalcost = [None] * (self.depth+1) + self.compute_globalgradients = [None] * (self.depth+1) + if self.totalupdatebool: + self.compute_totalcost = [None] * (self.depth+1) + self.compute_totalgradients = [None] * (self.depth+1) + # + + # some theano Variables we want to keep track on + if self.regularize: + self.regularizationenccost = [None] * (self.depth) + self.localcost = [None] * (self.depth+1) + self.localgradients = [None] * (self.depth+1) + self.globalcost = [None] * (self.depth+1) + self.globalgradients = [None] * (self.depth+1) + if self.totalupdatebool: + self.totalcost = [None] * (self.depth+1) + self.totalgradients = [None] * (self.depth+1) + + #params to update and inputs initialization + paramstot = [] + paramsenc = [] + self.inputs = [None] * (self.depth+1) + + if not(self.input is None): + self.inputs[0] = [self.input] + else: + self.inputs[0] = [] + + offset = 0 + for i in range(self.depth): + + if auxin_size[i] is None: + offset +=1 + param = [inputprec, None, in_sizeprec, auxin_size[i], self.n_hid[i],\ + False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] + else: + param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\ + False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] + + print '\tLayer init= ', i+1 + self.daaig[i] = DAAig(*param) + + # method input, outputs and parameters update + if i: + self.inputs[i] = copy.copy(self.inputs[i-1]) + if not(auxin_size[i] is None): + self.inputs[i] += [self.daaig[i].idx_list,self.auxinput[i-offset]] + + noisyout = [] + if not(inputprec is None): + noisyout += [self.daaig[i].noisy_input] + if not(auxin_size[i] is None): + noisyout += [self.daaig[i].noisy_auxinput] + + paramstot += self.daaig[i].params + + # save the costs + self.localcost[i] = self.daaig[i].noise.cost + self.globalcost[i] = self.daaig[i].noise.cost + if self.totalupdatebool: + if i: + self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost + else: + self.totalcost[i] = self.daaig[i].noise.cost + + if self.regularize: + if i: + self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc + else: + self.regularizationenccost[i] = 0 + + self.localcost[i] += self.daaig[i].regularization + self.globalcost[i] += self.regularizationenccost[i] + if self.totalupdatebool: + self.totalcost[i] += self.daaig[i].regularization + + self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params) + self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc) + if self.totalupdatebool: + self.totalgradients[i] = T.grad(self.totalcost[i], paramstot) + + #create the updates dictionnaries + local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i])) + global_grads = dict((j, j - self.unsup_lr * g)\ + for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i])) + if self.totalupdatebool: + total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i])) + + # method declaration + self.localupdate[i] = theano.Method(self.inputs[i],self.localcost[i],local_grads) + self.globalupdate[i] = theano.Method(self.inputs[i],self.globalcost[i],global_grads) + if self.totalupdatebool: + self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads) + # + if self.debugmethod: + self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) + self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) + self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec]) + self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) + self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) + self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i]) + self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i]) + self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i]) + if self.totalupdatebool: + self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i]) + self.compute_totalgradients[i] = theano.Method(self.inputs[i],self.totalgradients[i]) + # + + paramsenc += self.daaig[i].paramsenc + inputprec = self.daaig[i].clean.hidden + in_sizeprec = self.n_hid[i] + + # supervised layer + print '\tLayer supervised init' + self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target] + self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,inputprec,self.target) + paramstot += self.daaig[-1].params + + if self.regularize: + self.localcost[-1] = self.daaig[-1].regularized_cost + self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] + else: + self.localcost[-1] = self.daaig[-1].unregularized_cost + self.globalcost[-1] = self.daaig[-1].unregularized_cost + + if self.totalupdatebool: + self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]] + + self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params) + self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc) + if self.totalupdatebool: + self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\ + T.grad(self.globalcost[-1], paramstot) ] + + local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1])) + global_grads = dict((j, j - self.unsup_lr * g)\ + for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1])) + if self.totalupdatebool: + total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\ + for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1])) + + self.localupdate[-1] = theano.Method(self.inputs[-1],self.localcost[-1],local_grads) + self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads) + if self.totalupdatebool: + self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads) + self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone) + + if self.debugmethod: + self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1]) + self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1]) + self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1]) + self.compute_globalgradients[-1] = theano.Method(self.inputs[-1],self.globalgradients[-1]) + if self.totalupdatebool: + self.compute_totalcost[-1] = theano.Method(self.inputs[-1],self.totalcost[-1]) + self.compute_totalgradients[-1] =\ + theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1]) + + def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0, + noise_level = 0 , noise_level_group = 0, seed = 1, alloc = True,**init): + super(StackedDAAig, self)._instance_initialize(inst, **init) + + inst.unsup_lr = unsup_lr + inst.sup_lr = sup_lr + + for i in range(self.depth): + print '\tLayer = ', i+1 + inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\ + noise_level_group = noise_level_group, seed = seed, alloc = alloc) + print '\tLayer supervised' + inst.daaig[-1].initialize() + inst.daaig[-1].l1 = 0 + inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation