# HG changeset patch # User James Bergstra # Date 1252593050 14400 # Node ID bfc5914642ce8742936b9d82a2a384d997430884 # Parent e53c06901f8f864925ac89f75fa10cb515ed013b# Parent f1a29c772210ba7c35b96c1efbbe950b28fd77f0 merge diff -r e53c06901f8f -r bfc5914642ce .hgignore diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/aa.py --- a/pylearn/algorithms/aa.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/aa.py Thu Sep 10 10:30:50 2009 -0400 @@ -16,19 +16,19 @@ # ACQUIRE/MAKE INPUT if not input: input = T.matrix('input') - self.input = theano.External(input) + self.input = input # HYPER-PARAMETERS - self.lr = theano.Member(T.scalar()) + self.lr = T.scalar() # PARAMETERS - self.w1 = theano.Member(T.matrix()) + self.w1 = T.matrix() if not tie_weights: - self.w2 = theano.Member(T.matrix()) + self.w2 = T.matrix() else: self.w2 = self.w1.T - self.b1 = theano.Member(T.vector()) - self.b2 = theano.Member(T.vector()) + self.b1 = T.vector() + self.b2 = T.vector() # HIDDEN LAYER self.hidden_activation = T.dot(input, self.w1) + self.b1 @@ -97,7 +97,7 @@ return T.sum(self.reconstruction_costs) def build_regularization(self): - self.l2_coef = theano.Member(T.scalar()) + self.l2_coef = T.scalar() if self.tie_weights: return self.l2_coef * T.sum(self.w1 * self.w1) else: diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/daa.py --- a/pylearn/algorithms/daa.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/daa.py Thu Sep 10 10:30:50 2009 -0400 @@ -49,19 +49,19 @@ # ACQUIRE/MAKE INPUT if not input: input = T.matrix('input') - self.input = theano.External(input) + self.input = input # HYPER-PARAMETERS - self.lr = theano.Member(T.scalar()) + self.lr = T.scalar() # PARAMETERS - self.w1 = theano.Member(T.matrix()) + self.w1 = T.matrix() if not tie_weights: - self.w2 = theano.Member(T.matrix()) + self.w2 = T.matrix() else: self.w2 = self.w1.T - self.b1 = theano.Member(T.vector()) - self.b2 = theano.Member(T.vector()) + self.b1 = T.vector() + self.b2 = T.vector() # REGULARIZATION COST @@ -162,7 +162,7 @@ """ def build_corrupted_input(self): - self.noise_level = theano.Member(T.scalar()) + self.noise_level = T.scalar() return self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) * self.input def hid_activation_function(self, activation): @@ -175,7 +175,7 @@ return self.reconstruction_cost_function(self.input, output) def build_regularization(self): - self.l2_coef = theano.Member(T.scalar()) + self.l2_coef = T.scalar() if self.tie_weights: return self.l2_coef * T.sum(self.w1 * self.w1) else: diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/logistic_regression.py --- a/pylearn/algorithms/logistic_regression.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/logistic_regression.py Thu Sep 10 10:30:50 2009 -0400 @@ -33,18 +33,57 @@ self.n_in = n_in self.n_out = n_out - self.input = input if input is not None else T.matrix() - self.target = target if target is not None else T.lvector() + if input is not None: + self.input = input + else: + self.input = T.matrix() - self.w = w if w is not None else (T.dmatrix()) - self.b = b if b is not None else (T.dvector()) + if target is not None: + self.target = target + else: + self.target = T.lvector() + + #backport + #self.input = input if input is not None else T.matrix() + #self.target = target if target is not None else T.lvector() + + if w is not None: + self.w = w + else: + self.w = (T.dmatrix()) + if b is not None: + self.b = b + else: + self.b = (T.dvector()) + + #backport + #self.w = w if w is not None else (T.dmatrix()) + #self.b = b if b is not None else (T.dvector()) + + self.params = [] + for p in [self.w, self.b]: + if p.owner is None: + self.params += [p] + + #backport #the params of the model are the ones we fit to the data - self.params = [p for p in [self.w, self.b] if p.owner is None] + #self.params = [p for p in [self.w, self.b] if p.owner is None] + if l2 is not None: + self.l2 = l2 + else: + self.l2 = (T.dscalar()) + + if l1 is not None: + self.l1 = l1 + else: + self.l1 = (T.dscalar()) + + #backport #the hyper-parameters of the model are not fit to the data - self.l2 = l2 if l2 is not None else (T.dscalar()) - self.l1 = l1 if l1 is not None else (T.dscalar()) + #self.l2 = l2 if l2 is not None else (T.dscalar()) + #self.l1 = l1 if l1 is not None else (T.dscalar()) #here we actually build the model self.linear_output = T.dot(self.input, self.w) + self.b @@ -163,14 +202,46 @@ def __init__(self, input=None, targ=None, w=None, b=None, lr=None, regularize=False): super(LogReg2, self).__init__() #boilerplate - self.input = (input) if input is not None else T.matrix('input') - self.targ = (targ) if targ is not None else T.lcol() + if input is not None: + self.input = (input) + else: + self.input = T.matrix('input') + + if targ is not None: + self.targ = (targ) + else: + self.targ = T.lcol() + + #self.input = (input) if input is not None else T.matrix('input') + #self.targ = (targ) if targ is not None else T.lcol() + + if w is not None: + self.w = (w) + else: + self.w = (T.dmatrix()) - self.w = (w) if w is not None else (T.dmatrix()) - self.b = (b) if b is not None else (T.dvector()) - self.lr = (lr) if lr is not None else (T.dscalar()) + if b is not None: + self.b = (b) + else: + self.b = (T.dvector()) + + if lr is not None: + self.lr = (lr) + else: + self.lr = (T.scalar()) - self.params = [p for p in [self.w, self.b] if p.owner is None] + #backport + #self.w = (w) if w is not None else (T.dmatrix()) + #self.b = (b) if b is not None else (T.dvector()) + #self.lr = (lr) if lr is not None else (T.dscalar()) + + self.params = [] + for p in [self.w, self.b]: + if p.owner is None: + self.params += [p] + + #backport + #self.params = [p for p in [self.w, self.b] if p.owner is None] output = nnet.sigmoid(T.dot(self.x, self.w) + self.b) xent = -self.targ * T.log(output) - (1.0 - self.targ) * T.log(1.0 - output) @@ -251,11 +322,23 @@ def __init__(self, n_in=None, n_out=None, w=None, b=None): super(LogRegNew, self).__init__() #boilerplate + if w is not None: + self.w = w + else: + self.w = (T.dmatrix()) + + if b is not None: + self.b = b + else: + self.b = (T.dvector()) + + self.n_in = n_in self.n_out = n_out - self.w = w if w is not None else (T.dmatrix()) - self.b = b if b is not None else (T.dvector()) + #backport + #self.w = w if w is not None else (T.dmatrix()) + #self.b = b if b is not None else (T.dvector()) def _instance_initialize(self, obj): obj.w = N.zeros((self.n_in, self.n_out)) diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/rbm.py --- a/pylearn/algorithms/rbm.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/rbm.py Thu Sep 10 10:30:50 2009 -0400 @@ -29,9 +29,9 @@ # symbolic theano stuff # what about multidimensional inputs/outputs ? do they have to be # flattened or should we used tensors instead ? - self.w = w if w is not None else module.Member(T.dmatrix()) - self.visb = visb if visb is not None else module.Member(T.dvector()) - self.hidb = hidb if hidb is not None else module.Member(T.dvector()) + self.w = w if w is not None else T.dmatrix() + self.visb = visb if visb is not None else T.dvector() + self.hidb = hidb if hidb is not None else T.dvector() self.seed = seed; # 1-step Markov chain diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/regressor.py --- a/pylearn/algorithms/regressor.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/regressor.py Thu Sep 10 10:30:50 2009 -0400 @@ -13,15 +13,25 @@ self.regularize = regularize # ACQUIRE/MAKE INPUT AND TARGET - self.input = theano.External(input) if input else T.matrix('input') - self.target = theano.External(target) if target else T.matrix('target') + if input: + self.input = input + else: + self.target = target + + if target: + self.target = target + else: + self.target = T.dmatrix('target') + #backport + #self.input = input if input else T.matrix('input') + #self.target = target if target else T.matrix('target') # HYPER-PARAMETERS - self.lr = theano.Member(T.scalar()) + self.lr = T.scalar() # PARAMETERS - self.w = theano.Member(T.matrix()) - self.b = theano.Member(T.vector()) + self.w = T.matrix() + self.b = T.vector() # OUTPUT self.output_activation = T.dot(self.input, self.w) + self.b @@ -96,7 +106,7 @@ return T.mean(self.regression_costs) def build_regularization(self): - self.l2_coef = theano.Member(T.scalar()) + self.l2_coef = T.scalar() return self.l2_coef * T.sum(self.w * self.w) def _instance_initialize(self, obj, input_size = None, output_size = 1, seed = None, **init): diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/rnn.py --- a/pylearn/algorithms/rnn.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/rnn.py Thu Sep 10 10:30:50 2009 -0400 @@ -1,6 +1,6 @@ #!/usr/bin/env python import numpy as N -from theano import Op, Apply, tensor as T, Module, Member, Method, Mode, compile +from theano import Op, Apply, tensor as T, Module, Method, Mode, compile from theano.gof import OpSub, TopoOptimizer from minimizer import make_minimizer # minimizer @@ -121,15 +121,15 @@ self.n_out = n_out #affine transformatoin x -> latent space - self.v, self.b = Member(T.dmatrix()), Member(T.dvector()) + self.v, self.b = T.dmatrix(), T.dvector() input_transform = affine(self.v, self.b) #recurrent weight matrix in latent space - self.z0 = Member(T.dvector()) - self.w = Member(T.dmatrix()) + self.z0 = T.dvector() + self.w = T.dmatrix() #affine transformation latent -> output space - self.u, self.c = Member(T.dmatrix()), Member(T.dvector()) + self.u, self.c = T.dmatrix(), T.dvector() output_transform = affine(self.u, self.c) self.params = [self.v, self.b, self.w, self.u, self.c] diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/sandbox/DAA_inputs_groups.py --- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Thu Sep 10 10:30:50 2009 -0400 @@ -6,20 +6,77 @@ from theano.compile import module from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \ - scanmaskenc,scanmaskdec, FillMissing, mask_gradient + scanmaskenc,scanmaskdec, FillMissing, mask_gradient, blockgrad from pylearn.algorithms.logistic_regression import LogRegN +import pylearn.algorithms.cost -# used to initialize containers -class ScratchPad: +import time + +from pylearn.io import filetensor +import os + +# saving loading utils-------------------------------------------- +def save_mat(fname, mat, save_dir=''): + assert isinstance(mat, numpy.ndarray) + print 'save ndarray to file: ', save_dir + fname + file_handle = open(os.path.join(save_dir, fname), 'w') + filetensor.write(file_handle, mat) + writebool = False + while not writebool: + try: + file_handle.close() + writebool = True + except: + print 'save model error' + time.sleep((numpy.random.randint(10)+2)*10) + +def load_mat(fname, save_dir=''): + print 'loading ndarray from file: ', save_dir + fname + file_handle = open(os.path.join(save_dir,fname), 'r') + rval = filetensor.read(file_handle) + file_handle.close() + return rval + +# Weight initialisation utils-------------------------------------- + +# time consuming but just a test (not conclusive) +def orthogonalinit(W,axis=1): + nb = W.shape[axis] + bn = W.shape[0] if axis is 1 else W.shape[1] + if axis == 0: + W=W.T + Worto = copy.copy(W) + offset=0 + tmp=[] + for i in range(nb): + if i==bn: + offset=offset+bn + if i-offset != 0: + for j in xrange(offset,i): + orthoproj = (Worto[:,i]*Worto[:,j]).sum()*Worto[:,j]/(Worto[:,j]*Worto[:,j]).sum() + orthoproj.shape=(bn,1) + Worto[:,i:i+1] = Worto[:,i:i+1] - orthoproj + Worto[:,i:i+1] = Worto[:,i:i+1] / \ + numpy.sqrt((Worto[:,i:i+1]*Worto[:,i:i+1]).sum(0)) * numpy.sqrt((W[:,i:i+1]*W[:,i:i+1]).sum(0)) + return Worto if axis == 1 else Worto.T + +# @todo +def PCAinit(data,nhid): + pass + +#----------------------------------------------------------------- + +# Initialize containers: +class CreateContainer: pass # regularisation utils:------------------------------------------- def lnorm(param, type='l2'): if type == 'l1': - return T.sum(T.abs(param)) + return T.sum(T.abs_(param)) if type == 'l2': - return T.sum(T.pow(param,2)) + return T.sum(param*param) raise NotImplementedError('Only l1 and l2 regularization are currently implemented') def get_reg_cost(params, type): @@ -32,52 +89,83 @@ def sigmoid_act(x): return theano.tensor.nnet.sigmoid(x) +#tanh is scaled by 2 to have the same gradient than sigmoid [sigmoid(x)=(tanh(x/2.0)+1)/2.0] def tanh_act(x): + return theano.tensor.tanh(x/2.0) + +#divide per 2 is a bad idea with many layers... we lose the std of U*x +def tanh2_act(x): return theano.tensor.tanh(x) +def softsign_act(x): + return x/(1.0 + T.abs_(x)) + # costs utils:--------------------------------------------------- - # in order to fix numerical instability of the cost and gradient calculation for the cross entropy we calculate it # with the following functions direclty from the activation: +# XS is used to get back the KL divergence, important for doing global updates def sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis): - XE =-target * T.log(1 + T.exp(-output_act)) + (1 - target) * (- T.log(1 + T.exp(output_act))) - return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis) + XE = target * (- T.log(1 + T.exp(-output_act))) + (1 - target) * (- T.log(1 + T.exp(output_act))) + XS = T.xlogx.xlogx(target) + T.xlogx.xlogx(1-target) + return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis) def tanh_cross_entropy(target, output_act, mean_axis, sum_axis): - XE =-(target+1)/2.0 * T.log(1 + T.exp(-2 * output_act)) + \ - (1 - (target+1)/2.0) * (- T.log(1 + T.exp(2 * output_act))) - return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis) + XE = (target+1)/2.0 * (- T.log(1 + T.exp(- output_act))) + \ + (1 - (target+1)/2.0) * (- T.log(1 + T.exp(output_act))) + XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0) + return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis) + +def tanh2_cross_entropy(target, output_act, mean_axis, sum_axis): + XE = (target+1)/2.0 * (- T.log(1 + T.exp(- 2*output_act))) + \ + (1 - (target+1)/2.0) * (- T.log(1 + T.exp( 2*output_act))) + XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0) + return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis) + +def softsign_cross_entropy(target, output_act, mean_axis, sum_axis): + newact = ((output_act/(1.0 + T.abs_(output_act)))+1)/2.0 + XE = (target+1)/2.0 * T.log(newact) + (1 - (target+1)/2.0) * T.log(1 - newact) + XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0) + return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis) def cross_entropy(target, output_act, act, mean_axis=0, sum_axis=1): if act == 'sigmoid_act': return sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis) if act == 'tanh_act': return tanh_cross_entropy(target, output_act, mean_axis, sum_axis) + if act == 'softsign_act': + return softsign_cross_entropy(target, output_act, mean_axis, sum_axis) + if act == 'tanh2_act': + return tanh2_cross_entropy(target, output_act, mean_axis, sum_axis) assert False -def quadratic(target, output, act, axis = 1): - return pylearn.algorithms.cost.quadratic(target, output, axis) - - +def quadratic(target, output, act, mean_axis = 0): + return T.sum(pylearn.algorithms.cost.quadratic(target, output, mean_axis)) # DAAig module---------------------------------------------------------------- class DAAig(module.Module): - """De-noising Auto-encoder + """De-noising Auto-encoder with inputs groups and missing values """ def __init__(self, input = None, auxinput = None, in_size=None, auxin_size= None, n_hid=1, - regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', - reconstruction_cost_function='cross_entropy', interface = True, - ignore_missing=None, reconstruct_missing=False, - corruption_pattern=None, - **init): + regularize = False, tie_weights = False, tie_weights_aux = None, hid_fn = 'tanh_act', + rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy', + interface = True, ignore_missing=None, reconstruct_missing=False, + corruption_pattern=None, blockgrad = False, **init): """ + :param input: WRITEME + :param auxinput: WRITEME + :param in_size: WRITEME + :param auxin_size: WRITEME + :param n_hid: WRITEME :param regularize: WRITEME :param tie_weights: WRITEME :param hid_fn: WRITEME - :param reconstruction_cost: Should return one cost per example (row) + :param rec_fn: WRITEME + :param reconstruction_cost_function: WRITEME + :param scale_cost: WRITEME + :param interface: WRITEME :param ignore_missing: if not None, the input will be scanned in order to detect missing values, and these values will be replaced. Also, the reconstruction cost's gradient will be computed only on non @@ -92,7 +180,7 @@ in the current implementation, auxiliary inputs cannot be used when this option is True. :param corruption_pattern: if not None, may specify a particular way to - corrupt the input with missing values. Valid choices are: + corrupt the input with missing values. Valid choices are: - 'by_pair': consider that features are given as pairs, and corrupt (or not) the whole pair instead of considering them independently. Elements in a pair are not consecutive, instead they are assumed to @@ -101,13 +189,6 @@ missing inputs will be backpropagated. Otherwise, it will not. :todo: Default noise level for all daa levels """ - print '\t\t**** DAAig.__init__ ****' - print '\t\tinput = ', input - print '\t\tauxinput = ', auxinput - print '\t\tin_size = ', in_size - print '\t\tauxin_size = ', auxin_size - print '\t\tn_hid = ', n_hid - super(DAAig, self).__init__() self.random = T.RandomStreams() @@ -117,20 +198,37 @@ self.n_hid = n_hid self.regularize = regularize self.tie_weights = tie_weights + self.tie_weights_aux = tie_weights_aux if tie_weights_aux is not None else tie_weights self.interface = interface self.ignore_missing = ignore_missing self.reconstruct_missing = reconstruct_missing self.corruption_pattern = corruption_pattern - + self.blockgrad = blockgrad - assert hid_fn in ('sigmoid_act','tanh_act') + assert hid_fn in ('sigmoid_act','tanh_act','softsign_act','tanh2_act') self.hid_fn = eval(hid_fn) - self.hid_name = hid_fn + + assert rec_fn in ('sigmoid_act','tanh_act','softsign_act','tanh2_act') + self.rec_fn = eval(rec_fn) + self.rec_name = rec_fn assert reconstruction_cost_function in ('cross_entropy','quadratic') self.reconstruction_cost_function = eval(reconstruction_cost_function) self.reconstruction_cost_function_name = reconstruction_cost_function + print '\t\t**** DAAig.__init__ ****' + print '\t\tinput = ', input + print '\t\tauxinput = ', auxinput + print '\t\tin_size = ', self.in_size + print '\t\tauxin_size = ', self.auxin_size + print '\t\tn_hid = ', self.n_hid + print '\t\tregularize = ', self.regularize + print '\t\ttie_weights = ', self.tie_weights + print '\t\ttie_weights_aux = ', self.tie_weights_aux + print '\t\thid_fn = ', hid_fn + print '\t\trec_fn = ', rec_fn + print '\t\treconstruction_cost_function = ', reconstruction_cost_function + ### DECLARE MODEL VARIABLES and default self.input = input if self.ignore_missing is not None and self.input is not None: @@ -139,10 +237,11 @@ self.input_missing_mask = no_missing[1] # Missingness pattern. else: self.input_missing_mask = None - self.noisy_input = None + self.auxinput = auxinput self.idx_list = T.ivector('idx_list') if self.auxinput is not None else None - self.noisy_idx_list, self.noisy_auxinput = None, None + + self.noisy_input, self.noisy_idx_list, self.noisy_auxinput = None , None, None #parameters self.benc = T.dvector('benc') @@ -153,7 +252,8 @@ if self.auxinput is not None: self.wauxenc = [T.dmatrix('wauxenc%s'%i) for i in range(len(auxin_size))] - self.wauxdec = [self.wauxenc[i].T if tie_weights else T.dmatrix('wauxdec%s'%i) for i in range(len(auxin_size))] + self.wauxdec =[ self.wauxenc[i].T if self.tie_weights_aux else T.dmatrix('wauxdec%s'%i) for i in\ + range(len(auxin_size))] self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))] #hyper-parameters @@ -161,8 +261,10 @@ self.lr = T.scalar('lr') self.noise_level = T.scalar('noise_level') self.noise_level_group = T.scalar('noise_level_group') + self.scale_cost_in = T.scalar('scale_cost_in') + self.scale_cost_aux = T.scalar('scale_cost_aux') - # leave the chance for subclasses to initialize + # leave the chance for subclasses to initialize (example convolutionnal to implement) if self.__class__ == DAAig: self.init_behavioural() print '\t\t**** end DAAig.__init__ ****' @@ -173,66 +275,76 @@ self.noisy_input = self.corrupt_input() if self.auxinput is not None: self.noisy_idx_list , self.noisy_auxinput = \ - scannoise(self.idx_list, self.auxinput,self.noise_level, - self.noise_level_group) + scannoise(self.idx_list, self.auxinput,self.noise_level, self.noise_level_group) - self.noise = ScratchPad() - self.clean = ScratchPad() + self.noise = CreateContainer() + self.clean = CreateContainer() self.define_behavioural(self.clean, self.input, self.idx_list, self.auxinput) self.define_behavioural(self.noise, self.noisy_input, self.noisy_idx_list, self.noisy_auxinput) self.define_regularization() # call before cost - self.define_cost(self.clean) - self.define_cost(self.noise) + self.define_cost(self.noise) # the cost is only needed for the noise (not used for the clean part) self.define_params() if self.interface: self.define_gradients() self.define_interface() - - def define_behavioural(self, container, input, idx_list, auxinput): - self.define_propup(container, input, idx_list , auxinput) - container.hidden = self.hid_fn(container.hidden_activation) - self.define_propdown(container, idx_list , auxinput) - container.rec = self.hid_fn(container.rec_activation) - if (self.ignore_missing is not None and self.input is not None and not - self.reconstruct_missing): - # Apply mask to gradient to ensure we do not backpropagate on the - # cost computed on missing inputs (that were replaced with zeros). - container.rec = mask_gradient(container.rec, - self.input_missing_mask) - - def define_propup(self, container, input, idx_list, auxinput): - if self.input is not None: - container.hidden_activation = self.filter_up(input, self.wenc, self.benc) - if self.auxinput is not None: - container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc) - else: - if self.auxinput is not None: - container.hidden_activation = scandotenc(idx_list,auxinput,self.wauxenc) + self.benc - - # DEPENDENCY: define_propup - def define_propdown(self, container, idx_list, auxinput): - if self.input is not None: - rec_activation1 = self.filter_down(container.hidden,self.wdec,self.bdec) - if self.auxinput is not None: - rec_activation2 = scandotdec(idx_list,auxinput,container.hidden,self.wauxdec) +\ - scanbiasdec(idx_list,auxinput,self.bauxdec) - - if (self.input is not None) and (self.auxinput is not None): - container.rec_activation = T.join(1,rec_activation1,rec_activation2) - else: - if self.input is not None: - container.rec_activation = rec_activation1 - else: - container.rec_activation = rec_activation2 - + def filter_up(self, vis, w, b=None): out = T.dot(vis, w) return out + b if b else out filter_down = filter_up - # TODO: fix regularization type (outside parameter ?) + def corrupt_input(self): + if self.corruption_pattern is None: + mask = self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) + elif self.corruption_pattern == 'by_pair': + shape = T.shape(self.input) + # Do not ask me why, but just doing "/ 2" does not work (there is + # a bug in the optimizer). + shape = T.stack(shape[0], (shape[1] * 2) / 4) + mask = self.random.binomial(shape, 1, 1 - self.noise_level) + mask = T.horizontal_stack(mask, mask) + else: + raise ValueError('Unknown value for corruption_pattern: %s' % self.corruption_pattern) + return mask * self.input + + def define_behavioural(self, container, input, idx_list, auxinput): + self.define_propup(container, input, idx_list , auxinput) + container.hidden = self.hid_fn(container.hidden_activation) + + self.define_propdown(container, idx_list , auxinput) + container.rec = self.rec_fn(container.rec_activation) + if self.input is not None: + container.rec_in = self.rec_fn(container.rec_activation_in) + if (self.auxinput is not None): + container.rec_aux = self.rec_fn(container.rec_activation_aux) + + def define_propup(self, container, input, idx_list, auxinput): + container.hidden_activation = self.benc + if self.input is not None: + container.hidden_activation += self.filter_up(input, self.wenc) + if self.auxinput is not None: + container.hidden_activation += scandotenc(idx_list,auxinput,self.wauxenc) + + def define_propdown(self, container, idx_list, auxinput): + if self.input is not None: + container.rec_activation_in = self.filter_down(container.hidden,self.wdec,self.bdec) + if self.auxinput is not None: + container.rec_activation_aux = scandotdec(idx_list,auxinput,container.hidden,self.wauxdec) +\ + scanbiasdec(idx_list,auxinput,self.bauxdec) + + if (self.ignore_missing is not None and self.input is not None and not self.reconstruct_missing): + # Apply mask to gradient to ensure we do not backpropagate on the + # cost computed on missing inputs (that have been imputed). + container.rec_activation_in = mask_gradient(container.rec_activation_in, self.input_missing_mask) + + if (self.input is not None) and (self.auxinput is not None): + container.rec_activation = T.join(1,container.rec_activation_in,container.rec_activation_aux) + else: + container.rec_activation = container.rec_activation_in \ + if self.input is not None else container.rec_activation_aux + def define_regularization(self): self.reg_coef = T.scalar('reg_coef') if self.auxinput is not None: @@ -242,6 +354,7 @@ self.Maskup = [self.Maskup] if type(self.Maskdown) is not list: self.Maskdown = [self.Maskdown] + listweights = [] listweightsenc = [] if self.auxinput is not None: @@ -250,111 +363,97 @@ if self.input is not None: listweights += [self.wenc,self.wdec] listweightsenc += [self.wenc] - self.regularization = self.reg_coef * get_reg_cost(listweights,'l2') - self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l2') + + self.regularization = self.reg_coef * get_reg_cost(listweights,'l1') + self.regularizationenc = self.reg_coef * get_reg_cost(listweightsenc,'l1') - - # DEPENDENCY: define_behavioural, define_regularization def define_cost(self, container): - if self.reconstruction_cost_function_name == 'cross_entropy': - container.reconstruction_cost = self.reconstruction_costs(container.rec_activation) + tmpbool = (self.reconstruction_cost_function_name == 'cross_entropy') + if (self.input is not None): + container.reconstruction_cost_in = \ + self.reconstruction_cost_function(blockgrad(self.input) if self.blockgrad else self.input,\ + container.rec_activation_in if tmpbool else container.rec_in, self.rec_name) + if (self.auxinput is not None): + container.reconstruction_cost_aux = \ + self.reconstruction_cost_function(scaninputs(self.idx_list, self.auxinput), container.rec_activation_aux \ + if tmpbool else container.rec_aux, self.rec_name) + + # TOTAL COST + if (self.input is not None) and (self.auxinput is not None): + container.reconstruction_cost = self.scale_cost_in * \ + container.reconstruction_cost_in + self.scale_cost_aux*\ + container.reconstruction_cost_aux else: - container.reconstruction_cost = self.reconstruction_costs(container.rec) - # TOTAL COST + if self.input is not None: + container.reconstruction_cost = container.reconstruction_cost_in + if (self.auxinput is not None): + container.reconstruction_cost = container.reconstruction_cost_aux + if self.regularize: #if stacked don't merge regularization and cost here but in the stackeddaaig module - container.cost = container.cost + self.regularization + container.cost = container.reconstruction_cost + self.regularization else: container.cost = container.reconstruction_cost - # DEPENDENCY: define_cost def define_params(self): if not hasattr(self,'params'): self.params = [] + self.params += [self.benc] self.paramsenc = copy.copy(self.params) + if self.input is not None: self.params += [self.wenc] + [self.bdec] self.paramsenc += [self.wenc] if self.auxinput is not None: self.params += self.wauxenc + self.bauxdec self.paramsenc += self.wauxenc + if not(self.tie_weights): if self.input is not None: self.params += [self.wdec] + if not(self.tie_weights_aux): if self.auxinput is not None: self.params += self.wauxdec - # DEPENDENCY: define_cost, define_gradients def define_gradients(self): self.gradients = T.grad(self.noise.cost, self.params) - self.updates = dict((p, p - self.lr * g) for p, g in \ - zip(self.params, self.gradients)) + self.updates = dict((p, p - self.lr * g) for p, g in zip(self.params, self.gradients)) - - # DEPENDENCY: define_behavioural, define_regularization, define_cost, define_gradients def define_interface(self): # declare function to interface with module (if not stacked) - if self.input is None: - listin = [self.idx_list, self.auxinput] + listin = [] + listout = [] + if self.input is not None: + listin += [self.input] + listout += [self.noisy_input] if self.auxinput is None: - listin = [self.input] - if (self.input is not None) and (self.auxinput is not None): - listin =[self.input,self.idx_list, self.auxinput] + listin += [self.idx_list, self.auxinput] + listout += [self.noisy_auxinput] + self.update = theano.Method(listin, self.noise.cost, self.updates) self.compute_cost = theano.Method(listin, self.noise.cost) - if self.input is not None: - self.noisify = theano.Method(listin, self.noisy_input) - if self.auxinput is not None: - self.auxnoisify = theano.Method(listin, self.noisy_auxinput) - self.reconstruction = theano.Method(listin, self.clean.rec) + self.noisify = theano.Method(listin, listout) + self.recactivation = theano.Method(listin, self.noise.rec_activation) + self.reconstruction = theano.Method(listin, self.noise.rec) + self.activation = theano.Method(listin, self.clean.hidden_activation) self.representation = theano.Method(listin, self.clean.hidden) - self.validate = theano.Method(listin, [self.clean.cost, self.clean.rec]) - def corrupt_input(self): - if self.corruption_pattern is None: - mask = self.random.binomial(T.shape(self.input), 1, 1 - self.noise_level) - elif self.corruption_pattern == 'by_pair': - shape = T.shape(self.input) - scale = numpy.ones(2) - scale[1] = 2 - shape = shape / scale - mask = self.random.binomial(shape, 1, 1 - self.noise_level) - mask = T.hstack((mask, mask)) - else: - raise ValueError('Unknown value for corruption_pattern: %s' - % self.corruption_pattern) - return mask * self.input - - def reconstruction_costs(self, rec): - if (self.input is not None) and (self.auxinput is not None): - return self.reconstruction_cost_function(T.join(1,self.input,scaninputs(self.idx_list,self.auxinput)),\ - rec, self.hid_name) - if self.input is not None: - return self.reconstruction_cost_function(self.input, rec, self.hid_name) - if self.auxinput is not None: - return self.reconstruction_cost_function(scaninputs(self.idx_list,self.auxinput), rec, self.hid_name) - # All cases should be covered above. If not, something is wrong! - assert False - - def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, - seed=1, alloc=True, **init): + def _instance_initialize(self, obj, lr = 1 , reg_coef = 0, noise_level = 0 , noise_level_group = 0, scale_cost_in = 1, + scale_cost_aux = 1 , seed=1, orthoinit = False, tieinit = False, alloc=True, **init): super(DAAig, self)._instance_initialize(obj, **init) obj.reg_coef = reg_coef obj.noise_level = noise_level obj.noise_level_group = noise_level_group - if self. interface: - obj.lr = lr # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module) - else: - obj.lr = None + obj.scale_cost_in = scale_cost_in + obj.scale_cost_aux = scale_cost_aux + obj.lr = lr if self.interface else None + # if stacked useless (overriden by the sup_lr and unsup_lr of the stackeddaaig module) obj.random.initialize() - if seed is not None: - obj.random.seed(seed) + obj.random.seed(seed) self.R = numpy.random.RandomState(seed) - obj.__hide__ = ['params'] - if self.input is not None: self.inf = 1/numpy.sqrt(self.in_size) if self.auxinput is not None: @@ -363,29 +462,36 @@ self.inf = 1/numpy.sqrt(sum(self.auxin_size)+self.in_size) self.hif = 1/numpy.sqrt(self.n_hid) - if alloc: if self.input is not None: wencshp = (self.in_size, self.n_hid) wdecshp = tuple(reversed(wencshp)) + obj.bdec = numpy.zeros(self.in_size) + obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf) + if not(self.tie_weights): + obj.wdec = copy.copy(obj.wenc.T) if tieinit else \ + self.R.uniform(size=wdecshp,low=-self.hif,high=self.hif) + if orthoinit: + obj.wenc = orthogonalinit(obj.wenc) + if not(self.tie_weights): + obj.wdec = orthogonalinit(obj.wdec,0) print 'wencshp = ', wencshp print 'wdecshp = ', wdecshp - - obj.wenc = self.R.uniform(size=wencshp, low = -self.inf, high = self.inf) - if not(self.tie_weights): - obj.wdec = self.R.uniform(size=wdecshp, low=-self.hif, high=self.hif) - obj.bdec = numpy.zeros(self.in_size) if self.auxinput is not None: wauxencshp = [(i, self.n_hid) for i in self.auxin_size] wauxdecshp = [tuple(reversed(i)) for i in wauxencshp] + obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size] + obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] + if not(self.tie_weights_aux): + obj.wauxdec = [copy.copy(obj.wauxenc[i].T) for i in range(len(wauxdecshp))] if tieinit else\ + [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] + if orthoinit: + obj.wauxenc = [orthogonalinit(w) for w in obj.wauxenc] + if not(self.tie_weights_aux): + obj.wauxdec = [orthogonalinit(w,0) for w in obj.wauxdec] print 'wauxencshp = ', wauxencshp print 'wauxdecshp = ', wauxdecshp - - obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] - if not(self.tie_weights): - obj.wauxdec = [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] - obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size] print 'self.inf = ', self.inf print 'self.hif = ', self.hif @@ -398,20 +504,21 @@ class StackedDAAig(module.Module): def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None], in_size = None, auxin_size = [None], n_hid = [1], - regularize = False, tie_weights = False, hid_fn = 'sigmoid_act', - reconstruction_cost_function='cross_entropy', + regularize = False, tie_weights = False, tie_weights_aux = None, hid_fn = 'tanh_act', + rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy', n_out = 2, target = None, debugmethod = False, totalupdatebool=False, ignore_missing=None, reconstruct_missing=False, - corruption_pattern=None, + corruption_pattern=None, blockgrad = False, act_reg = 'sigmoid_act', **init): super(StackedDAAig, self).__init__() - print '\t**** StackedDAAig.__init__ ****' - print '\tinput = ', input - print '\tauxinput = ', auxinput - print '\tin_size = ', in_size - print '\tauxin_size = ', auxin_size - print '\tn_hid = ', n_hid + + # utils + def listify(param,depth): + if type(param) is list: + return param if len(param)==depth else [param[0]]*depth + else: + return [param]*depth # save parameters self.depth = depth @@ -419,11 +526,13 @@ self.auxinput = auxinput self.in_size = in_size auxin_size = auxin_size - self.n_hid = n_hid + self.n_hid = listify(n_hid,depth) self.regularize = regularize - self.tie_weights = tie_weights - self.hid_fn = hid_fn - self.reconstruction_cost_function = reconstruction_cost_function + tie_weights = listify(tie_weights,depth) + tie_weights_aux = listify(tie_weights_aux,depth) + hid_fn = listify(hid_fn,depth) + rec_fn = listify(rec_fn,depth) + reconstruction_cost_function = listify(reconstruction_cost_function,depth) self.n_out = n_out self.target = target if target is not None else T.lvector('target') self.debugmethod = debugmethod @@ -431,6 +540,27 @@ self.ignore_missing = ignore_missing self.reconstruct_missing = reconstruct_missing self.corruption_pattern = corruption_pattern + self.blockgrad = blockgrad + + assert act_reg in ('sigmoid_act','tanh_act','softsign_act','tanh2_act') + self.act_reg = eval(act_reg) + + print '\t**** StackedDAAig.__init__ ****' + print '\tdepth = ', self.depth + print '\tinput = ', self.input + print '\tauxinput = ', self.auxinput + print '\tin_size = ', self.in_size + print '\tauxin_size = ', auxin_size + print '\tn_hid = ', self.n_hid + print '\tregularize = ', self.regularize + print '\ttie_weights = ', tie_weights + print '\ttie_weights_aux = ', tie_weights_aux + print '\thid_fn = ', hid_fn + print '\trec_fn = ', rec_fn + print '\tact_reg = ', act_reg + print '\treconstruction_cost_function = ', reconstruction_cost_function + print '\tblockgrad = ', self.blockgrad + print '\tn_out = ', self.n_out # init for model construction inputprec = input @@ -446,15 +576,16 @@ self.globalupdate = [None] * (self.depth+1)#update wrt the layer cost backproped untill the input layer if self.totalupdatebool: self.totalupdate = [None] * (self.depth+1) #update wrt all the layers cost backproped untill the input layer - # - self.classify = None - #others methods + # facultative methods if self.debugmethod: + self.activation = [None] * (self.depth+1) self.representation = [None] * (self.depth) + self.recactivation = [None] * (self.depth) self.reconstruction = [None] * (self.depth) - self.validate = [None] * (self.depth) self.noisyinputs = [None] * (self.depth) + self.compute_localgradients_in = [None] * (self.depth) + self.compute_localgradients_aux = [None] * (self.depth) self.compute_localcost = [None] * (self.depth+1) self.compute_localgradients = [None] * (self.depth+1) self.compute_globalcost = [None] * (self.depth+1) @@ -462,15 +593,16 @@ if self.totalupdatebool: self.compute_totalcost = [None] * (self.depth+1) self.compute_totalgradients = [None] * (self.depth+1) - # # some theano Variables we want to keep track on - if self.regularize: - self.regularizationenccost = [None] * (self.depth) + self.localgradients_in = [None] * (self.depth) + self.localgradients_aux = [None] * (self.depth) self.localcost = [None] * (self.depth+1) self.localgradients = [None] * (self.depth+1) self.globalcost = [None] * (self.depth+1) self.globalgradients = [None] * (self.depth+1) + if self.regularize: + self.regularizationenccost = [None] * (self.depth) if self.totalupdatebool: self.totalcost = [None] * (self.depth+1) self.totalgradients = [None] * (self.depth+1) @@ -479,7 +611,6 @@ paramstot = [] paramsenc = [] self.inputs = [None] * (self.depth+1) - if self.input is not None: self.inputs[0] = [self.input] else: @@ -488,20 +619,21 @@ offset = 0 for i in range(self.depth): + dict_params = dict(input = inputprec, in_size = in_sizeprec, auxin_size = auxin_size[i], + n_hid = self.n_hid[i], regularize = False, tie_weights = tie_weights[i], + tie_weights_aux = tie_weights_aux[i], hid_fn = hid_fn[i], + rec_fn = rec_fn[i], reconstruction_cost_function = reconstruction_cost_function[i], + interface = False, ignore_missing = self.ignore_missing, + reconstruct_missing = self.reconstruct_missing,corruption_pattern = self.corruption_pattern, + blockgrad=self.blockgrad) if auxin_size[i] is None: offset +=1 - param = [inputprec, None, in_sizeprec, auxin_size[i], self.n_hid[i],\ - False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] + dict_params.update({'auxinput' : None}) else: - param = [inputprec, self.auxinput[i-offset], in_sizeprec, auxin_size[i], self.n_hid[i],\ - False, self.tie_weights, self.hid_fn, self.reconstruction_cost_function,False] - - dict_params = dict(ignore_missing = self.ignore_missing, - reconstruct_missing = self.reconstruct_missing, - corruption_pattern = self.corruption_pattern) + dict_params.update({'auxinput' : self.auxinput[i-offset]}) print '\tLayer init= ', i+1 - self.daaig[i] = DAAig(*param, **dict_params) + self.daaig[i] = DAAig(**dict_params) # method input, outputs and parameters update if i: @@ -521,31 +653,27 @@ self.localcost[i] = self.daaig[i].noise.cost self.globalcost[i] = self.daaig[i].noise.cost if self.totalupdatebool: - if i: - self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost - else: - self.totalcost[i] = self.daaig[i].noise.cost + self.totalcost[i] = self.totalcost[i-1] + self.daaig[i].noise.cost if i else self.daaig[i].noise.cost if self.regularize: - if i: - self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc - else: - self.regularizationenccost[i] = 0 - + self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc if i else 0 self.localcost[i] += self.daaig[i].regularization - self.globalcost[i] += self.regularizationenccost[i] + self.globalcost[i] += self.regularizationenccost[i] + self.daaig[i].regularization if self.totalupdatebool: self.totalcost[i] += self.daaig[i].regularization + self.localgradients_in[i] = T.grad(self.daaig[i].noise.reconstruction_cost_in, self.daaig[i].params) \ + if inputprec is not None else T.constant(0) + self.localgradients_aux[i] = T.grad(self.daaig[i].noise.reconstruction_cost_aux,self.daaig[i].params) \ + if auxin_size[i] is not None else T.constant(0) self.localgradients[i] = T.grad(self.localcost[i], self.daaig[i].params) - self.globalgradients[i] = T.grad(self.globalcost[i], self.daaig[i].params+paramsenc) + self.globalgradients[i] = T.grad(self.globalcost[i], paramsenc + self.daaig[i].params) if self.totalupdatebool: self.totalgradients[i] = T.grad(self.totalcost[i], paramstot) #create the updates dictionnaries - local_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(self.daaig[i].params,self.localgradients[i])) - global_grads = dict((j, j - self.unsup_lr * g)\ - for j,g in zip(self.daaig[i].params+paramsenc,self.globalgradients[i])) + local_grads = dict((j,j-self.unsup_lr*g) for j,g in zip(self.daaig[i].params,self.localgradients[i])) + global_grads = dict((j,j-self.unsup_lr*g) for j,g in zip(paramsenc+self.daaig[i].params,self.globalgradients[i])) if self.totalupdatebool: total_grads = dict((j, j - self.unsup_lr * g) for j,g in zip(paramstot,self.totalgradients[i])) @@ -554,50 +682,48 @@ self.globalupdate[i] = theano.Method(self.inputs[i],self.globalcost[i],global_grads) if self.totalupdatebool: self.totalupdate[i] = theano.Method(self.inputs[i],self.totalcost[i],total_grads) - # + if self.debugmethod: + self.activation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden_activation) self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) - self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) - self.validate[i] =theano.Method(self.inputs[i], [self.daaig[i].clean.cost, self.daaig[i].clean.rec]) + self.recactivation[i] = theano.Method(self.inputs[i],self.daaig[i].noise.rec_activation) + self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].noise.rec) self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i]) + self.compute_localgradients_in[i] = theano.Method(self.inputs[i],self.localgradients_in[i]) + self.compute_localgradients_aux[i] = theano.Method(self.inputs[i],self.localgradients_aux[i]) self.compute_globalcost[i] = theano.Method(self.inputs[i],self.globalcost[i]) self.compute_globalgradients[i] = theano.Method(self.inputs[i],self.globalgradients[i]) if self.totalupdatebool: self.compute_totalcost[i] = theano.Method(self.inputs[i],self.totalcost[i]) self.compute_totalgradients[i] = theano.Method(self.inputs[i],self.totalgradients[i]) - # paramsenc += self.daaig[i].paramsenc inputprec = self.daaig[i].clean.hidden in_sizeprec = self.n_hid[i] - # supervised layer + # supervised layer------------------------------------------------------------------------ print '\tLayer supervised init' self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target] - self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,inputprec,self.target) + self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,self.act_reg(self.daaig[-2].clean.hidden_activation),self.target) paramstot += self.daaig[-1].params - if self.regularize: - self.localcost[-1] = self.daaig[-1].regularized_cost - self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] - else: - self.localcost[-1] = self.daaig[-1].unregularized_cost - self.globalcost[-1] = self.daaig[-1].unregularized_cost + self.localcost[-1] = self.daaig[-1].regularized_cost \ + if self.regularize else self.daaig[-1].unregularized_cost + self.globalcost[-1] = self.daaig[-1].regularized_cost + self.regularizationenccost[-1] \ + if self.regularize else self.daaig[-1].unregularized_cost if self.totalupdatebool: self.totalcost[-1] = [self.totalcost[-2], self.globalcost[-1]] self.localgradients[-1] = T.grad(self.localcost[-1], self.daaig[-1].params) - self.globalgradients[-1] = T.grad(self.globalcost[-1], self.daaig[-1].params+paramsenc) + self.globalgradients[-1] = T.grad(self.globalcost[-1], paramsenc + self.daaig[-1].params) if self.totalupdatebool: - self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) ,\ - T.grad(self.globalcost[-1], paramstot) ] + self.totalgradients[-1] = [T.grad(self.totalcost[-2], paramstot) , T.grad(self.globalcost[-1],paramstot) ] - local_grads = dict((j, j - self.sup_lr * g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1])) - global_grads = dict((j, j - self.sup_lr * g)\ - for j,g in zip(self.daaig[-1].params+paramsenc,self.globalgradients[-1])) + local_grads = dict((j,j-self.sup_lr*g) for j,g in zip(self.daaig[-1].params,self.localgradients[-1])) + global_grads = dict((j,j-self.sup_lr*g) for j,g in zip(paramsenc + self.daaig[-1].params,self.globalgradients[-1])) if self.totalupdatebool: total_grads = dict((j, j - self.unsup_lr * g1 - self.sup_lr * g2)\ for j,g1,g2 in zip(paramstot,self.totalgradients[-1][0],self.totalgradients[-1][1])) @@ -606,10 +732,21 @@ self.globalupdate[-1] = theano.Method(self.inputs[-1],self.globalcost[-1],global_grads) if self.totalupdatebool: self.totalupdate[-1] = theano.Method(self.inputs[-1],self.totalcost[-1],total_grads) + # total update of each local cost [no global cost backpropagated] + totallocal_grads={} + for k in range(self.depth): + totallocal_grads.update(dict((j, j - self.unsup_lr * g) for j,g in \ + zip(self.daaig[k].params,self.localgradients[k]))) + totallocal_grads.update(dict((j, j - self.sup_lr * g) for j,g in + zip(self.daaig[-1].params,self.localgradients[-1]))) + self.totallocalupdate = theano.Method(self.inputs[-1],self.localcost,totallocal_grads) + + # interface for the user self.classify = theano.Method(self.inputs[-2],self.daaig[-1].argmax_standalone) self.NLL = theano.Method(self.inputs[-1],self.daaig[-1]._xent) if self.debugmethod: + self.activation[-1] = theano.Method(self.inputs[-2],self.daaig[-1].linear_output) self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1]) self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1]) self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1]) @@ -619,8 +756,9 @@ self.compute_totalgradients[-1] =\ theano.Method(self.inputs[-1],self.totalgradients[-1][0]+self.totalgradients[-1][1]) - def _instance_initialize(self,inst,unsup_lr = 0.1, sup_lr = 0.01, reg_coef = 0, - noise_level = 0 , noise_level_group = 0, seed = 1, alloc = True,**init): + def _instance_initialize(self,inst,unsup_lr = 0.01, sup_lr = 0.01, reg_coef = 0, scale_cost_in = 1, scale_cost_aux = 1, + noise_level = 0 , noise_level_group = 0, seed = 1, orthoinit = False, tieinit=False, + alloc = True,**init): super(StackedDAAig, self)._instance_initialize(inst, **init) inst.unsup_lr = unsup_lr @@ -628,9 +766,189 @@ for i in range(self.depth): print '\tLayer = ', i+1 - inst.daaig[i].initialize(reg_coef = reg_coef, noise_level = noise_level,\ - noise_level_group = noise_level_group, seed = seed, alloc = alloc) + inst.daaig[i].initialize(reg_coef = reg_coef[i] if type(reg_coef) is list else reg_coef, \ + noise_level = noise_level[i] if type(noise_level) is list else noise_level, \ + scale_cost_in = scale_cost_in[i] if type(scale_cost_in) is list else scale_cost_in, \ + scale_cost_aux = scale_cost_aux[i] if type(scale_cost_aux) is list else scale_cost_aux, \ + noise_level_group = noise_level_group[i] if type(noise_level_group) is list else noise_level_group, \ + seed = seed + i, orthoinit = orthoinit, tieinit = tieinit, alloc = alloc) + print '\tLayer supervised' inst.daaig[-1].initialize() - inst.daaig[-1].l1 = 0 - inst.daaig[-1].l2 = reg_coef #only l2 norm for regularisation to be consitent with the unsup regularisation + + if alloc: + inst.daaig[-1].R = numpy.random.RandomState(seed+self.depth) + # init the logreg weights + inst.daaig[-1].w = inst.daaig[-1].R.uniform(size=inst.daaig[-1].w.shape,\ + low = -1/numpy.sqrt(inst.daaig[-2].n_hid), high = 1/numpy.sqrt(inst.daaig[-2].n_hid)) + if orthoinit: + inst.daaig[-1].w = orthogonalinit(inst.daaig[-1].w) + inst.daaig[-1].l1 = reg_coef[-1] if type(reg_coef) is list else reg_coef + inst.daaig[-1].l2 = 0 + #only l1 norm for regularisation to be consitent with the unsup regularisation + + def _instance_save(self,inst,save_dir=''): + + for i in range(self.depth): + save_mat('benc%s.ft'%(i) ,inst.daaig[i].benc, save_dir) + + if self.daaig[i].auxinput is not None: + for j in range(len(inst.daaig[i].wauxenc)): + save_mat('wauxenc%s_%s.ft'%(i,j) ,inst.daaig[i].wauxenc[j], save_dir) + save_mat('bauxdec%s_%s.ft'%(i,j) ,inst.daaig[i].bauxdec[j], save_dir) + + if self.daaig[i].input is not None: + save_mat('wenc%s.ft'%(i) ,inst.daaig[i].wenc, save_dir) + save_mat('bdec%s.ft'%(i) ,inst.daaig[i].bdec, save_dir) + + if not self.daaig[i].tie_weights_aux: + if self.daaig[i].auxinput is not None: + for j in range(len(inst.daaig[i].wauxdec)): + save_mat('wauxdec%s_%s.ft'%(i,j) ,inst.daaig[i].wauxdec[j], save_dir) + + if not self.daaig[i].tie_weights: + if self.daaig[i].input is not None: + save_mat('wdec%s.ft'%(i) ,inst.daaig[i].wdec, save_dir) + i=i+1 + save_mat('wenc%s.ft'%(i) ,inst.daaig[i].w, save_dir) + save_mat('benc%s.ft'%(i) ,inst.daaig[i].b, save_dir) + + def _instance_load(self,inst,save_dir='',coefenc = None, coefdec = None, Sup_layer = None): + + if coefenc is None: + coefenc = [1.]*self.depth + if coefdec is None: + coefdec = [1.]*self.depth + + for i in range(self.depth): + inst.daaig[i].benc = load_mat('benc%s.ft'%(i), save_dir)/coefenc[i] + + if self.daaig[i].auxinput is not None: + for j in range(len(inst.daaig[i].wauxenc)): + inst.daaig[i].wauxenc[j] = load_mat('wauxenc%s_%s.ft'%(i,j),save_dir)/coefenc[i] + inst.daaig[i].bauxdec[j] = load_mat('bauxdec%s_%s.ft'%(i,j),save_dir)/coefdec[i] + + if self.daaig[i].input is not None: + inst.daaig[i].wenc = load_mat('wenc%s.ft'%(i),save_dir)/coefenc[i] + inst.daaig[i].bdec = load_mat('bdec%s.ft'%(i),save_dir)/coefdec[i] + + if not self.daaig[i].tie_weights_aux: + if self.daaig[i].auxinput is not None: + for j in range(len(inst.daaig[i].wauxdec)): + if 'wauxdec%s_%s.ft'%(i,j) in os.listdir(save_dir): + inst.daaig[i].wauxdec[j] = load_mat('wauxdec%s_%s.ft'%(i,j),save_dir)/coefdec[i] + else: + print "WARNING: no decoding 'wauxdec%s_%s.ft' file use 'wauxenc%s_%s.ft' instead"%(i,j,i,j) + inst.daaig[i].wauxdec[j] = numpy.transpose(load_mat('wauxenc%s_%s.ft'%(i,j),save_dir)/coefdec[i]) + + if not self.daaig[i].tie_weights: + if self.daaig[i].input is not None: + if 'wdec%s.ft'%(i) in os.listdir(save_dir): + inst.daaig[i].wdec = load_mat('wdec%s.ft'%(i),save_dir)/coefdec[i] + else: + print "WARNING: no decoding 'wdec%s.ft' file use 'wenc%s.ft' instead"%(i,i) + inst.daaig[i].wdec = numpy.transpose(load_mat('wenc%s.ft'%(i),save_dir)/coefdec[i]) + i=i+1 + if Sup_layer is None: + inst.daaig[i].w = load_mat('wenc%s.ft'%(i),save_dir) + inst.daaig[i].b = load_mat('benc%s.ft'%(i),save_dir) + else: + inst.daaig[i].w = load_mat('wenc%s.ft'%(Sup_layer),save_dir) + inst.daaig[i].b = load_mat('benc%s.ft'%(Sup_layer),save_dir) + + def _instance_hidsaturation(self,inst,layer,inputs): + return numpy.mean(numpy.median(abs(inst.activation[layer](*inputs)),1)) + + def _instance_recsaturation(self,inst,layer,inputs): + return numpy.mean(numpy.median(abs(inst.recactivation[layer](*inputs)),1)) + + def _instance_error(self,inst,inputs,target): + return numpy.sum(inst.classify(*inputs) != target) / float(len(target))*100.0 + + def _instance_nll(self,inst,inputs,target): + return numpy.sum(inst.NLL(*(inputs+[target]))) / float(len(target)) + + #try-------------------------------------------------------------------- + def _instance_rescalwsaturation(self,inst,inputs): + sat = [None]*(self.depth+1) + for i in range(self.depth+1): + sat[i] = inst.hidsaturation(i,inputs[min(i,self.depth-1)]) + + for i in range(self.depth-1): + if sat[i+1] > max(sat[:i+1]): + inst.daaig[i+1].wenc = inst.daaig[i+1].wenc/sat[i+1]*max(sat[:i+1]) + inst.daaig[i+1].benc = inst.daaig[i+1].benc/sat[i+1]*max(sat[:i+1]) + sat[i+1] = max(sat[:i+1]) + if sat[-1]>max(sat[:-1]): + inst.daaig[-1].w = inst.daaig[-1].w/sat[-1]*max(sat[:-1]) + inst.daaig[-1].b = inst.daaig[-1].b/sat[-1]*max(sat[:-1]) + + #----------------------------------------------------------------------- + + def _instance_unsupgrad(self,inst,inputs,layer,param_name): + inst.noiseseed(0) + gradin = inst.compute_localgradients_in[layer](*inputs) + inst.noiseseed(0) + gradaux = inst.compute_localgradients_aux[layer](*inputs) + inst.noiseseed(0) + gradtot = inst.compute_localgradients[layer](*inputs) + + for j in range(len(gradtot)): + if str(self.daaig[layer].params[j]) is param_name: + tmpin = numpy.sqrt((pow(inst.daaig[layer].scale_cost_in,2)*gradin[j]*gradin[j]).sum()) \ + if type(gradin) is list else 0 + tmpaux= numpy.sqrt((pow(inst.daaig[layer].scale_cost_aux,2)*gradaux[j]*gradaux[j]).sum())\ + if type(gradaux) is list else 0 + tmptot = numpy.sqrt((gradtot[j]*gradtot[j]).sum()) if type(gradtot) is list else 0 + + if type(gradin) is list and type(gradaux) is list and (gradin[j]*gradin[j]).sum() != 0: + projauxin =(inst.daaig[layer].scale_cost_aux*gradaux[j] * \ + inst.daaig[layer].scale_cost_in*gradin[j]).sum()/ \ + (numpy.sqrt((pow(inst.daaig[layer].scale_cost_in,2)*gradin[j]*gradin[j]).sum())) + else: + projauxin = 0 + return tmpin, tmpaux, tmptot, tmpin/(tmpaux+tmpin)*100, projauxin/tmpaux*100 if tmpaux != 0 else 0 + + def _instance_noiseseed(self,inst,seed): + scannoise.R.rand.seed(seed) + for i in range(self.depth): + inst.daaig[i].random.seed(seed+i+1) + + def _instance_unsupupdate(self,inst,data,layer='all',typeup = 'local',printcost = False): + cost = [None]*self.depth + if typeup == 'totallocal': + cost[-1] = inst.totallocalupdate(*data) + else: + if typeup == 'total': + if layer == 'all': + cost[-1] = inst.totalupdate[-2](*data[-1]) + else: + cost[layer] = inst.totalupdate[layer](*data[layer]) + else: + if layer is 'all': + for i in range(self.depth): + if typeup == 'local': + cost[i] = inst.localupdate[i](*data[i]) + if typeup == 'global': + cost[i] = inst.globalupdate[i](*data[i]) + for j in range(i): + dummy = inst.localupdate[j](*data[j]) + else: + if typeup == 'local': + cost[layer] = inst.localupdate[layer](*data[layer]) + if typeup == 'global': + cost[layer] = inst.globalupdate[layer](*data[layer]) + for j in range(layer): + dummy = inst.localupdate[j](*data[j]) + if printcost: + print cost + return cost + + def _instance_supupdate(self,inst,data,typeup = 'global',printcost = False): + if typeup == 'local': + cost = inst.localupdate[-1](*data) + if typeup == 'global': + cost = inst.globalupdate[-1](*data) + if printcost: + print cost + return cost diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/sandbox/test_cost.py --- a/pylearn/algorithms/sandbox/test_cost.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/sandbox/test_cost.py Thu Sep 10 10:30:50 2009 -0400 @@ -14,13 +14,19 @@ def test_float(self): """ - This should fail because we can't use floats in logfactorial + Ensure we cannot use floats in logfactorial. """ x = TT.as_tensor([0.5, 2.7]) o = cost.logfactorial(x) - f = T.function([],o) -# print repr(f()) - self.failUnless(numpy.all(f() == numpy.asarray([0., 0., 1.38629436, 3.29583687, 5.54517744, 8.04718956, 10.75055682, 13.62137104, 16.63553233, 19.7750212]))) + f = T.function([], o) + try: + f() + assert False + except TypeError, e: + if str(e).find(", must be int or long") >= 0: + pass + else: + raise class T_nlpoisson(unittest.TestCase): def test(self): diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/sgd.py --- a/pylearn/algorithms/sgd.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/sgd.py Thu Sep 10 10:30:50 2009 -0400 @@ -20,7 +20,7 @@ :param updates: extra symbolic updates to make when evating either step or step_cost (these override the gradients if necessary) - :type updatess: dict Variable -> Variable + :type updates: dict Variable -> Variable :param auxout: auxiliary outputs, list containing output symbols to compute at the same time as cost (for efficiency) :param methods: Should this module define the step and step_cost methods? diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/stacker.py --- a/pylearn/algorithms/stacker.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/stacker.py Thu Sep 10 10:30:50 2009 -0400 @@ -25,7 +25,7 @@ for i, (submodule, outname) in enumerate(submodules): layer = submodule(current, regularize = regularize) layers.append(layer) - current = layer[outname] + current = getattr(layer, outname) self.layers = layers self.input = self.layers[0].input @@ -35,16 +35,14 @@ local_update = [] global_update = [] to_update = [] - all_kits = [] for layer, (submodule, outname) in zip(layers, submodules): u = layer.update u.resolve_all() to_update += u.updates.keys() - all_kits += u.kits # the input is the whole deep model's input instead of the layer's own # input (which is previous_layer[outname]) inputs = [self.input] + u.inputs[1:] - method = theano.Method(inputs, u.outputs, u.updates, u.kits) + method = theano.Method(inputs, u.outputs, u.updates) local_update.append(method) global_update.append( theano.Method(inputs, @@ -52,9 +50,8 @@ # we update the params of the previous layers too but wrt # this layer's cost dict((param, param - layer.lr * T.grad(layer.cost, param)) - for param in to_update), - list(all_kits))) - representation.append(theano.Method(self.input, layer[outname])) + for param in to_update))) + representation.append(theano.Method(self.input, getattr(layer,outname))) # @todo: Add diagnostics # self.diagnose_from_input = Method([self.input], self.layers[0].diagnose.outputs + self.layers[1].diagnose.outputs ... @@ -64,12 +61,23 @@ self.representation = representation self.update = self.global_update[-1] self.compute = theano.Method(self.input, self.output) + + # takes method from last layer (usually ll.classify), copies it to self., + # while converting its input to deal with the global "model" input ll = self.layers[-1] - for name, method in ll.components_map(): + for name, method in ll.__dict__['local_attr'].iteritems(): if isinstance(method, theano.Method) and not hasattr(self, name): - m = method.dup() - m.resolve_all() - m.inputs = [self.input if x is ll.input else x for x in m.inputs] + if not isinstance(method.inputs, (list,dict)): + method.inputs = [method.inputs] + inputs = [] + for x in method.inputs: + if x is ll.input: + inputs += [self.input] + else: + inputs += [x] + #backport + #inputs = [self.input if x is ll.input else x for x in method.inputs] + m = theano.Method(inputs, method.outputs, method.updates) setattr(self, name, m) def _instance_initialize(self, obj, nunits = None, lr = 0.01, seed = None, **kwargs): diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/tests/test_daa.py --- a/pylearn/algorithms/tests/test_daa.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/tests/test_daa.py Thu Sep 10 10:30:50 2009 -0400 @@ -6,8 +6,9 @@ import time import pylearn.algorithms.logistic_regression +from theano.compile.mode import default_mode -def test_train_daa(mode = theano.Mode('c|py', 'fast_run')): +def test_train_daa(mode = default_mode): ndaa = 3 daa = models.Stacker([(models.SigmoidXEDenoisingAA, 'hidden')] * ndaa + [(models.BinRegressor, 'output')], diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/tests/test_sgd.py --- a/pylearn/algorithms/tests/test_sgd.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/tests/test_sgd.py Thu Sep 10 10:30:50 2009 -0400 @@ -14,7 +14,7 @@ c = m.step_cost(3.0) #print c[0], m.y - assert c[0] < 1.0e-5 + assert c < 1.0e-5 assert abs(m.y - (1.0 / 3)) < 1.0e-4 def test_sgd_stepsize_variable(): @@ -33,7 +33,7 @@ c = m.step_cost(3.0) # print c, m.y - assert c[0] < 1.0e-5 + assert c < 1.0e-5 assert abs(m.y - (1.0 / 3)) < 1.0e-4 @@ -63,7 +63,7 @@ c = m.step_cost(3.0) # print c, m.y - assert c[0] < 1.0e-5 + assert c < 1.0e-5 assert abs(m.y - (1.0 / 3)) < 1.0e-4 if __name__ == '__main__': diff -r e53c06901f8f -r bfc5914642ce pylearn/algorithms/tests/test_stacker.py --- a/pylearn/algorithms/tests/test_stacker.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/algorithms/tests/test_stacker.py Thu Sep 10 10:30:50 2009 -0400 @@ -5,12 +5,15 @@ import numpy import time +class StackBinRegressor(models_reg.BinRegressor): + def __init__(self, input = None, target = None, regularize = True): + super(StackBinRegressor, self).__init__(input, target, regularize) + self.build_extensions() def test_train(mode = theano.Mode('c|py', 'fast_run')): - - reg = models_stacker.Stacker([(models_reg.BinRegressor, 'output'), - (models_reg.BinRegressor, 'output')], - regularize = False) + reg = models_stacker.Stacker([(StackBinRegressor, 'output'), + (StackBinRegressor, 'output')], + regularize = False) #print reg.global_update[1].pretty(mode = mode.excluding('inplace')) model = reg.make([100, 200, 1], diff -r e53c06901f8f -r bfc5914642ce pylearn/datasets/MNIST.py --- a/pylearn/datasets/MNIST.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/datasets/MNIST.py Thu Sep 10 10:30:50 2009 -0400 @@ -6,9 +6,9 @@ import os import numpy -from ..io.amat import AMat -from .config import data_root # config -from .dataset import Dataset +from pylearn.io.pmat import PMat +from pylearn.datasets.config import data_root # config +from pylearn.datasets.dataset import Dataset def head(n=10, path=None): """Load the first MNIST examples. @@ -18,20 +18,20 @@ is the label of the i'th row of x. """ - path = os.path.join(data_root(), 'mnist','mnist_with_header.amat') if path is None else path + if path is None: + path = os.path.join(data_root(), 'mnist','mnist_all.pmat') - dat = AMat(path=path, head=n) + dat = PMat(fname=path) + + rows=dat.getRows(0,n) - try: - assert dat.input.shape[0] == n - assert dat.target.shape[0] == n - except Exception , e: - raise Exception("failed to read MNIST data", (dat, e)) + return rows[:,0:-1], numpy.asarray(rows[:,-1], dtype='int64') + - return dat.input, numpy.asarray(dat.target, dtype='int64').reshape(dat.target.shape[0]) - -def all(path=None): - return head(n=None, path=path) +#What is the purpose of this fct? +#If still usefull, rename it as it conflict with the python an numpy nake all. +#def all(path=None): +# return head(n=None, path=path) def train_valid_test(ntrain=50000, nvalid=10000, ntest=10000, path=None): all_x, all_targ = head(ntrain+nvalid+ntest, path=path) diff -r e53c06901f8f -r bfc5914642ce pylearn/datasets/config.py --- a/pylearn/datasets/config.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/datasets/config.py Thu Sep 10 10:30:50 2009 -0400 @@ -11,7 +11,11 @@ if os.getenv(key) is None: print >> sys.stderr, "WARNING: Environment variable", key, print >> sys.stderr, "is not set. Using default of", default - return default if os.getenv(key) is None else os.getenv(key) + if os.getenv(key) is None: + return default + else: + return os.getenv(key) + #return default if os.getenv(key) is None else os.getenv(key) def data_root(): return env_get('PYLEARN_DATA_ROOT', os.getenv('HOME')+'/data', 'DBPATH') diff -r e53c06901f8f -r bfc5914642ce pylearn/datasets/norb_small.py --- a/pylearn/datasets/norb_small.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/datasets/norb_small.py Thu Sep 10 10:30:50 2009 -0400 @@ -63,11 +63,14 @@ test = {} train['dat'] = os.path.join(dirpath, 'smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat') train['cat'] = os.path.join(dirpath, 'smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat') + train['info'] = os.path.join(dirpath, 'smallnorb-5x46789x9x18x6x2x96x96-training-info.mat') test['dat'] = os.path.join(dirpath, 'smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat') test['cat'] = os.path.join(dirpath, 'smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat') + test['info'] = os.path.join(dirpath, 'smallnorb-5x01235x9x18x6x2x96x96-testing-info.mat') path = Paths() - def __init__(self, ntrain=19440, nvalid=4860, ntest=24300, + def __init__(self, ntrain=19440, nvalid=4860, ntest=24300, + valid_variant=None, downsample_amt=1, seed=1, normalize=False, mode='stereo', dtype='int8'): @@ -78,16 +81,47 @@ self.ntrain = ntrain self.nvalid = nvalid self.ntest = ntest - self.downsample_amt = 1 + self.downsample_amt = downsample_amt self.normalize = normalize self.dtype = dtype rng = numpy.random.RandomState(seed) - self.indices = rng.permutation(self.nsamples) - self.itr = self.indices[0:ntrain] - self.ival = self.indices[ntrain:ntrain+nvalid] + if valid_variant is None: + # The validation set is just a random subset of training + self.indices = rng.permutation(self.nsamples) + self.itr = self.indices[0:ntrain] + self.ival = self.indices[ntrain:ntrain+nvalid] + elif valid_variant in (4,6,7,8,9): + # The validation set consists in an instance of each category + # In order to know which indices correspond to which instance, + # we need to load the 'info' files. + train_info = read(open(self.path.train['info'])) + + ordered_itrain = numpy.nonzero(train_info[:,0] != valid_variant)[0] + max_ntrain = ordered_itrain.shape[0] + ordered_ivalid = numpy.nonzero(train_info[:,0] == valid_variant)[0] + max_nvalid = ordered_ivalid.shape[0] + + if self.ntrain > max_ntrain: + print 'WARNING: ntrain is %i, but there are only %i training samples available' % (self.ntrain, max_ntrain) + self.ntrain = max_ntrain + + if self.nvalid > max_nvalid: + print 'WARNING: nvalid is %i, but there are only %i validation samples available' % (self.nvalid, max_nvalid) + self.nvalid = max_nvalid + + # Randomize + print + self.itr = ordered_itrain[rng.permutation(max_ntrain)][0:self.ntrain] + self.ival = ordered_ivalid[rng.permutation(max_nvalid)][0:self.nvalid] + self.current = None - + + def preprocess(self, x): + if not self.normalize: + return numpy.float64(x *1.0 / 255.0) + return x + def load(self, dataset='train'): if dataset == 'train' or dataset=='valid': @@ -99,7 +133,7 @@ print 'need to reload from train file' dat, cat = load_file(self.path.train, self.normalize, self.downsample_amt, self.dtype) - + x = dat[self.itr,...].reshape(self.ntrain,-1) y = cat[self.itr] self.dat1 = Dataset.Obj(x=x, y=y) # training @@ -126,7 +160,7 @@ x = dat.reshape(self.nsamples,-1) y = cat self.dat1 = Dataset.Obj(x=x, y=y) - + del dat, cat, x, y rval = self.dat1 diff -r e53c06901f8f -r bfc5914642ce pylearn/datasets/smallNorb.py --- a/pylearn/datasets/smallNorb.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/datasets/smallNorb.py Thu Sep 10 10:30:50 2009 -0400 @@ -1,7 +1,7 @@ import os import numpy -from ..io.filetensor import read -from .config import data_root +from pylearn.io.filetensor import read +from pylearn.datasets.config import data_root #Path = '/u/bergstrj/pub/data/smallnorb' #Path = '/home/fringant2/lisa/louradoj/data/smallnorb' diff -r e53c06901f8f -r bfc5914642ce pylearn/io/filetensor.py --- a/pylearn/io/filetensor.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/io/filetensor.py Thu Sep 10 10:30:50 2009 -0400 @@ -129,8 +129,19 @@ self.magic_t, self.elsize, self.ndim, self.dim, self.dim_size = _read_header(f,debug) self.f_start = f.tell() - self.readshape = tuple(self.dim[self.ndim-rank:]) if rank <= self.ndim else tuple(self.dim) - padding = tuple() if rank <= self.ndim else (1,) * (rank - self.ndim) + if rank <= self.ndim: + self.readshape = tuple(self.dim[self.ndim-rank:]) + else: + self.readshape = tuple(self.dim) + + #self.readshape = tuple(self.dim[self.ndim-rank:]) if rank <= self.ndim else tuple(self.dim) + + if rank <= self.ndim: + padding = tuple() + else: + padding = (1,) * (rank - self.ndim) + + #padding = tuple() if rank <= self.ndim else (1,) * (rank - self.ndim) self.returnshape = padding + self.readshape self.readsize = _prod(self.readshape) if debug: print 'READ PARAM', self.readshape, self.returnshape, self.readsize diff -r e53c06901f8f -r bfc5914642ce pylearn/sandbox/scan_inputs_groups.py --- a/pylearn/sandbox/scan_inputs_groups.py Thu Sep 10 10:30:35 2009 -0400 +++ b/pylearn/sandbox/scan_inputs_groups.py Thu Sep 10 10:30:50 2009 -0400 @@ -71,6 +71,31 @@ if nbias != 1: raise TypeError('not vector', bias_list[i]) return bias_list + +# block grad Op------------------------------------ +class BlockGrad(Op): + """This Op block the gradient of a variable""" + def make_node(self, x): + x = T.as_tensor_variable(x) + if x.ndim == 1: + return Apply(self, [x], [T.dvector()]) + else: + return Apply(self, [x], [T.dmatrix()]) + + def perform(self, node , x ,(out,)): + out[0] = x[0].copy() + + def grad(self, x, (gx,)): + return [gx*0] + + def __hash__(self): + return hash(BlockGrad)^77612 + + def __str__(self): + return "BlockGrad" + +blockgrad=BlockGrad() + # Encoding scan dot product------------------------------------ class ScanDotEnc(Op): """This Op takes an index list (as tensor.ivector), a list of matrices representing @@ -104,12 +129,12 @@ raise NotImplementedError('size of index different of inputs list size',idx_list) if max(idx_list) >= (len(args)-2)+1 : raise NotImplementedError('index superior to weight list length',idx_list) - for i in range(len(args[1])): - if (args[1][i].shape)[0] != batchsize: - raise NotImplementedError('different batchsize in the inputs list',args[1][i].shape) - for i in range(len(args)-2): - if (args[2+i].shape)[1] != n_hid: - raise NotImplementedError('different length of hidden in the weights list',args[2+i].shape) + for a in args[1]: + if (a.shape)[0] != batchsize: + raise NotImplementedError('different batchsize in the inputs list',a.shape) + for a in args[2:]: + if (a.shape)[1] != n_hid: + raise NotImplementedError('different length of hidden in the weights list',a.shape) for i in range(len(idx_list)): if idx_list[i]>0: @@ -171,12 +196,12 @@ raise NotImplementedError('size of index different of inputs list size',idx_list) if max(idx_list) >= (len(args)-3)+1 : raise NotImplementedError('index superior to weight list length',idx_list) - for i in range(len(args[1])): - if (args[1][i].shape)[0] != batchsize: - raise NotImplementedError('different batchsize in the inputs list',args[1][i].shape) - for i in range(len(args)-3): - if (args[2+i].shape)[1] != n_hid: - raise NotImplementedError('different length of hidden in the weights list',args[2+i].shape) + for a in args[1]: + if (a.shape)[0] != batchsize: + raise NotImplementedError('different batchsize in the inputs list',a.shape) + for a in args[2:-1]: + if (a.shape)[1] != n_hid: + raise NotImplementedError('different length of hidden in the weights list',a.shape) zcalc = [False for i in range(len(args)-3)] @@ -237,9 +262,9 @@ raise NotImplementedError('index superior to weight list length',idx_list) if len(idx_list) != len(args[1]) : raise NotImplementedError('size of index different of inputs list size',idx_list) - for i in range(len(args)-3): - if (args[3+i].shape)[0] != n_hid: - raise NotImplementedError('different length of hidden in the weights list',args[3+i].shape) + for a in args[3:]: + if (a.shape)[0] != n_hid: + raise NotImplementedError('different length of hidden in the weights list',a.shape) zcalc = [False for i in idx_list] z[0] = [None for i in idx_list] @@ -311,9 +336,9 @@ raise NotImplementedError('index superior to weight list length',idx_list) if len(idx_list) != len(args[1]) : raise NotImplementedError('size of index different of inputs list size',idx_list) - for i in range(len(args)-4): - if (args[3+i].shape)[0] != n_hid: - raise NotImplementedError('different length of hidden in the weights list',args[3+i].shape) + for a in args[3:-1]: + if a.shape[0] != n_hid: + raise NotImplementedError('different length of hidden in the weights list',a.shape) zidx=numpy.zeros((len(idx_list)+1)) @@ -538,9 +563,9 @@ if max(idx_list) >= (len(args)-1)+1 : raise NotImplementedError('index superior to weights list length',idx_listdec) - for i in range(len(args)-1): - if args[1+i].shape[dim] != n_hid: - raise NotImplementedError('different length of hidden in the encoding weights list',args[1+i].shape) + for a in args[1:]: + if a.shape[dim] != n_hid: + raise NotImplementedError('different length of hidden in the encoding weights list',a.shape) for i in range(len(args[1:])): z[i][0] = numpy.asarray((idx_list == i+1).sum(),dtype='int32') @@ -607,22 +632,250 @@ out[0] = input.copy() out = out[0] mask = output_storage[1] - mask[0] = numpy.ones(input.shape) + + if mask[0] is None or mask[0].shape!=input.shape: + mask[0] = numpy.ones(input.shape) + mask = mask[0] if self.fill_with_is_array: - ignore_k = len(out.shape) - len(self.fill_with.shape) - assert ignore_k >= 0 - for (idx, v) in numpy.ndenumerate(out): - if numpy.isnan(v): - if self.fill_with_is_array: - out[idx] = self.fill_with[idx[ignore_k:]] - else: - out[idx] = self.fill_with - mask[idx] = 0 + #numpy.ndenumerate is slower then a loop + #so we optimise for some number of dimension frequently used + if out.ndim==1: + assert self.fill_with.ndim==1 + for i in range(out.shape[0]): + if numpy.isnan(out[i]): + out[i] = self.fill_with[i] + mask[i] = 0 + elif out.ndim==2 and self.fill_with.ndim==1: + for i in range(out.shape[0]): + for j in range(out.shape[1]): + if numpy.isnan(out[i,j]): + out[i,j] = self.fill_with[j] + mask[i,j] = 0 + else: + ignore_k = out.ndim - self.fill_with.ndim + assert ignore_k >= 0 + for (idx, v) in numpy.ndenumerate(out): + if numpy.isnan(v): + out[idx] = self.fill_with[idx[ignore_k:]] + mask[idx] = 0 + else: + #numpy.ndenumerate is slower then a loop + #so we optimise for some number of dimension frequently used + if out.ndim==1: + for i in range(out.shape[0]): + if numpy.isnan(out[i]): + out[i] = self.fill_with + mask[i] = 0 + elif out.ndim==2: + for i in range(out.shape[0]): + for j in range(out.shape[1]): + if numpy.isnan(out[i,j]): + out[i,j] = self.fill_with + mask[i,j] = 0 + else: + for (idx, v) in numpy.ndenumerate(out): + if numpy.isnan(out[idx]): + out[idx] = self.fill_with + mask[idx] = 0 def grad(self, inputs, (out_grad, mask_grad, )): return [out_grad] +#def c(): + def c_no_compile_args(self): +#-ffast-math and "-ffinite-math-only" SHOULD NOT BE ACTIVATED as they make isnan don't work! Idem for -funsafe-math-optimizations on gcc 4.1(on gcc 4.3 it don't break isnan) + return ["-ffast-math", "-ffinite-math-only", +#for gcc 4.1 we also need '-funsafe-math-optimizations', not need for gcc 4.3. TODO find a way to return the value depending of the compiler used? + "-funsafe-math-optimizations" + ] + + def c_headers(self): + return ['"Python.h"', '"numpy/noprefix.h"', ''] + + def c_support_code(self): + return """ +using namespace std; +""" + + def c_code(self, node, name, (input,), (value, mask), sub): + if self.fill_with==None: + print "OPTIMISATION WARNING: FillMissing don't implement this case in c. We don't support fill_with=None in c. We revert to python version", self.fill_with_is_array, node.inputs[0].ndim + return super(FillMissing,self).c_code(node, name, (input,),(value,mask), sub) + if (self.fill_with_is_array and not node.inputs[0].ndim in [1,2]) or (not node.inputs[0].ndim in [1,2,3]): + print "OPTIMISATION WARNING: FillMissing don't implement this case in c. We revert to python version", self.fill_with_is_array, node.inputs[0].ndim + return super(FillMissing,self).c_code(node, name, (input,),(value,mask), sub) + + + d=locals() + d.update(sub) + d["self.fill_with_is_array"] = 1 if self.fill_with_is_array else 0 + d["self.fill_with"] = self.fill_with + if self.fill_with_is_array: + d["self.fill_with_length"]=str(self.fill_with.size) + s="" + for i in self.fill_with.flatten(): + s+=","+str(i) + d["self.fill_with_data"]=s[1:] + d["self.fill_with.ndim"]=str(self.fill_with.ndim) + else: + d["self.fill_with_length"]=str(1) + d["self.fill_with_data"]=str(self.fill_with) + d["self.fill_with.ndim"]=0 + if node.inputs[0].type.dtype=="float32": d["type"]="float" + elif node.inputs[0].type.dtype=="float64": d["type"]="double" + else: raise Exception("Type %s not implemented "%node.inputs[0].type.dtype) + + return """ +//This space was added to for the recompilation as we changed the compiler option. +int typenum; +PyArrayObject* input = %(input)s, *value = %(value)s, *mask = %(mask)s; +%(type)s fill_with[%(self.fill_with_length)s] = {%(self.fill_with_data)s}; + +if(!PyArray_Check(input)){ + PyErr_SetString(PyExc_ValueError, "input must be an ndarray"); + %(fail)s; + +} + +typenum = PyArray_ObjectType((PyObject*)input, 0); +if(!value || !PyArray_SAMESHAPE(value,input)){ + Py_XDECREF(value); + value = (PyArrayObject*) PyArray_ZEROS(input->nd, input->dimensions, typenum,0); + %(value)s = value; +} + +if (!mask || !PyArray_SAMESHAPE(mask,input)){ + Py_XDECREF(mask); + mask = (PyArrayObject*) PyArray_ZEROS(input->nd, input->dimensions, typenum,0); + %(mask)s = mask; +} + +if(!PyArray_ISCONTIGUOUS(input)){ + cout<<"OPTIMISATION WARNING: in FillMissing, the input is not contiguous in memory, so we create a new version that is contiguous. This can be optimized by using directly the data."<nd==value->nd==mask->nd); +#if %(self.fill_with_is_array)s + if(input->nd==1){ + %(type)s* value_ = (%(type)s*)(value->data); + %(type)s* mask_ = (%(type)s*)(mask->data); + %(type)s* input_ = (%(type)s*)(input->data); + for(int i=0;idimensions[0];i++){ + if(isnan(input_[i])){ + value_[i]=fill_with[i]; + mask_[i]=0; + }else{ + value_[i]=input_[i]; + mask_[i]=1; + + } + } + }else if(input->nd==2 && %(self.fill_with.ndim)s==1){ + for(int i=0; idimensions[0];i++){ + %(type)s* value_ = (%(type)s*) PyArray_GETPTR2(value,i,0); + %(type)s* mask_ = (%(type)s*) PyArray_GETPTR2(mask,i,0); + %(type)s* input_ = (%(type)s*) PyArray_GETPTR2(input,i,0); + for(int j=0; jdimensions[1];j++){ + if(isnan(input_[j])){ + value_[j]=fill_with[j]; + mask_[j]=0; + }else{ + value_[j]=input_[j]; + mask_[j]=1; + } + } + } + }else{//not implemented! +//SHOULD not happen as c_code should revert to the python version in that case + std:stringstream temp; + temp << "In FillMissing, we try to fill with an array and the input ndim is implemented only for 1 and 2. This case is not implemented."<