Mercurial > pylearn
changeset 692:5ca1a8e859db
merge
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Thu, 14 May 2009 17:00:22 -0400 |
parents | e69249897f89 (current diff) 0457dfa6fcad (diff) |
children | ee7026de9681 |
files | |
diffstat | 1 files changed, 502 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/sandbox/scan_inputs_groups.py Thu May 14 17:00:22 2009 -0400 @@ -0,0 +1,502 @@ +import numpy +import theano +from theano import tensor as T +from theano.gof import Op +from theano.gof import Apply +from theano import scalar as scal + +# These Ops allows us to deal with static groups of possibly missing inputs efficiently in the dense DAA framework +# (for exemple with multimodal data with sometimes entire modality missing). +# The inputs will be represented with an index list and a theano.generic variable (which will be a list of matrices +# (numpy array), each element will correspond to an available modality and the index list will indicates the weights +# associated to it). +# Exemple of index list: [1, 0, -3] +# *the 1 says that the first element of the input list will refer to the first element of the weights_list +# (auxiliary target as input) +# if inputslist[i]>0 it refers to Weightslist[indexlist[i]-1] +# *the 0 means that the second element of the input list will not be encoded neither decoded (it is remplaced by zeros) +# this is not efficient, so in this case it is better to give: [1,-3] and [inputslist[0],inputslist[2]] +# but it allows us to deal with empty lists: give indexlist = [.0] and inputlist=[[.0]] +# *when an index is negative it means that the input will not be used for encoding but we will still reconstruct it +# (auxiliary target as output) +# if inputslist[i]<0 it refers to Weightslist[-indexlist[i]-1] +# +# An entire batch should have the same available inputs configuration. +# +# Dense DAA Exemple:---------------------------------------------------------------------------- +# +#from theano.tensor.nnet import sigmoid +# +#nb_modality = 4 +#wenc = [T.dmatrix('wenc%s'%i) for i in range(nb_modality)] +#wdec = [T.dmatrix('wdec%s'%i) for i in range(nb_modality)] +#benc = T.dvector('benc') +#bdec = [T.dvector('bdec%s'%i) for i in range(nb_modality)] +#vectin = T.ivector('vectin') +#inputpart = theano.generic('inputpart') +#noise_bit = T.dscalar('noise_bit') +#noise_group = T.dscalar('noise_group') +# +#[vectin2,inputpart2] = scannoise(vectin,inputpart,noise_bit,noise_group) +#hid = scandotenc(vectin2, inputpart2, wenc) +#acthid = sigmoid(hid + benc) +#dec = sigmoid(scanbiasdec(vectin2,inputpart2,bdec) + scandotdec(vectin2, inputpart2,acthid,wdec)) +#cost = T.sum(T.sum(T.sqr( scaninput(vectin,inputpart) - rec ),1),0) + +# Checking inputs in make_node methods---------------------- +def Checkidx_list(idx_list): + idx_list = T.as_tensor_variable(idx_list) + nidx = idx_list.type.ndim + if nidx != 1: raise TypeError('not vector', idx_list) + return idx_list + +def Checkhidd(hidd): + hidd = T.as_tensor_variable(hidd) + nhidd = hidd.type.ndim + if nhidd not in (1,2): raise TypeError('not matrix or vector', hidd) + return hidd + +def Checkweights_list(weights_list): + weights_list = map(T.as_tensor_variable, weights_list) + for i in range(len(weights_list)): + nweights = weights_list[i].type.ndim + if nweights not in (1,2): raise TypeError('not matrix or vector', weights_list[i]) + return weights_list + +def Checkbias_list(bias_list): + bias_list = map(T.as_tensor_variable, bias_list) + for i in range(len(bias_list)): + nbias = bias_list[i].type.ndim + if nbias != 1: raise TypeError('not vector', bias_list[i]) + return bias_list + +# Encoding scan dot product------------------------------------ +class ScanDotEnc(Op): + """This Op takes an index list (as tensor.ivector), a list of matrices representing + the available inputs (as theano.generic), and all the encoding weights tensor.dmatrix of the model. It will select the + weights corresponding to the inputs (according to index list) and compute only the necessary dot products""" + def __init__(self): + #Create Theano methods to do the dot products with blas or at least in C. + self.M=theano.Module() + inputs = T.dmatrix('input') + weights = T.dmatrix('weights') + self.M.hid = T.dmatrix('hid') + self.M.resultin = self.M.hid + T.dot(inputs,weights) + result = T.dot(inputs,weights) + + self.M.dotin = theano.Method([inputs,weights],None,{self.M.hid : self.M.resultin}) + self.M.dot = theano.Method([inputs,weights],result) + self.m = self.M.make() + + def make_node(self, idx_list, inputs_list, weights_list): + idx_list = Checkidx_list(idx_list) + weights_list = Checkweights_list(weights_list) + return Apply(self, [idx_list] + [inputs_list] + weights_list, [T.dmatrix()]) + + def perform(self, node, args, (hid,)): + idx_list = args[0] + hidcalc = False + + batchsize = (args[1][0].shape)[0] + n_hid = (args[2].shape)[1] + if len(idx_list) != len(args[1]) : + raise NotImplementedError('size of index different of inputs list size',idx_list) + if max(idx_list) >= (len(args)-2)+1 : + raise NotImplementedError('index superior to weight list length',idx_list) + for i in range(len(args[1])): + if (args[1][i].shape)[0] != batchsize: + raise NotImplementedError('different batchsize in the inputs list',args[1][i]) + for i in range(len(args)-2): + if (args[2+i].shape)[1] != n_hid: + raise NotImplementedError('different length of hidden in the weights list',args[2+i]) + + for i in range(len(idx_list)): + if idx_list[i]>0: + if hidcalc: + self.m.dotin(args[1][i],args[2+int(idx_list[i]-1)]) + else: + self.m.hid = self.m.dot(args[1][i],args[2+int(idx_list[i]-1)]) + hidcalc = True + + if not hidcalc: + hid[0] = numpy.zeros([batchsize,n_hid]) + else: + hid[0] = self.m.hid + + + def grad(self, args, gz): + return [None, None] + ScanDotEncGrad()(args,gz) + + def __hash__(self): + return hash(ScanDotEnc)^58994 + + def __str__(self): + return "ScanDotEnc" + +scandotenc=ScanDotEnc() + +class ScanDotEncGrad(Op): + """This Op computes the gradient wrt the weights for ScanDotEnc""" + def __init__(self): + #Create Theano methods to do the dot products with blas or at least in C. + self.M=theano.Module() + input1 = T.dmatrix('input1') + self.M.g_out = T.dmatrix('g_out') + result = T.dmatrix('result') + input2=T.transpose(input1) + self.M.resultin = result + T.dot(input2,self.M.g_out) + self.M.result = T.dot(input2,self.M.g_out) + + self.M.dotin = theano.Method([input1,result],self.M.resultin) + self.M.dot = theano.Method([input1],self.M.result) + self.m = self.M.make() + + def make_node(self, args, g_out): + idx_list = Checkidx_list(args[0]) + weights_list = Checkweights_list(args[2:]) + return Apply(self, args + g_out, [T.dmatrix() for i in xrange(2,len(args))]) + + def perform(self, node, args, z): + idx_list = args[0] + self.m.g_out = args[-1] + + batchsize = (args[1][0].shape)[0] + n_hid = (args[2].shape)[1] + if len(idx_list) != len(args[1]) : + raise NotImplementedError('size of index different of inputs list size',idx_list) + if max(idx_list) >= (len(args)-3)+1 : + raise NotImplementedError('index superior to weight list length',idx_list) + for i in range(len(args[1])): + if (args[1][i].shape)[0] != batchsize: + raise NotImplementedError('different batchsize in the inputs list',args[1][i]) + for i in range(len(args)-3): + if (args[2+i].shape)[1] != n_hid: + raise NotImplementedError('different length of hidden in the weights list',args[2+i]) + + zcalc = [False for i in range(len(args)-3)] + + for i in range(len(idx_list)): + if idx_list[i]>0: + if zcalc[int(idx_list[i]-1)]: + z[int(idx_list[i]-1)][0] = self.m.dotin(args[1][i],z[int(idx_list[i]-1)][0]) + else: + z[int(idx_list[i]-1)][0] = self.m.dot(args[1][i]) + zcalc[int(idx_list[i]-1)] = True + + for i in range(len(args)-3): + if not zcalc[i]: + shp = args[2+i].shape + z[i][0] = numpy.zeros((shp[0],shp[1])) + + def __hash__(self): + return hash(ScanDotEncGrad)^15684 + + def __str__(self): + return "ScanDotEncGrad" + +# Decoding scan dot product------------------------------------ +class ScanDotDec(Op): + """This Op takes an index list (as tensor.ivector), a list of matrices representing + the available inputs (as theano.generic), the hidden layer of the DAA (theano.dmatrix) + and all the decoding weights tensor.dmatrix of the model. It will select the + weights corresponding to the available inputs (according to index list) and compute + only the necessary dot products. The outputs will be concatenated and will represent + the reconstruction of the different modality in the same order than the index list""" + def __init__(self): + #Create Theano methods to do the dot products with blas or at least in C. + self.M=theano.Module() + weights = T.dmatrix('weights') + self.M.hid = T.dmatrix('hid') + oldval = T.dmatrix('oldval') + resultin = oldval + T.dot(self.M.hid,weights) + result = T.dot(self.M.hid,weights) + + self.M.dotin = theano.Method([weights,oldval],resultin) + self.M.dot = theano.Method([weights],result) + self.m = self.M.make() + + def make_node(self, idx_list, input_list, hidd, weights_list): + idx_list = Checkidx_list(idx_list) + hidd = Checkhidd(hidd) + weights_list = Checkweights_list(weights_list) + return Apply(self, [idx_list] + [input_list] +[hidd] + weights_list,[T.dmatrix()]) + + def perform(self, node, args, (z,)): + + idx_list = abs(args[0]) + self.m.hid = args[2] + + batchsize = (self.m.hid.shape)[0] + n_hid = self.m.hid.shape[1] + if max(idx_list) >= len(args)-3+1 : + raise NotImplementedError('index superior to weight list length',idx_list) + if len(idx_list) != len(args[1]) : + raise NotImplementedError('size of index different of inputs list size',idx_list) + for i in range(len(args)-3): + if (args[3+i].shape)[0] != n_hid: + raise NotImplementedError('different length of hidden in the weights list',args[3+i]) + + zcalc = [False for i in idx_list] + z[0] = [None for i in idx_list] + + for i in range(len(idx_list)): + if idx_list[i]>0: + if zcalc[i]: + z[0][i] = self.m.dotin(args[3+int(idx_list[i]-1)],z[0][i]) + else: + z[0][i] = self.m.dot(args[3+int(idx_list[i]-1)]) + zcalc[i] = True + + for i in range(len(idx_list)): + if not zcalc[i]: + shp = args[1][int(idx_list[i]-1)].shape + z[0][i] = numpy.zeros((batchsize,shp[1])) + + z[0] = numpy.concatenate(z[0],1) + + def grad(self, args, gz): + return [None, None] + ScanDotDecGrad()(args,gz) + + def __hash__(self): + return hash(ScanDotDec)^73568 + + def __str__(self): + return "ScanDotDec" + +scandotdec=ScanDotDec() + +class ScanDotDecGrad(Op): + """This Op computes the gradient wrt the weights for ScanDotDec""" + def __init__(self): + self.M=theano.Module() + gout = T.dmatrix('gout') + self.M.hidt = T.dmatrix('hid') + oldval = T.dmatrix('oldval') + resultin1 = oldval + T.dot(self.M.hidt,gout) + result1 = T.dot(self.M.hidt,gout) + weights = T.dmatrix('weights') + weights2 = T.transpose(weights) + resultin2 = oldval + T.dot(gout,weights2) + result2 = T.dot(gout,weights2) + + self.M.dotin1 = theano.Method([gout,oldval],resultin1) + self.M.dot1 = theano.Method([gout],result1) + self.M.dotin2 = theano.Method([gout,weights,oldval],resultin2) + self.M.dot2 = theano.Method([gout,weights],result2) + self.m = self.M.make() + + + def make_node(self, args, g_out): + idx_list = Checkidx_list(args[0]) + hidd = Checkhidd(args[2]) + weights_list = Checkweights_list(args[3:]) + return Apply(self, args + g_out, [T.dmatrix() for i in xrange(2,len(args))]) + + def perform(self, node, args, z): + idx_list = abs(args[0]) + self.m.hidt = args[2].T + + batchsize = (self.m.hidt.shape)[1] + n_hid = self.m.hidt.shape[0] + if max(idx_list) >= len(args)-4+1 : + raise NotImplementedError('index superior to weight list length',idx_list) + if len(idx_list) != len(args[1]) : + raise NotImplementedError('size of index different of inputs list size',idx_list) + for i in range(len(args)-4): + if (args[3+i].shape)[0] != n_hid: + raise NotImplementedError('different length of hidden in the weights list',args[3+i]) + + zidx=numpy.zeros((len(idx_list)+1)) + + for i in range(len(idx_list)): + if idx_list[i] == 0: + zidx[i+1] = (args[1][i].shape)[1] + else: + zidx[i+1] = (args[3+idx_list[i]-1].shape)[1] + + zidx=zidx.cumsum() + hidcalc = False + zcalc = [False for i in range((len(args)-4))] + + for i in range(len(idx_list)): + if idx_list[i]>0: + if zcalc[int(idx_list[i])-1]: + z[int(idx_list[i])][0] = self.m.dotin1(args[-1][:,zidx[i]:zidx[i+1]],z[int(idx_list[i])][0]) + else: + z[int(idx_list[i])][0] = self.m.dot1(args[-1][:,zidx[i]:zidx[i+1]]) + zcalc[int(idx_list[i])-1] = True + if hidcalc: + z[0][0] = self.m.dotin2(args[-1][:,zidx[i]:zidx[i+1]],args[3+int(idx_list[i]-1)],z[0][0]) + else: + z[0][0] = self.m.dot2(args[-1][:,zidx[i]:zidx[i+1]],args[3+int(idx_list[i]-1)]) + hidcalc = True + + if not hidcalc: + z[0][0] = numpy.zeros((self.m.hidt.shape[1],self.m.hidt.shape[0])) + + for i in range((len(args)-4)): + if not zcalc[i]: + shp = args[3+i].shape + z[i+1][0] = numpy.zeros((shp[0],shp[1])) + + + def __hash__(self): + return hash(ScanDotDecGrad)^87445 + + def __str__(self): + return "ScanDotDecGrad" + +# DAA input noise------------------------------------ +class ScanNoise(Op): + """This Op takes an index list (as tensor.ivector), a list of matrices representing + the available inputs (as theano.generic), a probability of individual bit masking and + a probability of modality masking. It will return the inputs list with randoms zeros entry + and the index list with some positive values changed to negative values (groups masking)""" + def __init__(self, seed = 1): + self.M=theano.Module() + self.M.rand = T.RandomStreams(seed) + self.seed = seed + mat = T.matrix('mat') + noise_level_bit = T.dscalar('noise_level_bit') + noise_level_group = T.dscalar('noise_level_group') + self.M.out1 = self.M.rand.binomial(T.shape(mat), 1, 1 - noise_level_bit) * mat + self.M.out2 = self.M.rand.binomial((1,1), 1, 1 - noise_level_group) + + self.M.noisify_bit = theano.Method([mat,noise_level_bit],self.M.out1) + self.M.noisify_group_bool = theano.Method([noise_level_group],self.M.out2) + self.R = self.M.make() + self.R.rand.initialize() + + def make_node(self, idx_list, inputs_list, noise_level_bit, noise_level_group): + idx_list = Checkidx_list(idx_list) + return Apply(self, [idx_list] + [inputs_list] + [noise_level_bit] + [noise_level_group],\ + [T.ivector(), theano.generic()]) + + def perform(self, node, (idx_list,inputs_list,noise_level_bit,noise_level_group), (y,z)): + + if len(idx_list) != len(inputs_list) : + raise NotImplementedError('size of index different of inputs list size',idx_list) + + y[0] = [-i if (i>0 and not(self.R.noisify_group_bool(noise_level_group))) else i for i in idx_list] + z[0] = [(self.R.noisify_bit(inputs_list[i],noise_level_bit) if y[0][i]>0 else numpy.zeros((inputs_list[i].shape)))\ + for i in range(len(inputs_list))] + + def grad(self,args,gz): + return [None,None,None,None] + + + def __hash__(self): + return hash(ScanNoise)^hash(self.seed)^hash(self.R)^12254 + + def __str__(self): + return "ScanNoise" + +scannoise=ScanNoise() + +# Total input matrix construction------------------------------------ +class ScanInputs(Op): + """This Op takes an index list (as tensor.ivector) and a list of matrices representing + the available inputs (as theano.generic). It will construct the appropriate tensor.dmatrix + to compare to the reconstruction obtained with ScanDotDec""" + def make_node(self, idx_list, inputs_list): + idx_list = Checkidx_list(idx_list) + return Apply(self, [idx_list] + [inputs_list],[T.dmatrix()]) + + def perform(self, node, (idx_list, inputs_list), (z,)): + + if len(idx_list) != len(inputs_list): + raise NotImplementedError('size of index different of inputs list size',idx_list) + + for i in range(len(idx_list)): + if idx_list[i] == 0: + inputs_list[i] = 0 * inputs_list[i] + + z[0] = numpy.concatenate(inputs_list,1) + + def grad(self,args,gz): + return [None,None] + + def __hash__(self): + return hash(ScanInputs)^75902 + + def __str__(self): + return "ScanInputs" + +scaninputs=ScanInputs() + +# Decoding bias vector construction------------------------------------ +class ScanBiasDec(Op): + """This Op takes an index list (as tensor.ivector), a list of matrices representing + the available inputs (as theano.generic) and the decoding bias tensor.dvector. + It will construct the appropriate bias tensor.dvector + to add to the reconstruction obtained with ScanDotDec""" + def make_node(self, idx_list, input_list, bias_list): + idx_list = Checkidx_list(idx_list) + bias_list = Checkbias_list(bias_list) + return Apply(self, [idx_list] + [input_list] + bias_list, [T.dvector()]) + + def perform(self, node, args, (z,)): + idx_list = abs(args[0]) + + if max(idx_list) >= (len(args)-2)+1 : + raise NotImplementedError('index superior to bias list length',idx_list) + if len(idx_list) != len(args[1]) : + raise NotImplementedError('size of index different of inputs list size',idx_list) + z[0] = [args[idx_list[i]+1] if idx_list[i] != 0 else numpy.zeros(args[1][i].shape[1]) \ + for i in range(len(idx_list))] + z[0] = numpy.concatenate(z[0],1) + + def __hash__(self): + return hash(ScanBiasDec)^60056 + + def grad(self,args,gz): + return [None,None] + ScanBiasDecGrad()(args,gz) + + def __str__(self): + return "ScanBiasDec" + +scanbiasdec=ScanBiasDec() + +class ScanBiasDecGrad(Op): + """This Op computes the gradient wrt the bias for ScanBiasDec""" + def make_node(self, args, g_out): + idx_list = Checkidx_list(args[0]) + bias_list = Checkbias_list(args[2:]) + return Apply(self, args + g_out, [T.dvector() for i in range(len(args)-2)]) + + def perform(self, node, args, z): + idx_list = abs(args[0]) + + if max(idx_list) >= (len(args)-3)+1 : + raise NotImplementedError('index superior to bias list length',idx_list) + if len(idx_list) != len(args[1]) : + raise NotImplementedError('size of index different of inputs list size',idx_list) + + zidx=numpy.zeros((len(idx_list)+1)) + for i in range(len(idx_list)): + if idx_list[i] == 0: + zidx[i+1] = (args[1][i].shape)[1] + else: + zidx[i+1] = (args[2+idx_list[i]-1].size) + zidx=zidx.cumsum() + zcalc = [False for i in range((len(args)-3))] + + for i in range(len(idx_list)): + if idx_list[i]>0: + if zcalc[int(idx_list[i])-1]: + z[int(idx_list[i])-1][0] += args[-1][zidx[i]:zidx[i+1]] + else: + z[int(idx_list[i])-1][0] = args[-1][zidx[i]:zidx[i+1]] + zcalc[int(idx_list[i])-1] = True + + for i in range((len(args)-3)): + if not zcalc[i]: + shp = args[2+i].size + z[i][0] = numpy.zeros(shp) + + + def __hash__(self): + return hash(ScanBiasDecGrad)^41256 + + def __str__(self): + return "ScanBiasDecGrad"