Mercurial > pylearn
changeset 819:7dfecf11cbf4
...
author | dumitru@deepnets.mtv.corp.google.com |
---|---|
date | Wed, 02 Sep 2009 14:23:50 -0700 |
parents | f4729745bb58 (current diff) db2c26a2c97c (diff) |
children | 2333cd78f574 |
files | |
diffstat | 2 files changed, 144 insertions(+), 54 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/algorithms/sandbox/DAA_inputs_groups.py Wed Sep 02 14:22:02 2009 -0700 +++ b/pylearn/algorithms/sandbox/DAA_inputs_groups.py Wed Sep 02 14:23:50 2009 -0700 @@ -6,9 +6,12 @@ from theano.compile import module from pylearn.sandbox.scan_inputs_groups import scaninputs, scandotdec, scandotenc, scannoise, scanbiasdec, \ - scanmaskenc,scanmaskdec, FillMissing, mask_gradient + scanmaskenc,scanmaskdec, FillMissing, mask_gradient, blockgrad from pylearn.algorithms.logistic_regression import LogRegN +import pylearn.algorithms.cost + +import time from pylearn.io import filetensor import os @@ -19,7 +22,14 @@ print 'save ndarray to file: ', save_dir + fname file_handle = open(os.path.join(save_dir, fname), 'w') filetensor.write(file_handle, mat) - file_handle.close() + writebool = False + while not writebool: + try: + file_handle.close() + writebool = True + except: + print 'save model error' + time.sleep((numpy.random.randint(10)+2)*10) def load_mat(fname, save_dir=''): print 'loading ndarray from file: ', save_dir + fname @@ -83,27 +93,42 @@ def tanh_act(x): return theano.tensor.tanh(x/2.0) +def softsign_act(x): + return x/(1.0 + theano.tensor.abs(x)) + # costs utils:--------------------------------------------------- # in order to fix numerical instability of the cost and gradient calculation for the cross entropy we calculate it # with the following functions direclty from the activation: +# XS is used to get back the KL divergence, important for doing global updates + def sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis): - XE =-target * T.log(1 + T.exp(-output_act)) + (1 - target) * (- T.log(1 + T.exp(output_act))) - return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis) + XE = target * (- T.log(1 + T.exp(-output_act))) + (1 - target) * (- T.log(1 + T.exp(output_act))) + XS = T.xlogx.xlogx(target) + T.xlogx.xlogx(1-target) + return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis) def tanh_cross_entropy(target, output_act, mean_axis, sum_axis): - XE =-(target+1)/2.0 * T.log(1 + T.exp(- output_act)) + \ + XE = (target+1)/2.0 * (- T.log(1 + T.exp(- output_act))) + \ (1 - (target+1)/2.0) * (- T.log(1 + T.exp(output_act))) - return -T.mean(T.sum(XE, axis=sum_axis),axis=mean_axis) + XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0) + return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis) + +def softsign_cross_entropy(target, output_act, mean_axis, sum_axis): + newact = ((output_act/(1.0 + theano.tensor.abs(output_act)))+1)/2.0 + XE = (target+1)/2.0 * T.log(newact) + (1 - (target+1)/2.0) * T.log(1 - newact) + XS = T.xlogx.xlogx((target+1)/2.0) + T.xlogx.xlogx(1-(target+1)/2.0) + return -T.mean(T.sum(XE-XS, axis=sum_axis),axis=mean_axis) def cross_entropy(target, output_act, act, mean_axis=0, sum_axis=1): if act == 'sigmoid_act': return sigmoid_cross_entropy(target, output_act, mean_axis, sum_axis) if act == 'tanh_act': return tanh_cross_entropy(target, output_act, mean_axis, sum_axis) + if act == 'softsign_act': + return softsign_cross_entropy(target, output_act, mean_axis, sum_axis) assert False -def quadratic(target, output, act, axis = 1): - return pylearn.algorithms.cost.quadratic(target, output, axis) +def quadratic(target, output, act, mean_axis = 0): + return T.sum(pylearn.algorithms.cost.quadratic(target, output, mean_axis)) # DAAig module---------------------------------------------------------------- class DAAig(module.Module): @@ -112,10 +137,10 @@ def __init__(self, input = None, auxinput = None, in_size=None, auxin_size= None, n_hid=1, - regularize = False, tie_weights = False, hid_fn = 'tanh_act', + regularize = False, tie_weights = False, tie_weights_aux = None, hid_fn = 'tanh_act', rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy', interface = True, ignore_missing=None, reconstruct_missing=False, - corruption_pattern=None, **init): + corruption_pattern=None, blockgrad = False, **init): """ :param input: WRITEME :param auxinput: WRITEME @@ -152,18 +177,6 @@ missing inputs will be backpropagated. Otherwise, it will not. :todo: Default noise level for all daa levels """ - print '\t\t**** DAAig.__init__ ****' - print '\t\tinput = ', input - print '\t\tauxinput = ', auxinput - print '\t\tin_size = ', in_size - print '\t\tauxin_size = ', auxin_size - print '\t\tn_hid = ', n_hid - print '\t\tregularize = ', regularize - print '\t\ttie_weights = ', tie_weights - print '\t\thid_fn = ', hid_fn - print '\t\trec_fn = ', rec_fn - print '\t\treconstruction_cost_function = ', reconstruction_cost_function - super(DAAig, self).__init__() self.random = T.RandomStreams() @@ -173,15 +186,17 @@ self.n_hid = n_hid self.regularize = regularize self.tie_weights = tie_weights + self.tie_weights_aux = tie_weights_aux if tie_weights_aux is not None else tie_weights self.interface = interface self.ignore_missing = ignore_missing self.reconstruct_missing = reconstruct_missing self.corruption_pattern = corruption_pattern + self.blockgrad = blockgrad - assert hid_fn in ('sigmoid_act','tanh_act') + assert hid_fn in ('sigmoid_act','tanh_act','softsign_act') self.hid_fn = eval(hid_fn) - assert rec_fn in ('sigmoid_act','tanh_act') + assert rec_fn in ('sigmoid_act','tanh_act','softsign_act') self.rec_fn = eval(rec_fn) self.rec_name = rec_fn @@ -189,6 +204,19 @@ self.reconstruction_cost_function = eval(reconstruction_cost_function) self.reconstruction_cost_function_name = reconstruction_cost_function + print '\t\t**** DAAig.__init__ ****' + print '\t\tinput = ', input + print '\t\tauxinput = ', auxinput + print '\t\tin_size = ', self.in_size + print '\t\tauxin_size = ', self.auxin_size + print '\t\tn_hid = ', self.n_hid + print '\t\tregularize = ', self.regularize + print '\t\ttie_weights = ', self.tie_weights + print '\t\ttie_weights_aux = ', self.tie_weights_aux + print '\t\thid_fn = ', hid_fn + print '\t\trec_fn = ', rec_fn + print '\t\treconstruction_cost_function = ', reconstruction_cost_function + ### DECLARE MODEL VARIABLES and default self.input = input if self.ignore_missing is not None and self.input is not None: @@ -212,7 +240,8 @@ if self.auxinput is not None: self.wauxenc = [T.dmatrix('wauxenc%s'%i) for i in range(len(auxin_size))] - self.wauxdec = [self.wauxenc[i].T if tie_weights else T.dmatrix('wauxdec%s'%i) for i in range(len(auxin_size))] + self.wauxdec =[ self.wauxenc[i].T if self.tie_weights_aux else T.dmatrix('wauxdec%s'%i) for i in\ + range(len(auxin_size))] self.bauxdec = [T.dvector('bauxdec%s'%i) for i in range(len(auxin_size))] #hyper-parameters @@ -330,8 +359,8 @@ tmpbool = (self.reconstruction_cost_function_name == 'cross_entropy') if (self.input is not None): container.reconstruction_cost_in = \ - self.reconstruction_cost_function(self.input, container.rec_activation_in \ - if tmpbool else container.rec_in, self.rec_name) + self.reconstruction_cost_function(blockgrad(self.input) if self.blockgrad else self.input,\ + container.rec_activation_in if tmpbool else container.rec_in, self.rec_name) if (self.auxinput is not None): container.reconstruction_cost_aux = \ self.reconstruction_cost_function(scaninputs(self.idx_list, self.auxinput), container.rec_activation_aux \ @@ -370,6 +399,7 @@ if not(self.tie_weights): if self.input is not None: self.params += [self.wdec] + if not(self.tie_weights_aux): if self.auxinput is not None: self.params += self.wauxdec @@ -391,8 +421,8 @@ self.update = theano.Method(listin, self.noise.cost, self.updates) self.compute_cost = theano.Method(listin, self.noise.cost) self.noisify = theano.Method(listin, listout) - self.recactivation = theano.Method(listin, self.clean.rec_activation) - self.reconstruction = theano.Method(listin, self.clean.rec) + self.recactivation = theano.Method(listin, self.noise.rec_activation) + self.reconstruction = theano.Method(listin, self.noise.rec) self.activation = theano.Method(listin, self.clean.hidden_activation) self.representation = theano.Method(listin, self.clean.hidden) @@ -441,12 +471,12 @@ wauxdecshp = [tuple(reversed(i)) for i in wauxencshp] obj.bauxdec = [numpy.zeros(i) for i in self.auxin_size] obj.wauxenc = [self.R.uniform(size=i, low = -self.inf, high = self.inf) for i in wauxencshp] - if not(self.tie_weights): + if not(self.tie_weights_aux): obj.wauxdec = [copy.copy(obj.wauxenc[i].T) for i in range(len(wauxdecshp))] if tieinit else\ [self.R.uniform(size=i, low=-self.hif, high=self.hif) for i in wauxdecshp] if orthoinit: obj.wauxenc = [orthogonalinit(w) for w in obj.wauxenc] - if not(self.tie_weights): + if not(self.tie_weights_aux): obj.wauxdec = [orthogonalinit(w,0) for w in obj.wauxdec] print 'wauxencshp = ', wauxencshp print 'wauxdecshp = ', wauxdecshp @@ -462,11 +492,11 @@ class StackedDAAig(module.Module): def __init__(self, depth = 1, input = T.dmatrix('input'), auxinput = [None], in_size = None, auxin_size = [None], n_hid = [1], - regularize = False, tie_weights = False, hid_fn = 'tanh_act', + regularize = False, tie_weights = False, tie_weights_aux = None, hid_fn = 'tanh_act', rec_fn = 'tanh_act',reconstruction_cost_function='cross_entropy', n_out = 2, target = None, debugmethod = False, totalupdatebool=False, ignore_missing=None, reconstruct_missing=False, - corruption_pattern=None, + corruption_pattern=None, blockgrad = False, act_reg = 'sigmoid_act', **init): super(StackedDAAig, self).__init__() @@ -487,6 +517,7 @@ self.n_hid = listify(n_hid,depth) self.regularize = regularize tie_weights = listify(tie_weights,depth) + tie_weights_aux = listify(tie_weights_aux,depth) hid_fn = listify(hid_fn,depth) rec_fn = listify(rec_fn,depth) reconstruction_cost_function = listify(reconstruction_cost_function,depth) @@ -497,6 +528,10 @@ self.ignore_missing = ignore_missing self.reconstruct_missing = reconstruct_missing self.corruption_pattern = corruption_pattern + self.blockgrad = blockgrad + + assert act_reg in ('sigmoid_act','tanh_act','softsign_act') + self.act_reg = eval(act_reg) print '\t**** StackedDAAig.__init__ ****' print '\tdepth = ', self.depth @@ -507,9 +542,13 @@ print '\tn_hid = ', self.n_hid print '\tregularize = ', self.regularize print '\ttie_weights = ', tie_weights + print '\ttie_weights_aux = ', tie_weights_aux print '\thid_fn = ', hid_fn print '\trec_fn = ', rec_fn + print '\tact_reg = ', act_reg print '\treconstruction_cost_function = ', reconstruction_cost_function + print '\tblockgrad = ', blockgrad + print '\tact_reg = ', act_reg print '\tn_out = ', self.n_out # init for model construction @@ -529,7 +568,7 @@ # facultative methods if self.debugmethod: - self.activation = [None] * (self.depth) + self.activation = [None] * (self.depth+1) self.representation = [None] * (self.depth) self.recactivation = [None] * (self.depth) self.reconstruction = [None] * (self.depth) @@ -570,10 +609,12 @@ for i in range(self.depth): dict_params = dict(input = inputprec, in_size = in_sizeprec, auxin_size = auxin_size[i], - n_hid = self.n_hid[i], regularize = False, tie_weights = tie_weights[i], hid_fn = hid_fn[i], + n_hid = self.n_hid[i], regularize = False, tie_weights = tie_weights[i], + tie_weights_aux = tie_weights_aux[i], hid_fn = hid_fn[i], rec_fn = rec_fn[i], reconstruction_cost_function = reconstruction_cost_function[i], interface = False, ignore_missing = self.ignore_missing, - reconstruct_missing = self.reconstruct_missing,corruption_pattern = self.corruption_pattern) + reconstruct_missing = self.reconstruct_missing,corruption_pattern = self.corruption_pattern, + blockgrad=self.blockgrad) if auxin_size[i] is None: offset +=1 dict_params.update({'auxinput' : None}) @@ -606,7 +647,7 @@ if self.regularize: self.regularizationenccost[i] = self.regularizationenccost[i-1]+self.daaig[i-1].regularizationenc if i else 0 self.localcost[i] += self.daaig[i].regularization - self.globalcost[i] += self.regularizationenccost[i] + self.globalcost[i] += self.regularizationenccost[i] + self.daaig[i].regularization if self.totalupdatebool: self.totalcost[i] += self.daaig[i].regularization @@ -634,8 +675,8 @@ if self.debugmethod: self.activation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden_activation) self.representation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.hidden) - self.recactivation[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec_activation) - self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].clean.rec) + self.recactivation[i] = theano.Method(self.inputs[i],self.daaig[i].noise.rec_activation) + self.reconstruction[i] = theano.Method(self.inputs[i],self.daaig[i].noise.rec) self.noisyinputs[i] =theano.Method(self.inputs[i], noisyout) self.compute_localcost[i] = theano.Method(self.inputs[i],self.localcost[i]) self.compute_localgradients[i] = theano.Method(self.inputs[i],self.localgradients[i]) @@ -654,7 +695,7 @@ # supervised layer------------------------------------------------------------------------ print '\tLayer supervised init' self.inputs[-1] = copy.copy(self.inputs[-2])+[self.target] - self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,sigmoid_act(self.daaig[-2].clean.hidden_activation),self.target) + self.daaig[-1] = LogRegN(in_sizeprec,self.n_out,self.act_reg(self.daaig[-2].clean.hidden_activation),self.target) paramstot += self.daaig[-1].params self.localcost[-1] = self.daaig[-1].regularized_cost \ @@ -694,6 +735,7 @@ self.NLL = theano.Method(self.inputs[-1],self.daaig[-1]._xent) if self.debugmethod: + self.activation[-1] = theano.Method(self.inputs[-2],self.daaig[-1].linear_output) self.compute_localcost[-1] = theano.Method(self.inputs[-1],self.localcost[-1]) self.compute_localgradients[-1] = theano.Method(self.inputs[-1],self.localgradients[-1]) self.compute_globalcost[-1] = theano.Method(self.inputs[-1],self.globalcost[-1]) @@ -748,11 +790,12 @@ save_mat('wenc%s.ft'%(i) ,inst.daaig[i].wenc, save_dir) save_mat('bdec%s.ft'%(i) ,inst.daaig[i].bdec, save_dir) - if not self.daaig[i].tie_weights: + if not self.daaig[i].tie_weights_aux: if self.daaig[i].auxinput is not None: for j in range(len(inst.daaig[i].wauxdec)): save_mat('wauxdec%s_%s.ft'%(i,j) ,inst.daaig[i].wauxdec[j], save_dir) - + + if not self.daaig[i].tie_weights: if self.daaig[i].input is not None: save_mat('wdec%s.ft'%(i) ,inst.daaig[i].wdec, save_dir) i=i+1 @@ -778,7 +821,7 @@ inst.daaig[i].wenc = load_mat('wenc%s.ft'%(i),save_dir)/coefenc[i] inst.daaig[i].bdec = load_mat('bdec%s.ft'%(i),save_dir)/coefdec[i] - if not self.daaig[i].tie_weights: + if not self.daaig[i].tie_weights_aux: if self.daaig[i].auxinput is not None: for j in range(len(inst.daaig[i].wauxdec)): if 'wauxdec%s_%s.ft'%(i,j) in os.listdir(save_dir): @@ -786,7 +829,8 @@ else: print "WARNING: no decoding 'wauxdec%s_%s.ft' file use 'wauxenc%s_%s.ft' instead"%(i,j,i,j) inst.daaig[i].wauxdec[j] = numpy.transpose(load_mat('wauxenc%s_%s.ft'%(i,j),save_dir)/coefdec[i]) - + + if not self.daaig[i].tie_weights: if self.daaig[i].input is not None: if 'wdec%s.ft'%(i) in os.listdir(save_dir): inst.daaig[i].wdec = load_mat('wdec%s.ft'%(i),save_dir)/coefdec[i] @@ -808,11 +852,28 @@ return numpy.mean(numpy.median(abs(inst.recactivation[layer](*inputs)),1)) def _instance_error(self,inst,inputs,target): - return numpy.sum(inst.classify(*inputs) != target) / float(len(target)) *100.0 + return numpy.sum(inst.classify(*inputs) != target) / float(len(target))*100.0 def _instance_nll(self,inst,inputs,target): return numpy.sum(inst.NLL(*(inputs+[target]))) / float(len(target)) + #try-------------------------------------------------------------------- + def _instance_rescalwsaturation(self,inst,inputs): + sat = [None]*(self.depth+1) + for i in range(self.depth+1): + sat[i] = inst.hidsaturation(i,inputs[min(i,self.depth-1)]) + + for i in range(self.depth-1): + if sat[i+1] > max(sat[:i+1]): + inst.daaig[i+1].wenc = inst.daaig[i+1].wenc/sat[i+1]*max(sat[:i+1]) + inst.daaig[i+1].benc = inst.daaig[i+1].benc/sat[i+1]*max(sat[:i+1]) + sat[i+1] = max(sat[:i+1]) + if sat[-1]>max(sat[:-1]): + inst.daaig[-1].w = inst.daaig[-1].w/sat[-1]*max(sat[:-1]) + inst.daaig[-1].b = inst.daaig[-1].b/sat[-1]*max(sat[:-1]) + + #----------------------------------------------------------------------- + def _instance_unsupgrad(self,inst,inputs,layer,param_name): inst.noiseseed(0) gradin = inst.compute_localgradients_in[layer](*inputs) @@ -844,14 +905,14 @@ def _instance_unsupupdate(self,inst,data,layer='all',typeup = 'local',printcost = False): cost = [None]*self.depth - if typeup is 'totallocal': + if typeup == 'totallocal': cost[-1] = inst.totallocalupdate(*data) else: - if typeup is 'total': - if layer is 'all': - cost[-1] = inst.totalupdate[-1](*data) + if typeup == 'total': + if layer == 'all': + cost[-1] = inst.totalupdate[-1](*data[-1]) else: - cost[layer] = inst.totalupdate[layer](*data) + cost[layer] = inst.totalupdate[layer](*data[layer]) else: if layer is 'all': for i in range(self.depth): @@ -859,19 +920,23 @@ cost[i] = inst.localupdate[i](*data[i]) if typeup == 'global': cost[i] = inst.globalupdate[i](*data[i]) + for j in range(i): + dummy = inst.localupdate[j](*data[j]) else: if typeup == 'local': - cost[layer] = inst.localupdate[i](*data) + cost[layer] = inst.localupdate[layer](*data[layer]) if typeup == 'global': - cost[layer] = inst.globalupdate[i](*data) + cost[layer] = inst.globalupdate[layer](*data[layer]) + for j in range(layer): + dummy = inst.localupdate[j](*data[j]) if printcost: print cost return cost def _instance_supupdate(self,inst,data,typeup = 'global',printcost = False): - if typeup is 'local': + if typeup == 'local': cost = inst.localupdate[-1](*data) - if typeup is 'global': + if typeup == 'global': cost = inst.globalupdate[-1](*data) if printcost: print cost
--- a/pylearn/sandbox/scan_inputs_groups.py Wed Sep 02 14:22:02 2009 -0700 +++ b/pylearn/sandbox/scan_inputs_groups.py Wed Sep 02 14:23:50 2009 -0700 @@ -71,6 +71,31 @@ if nbias != 1: raise TypeError('not vector', bias_list[i]) return bias_list + +# block grad Op------------------------------------ +class BlockGrad(Op): + """This Op block the gradient of a variable""" + def make_node(self, x): + x = T.as_tensor_variable(x) + if x.ndim == 1: + return Apply(self, [x], [T.dvector()]) + else: + return Apply(self, [x], [T.dmatrix()]) + + def perform(self, node , x ,(out,)): + out[0] = x[0].copy() + + def grad(self, x, (gx,)): + return [gx*0] + + def __hash__(self): + return hash(BlockGrad)^77612 + + def __str__(self): + return "BlockGrad" + +blockgrad=BlockGrad() + # Encoding scan dot product------------------------------------ class ScanDotEnc(Op): """This Op takes an index list (as tensor.ivector), a list of matrices representing