Mercurial > paraspace
view paraspace/dexfile.py @ 25:670167ed06bb
test dex_deptracker._link_dependencies()
author | Thinker K.F. Li <thinker@codemud.net> |
---|---|
date | Tue, 07 Jun 2011 00:21:17 +0800 |
parents | a57ec6a76fe3 |
children | 15cb829ac442 |
line wrap: on
line source
## \brief Manage offset # # The instances are initialized with a offset. Every time an instance # is called, it return the offset before advancing offset with specify # size. # # moff = man_off(init_off) # assert moff(5) == init_off # assert moff() == (init_off + 5) # assert moff() == (init_off + 5) # class man_off(object): off = None def __init__(self, off): self.off = off pass def __call__(self, sz=0): off = self.off self.off = off + sz return off pass def _to_uint(data): v = 0 sh = 0 for c in data: v = v + (ord(c) << sh) sh = sh + 8 pass return v def _to_int(data): v = _to_uint(data) sz = len(data) if sz and ((1 << (sz * 8 - 1)) & v): v = -((1 << (sz * 8)) - v) pass return v def _uleb128(data): sh = 0 v = 0 for c in data: cv = ord(c) v = v + ((cv & 0x7f) << sh) sh = sh + 7 if cv <= 0x7f: break pass nbytes = sh / 7 return v, nbytes def _to_uleb128(v): assert v >= 0 data = '' while True: if v > 0x7f: data = data + chr((v & 0x7f) | 0x80) else: data = data + chr(v & 0x7f) break v = v >> 7 pass return data def _uleb128_sz(v): return len(_to_uleb128(v)) def _leb128(data): v, sh = _uleb128(data) if v & (1 << (sh * 7 - 1)): v = -((1 << (sh * 7)) - v) pass return v, sh def _to_leb128(v): data = '' while True: if v > 0x3f or v < ~0x3f: data = data + chr((v & 0x7f) | 0x80) else: data = data + chr(v & 0x7f) break v = v >> 7 pass return data def _leb128_sz(v): return len(_to_leb128(v)) def _compute_sz(o): if hasattr(o, 'compute_size'): o.compute_size() pass return o.data_size def _sum_data_size(obj_list): obj_sizes = itertools.imap(_compute_sz, obj_list) total = sum(obj_sizes) return total class _rawstr(object): size = None factor = None data = None data_size = None ## # \param size_name is dot separated attribute names from the parent. # def __init__(self, size=None, size_name=None, factor=1): self.size = size self.size_name = size_name self.factor = factor pass def parse(self, parent, data, off): obj = _rawstr(self.size, self.size_name, self.factor) if self.size is not None: size = self.size else: size = parent for name in self.size_name.split('.'): size = getattr(size, name) pass pass obj.data_size = size * self.factor obj.data = data[off:off + obj.data_size] return obj def sizeof(self, v): return v.data_size def to_str(self, v): return v pass class rawstr(_rawstr): def __init__(self, size, factor=1): super(rawstr, self).__init__(size=size, factor=factor) pass pass class rawstr_size_name(_rawstr): def __init__(self, size_name, factor=1): super(rawstr_size_name, self).__init__(size_name=size_name, factor=factor) pass pass class tap(object): @staticmethod def parse(parent, data, off): pass @staticmethod def sizeof(v): return 0 @staticmethod def to_str(v): return '' pass class uint32(object): @staticmethod def parse(parent, data, off): v = _to_uint(data[off:off + 4]) return v @staticmethod def sizeof(v): return 4 @staticmethod def to_str(v): return chr(v & 0xff) + chr((v >> 8) & 0xff) + chr((v >> 16) & 0xff) + \ chr((v >> 24) & 0xff) pass class uint16(object): @staticmethod def parse(parent, data, off): v = _to_uint(data[off:off + 2]) return v @staticmethod def sizeof(v): return 2 @staticmethod def to_str(v): return chr(v & 0xff) + chr((v >> 8) & 0xff) pass class uint8(object): @staticmethod def parse(parent, data, off): v = _to_uint(data[off:off + 1]) return v @staticmethod def sizeof(v): return 1 @staticmethod def to_str(v): return chr(v & 0xff) pass class int32(object): @staticmethod def parse(parent, data, off): v = _to_int(data[off:off + 4]) return v @staticmethod def sizeof(v): return 4 @staticmethod def to_str(v): return chr(v & 0xff) + chr((v >> 8) & 0xff) + chr((v >> 16) & 0xff) + \ chr((v >> 24) & 0xff) pass class int16(object): @staticmethod def parse(parent, data, off): v = _to_int(data[off:off + 2]) return v @staticmethod def sizeof(v): return 2 @staticmethod def to_str(v): return chr(v & 0xff) + chr((v >> 8) & 0xff) pass class uleb128(object): @staticmethod def parse(parent, data, off): v, sh = _uleb128(data[off:off + 5]) return v @staticmethod def sizeof(v): return _uleb128_sz(v) @staticmethod def to_str(v): return _to_uleb128(v) pass class leb128(object): @staticmethod def parse(parent, data, off): v, sh = _leb128(data[off:off + 5]) return v @staticmethod def sizeof(v): return _leb128_sz(v) @staticmethod def to_str(v): return _to_leb128(v) pass class auto_align(object): bits = None def __init__(self, bits): self.bits = bits pass def parse(self, parent, data, off): mask = (1 << self.bits) - 1 padding_sz = ((off + mask) & ~mask) - off return padding_sz @staticmethod def sizeof(v): return v @staticmethod def to_str(v): return '\x00' * v pass def _get_sz(o): if isinstance(o, relocatable): return o.data_size return o.__class__.sizeof(o) class relocatable(object): data_size = None @staticmethod def parse(parent, data, off): pass @staticmethod def sizeof(v): return v.data_size def to_str(self): pass def compute_size(self): pass def children(self): raise NotImplementedError, \ '%s: does not implement children' % (self.__class__.__name__) pass class array(relocatable): count_name = None child_type = None items = None def __init__(self, count_name, child_type): super(array, self).__init__() self.count_name = count_name self.child_type = child_type pass def parse(self, parent, data, off): nitem = parent for name in self.count_name.split('.'): nitem = getattr(nitem, name) pass obj = self.parse_nitem(parent, data, off, nitem) return obj def parse_nitem(self, parent, data, off, nitem): moff = man_off(off) obj = array(self.count_name, self.child_type) def parse(): item = obj.child_type.parse(parent, data, moff()) item_sz = obj.child_type.sizeof(item) moff(item_sz) return item items = [parse() for i in range(nitem)] obj.items = items obj.data_size = moff() - off return obj def compute_size(self): sizes = [compute_size(item) for item in self.items] size = sum(sizes) self.data_size = size pass def to_str(self): to_str = self.child_type.to_str strs = [to_str(item) for item in self.items] return ''.join(strs) def children(self): return ('items',) pass class composite(relocatable): child_names = None def __init__(self): for child_name in self.child_names: setattr(self, child_name, None) pass pass def parse_child(self, child_name, data, off): child_clazz = getattr(self.__class__, child_name) child = child_clazz.parse(self, data, off) setattr(self, child_name, child) pass @classmethod def parse(clazz, parent, data, off): moff = man_off(off) obj = clazz() for child_name in clazz.child_names: obj.parse_child(child_name, data, moff()) child = getattr(obj, child_name) child_clazz = getattr(obj.__class__, child_name) child_sz = child_clazz.sizeof(child) moff(child_sz) pass obj.data_size = moff() - off return obj def compute_size(self): children = [getattr(self, child_name) for child_name in self.child_names] child_sizes = [compute_size(child) for child in children] self.data_size = sum(child_sizes) pass def to_str(self): child_clazzs = [getattr(self.__class__, child_name) for child_name in self.child_names] children = [getattr(self, child_name) for child_name in self.child_names] child_strs = map(lambda child_clazz, child: \ child_clazz.to_str(child), child_clazzs, children) return ''.join(child_strs) def children(self): return self.child_names pass class cond(relocatable): condition = None child_type = None value = None def __init__(self, cond, child_type): self.condition = cond self.child_type = child_type pass def parse(self, parent, data, off): if self.condition(parent, data, off): value = self.child_type.parse(parent, data, off) else: value = None pass obj = cond(self.condition, self.child_type) obj.value = value obj.data_size = self.sizeof(obj) return obj def sizeof(self, v): if v.value is None: return 0 return self.child_type.sizeof(v.value) def compute_size(self): if isinstance(self.value, relocatable): self.value.compute_size() pass self.data_size = self.sizeof(self.value) pass def to_str(self): if self.value is None: return '' data = self.child_type.to_str(self.value) return data def children(self): return ('value',) pass class switch(relocatable): selector = None map = None child_type = None value = None _parent = None def __init__(self, selector, map): self.selector = selector self.map = map pass def switch_key(self, parent): selector = self.selector sel_value = parent for name in selector.split('.'): sel_value = getattr(sel_value, name) pass return sel_value def _get_child_type(self, parent): sel_value = self.switch_key(parent) child_type = self.map[sel_value] return child_type def parse(self, parent, data, off): child_type = self._get_child_type(parent) value = child_type.parse(parent, data, off) obj = switch(self.selector, self.map) obj.value = value obj.child_type = child_type obj.data_size = self.sizeof(obj) obj._parent = parent return obj @staticmethod def sizeof(v): return v.child_type.sizeof(v.value) def compute_size(self): if isinstance(self.value, relocatable): self.value.compute_size() pass self.data_size = self.sizeof(self.value) pass def to_str(self): data = self.child_type.to_str(self.value) return data def children(self): key = self.switch_key(self._parent) return (repr(key),) pass class abs_value(relocatable): value = None def __init__(self, value): self.value = value pass def parse(self, parse, data, off): obj = abs_value(self.value) return obj def sizeof(self, v): return 0 def to_str(self): return '' def children(self): return ('value',) pass class depend(relocatable): depend_on = None child_type = None def __init__(self, depend_on): self.depend_on = depend_on pass def __call__(self, child_type): self.child_type = child_type return self def parse(self, parent, data, off): v = self.child_type.parse(parent, data, off) return v def sizeof(self, v): v = self.child_type.sizeof(v) return v def compute_size(self, child): if issubclass(self.child_type, relocatable): self.child_type.compute_size(child) pass pass def to_str(self, child): return self.child_type.to_str(child) def link(self, child, name_path, parents, markers_info): raise NotImplementedError, 'does not support link() method' pass def _set_name_path_name(parent, name, obj): if isinstance(parent, (list, dict)): key = eval(name) parent[key] = obj return setattr(parent, name, obj) pass class depend_off(depend): def link(self, child, name_path, parents, markers_info): parent = parents[-1] name = name_path.split('.')[-1] dep_on_name_path = self.depend_on id_item_map = markers_info[dep_on_name_path] dep_on = id_item_map[child.data_offset] _set_name_path_name(parent, name, dep_on) pass pass class depend_off_rel(depend): relative_to = None def __init__(self, relative_to, depend_on): super(depend_off_rel, self).__init__(depend_on) self.relative_to = relative_to pass pass class depend_idx(depend): pass class _DEX_header(composite): magic = rawstr(8) checksum = uint32 signature = rawstr(20) fileSize = uint32 headerSize = uint32 endianTag = uint32 linkSize = uint32 linkOff = uint32 mapOff = uint32 stringIdsSize = uint32 stringIdsOff = uint32 typeIdsSize = uint32 typeIdsOff = uint32 protoIdsSize = uint32 protoIdsOff = uint32 fieldIdsSize = uint32 fieldIdsOff = uint32 methodIdsSize = uint32 methodIdsOff = uint32 classDefsSize = uint32 classDefsOff = uint32 dataSize = uint32 dataOff = uint32 child_names = \ 'magic checksum signature fileSize headerSize endianTag ' \ 'linkSize linkOff mapOff stringIdsSize stringIdsOff typeIdsSize ' \ 'typeIdsOff protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \ 'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \ 'dataSize dataOff'.split() pass class _DEX_MapItem(composite): type = uint16 unused = uint16 size = uint32 offset = uint32 types = { 0x0000: 'kDexTypeHeaderItem', 0x0001: 'kDexTypeStringIdItem', 0x0002: 'kDexTypeTypeIdItem', 0x0003: 'kDexTypeProtoIdItem', 0x0004: 'kDexTypeFieldIdItem', 0x0005: 'kDexTypeMethodIdItem', 0x0006: 'kDexTypeClassDefItem', 0x1000: 'kDexTypeMapList', 0x1001: 'kDexTypeTypeList', 0x1002: 'kDexTypeAnnotationSetRefList', 0x1003: 'kDexTypeAnnotationSetItem', 0x2000: 'kDexTypeClassDataItem', 0x2001: 'kDexTypeCodeItem', 0x2002: 'kDexTypeStringDataItem', 0x2003: 'kDexTypeDebugInfoItem', 0x2004: 'kDexTypeAnnotationItem', 0x2005: 'kDexTypeEncodedArrayItem', 0x2006: 'kDexTypeAnnotationsDirectoryItem' } child_names = \ 'type unused size offset'.split() pass class _DEX_MapItemBlock(composite): num = uint32 items = array('num', _DEX_MapItem) child_names = 'num items'.split() pass class _DEX_StringId(composite): stringDataOff = depend_off('_DEX_StringDataItem')(uint32) child_names = ('stringDataOff',) pass class _DEX_TypeId(composite): descriptorIdx = depend_idx('DEXFile.stringIds')(uint32) child_names = ('descriptorIdx',) pass class _DEX_ProtoId(composite): shortyIdx = depend_idx('DEXFile.stringIds')(uint32) returnTypeIdx = depend_idx('DEXFile.typeIds')(uint32) parametersOff = depend_off('_DEX_TypeList')(uint32) child_names = 'shortyIdx returnTypeIdx parametersOff'.split() pass class _DEX_FieldId(composite): classIdx = depend_idx('DEXFile.typeIds')(uint16) typeIdx = depend_idx('DEXFile.typeIds')(uint16) nameIdx = depend_idx('DEXFile.stringIds')(uint32) child_names = 'classIdx typeIdx nameIdx'.split() pass class _DEX_MethodId(composite): classIdx = depend_idx('DEXFile.typeIds')(uint16) protoIdx = depend_idx('DEXFile.protoIds')(uint16) nameIdx = depend_idx('DEXFile.stringIds')(uint32) child_names = 'classIdx protoIdx nameIdx'.split() pass class _DEX_ClassDef(composite): classIdx = depend_idx('DEXFile.typeIds')(uint32) accessFlags = uint32 superclassIdx = depend_idx('DEXFile.typeIds')(uint32) interfacesOff = depend_off('_DEX_TypeList')(uint32) sourceFileIdx = depend_idx('DEXFile.stringIds')(uint32) annotationsOff = depend_off('_DEX_AnnotationsDirectoryItem')(uint32) classDataOff = uint32 staticValuesOff = depend_off('_DEX_EncodedArrayItem')(uint32) child_names = \ 'classIdx accessFlags superclassIdx interfacesOff ' \ 'sourceFileIdx annotationsOff classDataOff staticValuesOff'.split() pass class _DEX_ClassDataHeader(composite): staticFieldsSize = uleb128 instanceFieldsSize = uleb128 directMethodsSize = uleb128 virtualMethodsSize = uleb128 child_names = \ 'staticFieldsSize instanceFieldsSize directMethodsSize ' \ 'virtualMethodsSize'.split() pass class _DEX_Field(composite): fieldIdx = depend_idx('DEXFile.fieldIds')(uleb128) accessFlags = uleb128 child_names = 'fieldIdx accessFlags'.split() pass class _DEX_Method(composite): methodIdx = depend_idx('DEXFile.methodIds')(uleb128) accessFlags = uleb128 codeOff = depend_off('_DEX_Code')(uleb128) child_names = 'methodIdx accessFlags codeOff'.split() pass class _DEX_ClassData(composite): header = _DEX_ClassDataHeader staticFields = array('header.staticFieldsSize', _DEX_Field) instanceFields = array('header.instanceFieldsSize', _DEX_Field) directMethods = array('header.directMethodsSize', _DEX_Method) virtualMethods = array('header.virtualMethodsSize', _DEX_Method) child_names = \ 'header ' \ 'staticFields instanceFields directMethods virtualMethods'.split() pass class _DEX_TypeList(composite): padding = auto_align(2) # 2 bits alignment num = uint32 typeItems = array('num', uint16) child_names = 'padding num typeItems'.split() pass class _DEX_Try(composite): startAddr = uint32 insnCount = uint16 handlerOff = depend_off_rel('_DEX_Code.handlers_size', '_DEX_Catch')(uint16) child_names = 'startAddr insnCount handlerOff'.split() pass class _DEX_CatchHandler(composite): typeIdx = depend_idx('DEXFile.typeIds')(uleb128) address = uleb128 child_names = 'typeIdx address'.split() pass class _DEX_CatchAllHandler(composite): address = uleb128 child_names = 'address'.split() pass class _DEX_Catch(composite): size = leb128 handlers = array('count', _DEX_CatchHandler) catchAllHandler = cond((lambda parent, data, off: parent.catchesAll), _DEX_CatchAllHandler) child_names = 'size handlers catchAllHandler'.split() @property def catchesAll(self): return self.size <= 0 @property def count(self): if self.size < 0: return -self.size return self.size pass class _DEX_Code(composite): registersSize = uint16 insSize = uint16 outsSize = uint16 triesSize = uint16 debugInfoOff = depend_off('_DEX_DebugInfoItem')(uint32) insnsSize = uint32 insns = rawstr_size_name('insnsSize', 2) _has_tries = lambda parent, data, off: parent.triesSize > 0 padding = cond(_has_tries, auto_align(2)) try_items = cond(_has_tries, array('triesSize', _DEX_Try)) handlers_size = cond(_has_tries, uleb128) catch_handler_items = cond(_has_tries, array('handlers_size.value', _DEX_Catch)) padding2 = auto_align(2) child_names = \ 'registersSize insSize outsSize triesSize debugInfoOff ' \ 'insnsSize insns padding try_items handlers_size ' \ 'catch_handler_items padding2'.split() pass class _DEX_AnnotationSetItem(composite): size = uint32 annotationOffs = array('size', depend_off('_DEX_AnnotationItem')(uint32)) child_names = 'size annotationOffs'.split() pass class _DEX_FieldAnnotationsItem(composite): fieldIdx = depend_idx('DEXFile.fieldIds')(uint32) annotationsOff = depend_off('_DEX_AnnotationSetItem')(uint32) child_names = 'fieldIdx annotationsOff'.split() pass class _DEX_MethodAnnotationsItem(composite): methodIdx = depend_idx('DEXFile.methodIds')(uint32) annotationsOff = depend_off('_DEX_AnnotationSetItem')(uint32) child_names = 'methodIdx annotationsOff'.split() pass class _DEX_ParameterAnnotationsItem(composite): methodIdx = depend_idx('DEXFile.methodIds')(uint32) annotationsOff = depend_off('_DEX_AnnotationSetItem')(uint32) child_names = 'methodIdx annotationsOff'.split() pass class _DEX_AnnotationsDirectoryItem(composite): classAnnotationsOff = depend_off('_DEX_AnnotationSetItem')(uint32) fieldsSize = uint32 methodsSize = uint32 parametersSize = uint32 fieldAnnotationsItems = array('fieldsSize', _DEX_FieldAnnotationsItem) methodAnnotationsItems = array('methodsSize', _DEX_MethodAnnotationsItem) parameterAnnotationsItems = array('parametersSize', _DEX_ParameterAnnotationsItem) child_names = 'classAnnotationsOff fieldsSize methodsSize ' \ 'parametersSize fieldAnnotationsItems methodAnnotationsItems ' \ 'parameterAnnotationsItems'.split() pass class _DEX_AnnotationArray(composite): size = uleb128 # annotations = array('size', _DEX_AnnotationMember_noname) child_names = 'size annotations'.split() pass ## # # \see createAnnotationMember() in dalvik/vm/reflect/Annotation.c # class _DEX_AnnotationMember_noname(composite): # # Constants from DexFile.h # kDexAnnotationByte = 0x00 kDexAnnotationShort = 0x02 kDexAnnotationChar = 0x03 kDexAnnotationInt = 0x04 kDexAnnotationLong = 0x06 kDexAnnotationFloat = 0x10 kDexAnnotationDouble = 0x11 kDexAnnotationString = 0x17 kDexAnnotationType = 0x18 kDexAnnotationField = 0x19 kDexAnnotationMethod = 0x1a kDexAnnotationEnum = 0x1b kDexAnnotationArray = 0x1c kDexAnnotationAnnotation = 0x1d kDexAnnotationNull = 0x1e kDexAnnotationBoolean = 0x1f kDexAnnotationValueTypeMask = 0x1f kDexAnnotationValueArgShift = 5 valueType = uint8 value_map = { kDexAnnotationByte: rawstr_size_name('value_width'), kDexAnnotationShort: rawstr_size_name('value_width'), kDexAnnotationChar: rawstr_size_name('value_width'), kDexAnnotationInt: rawstr_size_name('value_width'), kDexAnnotationLong: rawstr_size_name('value_width'), kDexAnnotationFloat: rawstr_size_name('value_width'), kDexAnnotationDouble: rawstr_size_name('value_width'), kDexAnnotationString: rawstr_size_name('value_width'), kDexAnnotationType: rawstr_size_name('value_width'), kDexAnnotationMethod: rawstr_size_name('value_width'), kDexAnnotationField: rawstr_size_name('value_width'), kDexAnnotationEnum: rawstr_size_name('value_width'), kDexAnnotationNull: abs_value(0), kDexAnnotationBoolean: abs_value(0), # width != 0 kDexAnnotationArray: _DEX_AnnotationArray, # kDexAnnotationAnnotation: _DEX_AnnotationItem_novisibility } value = switch('vtype', value_map) child_names = 'valueType value'.split() @property def vtype(self): vtype = self.valueType & self.kDexAnnotationValueTypeMask return vtype @property def width(self): width = self.valueType >> self.kDexAnnotationValueArgShift return width @property def value_width(self): width = self.valueType >> self.kDexAnnotationValueArgShift return width + 1 pass class _DEX_AnnotationMember(_DEX_AnnotationMember_noname): nameIdx = depend_idx('DEXFile.stringIds')(uleb128) child_names = 'nameIdx valueType value'.split() pass _DEX_AnnotationArray.annotations = array('size', _DEX_AnnotationMember_noname) ## \brief Annotation item # # \see processEncodedAnnotation() in dalvik/vm/reflect/Annotation.c # class _DEX_AnnotationItem_novisibility(composite): typeIdx = depend_idx('DEXFile.typeIds')(uleb128) size = uleb128 members = array('size', _DEX_AnnotationMember) child_names = 'typeIdx size members'.split() kDexVisibilityBuild = 0x00 kDexVisibilityRuntime = 0x01 kDexVisibilitySystem = 0x02 pass class _DEX_AnnotationItem(_DEX_AnnotationItem_novisibility): visibility = uint8 child_names = 'visibility typeIdx size members'.split() pass _DEX_AnnotationMember. \ value_map[_DEX_AnnotationMember.kDexAnnotationAnnotation] = \ _DEX_AnnotationItem_novisibility class _DEX_EncodedArrayItem(composite): size = uleb128 elements = array('size', _DEX_AnnotationMember_noname) child_names = 'size elements'.split() pass class _DEX_DebugCodeBlock(relocatable): DBG_END_SEQUENCE = 0x00 DBG_ADVANCE_PC = 0x01 DBG_ADVANCE_LINE = 0x02 DBG_START_LOCAL = 0x03 DBG_START_LOCAL_EXTENDED = 0x04 DBG_END_LOCAL = 0x05 DBG_RESTART_LOCAL = 0x06 DBG_SET_PROLOGUE_END = 0x07 DBG_SET_EPILOGUE_BEGIN = 0x08 DBG_SET_FILE = 0x09 DBG_FIRST_SPECIAL = 0x0a DBG_LINE_BASE = -4 DBG_LINE_RANGE = 15 opcodes = None data_size = None @staticmethod def parse(parent, data, off): moff = man_off(off) self = _DEX_DebugCodeBlock() # # Parse debug opcodes # opcodes = [] while True: opcode = _to_uint(data[moff(1):moff()]) if opcode == self.DBG_END_SEQUENCE: opcodes.append((opcode,)) break elif opcode == self.DBG_ADVANCE_PC: adv, sh = _uleb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, adv)) pass elif opcode == self.DBG_ADVANCE_LINE: adv, sh = _leb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, adv)) pass elif opcode in (self.DBG_START_LOCAL, self.DBG_START_LOCAL_EXTENDED): reg, sh = _uleb128(data[moff():moff() + 5]) moff(sh) name, sh = _uleb128(data[moff():moff() + 5]) moff(sh) descriptor, sh = _uleb128(data[moff():moff() + 5]) moff(sh) if opcode == self.DBG_START_LOCAL_EXTENDED: signature, sh = _uleb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, reg, name, descriptor, signature)) else: opcodes.append((opcode, reg, name, descriptor)) pass pass elif opcode == self.DBG_END_LOCAL: reg, sh = _uleb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, reg)) pass elif opcode == self.DBG_RESTART_LOCAL: reg, sh = _uleb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, reg)) pass elif opcode in (self.DBG_SET_PROLOGUE_END, self.DBG_SET_EPILOGUE_BEGIN, self.DBG_SET_FILE): opcodes.append((opcode,)) pass else: opcodes.append((opcode,)) pass pass self.opcodes = tuple(opcodes) self.data_size = moff() - off return self def compute_size(self): def compute_opcode_size(code): opcode = code[0] if opcode == self.DBG_END_SEQUENCE: size = 1 elif opcode == self.DBG_ADVANCE_PC: size = 1 + _uleb128_sz(code[1]) elif opcode == self.DBG_ADVANCE_LINE: size = 1 + _leb128_sz(code[1]) elif opcode in (self.DBG_START_LOCAL, self.DBG_START_LOCAL_EXTENDED): size = 1 + _uleb128_sz(code[1]) + _uleb128_sz(code[2]) + \ _uleb128_sz(code[3]) if len(code) == 5: size = size + _uleb128_sz(code[4]) pass pass elif opcode == self.DBG_END_LOCAL: size = 1 + _uleb128_sz(code[1]) elif opcode == self.DBG_RESTART_LOCAL: size = 1 + _uleb128_sz(code[1]) elif opcode in (self.DBG_SET_PROLOGUE_END, self.DBG_SET_EPILOGUE_BEGIN, self.DBG_SET_FILE): size = 1 else: size = 1 pass return size opcode_sizes = itertools.imap(compute_opcode_size, self.opcodes) opcode_sizes = [i for i in opcode_sizes] opcodes_size = sum(opcode_sizes) self.data_size = opcodes_size pass def to_str(self): # # Parse debug opcodes # opcodes = self.opcodes opcodebins = [] for code in opcodes: opcode = code[0] if opcode == self.DBG_END_SEQUENCE: opcodebins.append(chr(opcode)) break elif opcode == self.DBG_ADVANCE_PC: codebin = chr(opcode) + _to_uleb128(code[1]) opcodebins.append(codebin) pass elif opcode == self.DBG_ADVANCE_LINE: codebin = chr(opcode) + _to_leb128(code[1]) opcodebins.append(codebin) pass elif opcode == self.DBG_START_LOCAL: codebin = chr(opcode) + _to_uleb128(code[1]) + \ _to_uleb128(code[2]) + _to_uleb128(code[3]) codebins.append(codebin) pass elif opcode == self.DBG_START_LOCAL_EXTENDED: codebin = chr(opcode) + _to_uleb128(code[1]) + \ _to_uleb128(code[2]) + _to_uleb128(code[3]) + \ _to_uleb128(code[4]) codebins.append(codebin) pass elif opcode == self.DBG_END_LOCAL: codebin = chr(opcode) + _to_uleb128(code[1]) codebins.append(codebin) pass elif opcode == self.DBG_RESTART_LOCAL: codebin = chr(opcode) + _to_uleb128(code[1]) codebins.append(codebin) pass elif opcode in (self.DBG_SET_PROLOGUE_END, self.DBG_SET_EPILOGUE_BEGIN, self.DBG_SET_FILE): opcodebins.append(chr(opcode)) pass else: opcodebins.append(chr(opcode)) pass pass return ''.join(opcodebins) def children(self): return ('opcodes',) pass class _DEX_DebugInfoItem(composite): start_line = uleb128 parameters_size = uleb128 parameters = array('parameters_size', uleb128) opcodes = _DEX_DebugCodeBlock child_names = 'start_line parameters_size parameters opcodes'.split() pass class _DEX_StringDataItem(composite): size = uleb128 data = rawstr_size_name('size') padding = rawstr(1) child_names = 'size data padding'.split() pass class dummy(object): data_size = None @staticmethod def parse(parent, data, off): size, sh = _uleb128(data[off:off + 5]) data = data[off + sh: off + sh + size] self = _DEX_StringDataItem() self.size = size self.data = data self.data_size = sh + size + 1 return self def compute_size(self): size = len(self.data) self.size = size size_sz = _uleb128_sz(size) self.data_size = size_sz + size + 1 pass def to_str(self): size = len(self.data) self.size = size data = _uleb128(size) + self.data + '\x00' return data pass class DEXFile(composite): fname = None data = None header = _DEX_header maps = _DEX_MapItemBlock stringIds = array(None, _DEX_StringId) typeIds = array(None, _DEX_TypeId) protoIds = array(None, _DEX_ProtoId) fieldIds = array(None, _DEX_FieldId) methodIds = array(None, _DEX_MethodId) classDefs = array(None, _DEX_ClassDef) classDatas = array(None, _DEX_ClassData) typeLists = array(None, _DEX_TypeList) codeItems = array(None, _DEX_Code) annotationSetItems = array(None, _DEX_AnnotationSetItem) annotationsDirectoryItems = array(None, _DEX_AnnotationsDirectoryItem) annotationItems = array(None, _DEX_AnnotationItem) encodedArrayItems = array(None, _DEX_EncodedArrayItem) debugInfoItems = array(None, _DEX_DebugInfoItem) stringDataItems = array(None, _DEX_StringDataItem) child_names = 'header'.split() block_defs = { # 0x0000: 'kDexTypeHeaderItem', 0x0001: 'stringIds', 0x0002: 'typeIds', 0x0003: 'protoIds', 0x0004: 'fieldIds', 0x0005: 'methodIds', 0x0006: 'classDefs', # 0x1000: 'kDexTypeMapList', 0x1001: 'typeLists', # 0x1002: 'kDexTypeAnnotationSetRefList', 0x1003: 'annotationSetItems', 0x2000: 'classDatas', 0x2001: 'codeItems', 0x2002: 'stringDataItems', 0x2003: 'debugInfoItems', 0x2004: 'annotationItems', 0x2005: 'encodedArrayItems', 0x2006: 'annotationsDirectoryItems' } @staticmethod def open(fname): fo = file(fname, 'r') data = fo.read() dex = DEXFile.parse(data) dex.fname = fname return dex @classmethod def parse(clazz, data): obj = super(DEXFile, clazz).parse(None, data, 0) obj.data = data obj._parse(data) return obj def _parse_maps(self): data = self.data header = self.header off = header.mapOff self.parse_child('maps', data, off) pass def _parse_block(self, block_map): if block_map.type not in self.block_defs: return data = self.data child_name = self.block_defs[block_map.type] off = block_map.offset num = block_map.size child_clazz = getattr(self.__class__, child_name) blk = child_clazz.parse_nitem(self, data, off, num) setattr(self, child_name, blk) pass def _parse_blocks(self): data = self.data maps = self.maps.items.items for map in maps: if map.type in self.block_defs: self._parse_block(map) pass pass pass def _parse(self, data): self._parse_maps() self._parse_blocks() pass def children(self): return 'header stringIds typeIds protoIds fieldIds methodIds ' \ 'classDefs typeLists annotationSetItems classDatas codeItems ' \ 'stringDataItems debugInfoItems annotationItems ' \ 'encodedArrayItems annotationsDirectoryItems'.split() pass if __name__ == '__main__': dex = DEXFile.open('data/testdata1.dex') print 'Header' h = dex.header for attr in h.child_names: print '\t%s: %s' % (attr, repr(getattr(h, attr))) pass print print 'Size of stringIds is %d bytes' % (dex.stringIds.data_size) print print 'Size of typeIds is %d bytes' % (dex.typeIds.data_size) print print 'Size of protoIds is %d bytes' % (dex.protoIds.data_size) print print 'Size of fieldIds is %d bytes' % (dex.fieldIds.data_size) print print 'Size of methodIds is %d bytes' % (dex.methodIds.data_size) print print 'Size of classDefs is %d bytes' % (dex.classDefs.data_size) print print 'Size of classDatas is %d bytes' % (dex.classDatas.data_size) print print 'Size of typeLists is %d bytes' % (dex.typeLists.data_size) print print 'Size of codeItems is %d bytes' % (dex.codeItems.data_size) print print 'Size of annotationSetItems is %d bytes' % \ (dex.annotationSetItems.data_size) print print 'Size of annotationsDirectoryItems is %d bytes' % \ (dex.annotationsDirectoryItems.data_size) print print 'Size of annotationItems is %d bytes' % \ (dex.annotationItems.data_size) print print 'Size of encodedArrayItems is %d bytes' % \ (dex.encodedArrayItems.data_size) print print 'Size of debugInfoItems is %d bytes' % \ (dex.debugInfoItems.data_size) print print 'Size of stringDataItems is %d bytes' % \ (dex.stringDataItems.data_size) print print 'Data maps' maps = dex.maps.items.items for map in maps: print '\t0x%04x(%s) size=%d offset=0x%08x' % (map.type, map.types[map.type], map.size, map.offset) pass pass