Mercurial > paraspace
view paraspace/dexfile.py @ 13:c05fac334ab1
compute size for objects
author | Thinker K.F. Li <thinker@codemud.net> |
---|---|
date | Wed, 25 May 2011 21:47:15 +0800 |
parents | 8101024c942b |
children | f5728c6868b2 |
line wrap: on
line source
import itertools class _DEX_header(object): magic = None # 0x00, 8 bytes checksum = None # 0x08, 4 bytes signature = None # 0x0c, 20 bytes fileSize = None # 0x20, 4 bytes headerSize = None # 0x24 endianTag = None # 0x28 linkSize = None # 0x2c linkOff = None # 0x30 mapOff = None # 0x34 stringIdsSize = None # 0x38 stringIdsOff = None # 0x3c typeIdsSize = None # 0x40 typeIdsOff = None # 0x44 protoIdsSize = None # 0x48 protoIdsOff = None # 0x4c fieldIdsSize = None # 0x50 fieldIdsOff = None # 0x54 methodIdsSize = None # 0x58 methodIdsOff = None # 0x5c classDefsSize = None # 0x60 classDefsOff = None # 0x64 dataSize = None # 0x68 dataOff = None # 0x6c header_fields = \ 'magic checksum signature fileSize headerSize endianTag ' \ 'linkSize linkOff mapOff stringIdsSize stringIdsOff typeIdsSize ' \ 'typeIdsOff protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \ 'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \ 'dataSize dataOff'.split() data_size = 0x70 def parse(self, data): self.magic = data[:8] self.checksum = data[8: 0x0c] self.signature = data[0x0c: 0x20] idx = 0x20 fields = 'fileSize headerSize endianTag linkSize linkOff mapOff ' \ 'stringIdsSize stringIdsOff typeIdsSize typeIdsOff ' \ 'protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \ 'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \ 'dataSize dataOff'.split() for field in fields: d = data[idx: idx + 4] value = _to_uint(d) setattr(self, field, value) idx = idx + 4 pass pass pass ## \brief Manage offset # # The instances are initialized with a offset. Every time an instance # is called, it return the offset before advancing offset with specify # size. # # moff = man_off(init_off) # assert moff(5) == init_off # assert moff() == (init_off + 5) # assert moff() == (init_off + 5) # class man_off(object): off = None def __init__(self, off): self.off = off pass def __call__(self, sz=0): off = self.off self.off = off + sz return off pass def _to_uint(data): v = 0 sh = 0 for c in data: v = v + (ord(c) << sh) sh = sh + 8 pass return v def _to_int(data): v = _to_uint(data) sz = len(data) if sz and ((1 << (sz * 8 - 1)) & v): v = -((1 << (sz * 8)) - v) pass return v def _uleb128(data): sh = 0 v = 0 for c in data: cv = ord(c) v = v + ((cv & 0x7f) << sh) sh = sh + 7 if cv <= 0x7f: break pass nbytes = sh / 7 return v, nbytes def _to_uleb128(v): assert v >= 0 data = '' while True: if v > 0x7f: data = data + chr((v & 0x7f) | 0x80) else: data = data + chr(v & 0x7f) break v = v >> 7 pass return data def _uleb128_sz(v): return len(_to_uleb128(v)) def _leb128(data): v, sh = _uleb128(data) if v & (1 << (sh * 7 - 1)): v = -((1 << (sh * 7)) - v) pass return v, sh def _to_leb128(v): data = '' while True: if v > 0x3f or v < ~0x3f: data = data + chr((v & 0x7f) | 0x80) else: data = data + chr(v & 0x7f) break v = v >> 7 pass return data def _leb128_sz(v): return len(_to_leb128(v)) def _compute_sz(o): if hasattr(o, 'compute_size'): o.compute_size() pass return o.data_size def _sum_data_size(obj_list): obj_sizes = itertools.imap(_compute_sz, obj_list) total = sum(obj_sizes) return total class _DEX_MapItem(object): type = None # 2 bytes unused = None # 2 bytes size = None # 4 bytes offset = None # 4 bytes data_size = 12 types = { 0x0000: 'kDexTypeHeaderItem', 0x0001: 'kDexTypeStringIdItem', 0x0002: 'kDexTypeTypeIdItem', 0x0003: 'kDexTypeProtoIdItem', 0x0004: 'kDexTypeFieldIdItem', 0x0005: 'kDexTypeMethodIdItem', 0x0006: 'kDexTypeClassDefItem', 0x1000: 'kDexTypeMapList', 0x1001: 'kDexTypeTypeList', 0x1002: 'kDexTypeAnnotationSetRefList', 0x1003: 'kDexTypeAnnotationSetItem', 0x2000: 'kDexTypeClassDataItem', 0x2001: 'kDexTypeCodeItem', 0x2002: 'kDexTypeStringDataItem', 0x2003: 'kDexTypeDebugInfoItem', 0x2004: 'kDexTypeAnnotationItem', 0x2005: 'kDexTypeEncodedArrayItem', 0x2006: 'kDexTypeAnnotationsDirectoryItem' } def parse(self, data): self.type = _to_uint(data[:2]) self.size = _to_uint(data[4:8]) self.offset = _to_uint(data[8:12]) pass @classmethod def find_type_name(self, type_name): type_value = [v for v, name in _DEX_MapItem.types.items() if name == type_name][0] return type_value pass class _DEX_TypeId(object): descriptorIdx = None # 4 bytes data_size = 4 def parse(self, data): self.descriptorIdx = _to_uint(data[:4]) pass pass class _DEX_ProtoId(object): shortyIdx = None # 4 bytes returnTypeIdx = None # 4 bytes parametersOff = None # 4 bytes data_size = 12 def parse(self, data): self.shortyIdx = _to_uint(data[:4]) self.returnTypeIdx = _to_uint(data[4:8]) self.parametersOff = _to_uint(data[8:12]) pass pass class _DEX_FieldId(object): classIdx = None # 2 bytes typeIdx = None # 2 bytes nameIdx = None # 4 bytes data_size = 8 def parse(self, data): self.classIdx = _to_uint(data[:2]) self.typeIdx = _to_uint(data[2:4]) self.nameIdx = _to_uint(data[4:8]) pass pass class _DEX_MethodId(object): classIdx = None # 2 bytes protoIdx = None # 2 bytes nameIdx = None # 4 bytes data_size = 8 def parse(self, data): self.classIdx = _to_uint(data[:2]) self.protoIdx = _to_uint(data[2:4]) self.nameIdx = _to_uint(data[4:8]) pass pass class _DEX_ClassDef(object): classIdx = None # 0x00 accessFlags = None # 0x04 superclassIdx = None # 0x08 interfacesOff = None # 0x0c sourceFileIdx = None # 0x10 annotationsOff = None # 0x14 classDataOff = None # 0x18 staticValuesOff = None # 0x1c data_size = 0x20 def parse(self, data): self.classIdx = _to_uint(data[:4]) self.accessFlags = _to_uint(data[4:8]) self.superclassIdx = _to_uint(data[8:0xc]) self.interfacesOff = _to_uint(data[0xc:0x10]) self.sourceFileIdx = _to_uint(data[0x10:0x14]) self.annotationsOff = _to_uint(data[0x14:0x18]) self.classDataOff = _to_uint(data[0x18:0x1c]) self.staticValuesOff = _to_uint(data[0x1c:0x20]) pass pass class _DEX_ClassDataHeader(object): staticFieldsSize = None instanceFieldsSize = None directMethodsSize = None virtualMethodsSize = None data_size = None def parse(self, data, off): self.staticFieldsSize, sh = _uleb128(data[off:off + 10]) sz = sh off = off + sh self.instanceFieldsSize, sh = _uleb128(data[off:off + 10]) sz = sz + sh off = off + sh self.directMethodsSize, sh = _uleb128(data[off:off + 10]) sz = sz + sh off = off + sh self.virtualMethodsSize, sh = _uleb128(data[off:off + 10]) sz = sz + sh self.data_size = sz pass def compute_size(self): self.data_size = \ _uleb128_sz(self.staticFieldsSize) + \ _uleb128_sz(self.instanceFieldsSize) + \ _uleb128_sz(self.directMethodsSize) + \ _uleb128_sz(self.virtualMethodsSize) pass pass class _DEX_Field(object): fieldIdx = None accessFlags = None data_size = None def parse(self, data, off): self.fieldIdx, sh = _uleb128(data[off:off + 10]) sz = sh off = off + sh self.accessFlags, sh = _uleb128(data[off:off + 10]) sz = sz + sh self.data_size = sz pass def compute_size(self): self.data_size = \ _uleb128_sz(self.fieldIdx) + \ _uleb128_sz(self.accessFlags) pass pass class _DEX_Method(object): methodIdx = None accessFlags = None codeOff = None data_size = None def parse(self, data, off): self.methodIdx, sh = _uleb128(data[off:off + 10]) sz = sh off = off + sh self.accessFlags, sh = _uleb128(data[off:off + 10]) sz = sz + sh off = off + sh self.codeOff, sh = _uleb128(data[off:off + 10]) sz = sz + sh self.data_size = sz pass def compute_size(self): self.data_size = \ _uleb128_sz(self.methodIdx) + \ _uleb128_sz(self.accessFlags) + \ _uleb128_sz(self.codeOff) pass pass class _DEX_ClassData(object): header = None # DexClassDataHeader staticFields = None # DexField* instanceFields = None # DexField* directMethods = None # DexMethod* virtualMethods = None # DexMethod* data_size = None def parse(self, data, off): moff = man_off(off) header = _DEX_ClassDataHeader() header.parse(data, moff()) self.header = header moff(header.data_size) def parse_field(): field = _DEX_Field() field.parse(data, moff()) moff(field.data_size) # # field index depends previous one to reduce size # field.fieldIdx = field.fieldIdx + idx[0] idx[0] = field.fieldIdx return field def parse_method(): method = _DEX_Method() method.parse(data, moff()) moff(method.data_size) # # method index depends previous one to reduce size # method.methodIdx = method.methodIdx + idx[0] idx[0] = method.methodIdx return method idx = [0] self.staticFields = [parse_field() for i in range(header.staticFieldsSize)] idx = [0] self.instanceFields = [parse_field() for i in range(header.instanceFieldsSize)] idx = [0] self.directMethods = [parse_method() for i in range(header.directMethodsSize)] idx = [0] self.virtualMethods = [parse_method() for i in range(header.virtualMethodsSize)] self.data_size = moff() - off pass def compute_size(self): sz = self.header.data_size sz = sz + _sum_data_size(itertools.chain(self.staticFields, self.instanceFields, self.directMethods, self.virtualMethods)) self.data_size = sz pass pass class _DEX_TypeItem(object): typeIdx = None # 2 bytes data_size = 2 def parse(self, data, off): self.typeIdx = _to_uint(data[off:off + 2]) pass pass class _DEX_TypeList(object): typeItems = None data_size = None def parse(self, data, off): moff = man_off(off) size = _to_uint(data[moff(4):moff()]) def parse_type_item(): item = _DEX_TypeItem() item.parse(data, moff()) moff(item.data_size) return item typeItems = [parse_type_item() for i in range(size)] self.typeItems = typeItems self.data_size = moff() - off pass def compute_size(self): size = 4 + _sum_data_size(self.typeItems) self.data_size = size pass pass class _DEX_TypeLists(object): typeLists = None data_size = None def parse(self, num, data, off): moff = man_off(off) def parse(): moff.off = (moff(0) + 3) & ~0x3 # aligned for 4 bytes typeList = _DEX_TypeList() typeList.parse(data, moff()) moff(typeList.data_size) return typeList typeLists = [parse() for i in range(num)] self.typeLists = typeLists self.data_size = moff(0) - off pass def compute_size(self): def compute_align(prev, cur): v = ((prev + 3) & ~0x3) + cur return v sizes = itertools.imap(_compute_sz, self.typeLists) size = reduce(compute_align, sizes) self.data_size = size pass pass class _DEX_Try(object): startAddr = None # 4 bytes insnCount = None # 2 bytes handlerOff = None # 2 bytes data_size = 8 def parse(self, data, off): cur_off = off self.startAddr = _to_uint(data[cur_off:cur_off + 4]) cur_off = cur_off + 4 self.insnCount = _to_uint(data[cur_off:cur_off + 2]) cur_off = cur_off + 2 self.handlerOff = _to_uint(data[cur_off:cur_off + 2]) pass pass class _DEX_CatchHandler(object): typeIdx = None address = None data_size = None def parse(self, data, off): moff = man_off(off) self.typeIdx, sh = _uleb128(data[moff():moff() + 5]) moff(sh) self.address, sh = _uleb128(data[moff():moff() + 5]) moff(sh) self.data_size = moff() - off pass def parse1(self, data, off): self.address, sh = _uleb128(data[off:off + 5]) self.data_size = sh pass def compute_size(self): if self.typeIdx is not None: size = _uleb128_sz(self.typeIdx) else: size = 0 pass size = size + _uleb128_sz(self.address) self.data_size = size pass pass class _DEX_Catch(object): catchesAll = None handlers = None data_size = None def parse(self, data, off): moff = man_off(off) count, sh = _leb128(data[moff():moff() + 5]) moff(sh) if count > 0: self.catchesAll = False else: self.catchesAll = True count = -count pass def parse_handler(): handler = _DEX_CatchHandler() handler.parse(data, moff()) moff(handler.data_size) return handler self.handlers = [parse_handler() for i in range(count)] if self.catchesAll: # # Additional handler for catching all # handler = _DEX_CatchHandler() handler.parse1(data, moff()) moff(handler.data_size) self.handlers.append(handler) pass self.data_size = moff() - off pass def compute_size(self): count = len(self.handlers) if self.catchesAll: count = -(count - 1) pass count_sz = _leb128_sz(count) handlers_size = _sum_data_size(self.handlers) size = count_sz + handlers_size pass pass class _DEX_Code(object): registersSize = None # 2 bytes insSize = None # 2 bytes outsSize = None # 2 bytes triesSize = None # 2 bytes debugInfoOff = None # 4 bytes insnsSize = None # 4 bytes insns = None try_items = None catch_handler_items = None data_size = None def parse(self, data, off): moff = man_off(off) self.registersSize = _to_uint(data[moff(2):moff()]) self.insSize = _to_uint(data[moff(2):moff()]) self.outsSize = _to_uint(data[moff(2):moff()]) self.triesSize = _to_uint(data[moff(2):moff()]) self.debugInfoOff = _to_uint(data[moff(4):moff()]) self.insnsSize = _to_uint(data[moff(4):moff()]) moff(self.insnsSize * 2) # skip insns if self.triesSize > 0: def parse_try_item(): try_item = _DEX_Try() try_item.parse(data, moff()) moff(try_item.data_size) return try_item moff.off = (moff.off + 3) & ~0x3 # align tries to 4 bytes self.try_items = [parse_try_item() for i in range(self.triesSize)] def parse_catch_handler(): catch = _DEX_Catch() catch.parse(data, moff()) moff(catch.data_size) return catch # # No tries, no catch handlers # handlersSize, sh = _uleb128(data[moff():moff() + 5]) moff(sh) self.catch_handler_items = [parse_catch_handler() for i in range(handlersSize)] pass moff.off = (moff() + 3) & ~0x3 # round code item to 4 bytes self.data_size = moff() - off pass def compute_size(self): size = 16 + self.insnsSize * 2 if self.triesSize > 0: size = (size + 0x3) & ~0x3 try_items_size = _sum_data_size(self.try_items) catch_handler_items = self.catch_handler_items catch_handler_items_cnt = len(catch_handler_items) catch_handler_items_cnt_sz = _uleb128_sz(catch_handler_items_cnt) catch_handler_items_sz = _sum_data_size(catch_handler_items) catch_handler_items_size = \ catch_handler_items_cnt_sz + \ catch_handler_items_sz size = size + try_items_size + catch_handler_items_size pass self.data_size = (size + 3) & ~0x3 pass pass ## \brief File offset to Annotation item. # # This type is not in libdex of Dalvik. We add this class to tracking # information for layout algorithm. # class _DEX_AnnotationRefItem(object): annotationOff = None # 4 bytes data_size = 4 def parse(self, data, off): self.annotationOff = _to_uint(data[off:off + 4]) pass pass class _DEX_AnnotationSetItem(object): # size = None # 4 bytes annotations = None # 4 * size bytes data_size = None def parse(self, data, off): moff = man_off(off) size = _to_uint(data[moff(4):moff()]) def parse_annotation_ref(): ref = _DEX_AnnotationRefItem() ref.parse(data, moff()) moff(ref.data_size) return ref self.annotations = [parse_annotation_ref() for i in range(size)] self.data_size = moff() - off pass def compute_size(self): annotations_size = _sum_data_size(self.annotations) size = 4 + annotations_size self.data_size = size pass pass class _DEX_FieldAnnotationsItem(object): fieldIdx = None # 4 bytes annotationsOff = None # 4 bytes data_size = 8 def parse(self, data, off): moff = man_off(off) self.fieldIdx = _to_uint(data[moff(4):moff()]) self.annotationsOff = _to_uint(data[moff(4):moff()]) pass pass class _DEX_MethodAnnotationsItem(object): methodIdx = None # 4 bytes annotationsOff = None # 4 bytes data_size = 8 def parse(self, data, off): moff = man_off(off) self.methodIdx = _to_uint(data[moff(4):moff()]) self.annotationsOff = _to_uint(data[moff(4):moff()]) pass pass class _DEX_ParameterAnnotationsItem(object): methodIdx = None # 4 bytes annotationsOff = None # 4 bytes data_size = 8 def parse(self, data, off): moff = man_off(off) self.methodIdx = _to_uint(data[moff(4):moff()]) self.annotationsOff = _to_uint(data[moff(4):moff()]) pass pass class _DEX_AnnotationsDirectoryItem(object): classAnnotationsOff = None # 4 bytes fieldAnnotationsItems = None methodAnnotationsItems = None parameterAnnotationsItems = None data_size = None def parse(self, data, off): moff = man_off(off) self.classAnnotationsOff = _to_uint(data[moff(4):moff()]) fieldsSize = _to_uint(data[moff(4):moff()]) methodsSize = _to_uint(data[moff(4):moff()]) parametersSize = _to_uint(data[moff(4):moff()]) def parse_fieldAnnotationsItem(): item = _DEX_FieldAnnotationsItem() item.parse(data, moff()) moff(item.data_size) return item def parse_methodAnnotationsItem(): item = _DEX_MethodAnnotationsItem() item.parse(data, moff()) moff(item.data_size) return item def parse_parameterAnnotationsItem(): item = _DEX_ParameterAnnotationsItem() item.parse(data, moff()) moff(item.data_size) return item self.fieldAnnotationsItems = [parse_fieldAnnotationsItem() for i in range(fieldsSize)] self.methodAnnotationsItems = [parse_methodAnnotationsItem() for i in range(methodsSize)] self.parameterAnnotationsItems = [parse_parameterAnnotationsItem() for i in range(parametersSize)] self.data_size = moff() - off pass def compute_size(self): field_anno_sz = _sum_data_size(self.fieldAnnotationsItems) method_anno_sz = _sum_data_size(self.methodAnnotationsItems) parameter_anno_sz = _sum_data_size(self.parameterAnnotationsItems) all_items_size = field_anno_sz + method_anno_sz + parameter_anno_sz size = 16 + all_items_size self.data_size = size pass pass ## # # \see createAnnotationMember() in dalvik/vm/reflect/Annotation.c # class _DEX_AnnotationMember(object): nameIdx = None # optional valueType = None # 1 byte value = None # # Constants from DexFile.h # kDexAnnotationByte = 0x00 kDexAnnotationShort = 0x02 kDexAnnotationChar = 0x03 kDexAnnotationInt = 0x04 kDexAnnotationLong = 0x06 kDexAnnotationFloat = 0x10 kDexAnnotationDouble = 0x11 kDexAnnotationString = 0x17 kDexAnnotationType = 0x18 kDexAnnotationField = 0x19 kDexAnnotationMethod = 0x1a kDexAnnotationEnum = 0x1b kDexAnnotationArray = 0x1c kDexAnnotationAnnotation = 0x1d kDexAnnotationNull = 0x1e kDexAnnotationBoolean = 0x1f kDexAnnotationValueTypeMask = 0x1f kDexAnnotationValueArgShift = 5 data_size = None def parse(self, data, off): self.nameIdx, sh = _uleb128(data[off:off + 5]) self.parse_noname(data, off + sh) self.data_size = self.data_size + sh pass def parse_noname(self, data, off): moff = man_off(off) valueType = _to_uint(data[moff(1):moff()]) self.valueType = valueType width = valueType >> self.kDexAnnotationValueArgShift vtype = valueType & self.kDexAnnotationValueTypeMask if vtype == self.kDexAnnotationByte: self.value = _to_int(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationShort: self.value = _to_int(data[moff(width):moff()]) pass elif vtype == self.kDexAnnotationChar: self.value = _to_uint(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationInt: self.value = _to_int(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationLong: self.value = _to_int(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationFloat: self.value = _to_uint(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationDouble: self.value = _to_uint(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationBoolean: self.value = width != 0 pass elif vtype == self.kDexAnnotationString: # string index self.value = _to_uint(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationType: # TypeId index self.value = _to_uint(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationMethod: # MethodId index self.value = _to_uint(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationField: self.value = _to_uint(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationEnum: # FieldId index self.value = _to_uint(data[moff(width):moff()]) moff(1) pass elif vtype == self.kDexAnnotationArray: size, sh = _uleb128(data[moff():moff() + 5]) moff(sh) def parse_array_elm(): elm = _DEX_AnnotationMember() elm.parse_noname(data, moff()) moff(elm.data_size) return elm self.value = [parse_array_elm() for i in range(size)] pass elif vtype == self.kDexAnnotationAnnotation: annoitem = _DEX_AnnotationItem() annoitem.parse_novisibility(data, moff()) moff(annoitem.data_size) self.value = annoitem pass elif vtype == self.kDexAnnotationNull: self.value = 0 pass else: raise ValueError, \ 'Bad annotation element value byte 0x02x' % (valueType) self.data_size = moff() - off pass def compute_size(self): if self.nameIdx is not None: nameIdx_size = _uleb128_sz(self.nameIdx) else: nameIdx_size = 0 pass valueType = self.valueType width = valueType >> self.kDexAnnotationValueArgShift vtype = valueType & self.kDexAnnotationValueTypeMask if vtype in (self.kDexAnnotationByte, self.kDexAnnotationShort, self.kDexAnnotationChar, self.kDexAnnotationInt, self.kDexAnnotationLong, self.kDexAnnotationFloat, self.kDexAnnotationDouble, self.kDexAnnotationString, self.kDexAnnotationType, self.kDexAnnotationMethod, self.kDexAnnotationField, self.kDexAnnotationEnum): value_size = width + 2 pass elif vtype in (self.kDexAnnotationBoolean, self.kDexAnnotationNull): value_size = 1 pass elif vtype == self.kDexAnnotationArray: array_cnt = len(self.value) array_cnt_size = _uleb128_sz(array_cnt) array_size = _sum_data_size(self.value) value_size = 1 + array_cnt_size + array_size pass elif vtype == self.kDexAnnotationAnnotation: value_size = 1 + _compute_sz(self.value) pass else: raise ValueError, \ 'Bad annotation element value byte 0x02x' % (valueType) self.data_size = nameIdx_size + value_size pass pass ## \brief Annotation item # # \see processEncodedAnnotation() in dalvik/vm/reflect/Annotation.c # class _DEX_AnnotationItem(object): visibility = None # 1 byte (optional) typeIdx = None members = None kDexVisibilityBuild = 0x00 kDexVisibilityRuntime = 0x01 kDexVisibilitySystem = 0x02 data_size = None def parse(self, data, off): self.visibility = _to_uint(data[off:off + 1]) self.parse_novisibility(data, off + 1) self.data_size = self.data_size + 1 pass def parse_novisibility(self, data, off): moff = man_off(off) self.typeIdx, sh = _uleb128(data[moff():moff() + 5]) moff(sh) size, sh = _uleb128(data[moff():moff() + 5]) moff(sh) def parse_AnnotationMemmber(): member = _DEX_AnnotationMember() member.parse(data, moff()) moff(member.data_size) return member self.members = [parse_AnnotationMemmber() for i in range(size)] self.data_size = moff() - off pass def compute_size(self): if self.visibility is not None: visibility_size = 1 else: visibility_size = 0 pass typeIdx_size = _uleb128_sz(self.typeIdx) members_cnt_size = _uleb128_sz(len(self.members)) members_size = members_cnt_size + _sum_data_size(self.members) size = visibility_size + typeIdx_size + members_size self.data_size = size pass pass class _DEX_EncodedArrayItem(object): elements = None data_size = None def parse(self, data, off): moff = man_off(off) size, sh = _uleb128(data[moff():moff() + 5]) moff(sh) def parse_element(): element = _DEX_AnnotationMember() element.parse_noname(data, moff()) moff(element.data_size) return element self.elements = [parse_element() for i in range(size)] self.data_size = moff() - off pass def compute_size(self): elements_cnt_size = _uleb128_sz(len(self.elements)) size = elements_cnt_size + _sum_data_size(self.elements) self.data_size = size pass pass class _DEX_DebugInfoItem(object): start_line = None parameters = None opcodes = None DBG_END_SEQUENCE = 0x00 DBG_ADVANCE_PC = 0x01 DBG_ADVANCE_LINE = 0x02 DBG_START_LOCAL = 0x03 DBG_START_LOCAL_EXTENDED = 0x04 DBG_END_LOCAL = 0x05 DBG_RESTART_LOCAL = 0x06 DBG_SET_PROLOGUE_END = 0x07 DBG_SET_EPILOGUE_BEGIN = 0x08 DBG_SET_FILE = 0x09 DBG_FIRST_SPECIAL = 0x0a DBG_LINE_BASE = -4 DBG_LINE_RANGE = 15 data_size = None def parse(self, data, off): moff = man_off(off) self.start_line, sh = _uleb128(data[moff():moff() + 5]) moff(sh) parameters_size, sh = _uleb128(data[moff():moff() + 5]) moff(sh) # # Parse parameters # def parse_parameter(): paramter, sh = _uleb128(data[moff():moff() + 5]) moff(sh) return paramter self.parameters = [parse_parameter() for i in range(parameters_size)] # # Parse debug opcodes # opcodes = [] while True: opcode = _to_uint(data[moff(1):moff()]) if opcode == self.DBG_END_SEQUENCE: opcodes.append((opcode,)) break elif opcode == self.DBG_ADVANCE_PC: adv, sh = _uleb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, adv)) pass elif opcode == self.DBG_ADVANCE_LINE: adv, sh = _leb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, adv)) pass elif opcode in (self.DBG_START_LOCAL, self.DBG_START_LOCAL_EXTENDED): reg, sh = _uleb128(data[moff():moff() + 5]) moff(sh) name, sh = _uleb128(data[moff():moff() + 5]) moff(sh) descriptor, sh = _uleb128(data[moff():moff() + 5]) moff(sh) if opcode == self.DBG_START_LOCAL_EXTENDED: signature, sh = _uleb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, reg, name, descriptor, signature)) else: opcodes.append((opcode, reg, name, descriptor)) pass pass elif opcode == self.DBG_END_LOCAL: reg, sh = _uleb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, reg)) pass elif opcode == self.DBG_RESTART_LOCAL: reg, sh = _uleb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, reg)) pass elif opcode in (self.DBG_SET_PROLOGUE_END, self.DBG_SET_EPILOGUE_BEGIN, self.DBG_SET_FILE): opcodes.append((opcode,)) pass else: opcodes.append((opcode,)) pass pass self.opcodes = opcodes self.data_size = moff() - off pass def compute_size(self): start_line_size = _uleb128_sz(self.start_line) parameters_cnt_size = _uleb128_sz(len(self.parameters)) parameter_sizes = itertools.imap(_uleb128_sz, self.parameters) parameters_size = parameters_cnt_size + sum(parameter_sizes) def compute_opcode_size(code): opcode = code[0] if opcode == self.DBG_END_SEQUENCE: size = 1 elif opcode == self.DBG_ADVANCE_PC: size = 1 + _uleb128_sz(code[1]) elif opcode == self.DBG_ADVANCE_LINE: size = 1 + _leb128_sz(code[1]) elif opcode in (self.DBG_START_LOCAL, self.DBG_START_LOCAL_EXTENDED): size = 1 + _uleb128_sz(code[1]) + _uleb128_sz(code[2]) + \ _uleb128_sz(code[3]) if len(code) == 5: size = size + _uleb128_sz(code[4]) pass pass elif opcode == self.DBG_END_LOCAL: size = 1 + _uleb128_sz(code[1]) elif opcode == self.DBG_RESTART_LOCAL: size = 1 + _uleb128_sz(code[1]) elif opcode in (self.DBG_SET_PROLOGUE_END, self.DBG_SET_EPILOGUE_BEGIN, self.DBG_SET_FILE): size = 1 else: size = 1 pass return size opcode_sizes = itertools.imap(compute_opcode_size, self.opcodes) opcode_sizes = [i for i in opcode_sizes] opcodes_size = sum(opcode_sizes) size = start_line_size + parameters_size + opcodes_size self.data_size = size pass pass class DEXFile(object): _data = None _header = None _maps = None _strings = None _typeIds = None _protoIds = None _fieldIds = None _methodIds = None _classDefs = None _classDatas = None _typeLists = None _codeItems = None _annotationSetItems = None _annotationsDirectoryItems = None _annotationItems = None _encodedArrayItems = None _debugInfoItems = None def __init__(self): pass def open(self, filename): fo = file(filename, 'r') data = fo.read() self.parse(data) pass def _parse_maps(self): data = self._data header = self._header off = header.mapOff num = _to_uint(data[off:off + 4]) off = off + 4 maps = [] for i in range(num): item_data = data[off:off + _DEX_MapItem.data_size] item = _DEX_MapItem() item.parse(item_data) maps.append(item) off = off + _DEX_MapItem.data_size pass self._maps = maps pass def find_map_item(self, type_value): maps = self._maps try: codeItem_map = [map for map in maps if map.type == type_value][0] except IndexError: return None return codeItem_map def find_map_item_name(self, type_name): type_value = _DEX_MapItem.find_type_name(type_name) map = self.find_map_item(type_value) return map def _parse_strings(self): data = self._data header = self._header strings = [] num = header.stringIdsSize off = header.stringIdsOff for i in range(num): str_start_off = _to_uint(data[off:off + 4]) str_stop_off = data.index('\x00', str_start_off) string = data[str_start_off:str_stop_off] sz, sh = _uleb128(string) string = string[sh:] strings.append(string) off = off + 4 pass self._strings = strings pass def _parse_typeIds(self): data = self._data header = self._header num = header.typeIdsSize off = header.typeIdsOff def parse(item_data): type_id = _DEX_TypeId() type_id.parse(item_data) return type_id item_size = _DEX_TypeId.data_size item_offs = range(off, off + item_size * num, item_size) item_datas = [data[item_off:item_off + item_size] for item_off in item_offs] typeIds = [parse(item_data) for item_data in item_datas] self._typeIds = typeIds pass def _parse_protoIds(self): data = self._data header = self._header num = header.protoIdsSize off = header.protoIdsOff def parse(item_data): proto_id = _DEX_ProtoId() proto_id.parse(item_data) return proto_id item_size = _DEX_ProtoId.data_size item_offs = range(off, off + item_size * num, item_size) item_datas = [data[item_off:item_off + item_size] for item_off in item_offs] protoIds = [parse(item_data) for item_data in item_datas] self._protoIds = protoIds pass def _parse_fieldIds(self): data = self._data header = self._header num = header.fieldIdsSize off = header.fieldIdsOff def parse(item_data): field_id = _DEX_FieldId() field_id.parse(item_data) return field_id item_size = _DEX_FieldId.data_size item_offs = range(off, off + item_size * num, item_size) item_datas = [data[item_off:item_off + item_size] for item_off in item_offs] fieldIds = [parse(item_data) for item_data in item_datas] self._fieldIds = fieldIds pass def _parse_methodIds(self): data = self._data header = self._header num = header.methodIdsSize off = header.methodIdsOff def parse(item_data): method_id = _DEX_MethodId() method_id.parse(item_data) return method_id item_size = _DEX_MethodId.data_size item_offs = range(off, off + item_size * num, item_size) item_datas = [data[item_off:item_off + item_size] for item_off in item_offs] methodIds = [parse(item_data) for item_data in item_datas] self._methodIds = methodIds pass def _parse_classDefs(self): data = self._data header = self._header num = header.classDefsSize off = header.classDefsOff def parse(item_data): class_def = _DEX_ClassDef() class_def.parse(item_data) return class_def item_size = _DEX_ClassDef.data_size item_offs = range(off, off + item_size * num, item_size) item_datas = [data[item_off:item_off + item_size] for item_off in item_offs] classDefs = [parse(item_data) for item_data in item_datas] self._classDefs = classDefs pass def _parse_classDatas(self): header = self._header data = self._data class_data_map = self.find_map_item_name('kDexTypeClassDataItem') moff = man_off(class_data_map.offset) def parse_class_data(): class_data = _DEX_ClassData() class_data.parse(data, moff()) moff(class_data.data_size) return class_data class_datas = [parse_class_data() for i in range(class_data_map.size)] self._classDatas = class_datas pass def _parse_typeLists(self): data = self._data typeList_map = self.find_map_item_name('kDexTypeTypeList') num_typeLists = typeList_map.size typeLists = _DEX_TypeLists() typeLists.parse(num_typeLists, data, typeList_map.offset) self._typeLists = typeLists pass def _parse_codeItems(self): data = self._data codeItem_map = self.find_map_item_name('kDexTypeCodeItem') if codeItem_map is None: return num_codeItems = codeItem_map.size moff = man_off(codeItem_map.offset) def parse_code(): code = _DEX_Code() code.parse(data, moff()) moff(code.data_size) return code codeItems = [parse_code() for i in range(num_codeItems)] self._codeItems = codeItems pass def _parse_annotationSetItems(self): data = self._data annoset_map = self.find_map_item_name('kDexTypeAnnotationSetItem') if annoset_map is None: return moff = man_off(annoset_map.offset) def parse_annotationSetItem(): item = _DEX_AnnotationSetItem() item.parse(data, moff()) moff(item.data_size) return item self._annotationSetItems = [parse_annotationSetItem() for i in range(annoset_map.size)] pass def _parse_annotationsDirectoryItems(self): data = self._data annodir_map = \ self.find_map_item_name('kDexTypeAnnotationsDirectoryItem') if annodir_map is None: return moff = man_off(annodir_map.offset) def parse_annotationDirItem(): item = _DEX_AnnotationsDirectoryItem() item.parse(data, moff()) moff(item.data_size) return item self._annotationsDirectoryItems = [parse_annotationDirItem() for i in range(annodir_map.size)] pass def _parse_annotationItems(self): data = self._data annoitem_map = self.find_map_item_name('kDexTypeAnnotationItem') if annoitem_map is None: return moff = man_off(annoitem_map.offset) def parse_annotationItem(): item = _DEX_AnnotationItem() item.parse(data, moff()) moff(item.data_size) return item self._annotationItems = [parse_annotationItem() for i in range(annoitem_map.size)] pass def _parse_header(self): data = self._data header = _DEX_header() header.parse(data) self._header = header pass def _parse_encodedArrayItems(self): data = self._data encodedArrayItem_map = \ self.find_map_item_name('kDexTypeEncodedArrayItem') moff = man_off(encodedArrayItem_map.offset) def parse_encodedArrayItem(): item = _DEX_EncodedArrayItem() item.parse(data, moff()) moff(item.data_size) return item self._encodedArrayItems = [parse_encodedArrayItem() for i in range(encodedArrayItem_map.size)] pass def _parse_debugInfoItems(self): data = self._data debugInfoItem_map = \ self.find_map_item_name('kDexTypeDebugInfoItem') moff = man_off(debugInfoItem_map.offset) def parse_debugInfoItem(): item = _DEX_DebugInfoItem() item.parse(data, moff()) moff(item.data_size) return item self._debugInfoItems = [parse_debugInfoItem() for i in range(debugInfoItem_map.size)] pass def parse(self, data): self._data = data self._parse_header() self._parse_maps() self._parse_strings() self._parse_typeIds() self._parse_protoIds() self._parse_fieldIds() self._parse_methodIds() self._parse_classDefs() self._parse_classDatas() self._parse_typeLists() self._parse_codeItems() self._parse_annotationSetItems() self._parse_annotationsDirectoryItems() self._parse_annotationItems() self._parse_encodedArrayItems() self._parse_debugInfoItems() pass pass if __name__ == '__main__': dex = DEXFile() dex.open('test.dex') print 'Header' h = dex._header for attr in h.header_fields: print '\t%s: %s' % (attr, repr(getattr(h, attr))) pass print print 'Define Classes' strings = dex._strings classDefs = dex._classDefs typeIds = dex._typeIds for classDef in classDefs: typeId = typeIds[classDef.classIdx] descriptor = strings[typeId.descriptorIdx] data_off = classDef.classDataOff print '\t%s @0x%x' % (descriptor, data_off) pass print print 'Reference Classes' for typeId in typeIds: descriptor = strings[typeId.descriptorIdx] print '\t%s' % (descriptor) pass print print 'Class data' methodIds = dex._methodIds classDatas = dex._classDatas for classData in classDatas: print '\tclass' for method in classData.directMethods: code_off = method.codeOff methodId = methodIds[method.methodIdx] name = strings[methodId.nameIdx] print '\t\t%s@0x%x' % (name, code_off) pass for method in classData.virtualMethods: code_off = method.codeOff methodId = methodIds[method.methodIdx] name = strings[methodId.nameIdx] print '\t\t%s@0x%x' % (name, code_off) pass pass print print 'TypeLists size is %d/%d bytes' % (dex._typeLists.data_size, _compute_sz(dex._typeLists)) bytes = sum([code.data_size for code in dex._codeItems]) rbytes = sum([_compute_sz(code) for code in dex._codeItems]) print print 'CodeItems size is %d/%d bytes' % (bytes, rbytes) bytes = sum([annoset.data_size for annoset in dex._annotationSetItems]) rbytes = sum([_compute_sz(annoset) for annoset in dex._annotationSetItems]) print print 'AnnotationSetItems size is %d/%d bytes' % (bytes, rbytes) bytes = sum([annodir.data_size for annodir in dex._annotationsDirectoryItems]) rbytes = sum([_compute_sz(annodir) for annodir in dex._annotationsDirectoryItems]) print print 'AnnotationsDirtoryItems size is %d/%d bytes' % (bytes, rbytes) bytes = sum([annoitem.data_size for annoitem in dex._annotationItems]) rbytes = sum([_compute_sz(annoitem) for annoitem in dex._annotationItems]) print print 'AnnotationItems size is %d/%d bytes' % (bytes, rbytes) bytes = sum([encodeditem.data_size for encodeditem in dex._encodedArrayItems]) rbytes = sum([_compute_sz(encodeditem) for encodeditem in dex._encodedArrayItems]) print print 'EncodedArrayItems size is %d/%d bytes' % (bytes, rbytes) bytes = sum([debuginfoitem.data_size for debuginfoitem in dex._debugInfoItems]) rbytes = sum([_compute_sz(debuginfoitem) for debuginfoitem in dex._debugInfoItems]) print print 'DebugInfoItems size is %d/%d bytes' % (bytes, rbytes) print print 'Data maps' maps = dex._maps for map in maps: print '\t0x%04x(%s) size=%d offset=0x%08x' % (map.type, map.types[map.type], map.size, map.offset) pass pass