Mercurial > paraspace
view paraspace/dexfile.py @ 3:a78db169b0c8
parse AnnotationSetItems
author | Thinker K.F. Li <thinker@codemud.net> |
---|---|
date | Mon, 23 May 2011 21:44:58 +0800 |
parents | add64d56b0e2 |
children | b0766f1984bb |
line wrap: on
line source
class _DEX_header(object): magic = None # 0x00, 8 bytes checksum = None # 0x08, 4 bytes signature = None # 0x0c, 20 bytes fileSize = None # 0x20, 4 bytes headerSize = None # 0x24 endianTag = None # 0x28 linkSize = None # 0x2c linkOff = None # 0x30 mapOff = None # 0x34 stringIdsSize = None # 0x38 stringIdsOff = None # 0x3c typeIdsSize = None # 0x40 typeIdsOff = None # 0x44 protoIdsSize = None # 0x48 protoIdsOff = None # 0x4c fieldIdsSize = None # 0x50 fieldIdsOff = None # 0x54 methodIdsSize = None # 0x58 methodIdsOff = None # 0x5c classDefsSize = None # 0x60 classDefsOff = None # 0x64 dataSize = None # 0x68 dataOff = None # 0x6c header_fields = \ 'magic checksum signature fileSize headerSize endianTag ' \ 'linkSize linkOff mapOff stringIdsSize stringIdsOff typeIdsSize ' \ 'typeIdsOff protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \ 'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \ 'dataSize dataOff'.split() def parse(self, data): self.magic = data[:8] self.checksum = data[8: 0x0c] self.signature = data[0x0c: 0x20] idx = 0x20 fields = 'fileSize headerSize endianTag linkSize linkOff mapOff ' \ 'stringIdsSize stringIdsOff typeIdsSize typeIdsOff ' \ 'protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \ 'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \ 'dataSize dataOff'.split() for field in fields: d = data[idx: idx + 4] value = _to_uint(d) setattr(self, field, value) idx = idx + 4 pass pass pass class man_off(object): off = None def __init__(self, off): self.off = off pass def __call__(self, sz): off = self.off self.off = off + sz return off pass def _to_uint(data): v = 0 sh = 0 for c in data: v = v + (ord(c) << sh) sh = sh + 8 pass return v def _uleb128(data): sh = 0 v = 0 for c in data: cv = ord(c) v = v + ((cv & 0x7f) << sh) sh = sh + 7 if cv <= 0x7f: break pass nbytes = sh / 7 return v, nbytes def _leb128(data): v, sh = _uleb128(data) if v & (1 << (sh * 7 - 1)): v = -((1 << (sh * 7)) - v) pass return v, sh class _DEX_MapItem(object): type = None # 2 bytes unused = None # 2 bytes size = None # 4 bytes offset = None # 4 bytes data_size = 12 types = { 0x0000: 'kDexTypeHeaderItem', 0x0001: 'kDexTypeStringIdItem', 0x0002: 'kDexTypeTypeIdItem', 0x0003: 'kDexTypeProtoIdItem', 0x0004: 'kDexTypeFieldIdItem', 0x0005: 'kDexTypeMethodIdItem', 0x0006: 'kDexTypeClassDefItem', 0x1000: 'kDexTypeMapList', 0x1001: 'kDexTypeTypeList', 0x1002: 'kDexTypeAnnotationSetRefList', 0x1003: 'kDexTypeAnnotationSetItem', 0x2000: 'kDexTypeClassDataItem', 0x2001: 'kDexTypeCodeItem', 0x2002: 'kDexTypeStringDataItem', 0x2003: 'kDexTypeDebugInfoItem', 0x2004: 'kDexTypeAnnotationItem', 0x2005: 'kDexTypeEncodedArrayItem', 0x2006: 'kDexTypeAnnotationsDirectoryItem' } def parse(self, data): self.type = _to_uint(data[:2]) self.size = _to_uint(data[4:8]) self.offset = _to_uint(data[8:12]) pass @classmethod def find_type_name(self, type_name): type_value = [v for v, name in _DEX_MapItem.types.items() if name == type_name][0] return type_value pass class _DEX_TypeId(object): descriptorIdx = None # 4 bytes data_size = 4 def parse(self, data): self.descriptorIdx = _to_uint(data[:4]) pass pass class _DEX_ProtoId(object): shortyIdx = None # 4 bytes returnTypeIdx = None # 4 bytes parametersOff = None # 4 bytes data_size = 12 def parse(self, data): self.shortyIdx = _to_uint(data[:4]) self.returnTypeIdx = _to_uint(data[4:8]) self.parametersOff = _to_uint(data[8:12]) pass pass class _DEX_FieldId(object): classIdx = None # 2 bytes typeIdx = None # 2 bytes nameIdx = None # 4 bytes data_size = 8 def parse(self, data): self.classIdx = _to_uint(data[:2]) self.typeIdx = _to_uint(data[2:4]) self.nameIdx = _to_uint(data[4:8]) pass pass class _DEX_MethodId(object): classIdx = None # 2 bytes protoIdx = None # 2 bytes nameIdx = None # 4 bytes data_size = 8 def parse(self, data): self.classIdx = _to_uint(data[:2]) self.protoIdx = _to_uint(data[2:4]) self.nameIdx = _to_uint(data[4:8]) pass pass class _DEX_ClassDef(object): classIdx = None # 0x00 accessFlags = None # 0x04 superclassIdx = None # 0x08 interfacesOff = None # 0x0c sourceFileIdx = None # 0x10 annotationsOff = None # 0x14 classDataOff = None # 0x18 staticValuesOff = None # 0x1c data_size = 0x20 def parse(self, data): self.classIdx = _to_uint(data[:4]) self.accessFlags = _to_uint(data[4:8]) self.superclassIdx = _to_uint(data[8:0xc]) self.interfacesOff = _to_uint(data[0xc:0x10]) self.sourceFileIdx = _to_uint(data[0x10:0x14]) self.annotationsOff = _to_uint(data[0x14:0x18]) self.classDataOff = _to_uint(data[0x18:0x1c]) self.staticValuesOff = _to_uint(data[0x1c:0x20]) pass pass class _DEX_ClassDataHeader(object): staticFieldsSize = None instanceFieldsSize = None directMethodsSize = None virtualMethodsSize = None data_size = None def parse(self, data, off): self.staticFieldsSize, sh = _uleb128(data[off:off + 10]) sz = sh off = off + sh self.instanceFieldsSize, sh = _uleb128(data[off:off + 10]) sz = sz + sh off = off + sh self.directMethodsSize, sh = _uleb128(data[off:off + 10]) sz = sz + sh off = off + sh self.virtualMethodsSize, sh = _uleb128(data[off:off + 10]) sz = sz + sh self.data_size = sz pass pass class _DEX_Field(object): fieldIdx = None accessFlags = None data_size = None def parse(self, data, off): self.fieldIdx, sh = _uleb128(data[off:off + 10]) sz = sh off = off + sh self.accessFlags, sh = _uleb128(data[off:off + 10]) sz = sz + sh self.data_size = sz pass pass class _DEX_Method(object): methodIdx = None accessFlags = None codeOff = None data_size = None def parse(self, data, off): self.methodIdx, sh = _uleb128(data[off:off + 10]) sz = sh off = off + sh self.accessFlags, sh = _uleb128(data[off:off + 10]) sz = sz + sh off = off + sh self.codeOff, sh = _uleb128(data[off:off + 10]) sz = sz + sh self.data_size = sz pass pass class _DEX_ClassData(object): header = None # DexClassDataHeader staticFields = None # DexField* instanceFields = None # DexField* directMethods = None # DexMethod* virtualMethods = None # DexMethod* data_size = None def parse(self, data, off): header = _DEX_ClassDataHeader() header.parse(data, off) self.header = header cur_off = [off + header.data_size] def parse_field(): field = _DEX_Field() off = cur_off[0] field.parse(data, off) cur_off[0] = cur_off[0] + field.data_size # # field index depends previous one to reduce size # field.fieldIdx = field.fieldIdx + idx[0] idx[0] = field.fieldIdx return field def parse_method(): method = _DEX_Method() off = cur_off[0] method.parse(data, off) cur_off[0] = cur_off[0] + method.data_size # # method index depends previous one to reduce size # method.methodIdx = method.methodIdx + idx[0] idx[0] = method.methodIdx return method idx = [0] self.staticFields = [parse_field() for i in range(header.staticFieldsSize)] idx = [0] self.instanceFields = [parse_field() for i in range(header.instanceFieldsSize)] idx = [0] self.directMethods = [parse_method() for i in range(header.directMethodsSize)] idx = [0] self.virtualMethods = [parse_method() for i in range(header.virtualMethodsSize)] self.data_size = cur_off[0] - off pass pass class _DEX_TypeList(object): typeItems = None data_size = None def parse(self, data, off): size = _to_uint(data[off:off + 4]) item_off = off + 4 typeItems = [_to_uint(data[cur_off:cur_off + 2]) for cur_off in range(item_off, item_off + size * 2, 2)] data_size = 4 + size * 2 self.typeItems = typeItems self.data_size = data_size pass pass class _DEX_TypeLists(object): typeLists = None data_size = None def parse(self, num, data, off): def parse(): off = (cur_off[0] + 3) & ~0x3 typeList = _DEX_TypeList() typeList.parse(data, off) cur_off[0] = off + typeList.data_size return typeList cur_off = [off] typeLists = [parse() for i in range(num)] self.typeLists = typeLists self.data_size = cur_off[0] - off pass pass class _DEX_Try(object): startAddr = None # 4 bytes insnCount = None # 2 bytes handlerOff = None # 2 bytes data_size = 8 def parse(self, data, off): cur_off = off self.startAddr = _to_uint(data[cur_off:cur_off + 4]) cur_off = cur_off + 4 self.insnCount = _to_uint(data[cur_off:cur_off + 2]) cur_off = cur_off + 2 self.handlerOff = _to_uint(data[cur_off:cur_off + 2]) pass pass class _DEX_CatchHandler(object): typeIdx = None address = None data_size = None def parse(self, data, off): cur_off = off self.typeIdx, sh = _uleb128(data[cur_off:cur_off + 5]) cur_off = cur_off + sh self.address, sh = _uleb128(data[cur_off:cur_off + 5]) cur_off = cur_off + sh self.data_size = cur_off - off pass def parse1(self, data, off): self.address, sh = _uleb128(data[off:off + 5]) self.data_size = sh pass pass class _DEX_Catch(object): catchesAll = None handlers = None data_size = None def parse(self, data, off): def move_off(sz): off = move_off.off move_off.off = off + sz return off move_off.off = off moff = move_off count, sh = _leb128(data[moff.off:moff.off + 5]) moff(sh) if count > 0: self.catchesAll = False else: self.catchesAll = True count = -count pass def parse_handler(): handler = _DEX_CatchHandler() handler.parse(data, moff.off) moff(handler.data_size) return handler self.handlers = [parse_handler() for i in range(count)] if self.catchesAll: # # Additional handler for catching all # handler = _DEX_CatchHandler() handler.parse1(data, moff.off) moff(handler.data_size) self.handlers.append(handler) pass self.data_size = moff.off - off pass pass class _DEX_Code(object): registersSize = None # 2 bytes insSize = None # 2 bytes outsSize = None # 2 bytes triesSize = None # 2 bytes debugInfoOff = None # 4 bytes insnsSize = None # 4 bytes insns = None try_items = None catch_handler_items = None data_size = None def parse(self, data, off): def move_off(sz): off = move_off.off move_off.off = off + sz return off move_off.off = off moff = move_off self.registersSize = _to_uint(data[moff.off:moff.off + 2]) moff(2) self.insSize = _to_uint(data[moff.off:moff.off + 2]) moff(2) self.outsSize = _to_uint(data[moff.off:moff.off + 2]) moff(2) self.triesSize = _to_uint(data[moff.off:moff.off + 2]) moff(2) self.debugInfoOff = _to_uint(data[moff.off:moff.off + 4]) moff(4) self.insnsSize = _to_uint(data[moff.off:moff.off + 4]) moff(4) moff(self.insnsSize * 2) # skip insns if self.triesSize > 0: def parse_try_item(): try_item = _DEX_Try() try_item.parse(data, moff.off) moff(try_item.data_size) return try_item moff.off = (moff.off + 3) & ~0x3 self.try_items = [parse_try_item() for i in range(self.triesSize)] def parse_catch_handler(): catch = _DEX_Catch() catch.parse(data, moff.off) moff(catch.data_size) return catch # # No tries, no catch handlers # handlersSize, sh = _uleb128(data[moff.off:moff.off + 5]) moff(sh) self.catch_handler_items = [parse_catch_handler() for i in range(handlersSize)] pass moff.off = (moff.off + 3) & ~0x3 self.data_size = moff.off - off pass pass class _DEX_AnnotationSetItem(object): size = None # 4 bytes annotations = None # 4 * size bytes data_size = None def parse(self, data, off): moff = man_off(off) self.size = _to_uint(data[moff(4):moff(0)]) self.annotations = [_to_uint(data[moff(4):moff(0)]) for i in range(self.size)] self.data_size = moff(0) - off pass pass class DEXFile(object): _data = None _header = None _maps = None _strings = None _typeIds = None _protoIds = None _fieldIds = None _methodIds = None _classDefs = None _classDatas = None _typeLists = None _codeItems = None _annotationSetItems = None def __init__(self): pass def open(self, filename): fo = file(filename, 'r') data = fo.read() self.parse(data) pass def _parse_maps(self): data = self._data header = self._header off = header.mapOff num = _to_uint(data[off:off + 4]) off = off + 4 maps = [] for i in range(num): item_data = data[off:off + _DEX_MapItem.data_size] item = _DEX_MapItem() item.parse(item_data) maps.append(item) off = off + _DEX_MapItem.data_size pass self._maps = maps pass def find_map_item(self, type_value): maps = self._maps codeItem_map = [map for map in maps if map.type == type_value][0] return codeItem_map def find_map_item_name(self, type_name): type_value = _DEX_MapItem.find_type_name(type_name) map = self.find_map_item(type_value) return map def _parse_strings(self): data = self._data header = self._header strings = [] num = header.stringIdsSize off = header.stringIdsOff for i in range(num): str_start_off = _to_uint(data[off:off + 4]) str_stop_off = data.index('\x00', str_start_off) string = data[str_start_off:str_stop_off] sz, sh = _uleb128(string) string = string[sh:] strings.append(string) off = off + 4 pass self._strings = strings pass def _parse_typeIds(self): data = self._data header = self._header num = header.typeIdsSize off = header.typeIdsOff def parse(item_data): type_id = _DEX_TypeId() type_id.parse(item_data) return type_id item_size = _DEX_TypeId.data_size item_offs = range(off, off + item_size * num, item_size) item_datas = [data[item_off:item_off + item_size] for item_off in item_offs] typeIds = [parse(item_data) for item_data in item_datas] self._typeIds = typeIds pass def _parse_protoIds(self): data = self._data header = self._header num = header.protoIdsSize off = header.protoIdsOff def parse(item_data): proto_id = _DEX_ProtoId() proto_id.parse(item_data) return proto_id item_size = _DEX_ProtoId.data_size item_offs = range(off, off + item_size * num, item_size) item_datas = [data[item_off:item_off + item_size] for item_off in item_offs] protoIds = [parse(item_data) for item_data in item_datas] self._protoIds = protoIds pass def _parse_fieldIds(self): data = self._data header = self._header num = header.fieldIdsSize off = header.fieldIdsOff def parse(item_data): field_id = _DEX_FieldId() field_id.parse(item_data) return field_id item_size = _DEX_FieldId.data_size item_offs = range(off, off + item_size * num, item_size) item_datas = [data[item_off:item_off + item_size] for item_off in item_offs] fieldIds = [parse(item_data) for item_data in item_datas] self._fieldIds = fieldIds pass def _parse_methodIds(self): data = self._data header = self._header num = header.methodIdsSize off = header.methodIdsOff def parse(item_data): method_id = _DEX_MethodId() method_id.parse(item_data) return method_id item_size = _DEX_MethodId.data_size item_offs = range(off, off + item_size * num, item_size) item_datas = [data[item_off:item_off + item_size] for item_off in item_offs] methodIds = [parse(item_data) for item_data in item_datas] self._methodIds = methodIds pass def _parse_classDefs(self): data = self._data header = self._header num = header.classDefsSize off = header.classDefsOff def parse(item_data): class_def = _DEX_ClassDef() class_def.parse(item_data) return class_def item_size = _DEX_ClassDef.data_size item_offs = range(off, off + item_size * num, item_size) item_datas = [data[item_off:item_off + item_size] for item_off in item_offs] classDefs = [parse(item_data) for item_data in item_datas] self._classDefs = classDefs pass def _parse_classDatas(self): header = self._header data = self._data maps = self._maps kDexTypeClassDataItem = [type for type, name in _DEX_MapItem.types.items() if name == 'kDexTypeClassDataItem'][0] class_data_map = [map for map in maps if map.type == kDexTypeClassDataItem][0] off = [class_data_map.offset] def parse_class_data(): class_data = _DEX_ClassData() class_data.parse(data, off[0]) off[0] = off[0] + class_data.data_size return class_data class_datas = [parse_class_data() for i in range(class_data_map.size)] self._classDatas = class_datas pass def _parse_typeLists(self): maps = self._maps data = self._data kDexTypeTypeList = [type for type, name in _DEX_MapItem.types.items() if name == 'kDexTypeTypeList'][0] typeList_map = [map for map in maps if map.type == kDexTypeTypeList][0] num_typeLists = typeList_map.size typeLists = _DEX_TypeLists() typeLists.parse(num_typeLists, data, typeList_map.offset) self._typeLists = typeLists pass def _parse_codeItems(self): maps = self._maps data = self._data kDexTypeCodeItem = [type for type, name in _DEX_MapItem.types.items() if name == 'kDexTypeCodeItem'][0] codeItem_map = [map for map in maps if map.type == kDexTypeCodeItem][0] num_codeItems = codeItem_map.size cur_off = [codeItem_map.offset] def parse_code(): off = cur_off[0] code = _DEX_Code() code.parse(data, off) cur_off[0] = off + code.data_size return code codeItems = [parse_code() for i in range(num_codeItems)] self._codeItems = codeItems pass def _parse_annotationSetItems(self): data = self._data annoset_map = self.find_map_item_name('kDexTypeAnnotationSetItem') moff = man_off(annoset_map.offset) def parse_annotationSetItem(): item = _DEX_AnnotationSetItem() item.parse(data, moff(0)) moff(item.data_size) return item self._annotationSetItems = [parse_annotationSetItem() for i in range(annoset_map.size)] pass def parse(self, data): self._data = data header = _DEX_header() header.parse(data) self._header = header self._parse_maps() self._parse_strings() self._parse_typeIds() self._parse_protoIds() self._parse_fieldIds() self._parse_methodIds() self._parse_classDefs() self._parse_classDatas() self._parse_typeLists() self._parse_codeItems() self._parse_annotationSetItems() pass pass if __name__ == '__main__': dex = DEXFile() dex.open('test.dex') print 'Header' h = dex._header for attr in h.header_fields: print '\t%s: %s' % (attr, repr(getattr(h, attr))) pass print print 'Define Classes' strings = dex._strings classDefs = dex._classDefs typeIds = dex._typeIds for classDef in classDefs: typeId = typeIds[classDef.classIdx] descriptor = strings[typeId.descriptorIdx] data_off = classDef.classDataOff print '\t%s @0x%x' % (descriptor, data_off) pass print print 'Reference Classes' for typeId in typeIds: descriptor = strings[typeId.descriptorIdx] print '\t%s' % (descriptor) pass print print 'Class data' methodIds = dex._methodIds classDatas = dex._classDatas for classData in classDatas: print '\tclass' for method in classData.directMethods: code_off = method.codeOff methodId = methodIds[method.methodIdx] name = strings[methodId.nameIdx] print '\t\t%s@0x%x' % (name, code_off) pass for method in classData.virtualMethods: code_off = method.codeOff methodId = methodIds[method.methodIdx] name = strings[methodId.nameIdx] print '\t\t%s@0x%x' % (name, code_off) pass pass print print 'TypeLists size is %d bytes' % (dex._typeLists.data_size) bytes = sum([code.data_size for code in dex._codeItems]) print print 'CodeItems size is %d bytes' % (bytes) bytes = sum([annoset.data_size for annoset in dex._annotationSetItems]) print print 'AnnotationSetItems size is %d bytes' % (bytes) print print 'Data maps' maps = dex._maps for map in maps: print '\t0x%04x(%s) size=%d offset=0x%08x' % (map.type, map.types[map.type], map.size, map.offset) pass pass