# HG changeset patch # User Thinker K.F. Li # Date 1306156271 -28800 # Node ID add64d56b0e23e02637cbe36300e6a3bbddcb8a1 # Parent 05346b632adb6690d39f59f624cef4e66853267d Parse code items diff -r 05346b632adb -r add64d56b0e2 paraspace/dexfile.py --- a/paraspace/dexfile.py Sun May 22 02:16:20 2011 +0800 +++ b/paraspace/dexfile.py Mon May 23 21:11:11 2011 +0800 @@ -23,6 +23,13 @@ dataSize = None # 0x68 dataOff = None # 0x6c + header_fields = \ + 'magic checksum signature fileSize headerSize endianTag ' \ + 'linkSize linkOff mapOff stringIdsSize stringIdsOff typeIdsSize ' \ + 'typeIdsOff protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \ + 'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \ + 'dataSize dataOff'.split() + def parse(self, data): self.magic = data[:8] self.checksum = data[8: 0x0c] @@ -70,6 +77,14 @@ return v, nbytes +def _leb128(data): + v, sh = _uleb128(data) + if v & (1 << (sh * 7 - 1)): + v = -((1 << (sh * 7)) - v) + pass + return v, sh + + class _DEX_MapItem(object): type = None # 2 bytes unused = None # 2 bytes @@ -77,6 +92,26 @@ offset = None # 4 bytes data_size = 12 + types = { + 0x0000: 'kDexTypeHeaderItem', + 0x0001: 'kDexTypeStringIdItem', + 0x0002: 'kDexTypeTypeIdItem', + 0x0003: 'kDexTypeProtoIdItem', + 0x0004: 'kDexTypeFieldIdItem', + 0x0005: 'kDexTypeMethodIdItem', + 0x0006: 'kDexTypeClassDefItem', + 0x1000: 'kDexTypeMapList', + 0x1001: 'kDexTypeTypeList', + 0x1002: 'kDexTypeAnnotationSetRefList', + 0x1003: 'kDexTypeAnnotationSetItem', + 0x2000: 'kDexTypeClassDataItem', + 0x2001: 'kDexTypeCodeItem', + 0x2002: 'kDexTypeStringDataItem', + 0x2003: 'kDexTypeDebugInfoItem', + 0x2004: 'kDexTypeAnnotationItem', + 0x2005: 'kDexTypeEncodedArrayItem', + 0x2006: 'kDexTypeAnnotationsDirectoryItem' + } def parse(self, data): self.type = _to_uint(data[:2]) @@ -167,6 +202,333 @@ pass +class _DEX_ClassDataHeader(object): + staticFieldsSize = None + instanceFieldsSize = None + directMethodsSize = None + virtualMethodsSize = None + + data_size = None + + def parse(self, data, off): + self.staticFieldsSize, sh = _uleb128(data[off:off + 10]) + sz = sh + off = off + sh + self.instanceFieldsSize, sh = _uleb128(data[off:off + 10]) + sz = sz + sh + off = off + sh + self.directMethodsSize, sh = _uleb128(data[off:off + 10]) + sz = sz + sh + off = off + sh + self.virtualMethodsSize, sh = _uleb128(data[off:off + 10]) + sz = sz + sh + + self.data_size = sz + pass + pass + + +class _DEX_Field(object): + fieldIdx = None + accessFlags = None + + data_size = None + + def parse(self, data, off): + self.fieldIdx, sh = _uleb128(data[off:off + 10]) + sz = sh + off = off + sh + self.accessFlags, sh = _uleb128(data[off:off + 10]) + sz = sz + sh + + self.data_size = sz + pass + pass + + +class _DEX_Method(object): + methodIdx = None + accessFlags = None + codeOff = None + + data_size = None + + def parse(self, data, off): + self.methodIdx, sh = _uleb128(data[off:off + 10]) + sz = sh + off = off + sh + + self.accessFlags, sh = _uleb128(data[off:off + 10]) + sz = sz + sh + off = off + sh + + self.codeOff, sh = _uleb128(data[off:off + 10]) + sz = sz + sh + + self.data_size = sz + pass + pass + +class _DEX_ClassData(object): + header = None # DexClassDataHeader + staticFields = None # DexField* + instanceFields = None # DexField* + directMethods = None # DexMethod* + virtualMethods = None # DexMethod* + + data_size = None + + def parse(self, data, off): + header = _DEX_ClassDataHeader() + header.parse(data, off) + self.header = header + + cur_off = [off + header.data_size] + + def parse_field(): + field = _DEX_Field() + off = cur_off[0] + field.parse(data, off) + cur_off[0] = cur_off[0] + field.data_size + + # + # field index depends previous one to reduce size + # + field.fieldIdx = field.fieldIdx + idx[0] + idx[0] = field.fieldIdx + + return field + + def parse_method(): + method = _DEX_Method() + off = cur_off[0] + method.parse(data, off) + cur_off[0] = cur_off[0] + method.data_size + + # + # method index depends previous one to reduce size + # + method.methodIdx = method.methodIdx + idx[0] + idx[0] = method.methodIdx + + return method + + idx = [0] + self.staticFields = [parse_field() + for i in range(header.staticFieldsSize)] + idx = [0] + self.instanceFields = [parse_field() + for i in range(header.instanceFieldsSize)] + idx = [0] + self.directMethods = [parse_method() + for i in range(header.directMethodsSize)] + idx = [0] + self.virtualMethods = [parse_method() + for i in range(header.virtualMethodsSize)] + + self.data_size = cur_off[0] - off + pass + pass + + +class _DEX_TypeList(object): + typeItems = None + + data_size = None + + def parse(self, data, off): + size = _to_uint(data[off:off + 4]) + + item_off = off + 4 + typeItems = [_to_uint(data[cur_off:cur_off + 2]) + for cur_off in range(item_off, item_off + size * 2, 2)] + data_size = 4 + size * 2 + + self.typeItems = typeItems + self.data_size = data_size + pass + pass + + +class _DEX_TypeLists(object): + typeLists = None + + data_size = None + + def parse(self, num, data, off): + def parse(): + off = (cur_off[0] + 3) & ~0x3 + typeList = _DEX_TypeList() + typeList.parse(data, off) + cur_off[0] = off + typeList.data_size + + return typeList + + cur_off = [off] + typeLists = [parse() for i in range(num)] + + self.typeLists = typeLists + self.data_size = cur_off[0] - off + pass + pass + + +class _DEX_Try(object): + startAddr = None # 4 bytes + insnCount = None # 2 bytes + handlerOff = None # 2 bytes + + data_size = 8 + + def parse(self, data, off): + cur_off = off + self.startAddr = _to_uint(data[cur_off:cur_off + 4]) + cur_off = cur_off + 4 + self.insnCount = _to_uint(data[cur_off:cur_off + 2]) + cur_off = cur_off + 2 + self.handlerOff = _to_uint(data[cur_off:cur_off + 2]) + pass + pass + + +class _DEX_CatchHandler(object): + typeIdx = None + address = None + + data_size = None + + def parse(self, data, off): + cur_off = off + self.typeIdx, sh = _uleb128(data[cur_off:cur_off + 5]) + cur_off = cur_off + sh + self.address, sh = _uleb128(data[cur_off:cur_off + 5]) + cur_off = cur_off + sh + + self.data_size = cur_off - off + pass + + def parse1(self, data, off): + self.address, sh = _uleb128(data[off:off + 5]) + + self.data_size = sh + pass + pass + + +class _DEX_Catch(object): + catchesAll = None + handlers = None + + data_size = None + + def parse(self, data, off): + def move_off(sz): + off = move_off.off + move_off.off = off + sz + return off + move_off.off = off + moff = move_off + + count, sh = _leb128(data[moff.off:moff.off + 5]) + moff(sh) + + if count > 0: + self.catchesAll = False + else: + self.catchesAll = True + count = -count + pass + + def parse_handler(): + handler = _DEX_CatchHandler() + handler.parse(data, moff.off) + moff(handler.data_size) + return handler + + self.handlers = [parse_handler() for i in range(count)] + + if self.catchesAll: + # + # Additional handler for catching all + # + handler = _DEX_CatchHandler() + handler.parse1(data, moff.off) + moff(handler.data_size) + self.handlers.append(handler) + pass + + self.data_size = moff.off - off + pass + pass + + +class _DEX_Code(object): + registersSize = None # 2 bytes + insSize = None # 2 bytes + outsSize = None # 2 bytes + triesSize = None # 2 bytes + debugInfoOff = None # 4 bytes + insnsSize = None # 4 bytes + insns = None + try_items = None + catch_handler_items = None + + data_size = None + + def parse(self, data, off): + def move_off(sz): + off = move_off.off + move_off.off = off + sz + return off + move_off.off = off + moff = move_off + + self.registersSize = _to_uint(data[moff.off:moff.off + 2]) + moff(2) + self.insSize = _to_uint(data[moff.off:moff.off + 2]) + moff(2) + self.outsSize = _to_uint(data[moff.off:moff.off + 2]) + moff(2) + self.triesSize = _to_uint(data[moff.off:moff.off + 2]) + moff(2) + self.debugInfoOff = _to_uint(data[moff.off:moff.off + 4]) + moff(4) + self.insnsSize = _to_uint(data[moff.off:moff.off + 4]) + moff(4) + + moff(self.insnsSize * 2) # skip insns + + if self.triesSize > 0: + def parse_try_item(): + try_item = _DEX_Try() + try_item.parse(data, moff.off) + moff(try_item.data_size) + return try_item + + moff.off = (moff.off + 3) & ~0x3 + self.try_items = [parse_try_item() for i in range(self.triesSize)] + + def parse_catch_handler(): + catch = _DEX_Catch() + catch.parse(data, moff.off) + moff(catch.data_size) + return catch + + # + # No tries, no catch handlers + # + handlersSize, sh = _uleb128(data[moff.off:moff.off + 5]) + moff(sh) + self.catch_handler_items = [parse_catch_handler() + for i in range(handlersSize)] + pass + + moff.off = (moff.off + 3) & ~0x3 + self.data_size = moff.off - off + pass + pass + + class DEXFile(object): _data = None _header = None @@ -177,6 +539,9 @@ _fieldIds = None _methodIds = None _classDefs = None + _classDatas = None + _typeLists = None + _codeItems = None def __init__(self): pass @@ -205,7 +570,7 @@ off = off + _DEX_MapItem.data_size pass - self._maps = map + self._maps = maps pass def _parse_strings(self): @@ -334,6 +699,68 @@ self._classDefs = classDefs pass + def _parse_classDatas(self): + header = self._header + data = self._data + maps = self._maps + + kDexTypeClassDataItem = [type + for type, name in _DEX_MapItem.types.items() + if name == 'kDexTypeClassDataItem'][0] + class_data_map = [map + for map in maps + if map.type == kDexTypeClassDataItem][0] + + off = [class_data_map.offset] + def parse_class_data(): + class_data = _DEX_ClassData() + class_data.parse(data, off[0]) + off[0] = off[0] + class_data.data_size + return class_data + class_datas = [parse_class_data() for i in range(class_data_map.size)] + + self._classDatas = class_datas + pass + + def _parse_typeLists(self): + maps = self._maps + data = self._data + + kDexTypeTypeList = [type + for type, name in _DEX_MapItem.types.items() + if name == 'kDexTypeTypeList'][0] + typeList_map = [map for map in maps if map.type == kDexTypeTypeList][0] + num_typeLists = typeList_map.size + + typeLists = _DEX_TypeLists() + typeLists.parse(num_typeLists, data, typeList_map.offset) + + self._typeLists = typeLists + pass + + def _parse_codeItems(self): + maps = self._maps + data = self._data + + kDexTypeCodeItem = [type + for type, name in _DEX_MapItem.types.items() + if name == 'kDexTypeCodeItem'][0] + codeItem_map = [map for map in maps if map.type == kDexTypeCodeItem][0] + num_codeItems = codeItem_map.size + + cur_off = [codeItem_map.offset] + def parse_code(): + off = cur_off[0] + code = _DEX_Code() + code.parse(data, off) + cur_off[0] = off + code.data_size + return code + + codeItems = [parse_code() for i in range(num_codeItems)] + + self._codeItems = codeItems + pass + def parse(self, data): self._data = data header = _DEX_header() @@ -347,6 +774,9 @@ self._parse_fieldIds() self._parse_methodIds() self._parse_classDefs() + self._parse_classDatas() + self._parse_typeLists() + self._parse_codeItems() pass pass @@ -354,9 +784,10 @@ dex = DEXFile() dex.open('test.dex') + print 'Header' h = dex._header - for attr in h.__dict__.keys(): - print '%s: %s' % (attr, repr(h.__dict__[attr])) + for attr in h.header_fields: + print '\t%s: %s' % (attr, repr(getattr(h, attr))) pass print @@ -367,13 +798,51 @@ for classDef in classDefs: typeId = typeIds[classDef.classIdx] descriptor = strings[typeId.descriptorIdx] - print descriptor + data_off = classDef.classDataOff + print '\t%s @0x%x' % (descriptor, data_off) pass print print 'Reference Classes' for typeId in typeIds: descriptor = strings[typeId.descriptorIdx] - print descriptor + print '\t%s' % (descriptor) + pass + + print + print 'Class data' + methodIds = dex._methodIds + classDatas = dex._classDatas + for classData in classDatas: + print '\tclass' + for method in classData.directMethods: + code_off = method.codeOff + methodId = methodIds[method.methodIdx] + name = strings[methodId.nameIdx] + print '\t\t%s@0x%x' % (name, code_off) + pass + for method in classData.virtualMethods: + code_off = method.codeOff + methodId = methodIds[method.methodIdx] + name = strings[methodId.nameIdx] + print '\t\t%s@0x%x' % (name, code_off) + pass + pass + + print + print 'TypeLists size is %d bytes' % (dex._typeLists.data_size) + + bytes = sum([code.data_size for code in dex._codeItems]) + print + print 'CodeItems size is %d bytes' % (bytes) + + print + print 'Data maps' + maps = dex._maps + for map in maps: + print '\t0x%04x(%s) size=%d offset=0x%08x' % (map.type, + map.types[map.type], + map.size, + map.offset) pass pass