changeset 2:add64d56b0e2

Parse code items
author Thinker K.F. Li <thinker@codemud.net>
date Mon, 23 May 2011 21:11:11 +0800
parents 05346b632adb
children a78db169b0c8
files paraspace/dexfile.py
diffstat 1 files changed, 474 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/paraspace/dexfile.py	Sun May 22 02:16:20 2011 +0800
+++ b/paraspace/dexfile.py	Mon May 23 21:11:11 2011 +0800
@@ -23,6 +23,13 @@
     dataSize = None             # 0x68
     dataOff = None              # 0x6c
 
+    header_fields = \
+        'magic checksum signature fileSize headerSize endianTag ' \
+        'linkSize linkOff mapOff stringIdsSize stringIdsOff typeIdsSize ' \
+        'typeIdsOff protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \
+        'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \
+        'dataSize dataOff'.split()
+
     def parse(self, data):
         self.magic = data[:8]
         self.checksum = data[8: 0x0c]
@@ -70,6 +77,14 @@
     return v, nbytes
 
 
+def _leb128(data):
+    v, sh = _uleb128(data)
+    if v & (1 << (sh * 7 - 1)):
+        v = -((1 << (sh * 7)) - v)
+        pass
+    return v, sh
+
+
 class _DEX_MapItem(object):
     type = None                 # 2 bytes
     unused = None               # 2 bytes
@@ -77,6 +92,26 @@
     offset = None               # 4 bytes
 
     data_size = 12
+    types = {
+        0x0000: 'kDexTypeHeaderItem',
+        0x0001: 'kDexTypeStringIdItem',
+        0x0002: 'kDexTypeTypeIdItem',
+        0x0003: 'kDexTypeProtoIdItem',
+        0x0004: 'kDexTypeFieldIdItem',
+        0x0005: 'kDexTypeMethodIdItem',
+        0x0006: 'kDexTypeClassDefItem',
+        0x1000: 'kDexTypeMapList',
+        0x1001: 'kDexTypeTypeList',
+        0x1002: 'kDexTypeAnnotationSetRefList',
+        0x1003: 'kDexTypeAnnotationSetItem',
+        0x2000: 'kDexTypeClassDataItem',
+        0x2001: 'kDexTypeCodeItem',
+        0x2002: 'kDexTypeStringDataItem',
+        0x2003: 'kDexTypeDebugInfoItem',
+        0x2004: 'kDexTypeAnnotationItem',
+        0x2005: 'kDexTypeEncodedArrayItem',
+        0x2006: 'kDexTypeAnnotationsDirectoryItem'
+        }
 
     def parse(self, data):
         self.type = _to_uint(data[:2])
@@ -167,6 +202,333 @@
     pass
 
 
+class _DEX_ClassDataHeader(object):
+    staticFieldsSize = None
+    instanceFieldsSize = None
+    directMethodsSize = None
+    virtualMethodsSize = None
+
+    data_size = None
+
+    def parse(self, data, off):
+        self.staticFieldsSize, sh = _uleb128(data[off:off + 10])
+        sz = sh
+        off = off + sh
+        self.instanceFieldsSize, sh = _uleb128(data[off:off + 10])
+        sz = sz + sh
+        off = off + sh
+        self.directMethodsSize, sh = _uleb128(data[off:off + 10])
+        sz = sz + sh
+        off = off + sh
+        self.virtualMethodsSize, sh = _uleb128(data[off:off + 10])
+        sz = sz + sh
+
+        self.data_size = sz
+        pass
+    pass
+
+
+class _DEX_Field(object):
+    fieldIdx = None
+    accessFlags = None
+
+    data_size = None
+    
+    def parse(self, data, off):
+        self.fieldIdx, sh = _uleb128(data[off:off + 10])
+        sz = sh
+        off = off + sh
+        self.accessFlags, sh = _uleb128(data[off:off + 10])
+        sz = sz + sh
+
+        self.data_size = sz
+        pass
+    pass
+
+
+class _DEX_Method(object):
+    methodIdx = None
+    accessFlags = None
+    codeOff = None
+
+    data_size = None
+
+    def parse(self, data, off):
+        self.methodIdx, sh = _uleb128(data[off:off + 10])
+        sz = sh
+        off = off + sh
+        
+        self.accessFlags, sh = _uleb128(data[off:off + 10])
+        sz = sz + sh
+        off = off + sh
+        
+        self.codeOff, sh = _uleb128(data[off:off + 10])
+        sz = sz + sh
+
+        self.data_size = sz
+        pass
+    pass
+
+class _DEX_ClassData(object):
+    header = None               # DexClassDataHeader
+    staticFields = None         # DexField*
+    instanceFields = None       # DexField*
+    directMethods = None        # DexMethod*
+    virtualMethods = None       # DexMethod*
+
+    data_size = None
+
+    def parse(self, data, off):
+        header = _DEX_ClassDataHeader()
+        header.parse(data, off)
+        self.header = header
+        
+        cur_off = [off + header.data_size]
+        
+        def parse_field():
+            field = _DEX_Field()
+            off = cur_off[0]
+            field.parse(data, off)
+            cur_off[0] = cur_off[0] + field.data_size
+            
+            #
+            # field index depends previous one to reduce size
+            #
+            field.fieldIdx = field.fieldIdx + idx[0]
+            idx[0] = field.fieldIdx
+            
+            return field
+
+        def parse_method():
+            method = _DEX_Method()
+            off = cur_off[0]
+            method.parse(data, off)
+            cur_off[0] = cur_off[0] + method.data_size
+
+            #
+            # method index depends previous one to reduce size
+            #
+            method.methodIdx = method.methodIdx + idx[0]
+            idx[0] = method.methodIdx
+            
+            return method
+
+        idx = [0]
+        self.staticFields = [parse_field()
+                             for i in range(header.staticFieldsSize)]
+        idx = [0]
+        self.instanceFields = [parse_field()
+                               for i in range(header.instanceFieldsSize)]
+        idx = [0]
+        self.directMethods = [parse_method()
+                              for i in range(header.directMethodsSize)]
+        idx = [0]
+        self.virtualMethods = [parse_method()
+                               for i in range(header.virtualMethodsSize)]
+
+        self.data_size = cur_off[0] - off
+        pass
+    pass
+
+
+class _DEX_TypeList(object):
+    typeItems = None
+
+    data_size = None
+
+    def parse(self, data, off):
+        size = _to_uint(data[off:off + 4])
+        
+        item_off = off + 4
+        typeItems = [_to_uint(data[cur_off:cur_off + 2])
+                     for cur_off in range(item_off, item_off + size * 2, 2)]
+        data_size = 4 + size * 2
+
+        self.typeItems = typeItems
+        self.data_size = data_size
+        pass
+    pass
+
+
+class _DEX_TypeLists(object):
+    typeLists = None
+
+    data_size = None
+
+    def parse(self, num, data, off):
+        def parse():
+            off = (cur_off[0] + 3) & ~0x3
+            typeList = _DEX_TypeList()
+            typeList.parse(data, off)
+            cur_off[0] = off + typeList.data_size
+            
+            return typeList
+        
+        cur_off = [off]
+        typeLists = [parse() for i in range(num)]
+
+        self.typeLists = typeLists
+        self.data_size = cur_off[0] - off
+        pass
+    pass
+
+
+class _DEX_Try(object):
+    startAddr = None            # 4 bytes
+    insnCount = None            # 2 bytes
+    handlerOff = None           # 2 bytes
+
+    data_size = 8
+
+    def parse(self, data, off):
+        cur_off = off
+        self.startAddr = _to_uint(data[cur_off:cur_off + 4])
+        cur_off = cur_off + 4
+        self.insnCount = _to_uint(data[cur_off:cur_off + 2])
+        cur_off = cur_off + 2
+        self.handlerOff = _to_uint(data[cur_off:cur_off + 2])
+        pass
+    pass
+
+
+class _DEX_CatchHandler(object):
+    typeIdx = None
+    address = None
+    
+    data_size = None
+
+    def parse(self, data, off):
+        cur_off = off
+        self.typeIdx, sh = _uleb128(data[cur_off:cur_off + 5])
+        cur_off = cur_off + sh
+        self.address, sh = _uleb128(data[cur_off:cur_off + 5])
+        cur_off = cur_off + sh
+
+        self.data_size = cur_off - off
+        pass
+
+    def parse1(self, data, off):
+        self.address, sh = _uleb128(data[off:off + 5])
+        
+        self.data_size = sh
+        pass
+    pass
+
+
+class _DEX_Catch(object):
+    catchesAll = None
+    handlers = None
+    
+    data_size = None
+
+    def parse(self, data, off):
+        def move_off(sz):
+            off = move_off.off
+            move_off.off = off + sz
+            return off
+        move_off.off = off
+        moff = move_off
+
+        count, sh = _leb128(data[moff.off:moff.off + 5])
+        moff(sh)
+        
+        if count > 0:
+            self.catchesAll = False
+        else:
+            self.catchesAll = True
+            count = -count
+            pass
+
+        def parse_handler():
+            handler = _DEX_CatchHandler()
+            handler.parse(data, moff.off)
+            moff(handler.data_size)
+            return handler
+        
+        self.handlers = [parse_handler() for i in range(count)]
+
+        if self.catchesAll:
+            #
+            # Additional handler for catching all
+            #
+            handler = _DEX_CatchHandler()
+            handler.parse1(data, moff.off)
+            moff(handler.data_size)
+            self.handlers.append(handler)
+            pass
+        
+        self.data_size = moff.off - off
+        pass
+    pass
+
+
+class _DEX_Code(object):
+    registersSize = None        # 2 bytes
+    insSize = None              # 2 bytes
+    outsSize = None             # 2 bytes
+    triesSize = None            # 2 bytes
+    debugInfoOff = None         # 4 bytes
+    insnsSize = None            # 4 bytes
+    insns = None
+    try_items = None
+    catch_handler_items = None
+
+    data_size = None
+
+    def parse(self, data, off):
+        def move_off(sz):
+            off = move_off.off
+            move_off.off = off + sz
+            return off
+        move_off.off = off
+        moff = move_off
+        
+        self.registersSize = _to_uint(data[moff.off:moff.off + 2])
+        moff(2)
+        self.insSize = _to_uint(data[moff.off:moff.off + 2])
+        moff(2)
+        self.outsSize = _to_uint(data[moff.off:moff.off + 2])
+        moff(2)
+        self.triesSize = _to_uint(data[moff.off:moff.off + 2])
+        moff(2)
+        self.debugInfoOff = _to_uint(data[moff.off:moff.off + 4])
+        moff(4)
+        self.insnsSize = _to_uint(data[moff.off:moff.off + 4])
+        moff(4)
+        
+        moff(self.insnsSize * 2) # skip insns
+        
+        if self.triesSize > 0:
+            def parse_try_item():
+                try_item = _DEX_Try()
+                try_item.parse(data, moff.off)
+                moff(try_item.data_size)
+                return try_item
+        
+            moff.off = (moff.off + 3) & ~0x3
+            self.try_items = [parse_try_item() for i in range(self.triesSize)]
+            
+            def parse_catch_handler():
+                catch = _DEX_Catch()
+                catch.parse(data, moff.off)
+                moff(catch.data_size)
+                return catch
+            
+            #
+            # No tries, no catch handlers
+            #
+            handlersSize, sh = _uleb128(data[moff.off:moff.off + 5])
+            moff(sh)
+            self.catch_handler_items = [parse_catch_handler()
+                                        for i in range(handlersSize)]
+            pass
+
+        moff.off = (moff.off + 3) & ~0x3
+        self.data_size = moff.off - off
+        pass
+    pass
+
+
 class DEXFile(object):
     _data = None
     _header = None
@@ -177,6 +539,9 @@
     _fieldIds = None
     _methodIds = None
     _classDefs = None
+    _classDatas = None
+    _typeLists = None
+    _codeItems = None
     
     def __init__(self):
         pass
@@ -205,7 +570,7 @@
             off = off + _DEX_MapItem.data_size
             pass
 
-        self._maps = map
+        self._maps = maps
         pass
 
     def _parse_strings(self):
@@ -334,6 +699,68 @@
         self._classDefs = classDefs
         pass
 
+    def _parse_classDatas(self):
+        header = self._header
+        data = self._data
+        maps = self._maps
+        
+        kDexTypeClassDataItem = [type
+                                 for type, name in _DEX_MapItem.types.items()
+                                 if name == 'kDexTypeClassDataItem'][0]
+        class_data_map = [map
+                          for map in maps
+                          if map.type == kDexTypeClassDataItem][0]
+
+        off = [class_data_map.offset]
+        def parse_class_data():
+            class_data = _DEX_ClassData()
+            class_data.parse(data, off[0])
+            off[0] = off[0] + class_data.data_size
+            return class_data
+        class_datas = [parse_class_data() for i in range(class_data_map.size)]
+
+        self._classDatas = class_datas
+        pass
+
+    def _parse_typeLists(self):
+        maps = self._maps
+        data = self._data
+
+        kDexTypeTypeList = [type 
+                            for type, name in _DEX_MapItem.types.items()
+                            if name == 'kDexTypeTypeList'][0]
+        typeList_map = [map for map in maps if map.type == kDexTypeTypeList][0]
+        num_typeLists = typeList_map.size
+
+        typeLists = _DEX_TypeLists()
+        typeLists.parse(num_typeLists, data, typeList_map.offset)
+
+        self._typeLists = typeLists
+        pass
+
+    def _parse_codeItems(self):
+        maps = self._maps
+        data = self._data
+
+        kDexTypeCodeItem = [type 
+                            for type, name in _DEX_MapItem.types.items()
+                            if name == 'kDexTypeCodeItem'][0]
+        codeItem_map = [map for map in maps if map.type == kDexTypeCodeItem][0]
+        num_codeItems = codeItem_map.size
+        
+        cur_off = [codeItem_map.offset]
+        def parse_code():
+            off = cur_off[0]
+            code = _DEX_Code()
+            code.parse(data, off)
+            cur_off[0] = off + code.data_size
+            return code
+
+        codeItems = [parse_code() for i in range(num_codeItems)]
+
+        self._codeItems = codeItems
+        pass
+
     def parse(self, data):
         self._data = data
         header = _DEX_header()
@@ -347,6 +774,9 @@
         self._parse_fieldIds()
         self._parse_methodIds()
         self._parse_classDefs()
+        self._parse_classDatas()
+        self._parse_typeLists()
+        self._parse_codeItems()
         pass
     pass
 
@@ -354,9 +784,10 @@
     dex = DEXFile()
     dex.open('test.dex')
     
+    print 'Header'
     h = dex._header
-    for attr in h.__dict__.keys():
-        print '%s: %s' % (attr, repr(h.__dict__[attr]))
+    for attr in h.header_fields:
+        print '\t%s: %s' % (attr, repr(getattr(h, attr)))
         pass
 
     print
@@ -367,13 +798,51 @@
     for classDef in  classDefs:
         typeId = typeIds[classDef.classIdx]
         descriptor = strings[typeId.descriptorIdx]
-        print descriptor
+        data_off = classDef.classDataOff
+        print '\t%s @0x%x' % (descriptor, data_off)
         pass
 
     print
     print 'Reference Classes'
     for typeId in typeIds:
         descriptor = strings[typeId.descriptorIdx]
-        print descriptor
+        print '\t%s' % (descriptor)
+        pass
+
+    print
+    print 'Class data'
+    methodIds = dex._methodIds
+    classDatas = dex._classDatas
+    for classData in classDatas:
+        print '\tclass'
+        for method in classData.directMethods:
+            code_off = method.codeOff
+            methodId = methodIds[method.methodIdx]
+            name = strings[methodId.nameIdx]
+            print '\t\t%s@0x%x' % (name, code_off)
+            pass
+        for method in classData.virtualMethods:
+            code_off = method.codeOff
+            methodId = methodIds[method.methodIdx]
+            name = strings[methodId.nameIdx]
+            print '\t\t%s@0x%x' % (name, code_off)
+            pass
+        pass
+
+    print
+    print 'TypeLists size is %d bytes' % (dex._typeLists.data_size)
+
+    bytes = sum([code.data_size for code in dex._codeItems])
+    print
+    print 'CodeItems size is %d bytes' % (bytes)
+    
+    print
+    print 'Data maps'
+    maps = dex._maps
+    for map in maps:
+        print '\t0x%04x(%s) size=%d offset=0x%08x' % (map.type,
+                                                      map.types[map.type],
+                                                      map.size,
+                                                      map.offset)
         pass
     pass