changeset 1:05346b632adb

Parse records mentioned in file header
author Thinker K.F. Li <thinker@codemud.net>
date Sun, 22 May 2011 02:16:20 +0800
parents 31050a971b52
children add64d56b0e2
files paraspace/dexfile.py
diffstat 1 files changed, 216 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/paraspace/dexfile.py	Sun May 22 00:11:57 2011 +0800
+++ b/paraspace/dexfile.py	Sun May 22 02:16:20 2011 +0800
@@ -46,12 +46,30 @@
 
 def _to_uint(data):
     v = 0
+    sh = 0
     for c in data:
-        v = (v << 8) + ord(c)
+        v = v + (ord(c) << sh)
+        sh = sh + 8
         pass
     return v
 
 
+def _uleb128(data):
+    sh = 0
+    v = 0
+    for c in data:
+        cv = ord(c)
+        v = v + ((cv & 0x7f) << sh)
+        sh = sh + 7
+        
+        if cv <= 0x7f:
+            break
+        pass
+
+    nbytes = sh / 7
+    return v, nbytes
+
+
 class _DEX_MapItem(object):
     type = None                 # 2 bytes
     unused = None               # 2 bytes
@@ -67,6 +85,7 @@
         pass
     pass
 
+
 class _DEX_TypeId(object):
     descriptorIdx = None       # 4 bytes
     
@@ -94,9 +113,9 @@
 
 
 class _DEX_FieldId(object):
-    classIdx                    # 2 bytes
-    typeIdx                     # 2 bytes
-    nameIdx                     # 4 bytes
+    classIdx = None             # 2 bytes
+    typeIdx = None              # 2 bytes
+    nameIdx = None              # 4 bytes
     
     data_size = 8
 
@@ -109,9 +128,9 @@
 
 
 class _DEX_MethodId(object):
-    classIdx                    # 2 bytes
-    protoIdx                    # 2 bytes
-    nameIdx                     # 4 bytes
+    classIdx = None             # 2 bytes
+    protoIdx = None             # 2 bytes
+    nameIdx = None              # 4 bytes
     
     data_size = 8
 
@@ -122,6 +141,7 @@
         pass
     pass
 
+
 class _DEX_ClassDef(object):
     classIdx = None             # 0x00
     accessFlags = None          # 0x04
@@ -150,22 +170,210 @@
 class DEXFile(object):
     _data = None
     _header = None
+    _maps = None
     _strings = None
+    _typeIds = None
+    _protoIds = None
+    _fieldIds = None
+    _methodIds = None
+    _classDefs = None
     
     def __init__(self):
         pass
 
     def open(self, filename):
-        fo = file(filename)
+        fo = file(filename, 'r')
         data = fo.read()
 
         self.parse(data)
         pass
 
+    def _parse_maps(self):
+        data = self._data
+        header = self._header
+        off = header.mapOff
+
+        num = _to_uint(data[off:off + 4])
+        off = off + 4
+        
+        maps = []
+        for i in range(num):
+            item_data = data[off:off + _DEX_MapItem.data_size]
+            item = _DEX_MapItem()
+            item.parse(item_data)
+            maps.append(item)
+            off = off + _DEX_MapItem.data_size
+            pass
+
+        self._maps = map
+        pass
+
+    def _parse_strings(self):
+        data = self._data
+        header = self._header
+        strings = []
+
+        num = header.stringIdsSize
+        off = header.stringIdsOff
+        for i in range(num):
+            str_start_off = _to_uint(data[off:off + 4])
+            str_stop_off = data.index('\x00', str_start_off)
+            string = data[str_start_off:str_stop_off]
+            
+            sz, sh = _uleb128(string)
+            string = string[sh:]
+            strings.append(string)
+            off = off + 4
+            pass
+        
+        self._strings = strings
+        pass
+
+    def _parse_typeIds(self):
+        data = self._data
+        header = self._header
+
+        num = header.typeIdsSize
+        off = header.typeIdsOff
+        
+        def parse(item_data):
+            type_id = _DEX_TypeId()
+            type_id.parse(item_data)
+            return type_id
+        
+        item_size = _DEX_TypeId.data_size
+        item_offs = range(off, off + item_size * num, item_size)
+        item_datas = [data[item_off:item_off + item_size]
+                      for item_off in item_offs]
+        typeIds = [parse(item_data) for item_data in item_datas]
+
+        self._typeIds = typeIds
+        pass
+
+    def _parse_protoIds(self):
+        data = self._data
+        header = self._header
+
+        num = header.protoIdsSize
+        off = header.protoIdsOff
+        
+        def parse(item_data):
+            proto_id = _DEX_ProtoId()
+            proto_id.parse(item_data)
+            return proto_id
+        
+        item_size = _DEX_ProtoId.data_size
+        item_offs = range(off, off + item_size * num, item_size)
+        item_datas = [data[item_off:item_off + item_size]
+                      for item_off in item_offs]
+        protoIds = [parse(item_data) for item_data in item_datas]
+
+        self._protoIds = protoIds
+        pass
+
+    def _parse_fieldIds(self):
+        data = self._data
+        header = self._header
+
+        num = header.fieldIdsSize
+        off = header.fieldIdsOff
+        
+        def parse(item_data):
+            field_id = _DEX_FieldId()
+            field_id.parse(item_data)
+            return field_id
+        
+        item_size = _DEX_FieldId.data_size
+        item_offs = range(off, off + item_size * num, item_size)
+        item_datas = [data[item_off:item_off + item_size]
+                      for item_off in item_offs]
+        fieldIds = [parse(item_data) for item_data in item_datas]
+        
+        self._fieldIds = fieldIds
+        pass
+
+    def _parse_methodIds(self):
+        data = self._data
+        header = self._header
+
+        num = header.methodIdsSize
+        off = header.methodIdsOff
+        
+        def parse(item_data):
+            method_id = _DEX_MethodId()
+            method_id.parse(item_data)
+            return method_id
+        
+        item_size = _DEX_MethodId.data_size
+        item_offs = range(off, off + item_size * num, item_size)
+        item_datas = [data[item_off:item_off + item_size]
+                      for item_off in item_offs]
+        methodIds = [parse(item_data) for item_data in item_datas]
+        
+        self._methodIds = methodIds
+        pass
+
+    def _parse_classDefs(self):
+        data = self._data
+        header = self._header
+
+        num = header.classDefsSize
+        off = header.classDefsOff
+        
+        def parse(item_data):
+            class_def = _DEX_ClassDef()
+            class_def.parse(item_data)
+            return class_def
+        
+        item_size = _DEX_ClassDef.data_size
+        item_offs = range(off, off + item_size * num, item_size)
+        item_datas = [data[item_off:item_off + item_size]
+                      for item_off in item_offs]
+        classDefs = [parse(item_data) for item_data in item_datas]
+        
+        self._classDefs = classDefs
+        pass
+
     def parse(self, data):
         self._data = data
         header = _DEX_header()
         header.parse(data)
         self._header = header
+
+        self._parse_maps()
+        self._parse_strings()
+        self._parse_typeIds()
+        self._parse_protoIds()
+        self._parse_fieldIds()
+        self._parse_methodIds()
+        self._parse_classDefs()
         pass
     pass
+
+if __name__ == '__main__':
+    dex = DEXFile()
+    dex.open('test.dex')
+    
+    h = dex._header
+    for attr in h.__dict__.keys():
+        print '%s: %s' % (attr, repr(h.__dict__[attr]))
+        pass
+
+    print
+    print 'Define Classes'
+    strings = dex._strings
+    classDefs = dex._classDefs
+    typeIds = dex._typeIds
+    for classDef in  classDefs:
+        typeId = typeIds[classDef.classIdx]
+        descriptor = strings[typeId.descriptorIdx]
+        print descriptor
+        pass
+
+    print
+    print 'Reference Classes'
+    for typeId in typeIds:
+        descriptor = strings[typeId.descriptorIdx]
+        print descriptor
+        pass
+    pass