view paraspace/dexfile.py @ 1:05346b632adb

Parse records mentioned in file header
author Thinker K.F. Li <thinker@codemud.net>
date Sun, 22 May 2011 02:16:20 +0800
parents 31050a971b52
children add64d56b0e2
line wrap: on
line source

class _DEX_header(object):
    magic = None                # 0x00, 8 bytes
    checksum = None             # 0x08, 4 bytes
    signature = None            # 0x0c, 20 bytes
    fileSize = None             # 0x20, 4 bytes
    headerSize = None           # 0x24
    endianTag = None            # 0x28
    linkSize = None             # 0x2c
    linkOff = None              # 0x30
    mapOff = None               # 0x34
    stringIdsSize = None        # 0x38
    stringIdsOff = None         # 0x3c
    typeIdsSize = None          # 0x40
    typeIdsOff = None           # 0x44
    protoIdsSize = None         # 0x48
    protoIdsOff = None          # 0x4c
    fieldIdsSize = None         # 0x50
    fieldIdsOff = None          # 0x54
    methodIdsSize = None        # 0x58
    methodIdsOff = None         # 0x5c
    classDefsSize = None        # 0x60
    classDefsOff = None         # 0x64
    dataSize = None             # 0x68
    dataOff = None              # 0x6c

    def parse(self, data):
        self.magic = data[:8]
        self.checksum = data[8: 0x0c]
        self.signature = data[0x0c: 0x20]
        
        idx = 0x20
        fields = 'fileSize headerSize endianTag linkSize linkOff mapOff ' \
            'stringIdsSize stringIdsOff typeIdsSize typeIdsOff ' \
            'protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \
            'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \
            'dataSize dataOff'.split()
        for field in fields:
            d = data[idx: idx + 4]
            value = _to_uint(d)
            setattr(self, field, value)
            idx = idx + 4
            pass
        pass
    pass


def _to_uint(data):
    v = 0
    sh = 0
    for c in data:
        v = v + (ord(c) << sh)
        sh = sh + 8
        pass
    return v


def _uleb128(data):
    sh = 0
    v = 0
    for c in data:
        cv = ord(c)
        v = v + ((cv & 0x7f) << sh)
        sh = sh + 7
        
        if cv <= 0x7f:
            break
        pass

    nbytes = sh / 7
    return v, nbytes


class _DEX_MapItem(object):
    type = None                 # 2 bytes
    unused = None               # 2 bytes
    size = None                 # 4 bytes
    offset = None               # 4 bytes

    data_size = 12

    def parse(self, data):
        self.type = _to_uint(data[:2])
        self.size = _to_uint(data[4:8])
        self.offset = _to_uint(data[8:12])
        pass
    pass


class _DEX_TypeId(object):
    descriptorIdx = None       # 4 bytes
    
    data_size = 4

    def parse(self, data):
        self.descriptorIdx = _to_uint(data[:4])
        pass
    pass


class _DEX_ProtoId(object):
    shortyIdx = None           # 4 bytes
    returnTypeIdx = None       # 4 bytes
    parametersOff = None       # 4 bytes
    
    data_size = 12

    def parse(self, data):
        self.shortyIdx = _to_uint(data[:4])
        self.returnTypeIdx = _to_uint(data[4:8])
        self.parametersOff = _to_uint(data[8:12])
        pass
    pass


class _DEX_FieldId(object):
    classIdx = None             # 2 bytes
    typeIdx = None              # 2 bytes
    nameIdx = None              # 4 bytes
    
    data_size = 8

    def parse(self, data):
        self.classIdx = _to_uint(data[:2])
        self.typeIdx = _to_uint(data[2:4])
        self.nameIdx = _to_uint(data[4:8])
        pass
    pass


class _DEX_MethodId(object):
    classIdx = None             # 2 bytes
    protoIdx = None             # 2 bytes
    nameIdx = None              # 4 bytes
    
    data_size = 8

    def parse(self, data):
        self.classIdx = _to_uint(data[:2])
        self.protoIdx = _to_uint(data[2:4])
        self.nameIdx = _to_uint(data[4:8])
        pass
    pass


class _DEX_ClassDef(object):
    classIdx = None             # 0x00
    accessFlags = None          # 0x04
    superclassIdx = None        # 0x08
    interfacesOff = None        # 0x0c
    sourceFileIdx = None        # 0x10
    annotationsOff = None       # 0x14
    classDataOff = None         # 0x18
    staticValuesOff = None      # 0x1c
    
    data_size = 0x20

    def parse(self, data):
        self.classIdx = _to_uint(data[:4])
        self.accessFlags = _to_uint(data[4:8])
        self.superclassIdx = _to_uint(data[8:0xc])
        self.interfacesOff = _to_uint(data[0xc:0x10])
        self.sourceFileIdx = _to_uint(data[0x10:0x14])
        self.annotationsOff = _to_uint(data[0x14:0x18])
        self.classDataOff = _to_uint(data[0x18:0x1c])
        self.staticValuesOff = _to_uint(data[0x1c:0x20])
        pass
    pass


class DEXFile(object):
    _data = None
    _header = None
    _maps = None
    _strings = None
    _typeIds = None
    _protoIds = None
    _fieldIds = None
    _methodIds = None
    _classDefs = None
    
    def __init__(self):
        pass

    def open(self, filename):
        fo = file(filename, 'r')
        data = fo.read()

        self.parse(data)
        pass

    def _parse_maps(self):
        data = self._data
        header = self._header
        off = header.mapOff

        num = _to_uint(data[off:off + 4])
        off = off + 4
        
        maps = []
        for i in range(num):
            item_data = data[off:off + _DEX_MapItem.data_size]
            item = _DEX_MapItem()
            item.parse(item_data)
            maps.append(item)
            off = off + _DEX_MapItem.data_size
            pass

        self._maps = map
        pass

    def _parse_strings(self):
        data = self._data
        header = self._header
        strings = []

        num = header.stringIdsSize
        off = header.stringIdsOff
        for i in range(num):
            str_start_off = _to_uint(data[off:off + 4])
            str_stop_off = data.index('\x00', str_start_off)
            string = data[str_start_off:str_stop_off]
            
            sz, sh = _uleb128(string)
            string = string[sh:]
            strings.append(string)
            off = off + 4
            pass
        
        self._strings = strings
        pass

    def _parse_typeIds(self):
        data = self._data
        header = self._header

        num = header.typeIdsSize
        off = header.typeIdsOff
        
        def parse(item_data):
            type_id = _DEX_TypeId()
            type_id.parse(item_data)
            return type_id
        
        item_size = _DEX_TypeId.data_size
        item_offs = range(off, off + item_size * num, item_size)
        item_datas = [data[item_off:item_off + item_size]
                      for item_off in item_offs]
        typeIds = [parse(item_data) for item_data in item_datas]

        self._typeIds = typeIds
        pass

    def _parse_protoIds(self):
        data = self._data
        header = self._header

        num = header.protoIdsSize
        off = header.protoIdsOff
        
        def parse(item_data):
            proto_id = _DEX_ProtoId()
            proto_id.parse(item_data)
            return proto_id
        
        item_size = _DEX_ProtoId.data_size
        item_offs = range(off, off + item_size * num, item_size)
        item_datas = [data[item_off:item_off + item_size]
                      for item_off in item_offs]
        protoIds = [parse(item_data) for item_data in item_datas]

        self._protoIds = protoIds
        pass

    def _parse_fieldIds(self):
        data = self._data
        header = self._header

        num = header.fieldIdsSize
        off = header.fieldIdsOff
        
        def parse(item_data):
            field_id = _DEX_FieldId()
            field_id.parse(item_data)
            return field_id
        
        item_size = _DEX_FieldId.data_size
        item_offs = range(off, off + item_size * num, item_size)
        item_datas = [data[item_off:item_off + item_size]
                      for item_off in item_offs]
        fieldIds = [parse(item_data) for item_data in item_datas]
        
        self._fieldIds = fieldIds
        pass

    def _parse_methodIds(self):
        data = self._data
        header = self._header

        num = header.methodIdsSize
        off = header.methodIdsOff
        
        def parse(item_data):
            method_id = _DEX_MethodId()
            method_id.parse(item_data)
            return method_id
        
        item_size = _DEX_MethodId.data_size
        item_offs = range(off, off + item_size * num, item_size)
        item_datas = [data[item_off:item_off + item_size]
                      for item_off in item_offs]
        methodIds = [parse(item_data) for item_data in item_datas]
        
        self._methodIds = methodIds
        pass

    def _parse_classDefs(self):
        data = self._data
        header = self._header

        num = header.classDefsSize
        off = header.classDefsOff
        
        def parse(item_data):
            class_def = _DEX_ClassDef()
            class_def.parse(item_data)
            return class_def
        
        item_size = _DEX_ClassDef.data_size
        item_offs = range(off, off + item_size * num, item_size)
        item_datas = [data[item_off:item_off + item_size]
                      for item_off in item_offs]
        classDefs = [parse(item_data) for item_data in item_datas]
        
        self._classDefs = classDefs
        pass

    def parse(self, data):
        self._data = data
        header = _DEX_header()
        header.parse(data)
        self._header = header

        self._parse_maps()
        self._parse_strings()
        self._parse_typeIds()
        self._parse_protoIds()
        self._parse_fieldIds()
        self._parse_methodIds()
        self._parse_classDefs()
        pass
    pass

if __name__ == '__main__':
    dex = DEXFile()
    dex.open('test.dex')
    
    h = dex._header
    for attr in h.__dict__.keys():
        print '%s: %s' % (attr, repr(h.__dict__[attr]))
        pass

    print
    print 'Define Classes'
    strings = dex._strings
    classDefs = dex._classDefs
    typeIds = dex._typeIds
    for classDef in  classDefs:
        typeId = typeIds[classDef.classIdx]
        descriptor = strings[typeId.descriptorIdx]
        print descriptor
        pass

    print
    print 'Reference Classes'
    for typeId in typeIds:
        descriptor = strings[typeId.descriptorIdx]
        print descriptor
        pass
    pass