# HG changeset patch # User Thinker K.F. Li # Date 1306866698 -28800 # Node ID c98be35b572b01b7645a5449b8d722e1c7538379 # Parent f5728c6868b2961b93a08fd21eb896629e252cbe Rewrote dexfile.py to base on declaration diff -r f5728c6868b2 -r c98be35b572b data/testdata1.dex Binary file data/testdata1.dex has changed diff -r f5728c6868b2 -r c98be35b572b paraspace/dexfile.py --- a/paraspace/dexfile.py Fri May 27 00:40:14 2011 +0800 +++ b/paraspace/dexfile.py Wed Jun 01 02:31:38 2011 +0800 @@ -1,60 +1,3 @@ -import itertools - -class _DEX_header(object): - magic = None # 0x00, 8 bytes - checksum = None # 0x08, 4 bytes - signature = None # 0x0c, 20 bytes - fileSize = None # 0x20, 4 bytes - headerSize = None # 0x24 - endianTag = None # 0x28 - linkSize = None # 0x2c - linkOff = None # 0x30 - mapOff = None # 0x34 - stringIdsSize = None # 0x38 - stringIdsOff = None # 0x3c - typeIdsSize = None # 0x40 - typeIdsOff = None # 0x44 - protoIdsSize = None # 0x48 - protoIdsOff = None # 0x4c - fieldIdsSize = None # 0x50 - fieldIdsOff = None # 0x54 - methodIdsSize = None # 0x58 - methodIdsOff = None # 0x5c - classDefsSize = None # 0x60 - classDefsOff = None # 0x64 - dataSize = None # 0x68 - dataOff = None # 0x6c - - header_fields = \ - 'magic checksum signature fileSize headerSize endianTag ' \ - 'linkSize linkOff mapOff stringIdsSize stringIdsOff typeIdsSize ' \ - 'typeIdsOff protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \ - 'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \ - 'dataSize dataOff'.split() - - data_size = 0x70 - - def parse(self, data): - self.magic = data[:8] - self.checksum = data[8: 0x0c] - self.signature = data[0x0c: 0x20] - - idx = 0x20 - fields = 'fileSize headerSize endianTag linkSize linkOff mapOff ' \ - 'stringIdsSize stringIdsOff typeIdsSize typeIdsOff ' \ - 'protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \ - 'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \ - 'dataSize dataOff'.split() - for field in fields: - d = data[idx: idx + 4] - value = _to_uint(d) - setattr(self, field, value) - idx = idx + 4 - pass - pass - pass - - ## \brief Manage offset # # The instances are initialized with a offset. Every time an instance @@ -173,13 +116,486 @@ return total -class _DEX_MapItem(object): - type = None # 2 bytes - unused = None # 2 bytes - size = None # 4 bytes - offset = None # 4 bytes +class _rawstr(object): + size = None + factor = None + data = None + data_size = None + + ## + # \param size_name is dot separated attribute names from the parent. + # + def __init__(self, size=None, size_name=None, factor=1): + self.size = size + self.size_name = size_name + self.factor = factor + pass + + def parse(self, parent, data, off): + obj = _rawstr(self.size, self.size_name, self.factor) + if self.size is not None: + size = self.size + else: + size = parent + for name in self.size_name.split('.'): + size = getattr(size, name) + pass + pass + obj.data_size = size * self.factor + + obj.data = data[off:off + obj.data_size] + return obj + + def sizeof(self, v): + return v.data_size + + def to_str(self, v): + return v + pass + +class rawstr(_rawstr): + def __init__(self, size, factor=1): + super(rawstr, self).__init__(size=size, factor=factor) + pass + pass + + +class rawstr_size_name(_rawstr): + def __init__(self, size_name, factor=1): + super(rawstr_size_name, self).__init__(size_name=size_name, + factor=factor) + pass + pass + + +class tap(object): + @staticmethod + def parse(parent, data, off): + pass + + @staticmethod + def sizeof(v): + return 0 + + @staticmethod + def to_str(v): + return '' + pass + + +class uint32(object): + @staticmethod + def parse(parent, data, off): + v = _to_uint(data[off:off + 4]) + return v + + @staticmethod + def sizeof(v): + return 4 + + @staticmethod + def to_str(v): + return chr(v & 0xff) + chr((v >> 8) & 0xff) + chr((v >> 16) & 0xff) + \ + chr((v >> 24) & 0xff) + pass + + +class uint16(object): + @staticmethod + def parse(parent, data, off): + v = _to_uint(data[off:off + 2]) + return v + + @staticmethod + def sizeof(v): + return 2 + + @staticmethod + def to_str(v): + return chr(v & 0xff) + chr((v >> 8) & 0xff) + pass + + +class uint8(object): + @staticmethod + def parse(parent, data, off): + v = _to_uint(data[off:off + 1]) + return v + + @staticmethod + def sizeof(v): + return 1 + + @staticmethod + def to_str(v): + return chr(v & 0xff) + pass + + +class int32(object): + @staticmethod + def parse(parent, data, off): + v = _to_int(data[off:off + 4]) + return v + + @staticmethod + def sizeof(v): + return 4 + + @staticmethod + def to_str(v): + return chr(v & 0xff) + chr((v >> 8) & 0xff) + chr((v >> 16) & 0xff) + \ + chr((v >> 24) & 0xff) + pass + + +class int16(object): + @staticmethod + def parse(parent, data, off): + v = _to_int(data[off:off + 2]) + return v + + @staticmethod + def sizeof(v): + return 2 + + @staticmethod + def to_str(v): + return chr(v & 0xff) + chr((v >> 8) & 0xff) + pass + + +class uleb128(object): + @staticmethod + def parse(parent, data, off): + v, sh = _uleb128(data[off:off + 5]) + return v + + @staticmethod + def sizeof(v): + return _uleb128_sz(v) + + @staticmethod + def to_str(v): + return _to_uleb128(v) + pass + + +class leb128(object): + @staticmethod + def parse(parent, data, off): + v, sh = _leb128(data[off:off + 5]) + return v + + @staticmethod + def sizeof(v): + return _leb128_sz(v) + + @staticmethod + def to_str(v): + return _to_leb128(v) + pass + + +class auto_align(object): + bits = None + + def __init__(self, bits): + self.bits = bits + pass + + def parse(self, parent, data, off): + mask = (1 << self.bits) - 1 + padding_sz = ((off + mask) & ~mask) - off + return padding_sz + + @staticmethod + def sizeof(v): + return v + + @staticmethod + def to_str(v): + return '\x00' * v + pass + + +def _get_sz(o): + if isinstance(o, relocatable): + return o.data_size + return o.__class__.sizeof(o) + + +class relocatable(object): + data_size = None + + @staticmethod + def parse(parent, data, off): + pass + + @staticmethod + def sizeof(v): + return v.data_size + + def to_str(self): + pass + + def compute_size(self): + pass + pass + + +class array(relocatable): + count_name = None + child_type = None + items = None + + def __init__(self, count_name, child_type): + super(array, self).__init__() + self.count_name = count_name + self.child_type = child_type + pass - data_size = 12 + def parse(self, parent, data, off): + nitem = parent + for name in self.count_name.split('.'): + nitem = getattr(nitem, name) + pass + obj = self.parse_nitem(parent, data, off, nitem) + return obj + + def parse_nitem(self, parent, data, off, nitem): + moff = man_off(off) + + obj = array(self.count_name, self.child_type) + + def parse(): + item = obj.child_type.parse(parent, data, moff()) + item_sz = obj.child_type.sizeof(item) + moff(item_sz) + return item + + items = [parse() for i in range(nitem)] + + obj.items = items + obj.data_size = moff() - off + return obj + + def compute_size(self): + sizes = [compute_size(item) for item in self.items] + size = sum(sizes) + self.data_size = size + pass + + def to_str(self): + to_str = self.child_type.to_str + strs = [to_str(item) for item in self.items] + return ''.join(strs) + pass + + +class composite(relocatable): + child_names = None + + def __init__(self): + for child_name in self.child_names: + setattr(self, child_name, None) + pass + pass + + def parse_child(self, child_name, data, off): + child_clazz = getattr(self.__class__, child_name) + child = child_clazz.parse(self, data, off) + setattr(self, child_name, child) + pass + + @classmethod + def parse(clazz, parent, data, off): + moff = man_off(off) + obj = clazz() + + for child_name in clazz.child_names: + obj.parse_child(child_name, data, moff()) + child = getattr(obj, child_name) + child_clazz = getattr(obj.__class__, child_name) + child_sz = child_clazz.sizeof(child) + moff(child_sz) + pass + + obj.data_size = moff() - off + return obj + + def compute_size(self): + children = [getattr(self, child_name) + for child_name in self.child_names] + child_sizes = [compute_size(child) + for child in children] + self.data_size = sum(child_sizes) + pass + + def to_str(self): + child_clazzs = [getattr(self.__class__, child_name) + for child_name in self.child_names] + children = [getattr(self, child_name) + for child_name in self.child_names] + child_strs = map(lambda child_clazz, child: \ + child_clazz.to_str(child), + child_clazzs, children) + return ''.join(child_strs) + pass + + +class cond(composite): + condition = None + child_type = None + value = None + + def __init__(self, cond, child_type): + self.condition = cond + self.child_type = child_type + pass + + def parse(self, parent, data, off): + if self.condition(parent, data, off): + value = self.child_type.parse(parent, data, off) + else: + value = None + pass + + obj = cond(self.condition, self.child_type) + obj.value = value + obj.data_size = self.sizeof(obj) + return obj + + def sizeof(self, v): + if v.value is None: + return 0 + return self.child_type.sizeof(v.value) + + def compute_size(self): + if isinstance(self.value, relocatable): + self.value.compute_size() + pass + + self.data_size = self.sizeof(self.value) + pass + + def to_str(self): + if self.value is None: + return '' + + data = self.child_type.to_str(self.value) + return data + pass + + +class switch(composite): + selector = None + map = None + child_type = None + value = None + + def __init__(self, selector, map): + self.selector = selector + self.map = map + pass + + def _get_child_type(self, parent): + selector = self.selector + sel_value = parent + for name in selector.split('.'): + sel_value = getattr(sel_value, name) + pass + + child_type = self.map[sel_value] + return child_type + + def parse(self, parent, data, off): + child_type = self._get_child_type(parent) + value = child_type.parse(parent, data, off) + + obj = switch(self.selector, self.map) + obj.value = value + obj.child_type = child_type + obj.data_size = self.sizeof(obj) + return obj + + @staticmethod + def sizeof(v): + return v.child_type.sizeof(v.value) + + def compute_size(self): + if isinstance(self.value, relocatable): + self.value.compute_size() + pass + + self.data_size = self.sizeof(self.value) + pass + + def to_str(self): + data = self.child_type.to_str(self.value) + return data + pass + + +class abs_value(object): + value = None + + def __init__(self, value): + self.value = value + pass + + def parse(self, parse, data, off): + obj = abs_value(self.value) + return obj + + def sizeof(self, v): + return 0 + + def to_str(self): + return '' + pass + + +class _DEX_header(composite): + magic = rawstr(8) + checksum = uint32 + signature = rawstr(20) + fileSize = uint32 + headerSize = uint32 + endianTag = uint32 + linkSize = uint32 + linkOff = uint32 + mapOff = uint32 + stringIdsSize = uint32 + stringIdsOff = uint32 + typeIdsSize = uint32 + typeIdsOff = uint32 + protoIdsSize = uint32 + protoIdsOff = uint32 + fieldIdsSize = uint32 + fieldIdsOff = uint32 + methodIdsSize = uint32 + methodIdsOff = uint32 + classDefsSize = uint32 + classDefsOff = uint32 + dataSize = uint32 + dataOff = uint32 + + child_names = \ + 'magic checksum signature fileSize headerSize endianTag ' \ + 'linkSize linkOff mapOff stringIdsSize stringIdsOff typeIdsSize ' \ + 'typeIdsOff protoIdsSize protoIdsOff fieldIdsSize fieldIdsOff ' \ + 'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \ + 'dataSize dataOff'.split() + pass + + +class _DEX_MapItem(composite): + type = uint16 + unused = uint16 + size = uint32 + offset = uint32 + types = { 0x0000: 'kDexTypeHeaderItem', 0x0001: 'kDexTypeStringIdItem', @@ -200,680 +616,254 @@ 0x2005: 'kDexTypeEncodedArrayItem', 0x2006: 'kDexTypeAnnotationsDirectoryItem' } - - def parse(self, data): - self.type = _to_uint(data[:2]) - self.size = _to_uint(data[4:8]) - self.offset = _to_uint(data[8:12]) - pass - - @classmethod - def find_type_name(self, type_name): - type_value = [v - for v, name in _DEX_MapItem.types.items() - if name == type_name][0] - return type_value - pass - - -class _DEX_TypeId(object): - descriptorIdx = None # 4 bytes - data_size = 4 - - def parse(self, data): - self.descriptorIdx = _to_uint(data[:4]) - pass + child_names = \ + 'type unused size offset'.split() pass -class _DEX_ProtoId(object): - shortyIdx = None # 4 bytes - returnTypeIdx = None # 4 bytes - parametersOff = None # 4 bytes - - data_size = 12 +class _DEX_MapItemBlock(composite): + num = uint32 + items = array('num', _DEX_MapItem) - def parse(self, data): - self.shortyIdx = _to_uint(data[:4]) - self.returnTypeIdx = _to_uint(data[4:8]) - self.parametersOff = _to_uint(data[8:12]) - pass + child_names = 'num items'.split() pass -class _DEX_FieldId(object): - classIdx = None # 2 bytes - typeIdx = None # 2 bytes - nameIdx = None # 4 bytes - - data_size = 8 +class _DEX_StringId(composite): + stringDataOff = uint32 - def parse(self, data): - self.classIdx = _to_uint(data[:2]) - self.typeIdx = _to_uint(data[2:4]) - self.nameIdx = _to_uint(data[4:8]) - pass + child_names = ('stringDataOff',) pass -class _DEX_MethodId(object): - classIdx = None # 2 bytes - protoIdx = None # 2 bytes - nameIdx = None # 4 bytes +class _DEX_TypeId(composite): + descriptorIdx = uint32 + + child_names = ('descriptorIdx',) + pass + + +class _DEX_ProtoId(composite): + shortyIdx = uint32 + returnTypeIdx = uint32 + parametersOff = uint32 - data_size = 8 - - def parse(self, data): - self.classIdx = _to_uint(data[:2]) - self.protoIdx = _to_uint(data[2:4]) - self.nameIdx = _to_uint(data[4:8]) - pass + child_names = 'shortyIdx returnTypeIdx parametersOff'.split() pass -class _DEX_ClassDef(object): - classIdx = None # 0x00 - accessFlags = None # 0x04 - superclassIdx = None # 0x08 - interfacesOff = None # 0x0c - sourceFileIdx = None # 0x10 - annotationsOff = None # 0x14 - classDataOff = None # 0x18 - staticValuesOff = None # 0x1c - - data_size = 0x20 +class _DEX_FieldId(composite): + classIdx = uint16 + typeIdx = uint16 + nameIdx = uint32 - def parse(self, data): - self.classIdx = _to_uint(data[:4]) - self.accessFlags = _to_uint(data[4:8]) - self.superclassIdx = _to_uint(data[8:0xc]) - self.interfacesOff = _to_uint(data[0xc:0x10]) - self.sourceFileIdx = _to_uint(data[0x10:0x14]) - self.annotationsOff = _to_uint(data[0x14:0x18]) - self.classDataOff = _to_uint(data[0x18:0x1c]) - self.staticValuesOff = _to_uint(data[0x1c:0x20]) - pass + child_names = 'classIdx typeIdx nameIdx'.split() pass -class _DEX_ClassDataHeader(object): - staticFieldsSize = None - instanceFieldsSize = None - directMethodsSize = None - virtualMethodsSize = None - - data_size = None +class _DEX_MethodId(composite): + classIdx = uint16 + protoIdx = uint16 + nameIdx = uint32 - def parse(self, data, off): - self.staticFieldsSize, sh = _uleb128(data[off:off + 10]) - sz = sh - off = off + sh - self.instanceFieldsSize, sh = _uleb128(data[off:off + 10]) - sz = sz + sh - off = off + sh - self.directMethodsSize, sh = _uleb128(data[off:off + 10]) - sz = sz + sh - off = off + sh - self.virtualMethodsSize, sh = _uleb128(data[off:off + 10]) - sz = sz + sh - - self.data_size = sz - pass - - def compute_size(self): - self.data_size = \ - _uleb128_sz(self.staticFieldsSize) + \ - _uleb128_sz(self.instanceFieldsSize) + \ - _uleb128_sz(self.directMethodsSize) + \ - _uleb128_sz(self.virtualMethodsSize) - pass + child_names = 'classIdx protoIdx nameIdx'.split() pass -class _DEX_Field(object): - fieldIdx = None - accessFlags = None - - data_size = None +class _DEX_ClassDef(composite): + classIdx = uint32 + accessFlags = uint32 + superclassIdx = uint32 + interfacesOff = uint32 + sourceFileIdx = uint32 + annotationsOff = uint32 + classDataOff = uint32 + staticValuesOff = uint32 - def parse(self, data, off): - self.fieldIdx, sh = _uleb128(data[off:off + 10]) - sz = sh - off = off + sh - self.accessFlags, sh = _uleb128(data[off:off + 10]) - sz = sz + sh + child_names = \ + 'classIdx accessFlags superclassIdx interfacesOff ' \ + 'sourceFileIdx annotationsOff classDataOff staticValuesOff'.split() + pass + - self.data_size = sz - pass +class _DEX_ClassDataHeader(composite): + staticFieldsSize = uleb128 + instanceFieldsSize = uleb128 + directMethodsSize = uleb128 + virtualMethodsSize = uleb128 - def compute_size(self): - self.data_size = \ - _uleb128_sz(self.fieldIdx) + \ - _uleb128_sz(self.accessFlags) - pass + child_names = \ + 'staticFieldsSize instanceFieldsSize directMethodsSize ' \ + 'virtualMethodsSize'.split() pass -class _DEX_Method(object): - methodIdx = None - accessFlags = None - codeOff = None #!< This is only pos depended field - - data_size = None +class _DEX_Field(composite): + fieldIdx = uleb128 + accessFlags = uleb128 - def parse(self, data, off): - self.methodIdx, sh = _uleb128(data[off:off + 10]) - sz = sh - off = off + sh - - self.accessFlags, sh = _uleb128(data[off:off + 10]) - sz = sz + sh - off = off + sh - - # - # Offset with unsigned leb128 makes its size is vary for - # position. - # - self.codeOff, sh = _uleb128(data[off:off + 10]) - sz = sz + sh - - self.data_size = sz - pass - - def compute_size(self): - self.data_size = \ - _uleb128_sz(self.methodIdx) + \ - _uleb128_sz(self.accessFlags) + \ - _uleb128_sz(self.codeOff) - pass + child_names = 'fieldIdx accessFlags'.split() pass -## \brief Class data -# -# This is only top-level (map item) structure that is position -# depended. Since _DEX_Method is position depended, its size is vary -# for position. -# -class _DEX_ClassData(object): - header = None # DexClassDataHeader - staticFields = None # DexField* - instanceFields = None # DexField* - directMethods = None # DexMethod* - virtualMethods = None # DexMethod* - - data_size = None - - def parse(self, data, off): - moff = man_off(off) - - header = _DEX_ClassDataHeader() - header.parse(data, moff()) - self.header = header - moff(header.data_size) - - def parse_field(): - field = _DEX_Field() - field.parse(data, moff()) - moff(field.data_size) - - # - # field index depends previous one to reduce size - # - field.fieldIdx = field.fieldIdx + idx[0] - idx[0] = field.fieldIdx - - return field - def parse_method(): - method = _DEX_Method() - method.parse(data, moff()) - moff(method.data_size) - - # - # method index depends previous one to reduce size - # - method.methodIdx = method.methodIdx + idx[0] - idx[0] = method.methodIdx - - return method +class _DEX_Method(composite): + methodIdx = uleb128 + accessFlags = uleb128 + codeOff = uleb128 - idx = [0] - self.staticFields = [parse_field() - for i in range(header.staticFieldsSize)] - idx = [0] - self.instanceFields = [parse_field() - for i in range(header.instanceFieldsSize)] - idx = [0] - self.directMethods = [parse_method() - for i in range(header.directMethodsSize)] - idx = [0] - self.virtualMethods = [parse_method() - for i in range(header.virtualMethodsSize)] - - self.data_size = moff() - off - pass - - def compute_size(self): - sz = self.header.data_size - sz = sz + _sum_data_size(itertools.chain(self.staticFields, - self.instanceFields, - self.directMethods, - self.virtualMethods)) - - self.data_size = sz - pass + child_names = 'methodIdx accessFlags codeOff'.split() pass -class _DEX_TypeItem(object): - typeIdx = None # 2 bytes +class _DEX_ClassData(composite): + header = _DEX_ClassDataHeader + staticFields = array('header.staticFieldsSize', _DEX_Field) + instanceFields = array('header.instanceFieldsSize', _DEX_Field) + directMethods = array('header.directMethodsSize', _DEX_Method) + virtualMethods = array('header.virtualMethodsSize', _DEX_Method) - data_size = 2 - - def parse(self, data, off): - self.typeIdx = _to_uint(data[off:off + 2]) - pass + child_names = \ + 'header ' \ + 'staticFields instanceFields directMethods virtualMethods'.split() pass -class _DEX_TypeList(object): - typeItems = None - - data_size = None - - def parse(self, data, off): - moff = man_off(off) - - size = _to_uint(data[moff(4):moff()]) +class _DEX_TypeList(composite): + padding = auto_align(2) # 2 bits alignment + num = uint32 + typeItems = array('num', uint16) - def parse_type_item(): - item = _DEX_TypeItem() - item.parse(data, moff()) - moff(item.data_size) - return item - - typeItems = [parse_type_item() - for i in range(size)] - - self.typeItems = typeItems - self.data_size = moff() - off - pass - - def compute_size(self): - size = 4 + _sum_data_size(self.typeItems) - - self.data_size = size - pass + child_names = 'padding num typeItems'.split() pass -class _DEX_TypeLists(object): - typeLists = None - - data_size = None +class _DEX_Try(composite): + startAddr = uint32 + insnCount = uint16 + handlerOff = uint16 - def parse(self, num, data, off): - moff = man_off(off) - - def parse(): - moff.off = (moff(0) + 3) & ~0x3 # aligned for 4 bytes - typeList = _DEX_TypeList() - typeList.parse(data, moff()) - moff(typeList.data_size) - - return typeList - - typeLists = [parse() for i in range(num)] - - self.typeLists = typeLists - self.data_size = moff(0) - off - pass - - def compute_size(self): - def compute_align(prev, cur): - v = ((prev + 3) & ~0x3) + cur - return v - - sizes = itertools.imap(_compute_sz, self.typeLists) - size = reduce(compute_align, sizes) - - self.data_size = size - pass + child_names = 'startAddr insnCount handlerOff'.split() pass -class _DEX_Try(object): - startAddr = None # 4 bytes - insnCount = None # 2 bytes - handlerOff = None # 2 bytes - - data_size = 8 +class _DEX_CatchHandler(composite): + typeIdx = uleb128 + address = uleb128 - def parse(self, data, off): - cur_off = off - self.startAddr = _to_uint(data[cur_off:cur_off + 4]) - cur_off = cur_off + 4 - self.insnCount = _to_uint(data[cur_off:cur_off + 2]) - cur_off = cur_off + 2 - self.handlerOff = _to_uint(data[cur_off:cur_off + 2]) - pass + child_names = 'typeIdx address'.split() pass -class _DEX_CatchHandler(object): - typeIdx = None - address = None - - data_size = None - - def parse(self, data, off): - moff = man_off(off) - self.typeIdx, sh = _uleb128(data[moff():moff() + 5]) - moff(sh) - self.address, sh = _uleb128(data[moff():moff() + 5]) - moff(sh) - - self.data_size = moff() - off - pass +class _DEX_CatchAllHandler(composite): + address = uleb128 - def parse1(self, data, off): - self.address, sh = _uleb128(data[off:off + 5]) - - self.data_size = sh - pass - - def compute_size(self): - if self.typeIdx is not None: - size = _uleb128_sz(self.typeIdx) - else: - size = 0 - pass - size = size + _uleb128_sz(self.address) - - self.data_size = size - pass + child_names = 'address'.split() pass -class _DEX_Catch(object): - catchesAll = None - handlers = None - - data_size = None - - def parse(self, data, off): - moff = man_off(off) +class _DEX_Catch(composite): + size = leb128 + handlers = array('count', _DEX_CatchHandler) + catchAllHandler = cond((lambda parent, data, off: parent.catchesAll), + _DEX_CatchAllHandler) - count, sh = _leb128(data[moff():moff() + 5]) - moff(sh) - - if count > 0: - self.catchesAll = False - else: - self.catchesAll = True - count = -count - pass + child_names = 'size handlers catchAllHandler'.split() - def parse_handler(): - handler = _DEX_CatchHandler() - handler.parse(data, moff()) - moff(handler.data_size) - return handler - - self.handlers = [parse_handler() for i in range(count)] + @property + def catchesAll(self): + return self.size <= 0 - if self.catchesAll: - # - # Additional handler for catching all - # - handler = _DEX_CatchHandler() - handler.parse1(data, moff()) - moff(handler.data_size) - self.handlers.append(handler) - pass - - self.data_size = moff() - off - pass - - def compute_size(self): - count = len(self.handlers) - if self.catchesAll: - count = -(count - 1) - pass - count_sz = _leb128_sz(count) - - handlers_size = _sum_data_size(self.handlers) - - size = count_sz + handlers_size - pass + @property + def count(self): + if self.size < 0: + return -self.size + return self.size pass -class _DEX_Code(object): - registersSize = None # 2 bytes - insSize = None # 2 bytes - outsSize = None # 2 bytes - triesSize = None # 2 bytes - debugInfoOff = None # 4 bytes - insnsSize = None # 4 bytes - insns = None - try_items = None - catch_handler_items = None - - data_size = None +class _DEX_Code(composite): + registersSize = uint16 + insSize = uint16 + outsSize = uint16 + triesSize = uint16 + debugInfoOff = uint32 + insnsSize = uint32 + insns = rawstr_size_name('insnsSize', 2) + + _has_tries = lambda parent, data, off: parent.triesSize > 0 + + padding = cond(_has_tries, auto_align(2)) + try_items = cond(_has_tries, + array('triesSize', _DEX_Try)) + + handlers_size = cond(_has_tries, uleb128) + catch_handler_items = cond(_has_tries, + array('handlers_size.value', _DEX_Catch)) - def parse(self, data, off): - moff = man_off(off) - - self.registersSize = _to_uint(data[moff(2):moff()]) - self.insSize = _to_uint(data[moff(2):moff()]) - self.outsSize = _to_uint(data[moff(2):moff()]) - self.triesSize = _to_uint(data[moff(2):moff()]) - self.debugInfoOff = _to_uint(data[moff(4):moff()]) - self.insnsSize = _to_uint(data[moff(4):moff()]) - - moff(self.insnsSize * 2) # skip insns - - if self.triesSize > 0: - def parse_try_item(): - try_item = _DEX_Try() - try_item.parse(data, moff()) - moff(try_item.data_size) - return try_item - - moff.off = (moff.off + 3) & ~0x3 # align tries to 4 bytes - self.try_items = [parse_try_item() for i in range(self.triesSize)] - - def parse_catch_handler(): - catch = _DEX_Catch() - catch.parse(data, moff()) - moff(catch.data_size) - return catch - - # - # No tries, no catch handlers - # - handlersSize, sh = _uleb128(data[moff():moff() + 5]) - moff(sh) - self.catch_handler_items = [parse_catch_handler() - for i in range(handlersSize)] - pass + padding2 = auto_align(2) - moff.off = (moff() + 3) & ~0x3 # round code item to 4 bytes - self.data_size = moff() - off - pass - - def compute_size(self): - size = 16 + self.insnsSize * 2 - - if self.triesSize > 0: - size = (size + 0x3) & ~0x3 - - try_items_size = _sum_data_size(self.try_items) + child_names = \ + 'registersSize insSize outsSize triesSize debugInfoOff ' \ + 'insnsSize insns padding try_items handlers_size ' \ + 'catch_handler_items padding2'.split() + pass - catch_handler_items = self.catch_handler_items - catch_handler_items_cnt = len(catch_handler_items) - catch_handler_items_cnt_sz = _uleb128_sz(catch_handler_items_cnt) - catch_handler_items_sz = _sum_data_size(catch_handler_items) - catch_handler_items_size = \ - catch_handler_items_cnt_sz + \ - catch_handler_items_sz - - size = size + try_items_size + catch_handler_items_size - pass - self.data_size = (size + 3) & ~0x3 - pass +class _DEX_AnnotationSetItem(composite): + size = uint32 + annotationOffs = array('size', uint32) + + child_names = 'size annotationOffs'.split() pass -## \brief File offset to Annotation item. -# -# This type is not in libdex of Dalvik. We add this class to tracking -# information for layout algorithm. -# -class _DEX_AnnotationRefItem(object): - annotationOff = None # 4 bytes - - data_size = 4 - - def parse(self, data, off): - self.annotationOff = _to_uint(data[off:off + 4]) - pass - pass - -class _DEX_AnnotationSetItem(object): - # size = None # 4 bytes - annotations = None # 4 * size bytes - - data_size = None - - def parse(self, data, off): - moff = man_off(off) - - size = _to_uint(data[moff(4):moff()]) - - def parse_annotation_ref(): - ref = _DEX_AnnotationRefItem() - ref.parse(data, moff()) - moff(ref.data_size) - return ref - - self.annotations = [parse_annotation_ref() - for i in range(size)] - - self.data_size = moff() - off - pass - - def compute_size(self): - annotations_size = _sum_data_size(self.annotations) - size = 4 + annotations_size - - self.data_size = size - pass +class _DEX_FieldAnnotationsItem(composite): + fieldIdx = uint32 + annotationsOff = uint32 + + child_names = 'fieldIdx annotationsOff'.split() pass -class _DEX_FieldAnnotationsItem(object): - fieldIdx = None # 4 bytes - annotationsOff = None # 4 bytes - - data_size = 8 +class _DEX_MethodAnnotationsItem(composite): + methodIdx = uint32 + annotationsOff = uint32 - def parse(self, data, off): - moff = man_off(off) - - self.fieldIdx = _to_uint(data[moff(4):moff()]) - self.annotationsOff = _to_uint(data[moff(4):moff()]) - pass + child_names = 'methodIdx annotationsOff'.split() pass -class _DEX_MethodAnnotationsItem(object): - methodIdx = None # 4 bytes - annotationsOff = None # 4 bytes - - data_size = 8 - - def parse(self, data, off): - moff = man_off(off) - - self.methodIdx = _to_uint(data[moff(4):moff()]) - self.annotationsOff = _to_uint(data[moff(4):moff()]) - pass - pass +class _DEX_ParameterAnnotationsItem(composite): + methodIdx = uint32 + annotationsOff = uint32 - -class _DEX_ParameterAnnotationsItem(object): - methodIdx = None # 4 bytes - annotationsOff = None # 4 bytes - - data_size = 8 - - def parse(self, data, off): - moff = man_off(off) - - self.methodIdx = _to_uint(data[moff(4):moff()]) - self.annotationsOff = _to_uint(data[moff(4):moff()]) - pass + child_names = 'methodIdx annotationsOff'.split() pass -class _DEX_AnnotationsDirectoryItem(object): - classAnnotationsOff = None # 4 bytes - fieldAnnotationsItems = None - methodAnnotationsItems = None - parameterAnnotationsItems = None - - data_size = None - - def parse(self, data, off): - moff = man_off(off) - - self.classAnnotationsOff = _to_uint(data[moff(4):moff()]) - fieldsSize = _to_uint(data[moff(4):moff()]) - methodsSize = _to_uint(data[moff(4):moff()]) - parametersSize = _to_uint(data[moff(4):moff()]) +class _DEX_AnnotationsDirectoryItem(composite): + classAnnotationsOff = uint32 + fieldsSize = uint32 + methodsSize = uint32 + parametersSize = uint32 + + fieldAnnotationsItems = array('fieldsSize', _DEX_FieldAnnotationsItem) + methodAnnotationsItems = array('methodsSize', _DEX_MethodAnnotationsItem) + parameterAnnotationsItems = array('parametersSize', + _DEX_ParameterAnnotationsItem) - def parse_fieldAnnotationsItem(): - item = _DEX_FieldAnnotationsItem() - item.parse(data, moff()) - moff(item.data_size) - return item - - def parse_methodAnnotationsItem(): - item = _DEX_MethodAnnotationsItem() - item.parse(data, moff()) - moff(item.data_size) - return item - - def parse_parameterAnnotationsItem(): - item = _DEX_ParameterAnnotationsItem() - item.parse(data, moff()) - moff(item.data_size) - return item - - self.fieldAnnotationsItems = [parse_fieldAnnotationsItem() - for i in range(fieldsSize)] - self.methodAnnotationsItems = [parse_methodAnnotationsItem() - for i in range(methodsSize)] - self.parameterAnnotationsItems = [parse_parameterAnnotationsItem() - for i in range(parametersSize)] - - self.data_size = moff() - off - pass + child_names = 'classAnnotationsOff fieldsSize methodsSize ' \ + 'parametersSize fieldAnnotationsItems methodAnnotationsItems ' \ + 'parameterAnnotationsItems'.split() + pass + - def compute_size(self): - field_anno_sz = _sum_data_size(self.fieldAnnotationsItems) - method_anno_sz = _sum_data_size(self.methodAnnotationsItems) - parameter_anno_sz = _sum_data_size(self.parameterAnnotationsItems) - - all_items_size = field_anno_sz + method_anno_sz + parameter_anno_sz - size = 16 + all_items_size - - self.data_size = size - pass +class _DEX_AnnotationArray(composite): + size = uleb128 + # annotations = array('size', _DEX_AnnotationMember_noname) + + child_names = 'size annotations'.split() pass @@ -881,11 +871,7 @@ # # \see createAnnotationMember() in dalvik/vm/reflect/Annotation.c # -class _DEX_AnnotationMember(object): - nameIdx = None # optional - valueType = None # 1 byte - value = None - +class _DEX_AnnotationMember(composite): # # Constants from DexFile.h # @@ -909,251 +895,92 @@ kDexAnnotationValueTypeMask = 0x1f kDexAnnotationValueArgShift = 5 - data_size = None + nameIdx = uleb128 + valueType = uint8 + value_map = { + kDexAnnotationByte: rawstr_size_name('value_width'), + kDexAnnotationShort: rawstr_size_name('value_width'), + kDexAnnotationChar: rawstr_size_name('value_width'), + kDexAnnotationInt: rawstr_size_name('value_width'), + kDexAnnotationLong: rawstr_size_name('value_width'), + kDexAnnotationFloat: rawstr_size_name('value_width'), + kDexAnnotationDouble: rawstr_size_name('value_width'), + kDexAnnotationString: rawstr_size_name('value_width'), + kDexAnnotationType: rawstr_size_name('value_width'), + kDexAnnotationMethod: rawstr_size_name('value_width'), + kDexAnnotationField: rawstr_size_name('value_width'), + kDexAnnotationEnum: rawstr_size_name('value_width'), + kDexAnnotationNull: abs_value(0), + kDexAnnotationBoolean: abs_value(0), # width != 0 + kDexAnnotationArray: _DEX_AnnotationArray, + # kDexAnnotationAnnotation: _DEX_AnnotationItem_novisibility + } + value = switch('vtype', value_map) - def parse(self, data, off): - self.nameIdx, sh = _uleb128(data[off:off + 5]) - - self.parse_noname(data, off + sh) - - self.data_size = self.data_size + sh - pass - - def parse_noname(self, data, off): - moff = man_off(off) - - valueType = _to_uint(data[moff(1):moff()]) - self.valueType = valueType - width = valueType >> self.kDexAnnotationValueArgShift - - vtype = valueType & self.kDexAnnotationValueTypeMask + child_names = 'nameIdx valueType value'.split() - if vtype == self.kDexAnnotationByte: - self.value = _to_int(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationShort: - self.value = _to_int(data[moff(width):moff()]) - pass - elif vtype == self.kDexAnnotationChar: - self.value = _to_uint(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationInt: - self.value = _to_int(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationLong: - self.value = _to_int(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationFloat: - self.value = _to_uint(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationDouble: - self.value = _to_uint(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationBoolean: - self.value = width != 0 - pass - elif vtype == self.kDexAnnotationString: # string index - self.value = _to_uint(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationType: # TypeId index - self.value = _to_uint(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationMethod: # MethodId index - self.value = _to_uint(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationField: - self.value = _to_uint(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationEnum: # FieldId index - self.value = _to_uint(data[moff(width):moff()]) - moff(1) - pass - elif vtype == self.kDexAnnotationArray: - size, sh = _uleb128(data[moff():moff() + 5]) - moff(sh) - - def parse_array_elm(): - elm = _DEX_AnnotationMember() - elm.parse_noname(data, moff()) - moff(elm.data_size) - return elm - - self.value = [parse_array_elm() - for i in range(size)] - pass - elif vtype == self.kDexAnnotationAnnotation: - annoitem = _DEX_AnnotationItem() - annoitem.parse_novisibility(data, moff()) - moff(annoitem.data_size) - self.value = annoitem - pass - elif vtype == self.kDexAnnotationNull: - self.value = 0 - pass - else: - raise ValueError, \ - 'Bad annotation element value byte 0x02x' % (valueType) - - self.data_size = moff() - off - pass + @property + def vtype(self): + vtype = self.valueType & self.kDexAnnotationValueTypeMask + return vtype + + @property + def width(self): + width = self.valueType >> self.kDexAnnotationValueArgShift + return width - def compute_size(self): - if self.nameIdx is not None: - nameIdx_size = _uleb128_sz(self.nameIdx) - else: - nameIdx_size = 0 - pass - - valueType = self.valueType - width = valueType >> self.kDexAnnotationValueArgShift - - vtype = valueType & self.kDexAnnotationValueTypeMask + @property + def value_width(self): + width = self.valueType >> self.kDexAnnotationValueArgShift + return width + 1 + pass + - if vtype in (self.kDexAnnotationByte, - self.kDexAnnotationShort, - self.kDexAnnotationChar, - self.kDexAnnotationInt, - self.kDexAnnotationLong, - self.kDexAnnotationFloat, - self.kDexAnnotationDouble, - self.kDexAnnotationString, - self.kDexAnnotationType, - self.kDexAnnotationMethod, - self.kDexAnnotationField, - self.kDexAnnotationEnum): - value_size = width + 2 - pass - elif vtype in (self.kDexAnnotationBoolean, - self.kDexAnnotationNull): - value_size = 1 - pass - elif vtype == self.kDexAnnotationArray: - array_cnt = len(self.value) - array_cnt_size = _uleb128_sz(array_cnt) - array_size = _sum_data_size(self.value) - value_size = 1 + array_cnt_size + array_size - pass - elif vtype == self.kDexAnnotationAnnotation: - value_size = 1 + _compute_sz(self.value) - pass - else: - raise ValueError, \ - 'Bad annotation element value byte 0x02x' % (valueType) - - self.data_size = nameIdx_size + value_size - pass +class _DEX_AnnotationMember_noname(_DEX_AnnotationMember): + child_names = 'valueType value'.split() pass +_DEX_AnnotationArray.annotations = array('size', _DEX_AnnotationMember_noname) + + ## \brief Annotation item # # \see processEncodedAnnotation() in dalvik/vm/reflect/Annotation.c # -class _DEX_AnnotationItem(object): - visibility = None # 1 byte (optional) - typeIdx = None - members = None +class _DEX_AnnotationItem(composite): + visibility = uint8 + typeIdx = uleb128 + size = uleb128 + members = array('size', _DEX_AnnotationMember) + child_names = 'visibility typeIdx size members'.split() + kDexVisibilityBuild = 0x00 kDexVisibilityRuntime = 0x01 kDexVisibilitySystem = 0x02 - - data_size = None + pass - def parse(self, data, off): - self.visibility = _to_uint(data[off:off + 1]) - - self.parse_novisibility(data, off + 1) - - self.data_size = self.data_size + 1 - pass - def parse_novisibility(self, data, off): - moff = man_off(off) - - self.typeIdx, sh = _uleb128(data[moff():moff() + 5]) - moff(sh) - size, sh = _uleb128(data[moff():moff() + 5]) - moff(sh) - - def parse_AnnotationMemmber(): - member = _DEX_AnnotationMember() - member.parse(data, moff()) - moff(member.data_size) - return member - - self.members = [parse_AnnotationMemmber() - for i in range(size)] - - self.data_size = moff() - off - pass - - def compute_size(self): - if self.visibility is not None: - visibility_size = 1 - else: - visibility_size = 0 - pass - - typeIdx_size = _uleb128_sz(self.typeIdx) - - members_cnt_size = _uleb128_sz(len(self.members)) - members_size = members_cnt_size + _sum_data_size(self.members) - - size = visibility_size + typeIdx_size + members_size - - self.data_size = size - pass +class _DEX_AnnotationItem_novisibility(_DEX_AnnotationItem): + child_names = 'typeIdx size members'.split() pass -class _DEX_EncodedArrayItem(object): - elements = None - - data_size = None +_DEX_AnnotationMember. \ + value_map[_DEX_AnnotationMember.kDexAnnotationAnnotation] = \ + _DEX_AnnotationItem_novisibility - def parse(self, data, off): - moff = man_off(off) - - size, sh = _uleb128(data[moff():moff() + 5]) - moff(sh) - def parse_element(): - element = _DEX_AnnotationMember() - element.parse_noname(data, moff()) - moff(element.data_size) - return element - - self.elements = [parse_element() - for i in range(size)] +class _DEX_EncodedArrayItem(composite): + size = uleb128 + elements = array('size', _DEX_AnnotationMember_noname) - self.data_size = moff() - off - pass - - def compute_size(self): - elements_cnt_size = _uleb128_sz(len(self.elements)) - size = elements_cnt_size + _sum_data_size(self.elements) - - self.data_size = size - pass + child_names = 'size elements'.split() pass -class _DEX_DebugInfoItem(object): - start_line = None - parameters = None - opcodes = None - +class _DEX_DebugCodeBlock(relocatable): DBG_END_SEQUENCE = 0x00 DBG_ADVANCE_PC = 0x01 DBG_ADVANCE_LINE = 0x02 @@ -1167,28 +994,16 @@ DBG_FIRST_SPECIAL = 0x0a DBG_LINE_BASE = -4 DBG_LINE_RANGE = 15 - + + opcodes = None data_size = None - - def parse(self, data, off): + + @staticmethod + def parse(parent, data, off): moff = man_off(off) - self.start_line, sh = _uleb128(data[moff():moff() + 5]) - moff(sh) - parameters_size, sh = _uleb128(data[moff():moff() + 5]) - moff(sh) - - # - # Parse parameters - # - def parse_parameter(): - paramter, sh = _uleb128(data[moff():moff() + 5]) - moff(sh) - return paramter + self = _DEX_DebugCodeBlock() - self.parameters = [parse_parameter() - for i in range(parameters_size)] - # # Parse debug opcodes # @@ -1245,17 +1060,12 @@ pass pass self.opcodes = opcodes - + self.data_size = moff() - off - pass + + return self def compute_size(self): - start_line_size = _uleb128_sz(self.start_line) - - parameters_cnt_size = _uleb128_sz(len(self.parameters)) - parameter_sizes = itertools.imap(_uleb128_sz, self.parameters) - parameters_size = parameters_cnt_size + sum(parameter_sizes) - def compute_opcode_size(code): opcode = code[0] @@ -1291,473 +1101,270 @@ opcode_sizes = [i for i in opcode_sizes] opcodes_size = sum(opcode_sizes) - size = start_line_size + parameters_size + opcodes_size + self.data_size = opcodes_size + pass + + def to_str(self): + # + # Parse debug opcodes + # + opcodes = self.opcodes + opcodebins = [] + for code in opcodes: + opcode = code[0] + + if opcode == self.DBG_END_SEQUENCE: + opcodebins.append(chr(opcode)) + break + elif opcode == self.DBG_ADVANCE_PC: + codebin = chr(opcode) + _to_uleb128(code[1]) + opcodebins.append(codebin) + pass + elif opcode == self.DBG_ADVANCE_LINE: + codebin = chr(opcode) + _to_leb128(code[1]) + opcodebins.append(codebin) + pass + elif opcode == self.DBG_START_LOCAL: + codebin = chr(opcode) + _to_uleb128(code[1]) + \ + _to_uleb128(code[2]) + _to_uleb128(code[3]) + codebins.append(codebin) + pass + elif opcode == self.DBG_START_LOCAL_EXTENDED: + codebin = chr(opcode) + _to_uleb128(code[1]) + \ + _to_uleb128(code[2]) + _to_uleb128(code[3]) + \ + _to_uleb128(code[4]) + codebins.append(codebin) + pass + elif opcode == self.DBG_END_LOCAL: + codebin = chr(opcode) + _to_uleb128(code[1]) + codebins.append(codebin) + pass + elif opcode == self.DBG_RESTART_LOCAL: + codebin = chr(opcode) + _to_uleb128(code[1]) + codebins.append(codebin) + pass + elif opcode in (self.DBG_SET_PROLOGUE_END, + self.DBG_SET_EPILOGUE_BEGIN, + self.DBG_SET_FILE): + opcodebins.append(chr(opcode)) + pass + else: + opcodebins.append(chr(opcode)) + pass + pass + + return ''.join(opcodebins) + pass + + +class _DEX_DebugInfoItem(composite): + start_line = uleb128 + parameters_size = uleb128 + parameters = array('parameters_size', uleb128) + opcodes = _DEX_DebugCodeBlock + + child_names = 'start_line parameters_size parameters opcodes'.split() + pass + + +class _DEX_StringDataItem(relocatable): + size = None + data = None + + data_size = None + + @staticmethod + def parse(parent, data, off): + size, sh = _uleb128(data[off:off + 5]) + data = data[off + sh: off + sh + size] + + self = _DEX_StringDataItem() + + self.size = size + self.data = data + self.data_size = sh + size + 1 + return self + + def compute_size(self): + size = len(self.data) + self.size = size + size_sz = _uleb128_sz(size) + self.data_size = size_sz + size + 1 + pass - self.data_size = size + def to_str(self): + size = len(self.data) + self.size = size + data = _uleb128(size) + self.data + '\x00' + return data + pass + + +class DEXFile(composite): + fname = None + data = None + header = _DEX_header + maps = _DEX_MapItemBlock + stringIds = array(None, _DEX_StringId) + typeIds = array(None, _DEX_TypeId) + protoIds = array(None, _DEX_ProtoId) + fieldIds = array(None, _DEX_FieldId) + methodIds = array(None, _DEX_MethodId) + classDefs = array(None, _DEX_ClassDef) + classDatas = array(None, _DEX_ClassData) + typeLists = array(None, _DEX_TypeList) + codeItems = array(None, _DEX_Code) + annotationSetItems = array(None, _DEX_AnnotationSetItem) + annotationsDirectoryItems = array(None, _DEX_AnnotationsDirectoryItem) + annotationItems = array(None, _DEX_AnnotationItem) + encodedArrayItems = array(None, _DEX_EncodedArrayItem) + debugInfoItems = array(None, _DEX_DebugInfoItem) + stringDataItems = array(None, _DEX_StringDataItem) + + child_names = 'header'.split() + + block_defs = { + # 0x0000: 'kDexTypeHeaderItem', + 0x0001: 'stringIds', + 0x0002: 'typeIds', + 0x0003: 'protoIds', + 0x0004: 'fieldIds', + 0x0005: 'methodIds', + 0x0006: 'classDefs', + # 0x1000: 'kDexTypeMapList', + 0x1001: 'typeLists', + # 0x1002: 'kDexTypeAnnotationSetRefList', + 0x1003: 'annotationSetItems', + 0x2000: 'classDatas', + 0x2001: 'codeItems', + 0x2002: 'stringDataItems', + 0x2003: 'debugInfoItems', + 0x2004: 'annotationItems', + 0x2005: 'encodedArrayItems', + 0x2006: 'annotationsDirectoryItems' + } + + @staticmethod + def open(fname): + fo = file(fname, 'r') + data = fo.read() + + dex = DEXFile.parse(data) + dex.fname = fname + return dex + + @classmethod + def parse(clazz, data): + obj = super(DEXFile, clazz).parse(None, data, 0) + obj.data = data + obj._parse(data) + return obj + + def _parse_maps(self): + data = self.data + header = self.header + off = header.mapOff + self.parse_child('maps', data, off) + pass + + def _parse_block(self, block_map): + if block_map.type not in self.block_defs: + return + + data = self.data + + child_name = self.block_defs[block_map.type] + off = block_map.offset + num = block_map.size + + child_clazz = getattr(self.__class__, child_name) + blk = child_clazz.parse_nitem(self, data, off, num) + setattr(self, child_name, blk) + pass + + def _parse_blocks(self): + data = self.data + maps = self.maps.items.items + for map in maps: + if map.type in self.block_defs: + self._parse_block(map) + pass + pass + pass + + def _parse(self, data): + self._parse_maps() + self._parse_blocks() pass pass -class DEXFile(object): - _data = None - _header = None - _maps = None - _strings = None - _typeIds = None - _protoIds = None - _fieldIds = None - _methodIds = None - _classDefs = None - _classDatas = None - _typeLists = None - _codeItems = None - _annotationSetItems = None - _annotationsDirectoryItems = None - _annotationItems = None - _encodedArrayItems = None - _debugInfoItems = None - - def __init__(self): - pass - - def open(self, filename): - fo = file(filename, 'r') - data = fo.read() - - self.parse(data) - pass - - def _parse_maps(self): - data = self._data - header = self._header - off = header.mapOff - - num = _to_uint(data[off:off + 4]) - off = off + 4 - - maps = [] - for i in range(num): - item_data = data[off:off + _DEX_MapItem.data_size] - item = _DEX_MapItem() - item.parse(item_data) - maps.append(item) - off = off + _DEX_MapItem.data_size - pass - - self._maps = maps - pass - - def find_map_item(self, type_value): - maps = self._maps - try: - codeItem_map = [map for map in maps if map.type == type_value][0] - except IndexError: - return None - - return codeItem_map - - def find_map_item_name(self, type_name): - type_value = _DEX_MapItem.find_type_name(type_name) - map = self.find_map_item(type_value) - return map - - def _parse_strings(self): - data = self._data - header = self._header - strings = [] - - num = header.stringIdsSize - off = header.stringIdsOff - for i in range(num): - str_start_off = _to_uint(data[off:off + 4]) - str_stop_off = data.index('\x00', str_start_off) - string = data[str_start_off:str_stop_off] - - sz, sh = _uleb128(string) - string = string[sh:] - strings.append(string) - off = off + 4 - pass - - self._strings = strings - pass - - def _parse_typeIds(self): - data = self._data - header = self._header - - num = header.typeIdsSize - off = header.typeIdsOff - - def parse(item_data): - type_id = _DEX_TypeId() - type_id.parse(item_data) - return type_id - - item_size = _DEX_TypeId.data_size - item_offs = range(off, off + item_size * num, item_size) - item_datas = [data[item_off:item_off + item_size] - for item_off in item_offs] - typeIds = [parse(item_data) for item_data in item_datas] - - self._typeIds = typeIds - pass - - def _parse_protoIds(self): - data = self._data - header = self._header - - num = header.protoIdsSize - off = header.protoIdsOff - - def parse(item_data): - proto_id = _DEX_ProtoId() - proto_id.parse(item_data) - return proto_id - - item_size = _DEX_ProtoId.data_size - item_offs = range(off, off + item_size * num, item_size) - item_datas = [data[item_off:item_off + item_size] - for item_off in item_offs] - protoIds = [parse(item_data) for item_data in item_datas] - - self._protoIds = protoIds - pass - - def _parse_fieldIds(self): - data = self._data - header = self._header - - num = header.fieldIdsSize - off = header.fieldIdsOff - - def parse(item_data): - field_id = _DEX_FieldId() - field_id.parse(item_data) - return field_id - - item_size = _DEX_FieldId.data_size - item_offs = range(off, off + item_size * num, item_size) - item_datas = [data[item_off:item_off + item_size] - for item_off in item_offs] - fieldIds = [parse(item_data) for item_data in item_datas] - - self._fieldIds = fieldIds - pass - - def _parse_methodIds(self): - data = self._data - header = self._header - - num = header.methodIdsSize - off = header.methodIdsOff - - def parse(item_data): - method_id = _DEX_MethodId() - method_id.parse(item_data) - return method_id - - item_size = _DEX_MethodId.data_size - item_offs = range(off, off + item_size * num, item_size) - item_datas = [data[item_off:item_off + item_size] - for item_off in item_offs] - methodIds = [parse(item_data) for item_data in item_datas] - - self._methodIds = methodIds - pass - - def _parse_classDefs(self): - data = self._data - header = self._header - - num = header.classDefsSize - off = header.classDefsOff - - def parse(item_data): - class_def = _DEX_ClassDef() - class_def.parse(item_data) - return class_def - - item_size = _DEX_ClassDef.data_size - item_offs = range(off, off + item_size * num, item_size) - item_datas = [data[item_off:item_off + item_size] - for item_off in item_offs] - classDefs = [parse(item_data) for item_data in item_datas] - - self._classDefs = classDefs - pass - - def _parse_classDatas(self): - header = self._header - data = self._data - - class_data_map = self.find_map_item_name('kDexTypeClassDataItem') - - moff = man_off(class_data_map.offset) - - def parse_class_data(): - class_data = _DEX_ClassData() - class_data.parse(data, moff()) - moff(class_data.data_size) - return class_data - class_datas = [parse_class_data() for i in range(class_data_map.size)] - - self._classDatas = class_datas - pass - - def _parse_typeLists(self): - data = self._data - - typeList_map = self.find_map_item_name('kDexTypeTypeList') - num_typeLists = typeList_map.size - - typeLists = _DEX_TypeLists() - typeLists.parse(num_typeLists, data, typeList_map.offset) - - self._typeLists = typeLists - pass - - def _parse_codeItems(self): - data = self._data - - codeItem_map = self.find_map_item_name('kDexTypeCodeItem') - if codeItem_map is None: - return - num_codeItems = codeItem_map.size - - moff = man_off(codeItem_map.offset) - - def parse_code(): - code = _DEX_Code() - code.parse(data, moff()) - moff(code.data_size) - return code - - codeItems = [parse_code() for i in range(num_codeItems)] - - self._codeItems = codeItems - pass - - def _parse_annotationSetItems(self): - data = self._data - - annoset_map = self.find_map_item_name('kDexTypeAnnotationSetItem') - if annoset_map is None: - return - - moff = man_off(annoset_map.offset) - - def parse_annotationSetItem(): - item = _DEX_AnnotationSetItem() - item.parse(data, moff()) - moff(item.data_size) - return item - - self._annotationSetItems = [parse_annotationSetItem() - for i in range(annoset_map.size)] - pass - - def _parse_annotationsDirectoryItems(self): - data = self._data - - annodir_map = \ - self.find_map_item_name('kDexTypeAnnotationsDirectoryItem') - if annodir_map is None: - return - - moff = man_off(annodir_map.offset) - - def parse_annotationDirItem(): - item = _DEX_AnnotationsDirectoryItem() - item.parse(data, moff()) - moff(item.data_size) - return item - - self._annotationsDirectoryItems = [parse_annotationDirItem() - for i in range(annodir_map.size)] - pass - - def _parse_annotationItems(self): - data = self._data - - annoitem_map = self.find_map_item_name('kDexTypeAnnotationItem') - if annoitem_map is None: - return - - moff = man_off(annoitem_map.offset) - - def parse_annotationItem(): - item = _DEX_AnnotationItem() - item.parse(data, moff()) - moff(item.data_size) - return item - - self._annotationItems = [parse_annotationItem() - for i in range(annoitem_map.size)] - pass - - def _parse_header(self): - data = self._data - header = _DEX_header() - header.parse(data) - self._header = header - pass - - def _parse_encodedArrayItems(self): - data = self._data - - encodedArrayItem_map = \ - self.find_map_item_name('kDexTypeEncodedArrayItem') - - moff = man_off(encodedArrayItem_map.offset) - - def parse_encodedArrayItem(): - item = _DEX_EncodedArrayItem() - item.parse(data, moff()) - moff(item.data_size) - return item - - self._encodedArrayItems = [parse_encodedArrayItem() - for i in range(encodedArrayItem_map.size)] - pass - - def _parse_debugInfoItems(self): - data = self._data - - debugInfoItem_map = \ - self.find_map_item_name('kDexTypeDebugInfoItem') - - moff = man_off(debugInfoItem_map.offset) - - def parse_debugInfoItem(): - item = _DEX_DebugInfoItem() - item.parse(data, moff()) - moff(item.data_size) - return item - - self._debugInfoItems = [parse_debugInfoItem() - for i in range(debugInfoItem_map.size)] - pass - - def parse(self, data): - self._data = data - self._parse_header() - self._parse_maps() - self._parse_strings() - self._parse_typeIds() - self._parse_protoIds() - self._parse_fieldIds() - self._parse_methodIds() - self._parse_classDefs() - self._parse_classDatas() - self._parse_typeLists() - self._parse_codeItems() - self._parse_annotationSetItems() - self._parse_annotationsDirectoryItems() - self._parse_annotationItems() - self._parse_encodedArrayItems() - self._parse_debugInfoItems() - pass - pass - if __name__ == '__main__': - dex = DEXFile() - dex.open('test.dex') + dex = DEXFile.open('../data/testdata1.dex') print 'Header' - h = dex._header - for attr in h.header_fields: + h = dex.header + for attr in h.child_names: print '\t%s: %s' % (attr, repr(getattr(h, attr))) pass print - print 'Define Classes' - strings = dex._strings - classDefs = dex._classDefs - typeIds = dex._typeIds - for classDef in classDefs: - typeId = typeIds[classDef.classIdx] - descriptor = strings[typeId.descriptorIdx] - data_off = classDef.classDataOff - print '\t%s @0x%x' % (descriptor, data_off) - pass + print 'Size of stringIds is %d bytes' % (dex.stringIds.data_size) print - print 'Reference Classes' - for typeId in typeIds: - descriptor = strings[typeId.descriptorIdx] - print '\t%s' % (descriptor) - pass + print 'Size of typeIds is %d bytes' % (dex.typeIds.data_size) + + print + print 'Size of protoIds is %d bytes' % (dex.protoIds.data_size) + + print + print 'Size of fieldIds is %d bytes' % (dex.fieldIds.data_size) print - print 'Class data' - methodIds = dex._methodIds - classDatas = dex._classDatas - for classData in classDatas: - print '\tclass' - for method in classData.directMethods: - code_off = method.codeOff - methodId = methodIds[method.methodIdx] - name = strings[methodId.nameIdx] - print '\t\t%s@0x%x' % (name, code_off) - pass - for method in classData.virtualMethods: - code_off = method.codeOff - methodId = methodIds[method.methodIdx] - name = strings[methodId.nameIdx] - print '\t\t%s@0x%x' % (name, code_off) - pass - pass + print 'Size of methodIds is %d bytes' % (dex.methodIds.data_size) + + print + print 'Size of classDefs is %d bytes' % (dex.classDefs.data_size) + + print + print 'Size of classDatas is %d bytes' % (dex.classDatas.data_size) + + print + print 'Size of typeLists is %d bytes' % (dex.typeLists.data_size) + + print + print 'Size of codeItems is %d bytes' % (dex.codeItems.data_size) print - print 'TypeLists size is %d/%d bytes' % (dex._typeLists.data_size, - _compute_sz(dex._typeLists)) + print 'Size of annotationSetItems is %d bytes' % \ + (dex.annotationSetItems.data_size) - bytes = sum([code.data_size for code in dex._codeItems]) - rbytes = sum([_compute_sz(code) for code in dex._codeItems]) print - print 'CodeItems size is %d/%d bytes' % (bytes, rbytes) - - bytes = sum([annoset.data_size for annoset in dex._annotationSetItems]) - rbytes = sum([_compute_sz(annoset) for annoset in dex._annotationSetItems]) + print 'Size of annotationsDirectoryItems is %d bytes' % \ + (dex.annotationsDirectoryItems.data_size) + print - print 'AnnotationSetItems size is %d/%d bytes' % (bytes, rbytes) - - bytes = sum([annodir.data_size - for annodir in dex._annotationsDirectoryItems]) - rbytes = sum([_compute_sz(annodir) - for annodir in dex._annotationsDirectoryItems]) + print 'Size of annotationItems is %d bytes' % \ + (dex.annotationItems.data_size) + print - print 'AnnotationsDirtoryItems size is %d/%d bytes' % (bytes, rbytes) - - bytes = sum([annoitem.data_size - for annoitem in dex._annotationItems]) - rbytes = sum([_compute_sz(annoitem) - for annoitem in dex._annotationItems]) + print 'Size of encodedArrayItems is %d bytes' % \ + (dex.encodedArrayItems.data_size) + print - print 'AnnotationItems size is %d/%d bytes' % (bytes, rbytes) - - bytes = sum([encodeditem.data_size - for encodeditem in dex._encodedArrayItems]) - rbytes = sum([_compute_sz(encodeditem) - for encodeditem in dex._encodedArrayItems]) + print 'Size of debugInfoItems is %d bytes' % \ + (dex.debugInfoItems.data_size) + print - print 'EncodedArrayItems size is %d/%d bytes' % (bytes, rbytes) - - bytes = sum([debuginfoitem.data_size - for debuginfoitem in dex._debugInfoItems]) - rbytes = sum([_compute_sz(debuginfoitem) - for debuginfoitem in dex._debugInfoItems]) - print - print 'DebugInfoItems size is %d/%d bytes' % (bytes, rbytes) - + print 'Size of stringDataItems is %d bytes' % \ + (dex.stringDataItems.data_size) + print print 'Data maps' - maps = dex._maps + maps = dex.maps.items.items for map in maps: print '\t0x%04x(%s) size=%d offset=0x%08x' % (map.type, map.types[map.type], map.size, map.offset) - pass pass