# HG changeset patch # User Thinker K.F. Li # Date 1307377277 -28800 # Node ID 670167ed06bba475ab5d7c5ebeba9ac00a507cf0 # Parent a57ec6a76fe386e8ef289398dc9ee72f1931bed5 test dex_deptracker._link_dependencies() diff -r a57ec6a76fe3 -r 670167ed06bb paraspace/dex_deptracker.py --- a/paraspace/dex_deptracker.py Thu Jun 02 21:50:49 2011 +0800 +++ b/paraspace/dex_deptracker.py Tue Jun 07 00:21:17 2011 +0800 @@ -6,12 +6,44 @@ dexfile.switch) -def _dig_clazz(name_path, clazz, dex_types): - deps = {} +def _resolve_name_path(name_path): + obj = dexfile + parent = None + for name in name_path.split('.'): + if isinstance(parent, dexfile.array) and obj == list: + # array.items. + obj = parent.child_type + parent = list + continue + + parent = obj + if isinstance(parent, dexfile.switch): + key = eval(name) + obj = parent.map[key] + continue + + if isinstance(parent, dexfile.array) and name == 'items': + obj = list + continue + + if isinstance(parent, dexfile.cond) and name == 'value': + obj = obj.child_type + continue + + obj = getattr(parent, name) + pass + return obj, parent + + +def _find_dep_decls_from_clazz(name_path, clazz, dex_types): + dep_decls = {} for attr in dir(clazz): namelist = [name_path, attr] + # + # Find dependency + # digged_flag = False value_type = getattr(clazz, attr) @@ -20,6 +52,7 @@ issubclass(value_type, _nest_types)): if isinstance(value_type, dexfile.array): namelist.append('items') + namelist.append('*') value_type = value_type.child_type elif isinstance(value_type, dexfile.cond): namelist.append('value') @@ -29,9 +62,15 @@ if child_type in dex_types.values(): continue child_name_path = '.'.join(namelist) + '.' + repr(key) - child_deps = \ - _dig_clazz(child_name_path, child_type, dex_types) - deps.update(child_deps) + child_dep_decls = \ + _find_dep_decls_from_clazz(child_name_path, + child_type, + dex_types) + + if child_dep_decls: + raise ValueError, \ + 'can not depend on elements of a switch (%s)' \ + % (child_name_path) pass digged_flag = True break @@ -39,28 +78,81 @@ if digged_flag: continue - + + # + # Record dependency + # if isinstance(value_type, dexfile.depend): from_name = '.'.join(namelist) if isinstance(value_type, dexfile.depend_off): depend_name = value_type.depend_on - deps[from_name] = (dexfile.depend_off, depend_name) + dep_decls[from_name] = (dexfile.depend_off, depend_name) elif isinstance(value_type, dexfile.depend_off_rel): depend_name = value_type.depend_on relative_to = value_type.relative_to - deps[from_name] = (dexfile.depend_off_rel, + dep_decls[from_name] = (dexfile.depend_off_rel, depend_name, relative_to) elif isinstance(value_type, dexfile.depend_idx): depend_name = value_type.depend_on - deps[from_name] = (dexfile.depend_idx, depend_name) + dep_decls[from_name] = (dexfile.depend_idx, depend_name) pass pass pass - return deps + return dep_decls + + +def _dex_tree_get_child(obj, child_name): + if isinstance(obj, list): + idx = int(child_name) + return obj[idx] + + if isinstance(obj, dexfile.switch): + assert obj.map[eval(child_name)] == obj.child_type + return obj.value + + return getattr(obj, child_name) -def all_dex_types(): +def _dex_tree_set_child(obj, child_name, value): + if isinstance(obj, list): + idx = int(child_name) + obj[idx] = value + elif isinstance(obj, dexfile.switch): + assert obj.map[eval(child_name)] == obj.child_type + obj.value = value + else: + setattr(obj, child_name, value) + pass + pass + + +def _travel_dex_relocatable(root_obj, parents=[]): + stk = [(root_obj, parents, root_obj.__class__.__name__)] + + while stk: + obj, parents, obj_name = stk.pop(0) + yield (obj, parents, obj_name) + + if isinstance(obj, list): + children = [(value, parents + [obj], obj_name + '.' + repr(idx)) + for idx, value in enumerate(obj)] + stk.extend(children) + continue + + if not isinstance(obj, dexfile.relocatable): + continue + + child_parents = parents + [obj] + children = [(_dex_tree_get_child(obj, child_name), + child_parents, obj_name + '.' + child_name) + for child_name in obj.children()] + stk.extend(children) + pass + pass + + +def _all_dex_types(): dex_types = dict([(name, value) for name, value in dexfile.__dict__.items() if name.startswith('_DEX_')]) @@ -68,30 +160,264 @@ return dex_types -def collect_dependencies(): - dex_types = all_dex_types() +def collect_all_dep_decls(): + dex_types = _all_dex_types() - all_deps = {} + all_dep_decls = {} for name_path, clazz in dex_types.items(): - deps = _dig_clazz(name_path, clazz, dex_types) - all_deps.update(deps) + dep_decls = _find_dep_decls_from_clazz(name_path, clazz, dex_types) + all_dep_decls.update(dep_decls) + pass + + return all_dep_decls + + +class _uid_marker(dexfile.relocatable): + uid_seq = 0 + back_type = None + + def __init__(self, back_type, name_path): + self.back_type = back_type + self.name_path = name_path pass - return all_deps + def parse(self, parent, data, off): + value = self.back_type.parse(parent, data, off) + try: + value.data_uid = _uid_marker.uid_seq + except AttributeError: + raise AttributeError, \ + 'can not depend on non-instance (%s)' % (self.name_path) + _uid_marker.uid_seq = _uid_marker.uid_seq + 1 + + return value + + def sizeof(self, value): + sz = self.back_type.sizeof(value) + return sz + + def compute_size(self, back_obj): + self.back_type.compute_size(back_obj) + pass + + def to_str(self, back_obj): + return self.back_type.to_str(back_obj) + + def link_prepare(self, obj, name_path, parents, markers_info): + try: + id_item_map = markers_info[name_path] + except KeyError: + id_item_map = {} + markers_info[name_path] = id_item_map + pass + assert obj.data_uid not in id_item_map + id_item_map[obj.data_uid] = obj + pass + + def __getattr__(self, name): + return getattr(self.back_type, name) + + def __call__(self, *args, **kws): + return self.back_type(*args, **kws) + pass -def _install_uid_marker(all_deps): - for from_path, dep in all_deps.items(): - dep_type = dep[0] - if issubclass(dep_type, dexfile.depend_off): - # install offset marker +class _offset_marker(_uid_marker): + def parse(self, parent, data, off): + super(_offset_marker, self).parse(parent, data, off) + + value = self.back_type.parse(parent, data, off) + value.data_offset = off + return value + + def link_prepare(self, obj, name_path, parents, markers_info): + try: + id_item_map = markers_info[name_path] + except KeyError: + id_item_map = {} + markers_info[name_path] = id_item_map pass - # install uid marker + assert obj.data_offset not in id_item_map + id_item_map[obj.data_offset] = obj pass pass -def _link_dependencies(): +class _rel_offset_marker(_offset_marker): + def link_prepare(self, obj, name_path, parents, markers_info): + rev_parents = list(parents) + rev_parents.reverse() + + for parent in rev_parents: + rel_marker_info = getattr(parent, 'rel_marker_info', {}) + items = getattr(parent, name_path, []) + items.append(obj) + pass + pass + + @staticmethod + def find_depon(name_path, parents): + rev_parents = list(parents) + rev_parents.reverse() + + for parent in rev_parents: + try: + rel_marker_info = parent.rel_marker_info + except: + continue + if name_path in rel_marker_info: + depons = rel_marker_info[name_path] + assert len(depons) == 1 + depon = depons[0] + return depon + pass + + raise RuntimeError, 'can not find relative offset depend' + pass + + +class _idx_marker(_uid_marker): + def parse(self, parent, data, off): + assert isinstance(self.back_type, dexfile.array) + array = self.back_type.parse(parent, data, off) + for item in array.items: + item.data_uid = _uid_marker.uid_seq + _uid_marker.uid_seq = _uid_marker.uid_seq + 1 + pass + return array + + def link_prepare(self, obj, name_path, parents, markers_info): + pass + pass + + +def _install_offset_marker(name_path): + obj, parent = _resolve_name_path(name_path) + marker = _offset_marker(obj, name_path) + name = name_path.split('.')[-1] + _dex_tree_set_child(parent, name, marker) + pass + + +def _install_rel_offset_marker(name_path): + obj, parent = _resolve_name_path(name_path) + marker = _rel_offset_marker(obj, name_path) + name = name_path.split('.')[-1] + _dex_tree_set_child(parent, name, marker) + pass + + +def _install_uid_marker(name_path): + obj, parent = _resolve_name_path(name_path) + marker = _uid_marker(obj, name_path) + name = name_path.split('.')[-1] + _dex_tree_set_child(parent, name, marker) + pass + + +def _install_idx_marker(name_path): + obj, parent = _resolve_name_path(name_path) + marker = _idx_marker(obj, name_path) + name = name_path.split('.')[-1] + _dex_tree_set_child(parent, name, marker) + pass + + +def _install_markers(all_dep_decls): + all_markers = set() + + for from_path, dep in all_dep_decls.items(): + dep_type = dep[0] + if issubclass(dep_type, dexfile.depend_off_rel): + name_path1 = dep[1] + if name_path1 not in all_markers: + all_markers.add(name_path1) + _install_rel_offset_marker(name_path1) + pass + + name_path2 = dep[2] + if name_path2 not in all_markers: + all_markers.add(name_path2) + _install_rel_offset_marker(name_path2) + pass + pass + elif dep_type == dexfile.depend_off: + name_path = dep[1] + if name_path not in all_markers: + all_markers.add(name_path) + _install_offset_marker(name_path) + pass + pass + elif dep_type == dexfile.depend_idx: + name_path = dep[1] + if name_path not in all_markers: + all_markers.add(name_path) + _install_idx_marker(name_path) + pass + pass + else: + raise TypeError, 'Invalid type of depend %s' % (repr(dep_type)) + pass + pass + + +def _link_dependencies(root_obj, all_dep_decls): + markers_info = {} + depon_src_map = {} + for dep_src, depon in all_dep_decls.items(): + for tgt in depon[1:]: + markers_info[tgt] = {} + depon_src_map[depon] = dep_src + pass + pass + + # + # Collect marked objects + # + for obj, parents, name_path in \ + _travel_dex_relocatable(root_obj): + if name_path not in markers_info: + continue + + marker, dummy_parent = _resolve_name_path(name_path) + marker.link_prepare(obj, name_path, parents, markers_info) + pass + + # + # Link depend source to marked target + # + for obj, parents, name_path in \ + _travel_dex_relocatable(root_obj): + if name_path not in all_dep_decls: + continue + + rev_parents = list(parents) + rev_parents.reverse() + + dep = all_dep_decls[name_path] + dep_type = dep[0] + if isinstance(dep_type, dexfile.depend_off_rel): + depon1 = _rel_offset_marker.find_depon(dep[1]) + depon2 = _rel_offset_marker.find_depon(dep[2]) + + parent = parents[-1] + name = name_path.split('.')[-1] + _dex_tree_set_child(parent, name, (depon1, depon2)) + elif isinstance(dep_type, dexfile.depend_off): + depon_name_path = dep[1] + depon = markers_info[depon_name_path] + parent = parents[-1] + name = name_path.split('.')[-1] + _dex_tree_set_child(parent, name, depon) + elif isinstance(dep_type, dexfile.depend_idx): + depon_name_path = dep[1] + depon = markers_info[depon_name_path] + parent = parents[-1] + name = name_path.split('.')[-1] + _dex_tree_set_child(parent, name, depon) + else: + raise TypeError, 'invalid depend type %s' % (repr(dep_type)) + pass pass @@ -100,9 +426,9 @@ if __name__ == '__main__': - dex = dexfile.DEXFile.open('../data/testdata1.dex') + dex = dexfile.DEXFile.open('data/testdata1.dex') import pprint print print 'Dependencies' - pprint.pprint(collect_dependencies()) + pprint.pprint(collect_all_dep_decls()) diff -r a57ec6a76fe3 -r 670167ed06bb paraspace/dexfile.py --- a/paraspace/dexfile.py Thu Jun 02 21:50:49 2011 +0800 +++ b/paraspace/dexfile.py Tue Jun 07 00:21:17 2011 +0800 @@ -341,6 +341,10 @@ def compute_size(self): pass + + def children(self): + raise NotImplementedError, \ + '%s: does not implement children' % (self.__class__.__name__) pass @@ -390,6 +394,9 @@ to_str = self.child_type.to_str strs = [to_str(item) for item in self.items] return ''.join(strs) + + def children(self): + return ('items',) pass @@ -441,6 +448,9 @@ child_clazz.to_str(child), child_clazzs, children) return ''.join(child_strs) + + def children(self): + return self.child_names pass @@ -453,7 +463,7 @@ self.condition = cond self.child_type = child_type pass - + def parse(self, parent, data, off): if self.condition(parent, data, off): value = self.child_type.parse(parent, data, off) @@ -485,6 +495,9 @@ data = self.child_type.to_str(self.value) return data + + def children(self): + return ('value',) pass @@ -494,17 +507,23 @@ child_type = None value = None + _parent = None + def __init__(self, selector, map): self.selector = selector self.map = map pass - def _get_child_type(self, parent): + def switch_key(self, parent): selector = self.selector sel_value = parent for name in selector.split('.'): sel_value = getattr(sel_value, name) pass + return sel_value + + def _get_child_type(self, parent): + sel_value = self.switch_key(parent) child_type = self.map[sel_value] return child_type @@ -517,6 +536,7 @@ obj.value = value obj.child_type = child_type obj.data_size = self.sizeof(obj) + obj._parent = parent return obj @staticmethod @@ -534,10 +554,14 @@ def to_str(self): data = self.child_type.to_str(self.value) return data + + def children(self): + key = self.switch_key(self._parent) + return (repr(key),) pass -class abs_value(object): +class abs_value(relocatable): value = None def __init__(self, value): @@ -553,6 +577,9 @@ def to_str(self): return '' + + def children(self): + return ('value',) pass @@ -584,10 +611,32 @@ def to_str(self, child): return self.child_type.to_str(child) + + def link(self, child, name_path, parents, markers_info): + raise NotImplementedError, 'does not support link() method' + pass + + +def _set_name_path_name(parent, name, obj): + if isinstance(parent, (list, dict)): + key = eval(name) + parent[key] = obj + return + setattr(parent, name, obj) pass class depend_off(depend): + def link(self, child, name_path, parents, markers_info): + parent = parents[-1] + name = name_path.split('.')[-1] + + dep_on_name_path = self.depend_on + id_item_map = markers_info[dep_on_name_path] + dep_on = id_item_map[child.data_offset] + + _set_name_path_name(parent, name, dep_on) + pass pass @@ -596,6 +645,7 @@ def __init__(self, relative_to, depend_on): super(depend_off_rel, self).__init__(depend_on) + self.relative_to = relative_to pass pass @@ -1110,7 +1160,7 @@ opcodes.append((opcode,)) pass pass - self.opcodes = opcodes + self.opcodes = tuple(opcodes) self.data_size = moff() - off @@ -1205,6 +1255,9 @@ pass return ''.join(opcodebins) + + def children(self): + return ('opcodes',) pass @@ -1218,10 +1271,16 @@ pass -class _DEX_StringDataItem(relocatable): - size = None - data = None +class _DEX_StringDataItem(composite): + size = uleb128 + data = rawstr_size_name('size') + padding = rawstr(1) + + child_names = 'size data padding'.split() + pass + +class dummy(object): data_size = None @staticmethod @@ -1347,11 +1406,17 @@ self._parse_maps() self._parse_blocks() pass + + def children(self): + return 'header stringIds typeIds protoIds fieldIds methodIds ' \ + 'classDefs typeLists annotationSetItems classDatas codeItems ' \ + 'stringDataItems debugInfoItems annotationItems ' \ + 'encodedArrayItems annotationsDirectoryItems'.split() pass if __name__ == '__main__': - dex = DEXFile.open('../data/testdata1.dex') + dex = DEXFile.open('data/testdata1.dex') print 'Header' h = dex.header diff -r a57ec6a76fe3 -r 670167ed06bb paraspace/tests/dexfile_test.py --- a/paraspace/tests/dexfile_test.py Thu Jun 02 21:50:49 2011 +0800 +++ b/paraspace/tests/dexfile_test.py Tue Jun 07 00:21:17 2011 +0800 @@ -25,9 +25,9 @@ def dependencies_test(): - import dex_deptracker + from paraspace import dex_deptracker - deps = dex_deptracker.collect_dependencies() + deps = dex_deptracker.collect_all_dep_decls() assert deps['_DEX_AnnotationItem.typeIdx'][0] == dexfile.depend_idx assert deps['_DEX_AnnotationItem.typeIdx'][1] == 'DEXFile.typeIds' assert deps['_DEX_FieldId.typeIdx'][0] == dexfile.depend_idx @@ -36,5 +36,131 @@ assert deps['_DEX_ClassDef.staticValuesOff'][1] == '_DEX_EncodedArrayItem' assert deps['_DEX_Try.handlerOff'][0] == dexfile.depend_off_rel assert deps['_DEX_Try.handlerOff'][1] == '_DEX_Catch' - assert deps['_DEX_Try.handlerOff'][2] == None + assert deps['_DEX_Try.handlerOff'][2] == '_DEX_Code.handlers_size' + pass + + +def resolve_name_path_test(): + from paraspace.dex_deptracker import _resolve_name_path + + obj, parent = _resolve_name_path('_DEX_ClassData.staticFields.items') + assert obj == list + obj, parent = _resolve_name_path('_DEX_ClassData.staticFields.items.*') + assert obj == dexfile._DEX_Field + obj, parent = _resolve_name_path('_DEX_ClassData.staticFields.items.1') + assert obj == dexfile._DEX_Field + + obj, parent = _resolve_name_path('_DEX_Catch.catchAllHandler.value') + assert obj == dexfile._DEX_CatchAllHandler + + key = dexfile._DEX_AnnotationMember_noname.kDexAnnotationNull + obj, parent = _resolve_name_path('_DEX_AnnotationMember_noname.value.' + + repr(key)) + assert isinstance(obj, dexfile.abs_value) + pass + + +def find_dep_decls_from_clazz_test(): + from paraspace.dex_deptracker import _find_dep_decls_from_clazz + + dex_types = dict([(dex_type_name, getattr(dexfile, dex_type_name)) + for dex_type_name in dir(dexfile) + if dex_type_name.startswith('_DEX_')]) + deps = _find_dep_decls_from_clazz('_DEX_ProtoId', dexfile._DEX_ProtoId, + dex_types) + assert len(deps) == 3 + assert '_DEX_ProtoId.shortyIdx' in deps + assert deps['_DEX_ProtoId.shortyIdx'][0] == dexfile.depend_idx + assert deps['_DEX_ProtoId.shortyIdx'][1] == 'DEXFile.stringIds' + + assert deps['_DEX_ProtoId.parametersOff'][1] == '_DEX_TypeList' + pass + + +def find_dep_decls_from_clazz__array_test(): + from paraspace.dex_deptracker import _find_dep_decls_from_clazz + + dex_types = dict([(dex_type_name, getattr(dexfile, dex_type_name)) + for dex_type_name in dir(dexfile) + if dex_type_name.startswith('_DEX_')]) + deps = _find_dep_decls_from_clazz('_DEX_AnnotationSetItem', + dexfile._DEX_AnnotationSetItem, + dex_types) + assert len(deps) == 1 + name_path = '_DEX_AnnotationSetItem.annotationOffs.items.*' + assert name_path in deps + assert deps[name_path][0] == dexfile.depend_off + assert deps[name_path][1] == '_DEX_AnnotationItem' pass + + +def travel_dex_relocatable__array_test(): + from paraspace.dex_deptracker import _travel_dex_relocatable + + srcdir = os.path.dirname(__file__) + srcroot = os.path.join(srcdir, '..', '..') + testdatapath = os.path.join(srcroot, 'data', 'testdata1.dex') + dex = dexfile.DEXFile.open(testdatapath) + dexroot = dex.typeLists.items[0] + + itr = _travel_dex_relocatable(dexroot) + pathes = [v[2] for v in itr] + assert len(pathes) == 6 + assert '_DEX_TypeList' in pathes + assert '_DEX_TypeList.padding' in pathes + assert '_DEX_TypeList.num' in pathes + assert '_DEX_TypeList.typeItems' in pathes + assert '_DEX_TypeList.typeItems.items' in pathes + assert '_DEX_TypeList.typeItems.items.0' in pathes + pass + + +def _install_dexfile_4_deptracker(): + global dexfile + import imp + from paraspace import dex_deptracker + + try: + new_dexfile = imp.load_compiled('dexfile', dexfile.__file__) + except ImportError: + new_dexfile = imp.load_source('dexfile', dexfile.__file__) + pass + dex_deptracker.dexfile = new_dexfile + dexfile = new_dexfile + pass + + +def install_markers_test(): + from paraspace.dex_deptracker import collect_all_dep_decls + from paraspace.dex_deptracker import _install_markers, _idx_marker + from paraspace.dex_deptracker import _offset_marker, _rel_offset_marker + + _install_dexfile_4_deptracker() + + all_dep_decls = collect_all_dep_decls() + _install_markers(all_dep_decls) + assert isinstance(dexfile.DEXFile.typeIds, _idx_marker) + assert isinstance(dexfile._DEX_Code.back_type.handlers_size, + _rel_offset_marker) + assert dexfile._DEX_TypeList.__class__ == _offset_marker + pass + +def link_dependencies_test(): + from paraspace.dex_deptracker import collect_all_dep_decls + from paraspace.dex_deptracker import _link_dependencies + from paraspace.dex_deptracker import _install_markers, _idx_marker + from paraspace.dex_deptracker import _offset_marker, _rel_offset_marker + + _install_dexfile_4_deptracker() + + all_dep_decls = collect_all_dep_decls() + _install_markers(all_dep_decls) + assert isinstance(dexfile.DEXFile.typeIds, _idx_marker) + + srcdir = os.path.dirname(__file__) + srcroot = os.path.join(srcdir, '..', '..') + testdatapath = os.path.join(srcroot, 'data', 'testdata1.dex') + dex = dexfile.DEXFile.open(testdatapath) + + _link_dependencies(dex, all_dep_decls) + pass