Mercurial > paraspace
view paraspace/dex_deptracker.py @ 61:3e34c7d33182
Fix and test restore_depedencies()
- _travel_dex_relocatable() is fixed to travel a linked tree.
- aware that depend attributes are replaced by depend-ons.
- fix restore_depedencies() for changes of depend_off_rel()
- It was a pair of depon, relative
- It is only depon.
author | Thinker K.F. Li <thinker@codemud.net> |
---|---|
date | Tue, 28 Jun 2011 10:25:54 +0800 |
parents | c2a4921ec83b |
children | bbe8d5cbe368 |
line wrap: on
line source
from paraspace import dexfile _nest_types = (dexfile.array, dexfile.cond, dexfile.switch) def _skip_marker_clazz(marker): while isinstance(marker, _marker): marker = marker.back_type pass return marker def _resolve_name_path(name_path): obj = dexfile parent = None for name in name_path.split('.'): obj = _skip_marker_clazz(obj) if isinstance(parent, dexfile.array) and obj == list: # array.items.<num> obj = parent.child_type parent = list continue parent = obj if isinstance(parent, dexfile.switch): key = eval(name) obj = parent.map[key] continue if isinstance(parent, dexfile.array) and name == 'items': obj = list continue if isinstance(parent, dexfile.cond) and name == 'value': obj = obj.child_type continue try: obj = getattr(parent, name) except: print obj, parent, name_path raise pass return obj, parent def _travel_dex_type(clazz, name_path): def travelable(attr, attr_name): if attr_name.startswith('_'): return False if callable(attr) and ((not isinstance(attr, dexfile._dex_type)) and (not (type(attr) == type and issubclass(attr, dexfile._dex_type)))): return False return True clazz = _skip_marker_clazz(clazz) travel_queue = [(getattr(clazz, attr_name), name_path + '.' + attr_name) for attr_name in dir(clazz) if travelable(getattr(clazz, attr_name), attr_name)] while travel_queue: attr, name_path = travel_queue.pop(0) if isinstance(attr, _marker): # # transparent. Enqueue back_type with the same name again. # child = attr.back_type travel_queue.append((child, name_path)) continue yield attr, name_path if isinstance(attr, _nest_types): if isinstance(attr, dexfile.array): child_name = name_path + '.items.*' child = attr.child_type travel_queue.append((child, child_name)) elif isinstance(attr, dexfile.cond): child_name = name_path + '.value' child = attr.child_type travel_queue.append((child, child_name)) elif isinstance(attr, dexfile.switch): for key in attr.map.keys(): child_name = name_path + '.' + repr(key) child = attr.map[key] travel_queue.append((child, child_name)) pass pass pass pass pass def _find_dep_decls_from_clazz(name_path, clazz, dex_types): # XXX: implements the loop with _travel_dex_type() dep_decls = {} for attr in dir(clazz): namelist = [name_path, attr] # # Find dependency # digged_flag = False value_type = getattr(clazz, attr) while isinstance(value_type, _nest_types) or \ (type(value_type) == type and issubclass(value_type, _nest_types)): if isinstance(value_type, dexfile.array): namelist.append('items') namelist.append('*') value_type = value_type.child_type elif isinstance(value_type, dexfile.cond): namelist.append('value') value_type = value_type.child_type elif isinstance(value_type, dexfile.switch): for key, child_type in value_type.map.items(): if child_type in dex_types.values(): continue child_name_path = '.'.join(namelist) + '.' + repr(key) child_dep_decls = \ _find_dep_decls_from_clazz(child_name_path, child_type, dex_types) if child_dep_decls: raise ValueError, \ 'can not depend on elements of a switch (%s)' \ % (child_name_path) pass digged_flag = True break pass if digged_flag: continue # # Record dependency # if isinstance(value_type, dexfile.depend): from_name = '.'.join(namelist) if isinstance(value_type, dexfile.depend_off): depend_name = value_type.depend_on dep_decls[from_name] = (dexfile.depend_off, depend_name) elif isinstance(value_type, dexfile.depend_off_rel): depend_name = value_type.depend_on relative_to = value_type.relative_to dep_decls[from_name] = (dexfile.depend_off_rel, depend_name, relative_to) elif isinstance(value_type, dexfile.depend_idx): depend_name = value_type.depend_on dep_decls[from_name] = (dexfile.depend_idx, depend_name) pass pass pass return dep_decls def _dex_tree_get_child(obj, child_name): child_parts = child_name.split('.') last_child_part = '' for child_part in child_parts: if isinstance(obj, dexfile.cond) and not obj.is_true: raise AttributeError, \ 'dexfile.cond is not true for %s of %s' % \ (last_child_part, child_name) last_child_part = child_part if isinstance(obj, list): idx = int(child_part) obj = obj[idx] continue if isinstance(obj, dexfile.switch): assert obj.map[eval(child_part)] == obj.child_type obj = obj.value continue obj = getattr(obj, child_part) pass return obj def _dex_tree_set_child(obj, child_name, value): child_parts = child_name.split('.') for child_part in child_parts[:-1]: if isinstance(obj, list): idx = int(child_part) obj = obj[idx] continue if isinstance(obj, dexfile.switch): assert obj.map[eval(child_part)] == obj.child_type obj = obj.value continue obj = getattr(obj, child_part) pass last_part = child_parts[-1] if isinstance(obj, list): idx = int(last_part) obj[idx] = value elif isinstance(obj, dexfile.switch): assert obj.map[eval(last_part)] == obj.child_type obj.value = value else: setattr(obj, last_part, value) pass pass ## \brief Travel the tree of relocatable objects and their attributes. # # \param skip_func is a function that returns true for skip a subtree. # # \ref skip_func is called for every relocatable and their attributes # to determines if visiting the object and subtree. # def _travel_dex_relocatable(root_obj, parents=[], skip_func=None): stk = [(root_obj, parents, root_obj.__class__.__name__)] def make_travel_info(obj, obj_name, child_name, parents): child_parents = parents + [obj] child_obj = _dex_tree_get_child(obj, child_name) child_path = obj_name + '.' + child_name child_clazz, dummy = _resolve_name_path(child_path) if not isinstance(child_clazz, dexfile.depend): if isinstance(child_obj, dexfile.composite): child_path = child_obj.__class__.__name__ pass pass return (child_obj, child_parents, child_path) while stk: obj, parents, obj_name = stk.pop(0) if skip_func and skip_func(obj, parents, obj_name): continue yield (obj, parents, obj_name) if isinstance(obj, list): children = [make_travel_info(obj, obj_name, repr(idx), parents) for idx in range(len(obj))] stk.extend(children) continue obj_clazz, dummy = _resolve_name_path(obj_name) if isinstance(obj_clazz, dexfile.depend): continue if not isinstance(obj, dexfile.relocatable): continue children = [make_travel_info(obj, obj_name, child_name, parents) for child_name in obj.children()] stk.extend(children) pass pass def _all_dex_types(): dex_types = dict([(name, value) for name, value in dexfile.__dict__.items() if name.startswith('_DEX_')]) dex_types['DEXFile'] = dexfile.DEXFile return dex_types def _all_dex_type_to_names(): def check_marker(value, name): value = _skip_marker_clazz(value) return value, name dex_types = dict([check_marker(value, name) for name, value in dexfile.__dict__.items() if name.startswith('_DEX_')]) dex_types[dexfile.DEXFile] = 'DEXFile' return dex_types def collect_all_dep_decls(): dex_types = _all_dex_types() all_dep_decls = {} for name_path, clazz in dex_types.items(): dep_decls = _find_dep_decls_from_clazz(name_path, clazz, dex_types) all_dep_decls.update(dep_decls) pass return all_dep_decls class _marker(dexfile.null_relocatable): back_type = None def set_marker(self, obj, off): raise NotImplementedError, \ 'The marker does not implement set_marker()' pass class _uid_marker(_marker): uid_seq = 0 def __init__(self, back_type, name_path): self.back_type = back_type self.name_path = name_path pass def parse(self, parent, data, off): value = self.back_type.parse(parent, data, off) try: value.data_uid = _uid_marker.uid_seq except AttributeError: raise AttributeError, \ 'can not depend on non-instance (%s/%s)' % \ (self.name_path, repr(value)) _uid_marker.uid_seq = _uid_marker.uid_seq + 1 return value def sizeof(self, value): sz = self.back_type.sizeof(value) return sz def compute_size(self, back_obj): self.back_type.compute_size(back_obj) pass def to_str(self, back_obj): return self.back_type.to_str(back_obj) def link_prepare(self, obj, name_path, parents, markers_info): try: id_item_map = markers_info[name_path] except KeyError: id_item_map = {} markers_info[name_path] = id_item_map pass assert obj.data_uid not in id_item_map id_item_map[obj.data_uid] = obj pass def __getattr__(self, name): return getattr(self.back_type, name) def __call__(self, *args, **kws): return self.back_type(*args, **kws) def set_marker(self, obj, off): pass pass class _offset_marker(_uid_marker): def parse(self, parent, data, off): super(_offset_marker, self).parse(parent, data, off) value = self.back_type.parse(parent, data, off) value.data_offset = off return value def link_prepare(self, obj, name_path, parents, markers_info): try: id_item_map = markers_info[name_path] except KeyError: id_item_map = {} markers_info[name_path] = id_item_map pass assert obj.data_offset not in id_item_map id_item_map[obj.data_offset] = obj pass def set_marker(self, obj, off): obj.data_offset = off pass pass class _rel_offset_marker(_offset_marker): def link_prepare(self, obj, name_path, parents, markers_info): rev_parents = list(parents) rev_parents.reverse() for parent in rev_parents: try: rel_marker_info = parent.rel_marker_info except AttributeError: try: rel_marker_info = {} parent.rel_marker_info = rel_marker_info except AttributeError: continue pass depons = rel_marker_info.setdefault(name_path, []) depons.append(obj) pass pass @staticmethod def find_depon(name_path, parents): name_parts = name_path.split('.') dex_types = _all_dex_types() comp_type_name = name_parts[0] comp_type = dex_types[comp_type_name] rev_parents = list(parents) rev_parents.reverse() for parent in rev_parents: if isinstance(parent, comp_type): attr_name = '.'.join(name_parts[1:]) depon = _dex_tree_get_child(parent, attr_name) return depon try: rel_marker_info = parent.rel_marker_info except: continue if name_path in rel_marker_info: depons = rel_marker_info[name_path] assert len(depons) == 1 depon = depons[0] return depon pass raise RuntimeError, 'can not find relative offset depend' def set_marker(self, obj, off): obj.data_offset = off pass pass class _idx_marker(_uid_marker): def parse(self, parent, data, off): assert isinstance(self.back_type, dexfile.array) array = self.back_type.parse(parent, data, off) for item in array.items: item.data_uid = _uid_marker.uid_seq _uid_marker.uid_seq = _uid_marker.uid_seq + 1 pass return array def link_prepare(self, obj, name_path, parents, markers_info): try: id_item_map = markers_info[name_path] except KeyError: id_item_map = [] markers_info[name_path] = id_item_map pass for idx, item in enumerate(obj.items): id_item_map[idx] = item item.data_idx = idx pass pass def set_marker(self, obj, off): pass pass def _install_offset_marker(name_path): obj, parent = _resolve_name_path(name_path) while isinstance(obj, dexfile.ref): name_path = obj.target_path obj, parent = _resolve_name_path(name_path) pass marker = _offset_marker(obj, name_path) name = name_path.split('.')[-1] _dex_tree_set_child(parent, name, marker) pass def _install_rel_offset_marker(name_path): obj, parent = _resolve_name_path(name_path) while isinstance(obj, dexfile.ref): name_path = obj.target_path obj, parent = _resolve_name_path(name_path) pass marker = _rel_offset_marker(obj, name_path) name = name_path.split('.')[-1] _dex_tree_set_child(parent, name, marker) pass def _install_uid_marker(name_path): obj, parent = _resolve_name_path(name_path) while isinstance(obj, dexfile.ref): name_path = obj.target_path obj, parent = _resolve_name_path(name_path) pass marker = _uid_marker(obj, name_path) name = name_path.split('.')[-1] _dex_tree_set_child(parent, name, marker) pass def _install_idx_marker(name_path): obj, parent = _resolve_name_path(name_path) while isinstance(obj, dexfile.ref): name_path = obj.target_path obj, parent = _resolve_name_path(name_path) pass marker = _idx_marker(obj, name_path) name = name_path.split('.')[-1] _dex_tree_set_child(parent, name, marker) pass def _install_markers(all_dep_decls): all_markers = set() for from_path, dep in all_dep_decls.items(): dep_type = dep[0] if issubclass(dep_type, dexfile.depend_off_rel): name_path1 = dep[1] if name_path1 not in all_markers: all_markers.add(name_path1) _install_offset_marker(name_path1) pass name_path2 = dep[2] if name_path2 not in all_markers: all_markers.add(name_path2) _install_rel_offset_marker(name_path2) pass pass elif dep_type == dexfile.depend_off: name_path = dep[1] if name_path not in all_markers: all_markers.add(name_path) _install_offset_marker(name_path) pass pass elif dep_type == dexfile.depend_idx: name_path = dep[1] if name_path not in all_markers: all_markers.add(name_path) _install_idx_marker(name_path) pass pass else: raise TypeError, 'Invalid type of depend %s' % (repr(dep_type)) pass pass def _patch_dex_type_markers(all_dep_decls): import itertools marked_types = dict([(marker.back_type, name) for name, marker in _all_dex_types().items() if isinstance(marker, _marker)]) travel_iters = [_travel_dex_type(dex_type, name_path) for name_path, dex_type in _all_dex_types().items()] marked_type_refs = [(name_path, marked_types[attr]) for attr, name_path in itertools.chain(*travel_iters) if type(attr) == type and issubclass(attr, dexfile.composite) and attr in marked_types] def patch_ref(name_path, depon_path): depon, depon_parent = _resolve_name_path(depon_path) path_elms = name_path.split('.') parent_path = '.'.join(path_elms[:-1]) parent, grand = _resolve_name_path(parent_path) if isinstance(grand, _nest_types): if isinstance(grand, (dexfile.array, dexfile.cond)): grand.child_type = depon elif isinstance(grand, dexfile.switch): key = eval(path_elms[-1]) grand.map[key] = depon else: raise RuntimeError, 'should not be here' pass else: name = path_elms[-1] setattr(parent, name, depon) pass pass for name_path, depon_path in marked_type_refs: patch_ref(name_path, depon_path) pass pass def _build_associations(root_obj): def get_elts(parent, path): parts = path.split('.') obj = parent for part in parts: obj = _dex_tree_get_child(obj, part) if isinstance(obj, dexfile.cond) and not obj.is_true: return () pass return obj for obj, parents, name_path in \ _travel_dex_relocatable(root_obj): if isinstance(obj, dexfile._objs_asso): rev_parents = list(parents) rev_parents.reverse() for parent in rev_parents: if isinstance(parent, dexfile.composite): break pass left_elts = get_elts(parent, obj.left) right_elts = get_elts(parent, obj.right) obj.build_associations(left_elts, right_elts) pass pass pass ## \brief Find a parent that is an instance of given clazz from a list. # def _find_parent_of_clazz(clazz, parents): rev_parents = list(parents) rev_parents.reverse() for parent in rev_parents: if isinstance(parent, clazz): return parent pass raise TypeError, 'can not find a prent of %s type' % (repr(clazz)) def _split_name_path_clazz_attr(name_path): idx = name_path.index('.') if idx >= 0: clazz = name_path[:idx] attr = name_path[idx + 1:] else: clazz = name_path attr = None pass return clazz, attr ## \brief Setup value of refs. # def _build_refs(root_obj): for obj, parents, name_path in \ _travel_dex_relocatable(root_obj): if not parents: continue imm_parent = parents[-1] if isinstance(imm_parent, dexfile.cond) and not imm_parent.is_true: continue clazz, pclazz = _resolve_name_path(name_path) while isinstance(clazz, dexfile.depend): clazz = clazz.back_type pass if isinstance(clazz, dexfile.ref): pclazz_name, attr_name = _split_name_path_clazz_attr(name_path) if not attr_name: raise ValueError, \ 'not attribute name in name path (%s)' % (name_path) parent = _find_parent_of_clazz(dexfile.composite, parents) value = clazz.get_value(parents) _dex_tree_set_child(parent, attr_name, value) pass pass pass def _link_dependencies(root_obj, all_dep_decls): markers_info = {} depon_src_map = {} for dep_src, depon in all_dep_decls.items(): for tgt in depon[1:]: markers_info[tgt] = {} depon_src_map[depon] = dep_src pass pass # # Collect marked objects # for obj, parents, name_path in \ _travel_dex_relocatable(root_obj): if name_path not in markers_info: continue marker, dummy_parent = _resolve_name_path(name_path) while isinstance(marker, dexfile.ref): marker, dummy = _resolve_name_path(marker.target_path) pass marker.link_prepare(obj, name_path, parents, markers_info) pass # # Link depend source to marked target # for obj, parents, name_path in \ _travel_dex_relocatable(root_obj): if name_path not in all_dep_decls: continue imm_parent = parents[-1] if isinstance(imm_parent, dexfile.cond) and not imm_parent.is_true: continue dep = all_dep_decls[name_path] dep_type = dep[0] if dep_type == dexfile.depend_off_rel: depon2 = _rel_offset_marker.find_depon(dep[2], parents) offset = depon2.data_offset + obj depon1 = markers_info[dep[1]][offset] name = name_path.split('.')[-1] _dex_tree_set_child(imm_parent, name, depon1) elif dep_type == dexfile.depend_off: depon_name_path = dep[1] depon = markers_info[depon_name_path][obj] name = name_path.split('.')[-1] _dex_tree_set_child(imm_parent, name, depon) elif dep_type == dexfile.depend_idx: depon_name_path = dep[1] depon = markers_info[depon_name_path][obj] name = name_path.split('.')[-1] _dex_tree_set_child(imm_parent, name, depon) else: raise TypeError, 'invalid depend type %s' % (repr(dep_type)) pass pass def build_dependencies(dexroot, all_dep_decls): _build_associations(dexroot) _build_refs(dexroot) _link_dependencies(dexroot, all_dep_decls) pass def _build_depon_dep_map(all_dep_decls): from itertools import chain def _build_sub_depon_dep(from_path, dep): depon1 = dep[1] sub = [(depon1, from_path)] dep_type = dep[0] if dep_type == dexfile.depend_off_rel: depon2 = dep[2] sub.append((depon2, from_path)) pass return sub depon_dep_lsts = [_build_sub_depon_dep(from_path, dep) for from_path, dep in all_dep_decls.items()] depon_dep_lst = chain(*depon_dep_lsts) depon_dep_map = dict(depon_dep_lst) return depon_dep_map def update_offset(dexroot, all_dep_decls): from dexfile import man_off depon_dep_map = _build_depon_dep_map(all_dep_decls) dex_type_names = _all_dex_type_to_names() def make_path(obj, parent_path, obj_name): name_path = parent_path + '.' + obj_name if isinstance(obj, dexfile.composite): if name_path not in all_dep_decls: type_name = dex_type_names[obj.__class__] return type_name pass return name_path moff = man_off(0) queue = [(dexroot, 'DEXFile')] while queue: obj, name_path = queue.pop() obj_clazz, parent_clazz = _resolve_name_path(name_path) if isinstance(obj_clazz, dexfile.depend): moff(obj_clazz.sizeof(obj)) continue if name_path in depon_dep_map: marker, dummy = _resolve_name_path(name_path) while isinstance(marker, dexfile.ref): marker, dummy = _resolve_name_path(marker.target_path) try: obj = obj[0] except: print name_path raise pass marker.set_marker(obj, moff()) pass if isinstance(obj, (tuple, list)): attrs = [(elt, make_path(elt, name_path, str(idx))) for idx, elt in enumerate(obj)] attrs.reverse() queue = queue + attrs continue if isinstance(obj, dexfile.ref): continue if not isinstance(obj, dexfile.relocatable): clazz, parent = _resolve_name_path(name_path) moff(clazz.sizeof(obj)) continue children = obj.children() attr_n_names = [(_dex_tree_get_child(obj, child_name), child_name) for child_name in children] attrs = [(attr, make_path(attr, name_path, attr_name)) for attr, attr_name in attr_n_names] attrs.reverse() queue = queue + attrs pass pass ## \brief Restore to raw value before linking for dependencies. # def restore_dependencies(dexroot, all_dep_decls): for obj, parents, name_path in \ _travel_dex_relocatable(dexroot): if name_path not in all_dep_decls: continue imm_parent = parents[-1] if isinstance(imm_parent, dexfile.cond) and not imm_parent.is_true: continue dep = all_dep_decls[name_path] dep_type = dep[0] if dep_type == dexfile.depend_off_rel: relative_to = _rel_offset_marker.find_depon(dep[2], parents) depon = obj rel_off = depon.data_offset - relative_to.data_offset name = name_path.split('.')[-1] _dex_tree_set_child(imm_parent, name, rel_off) elif dep_type == dexfile.depend_off: depon = obj name = name_path.split('.')[-1] _dex_tree_set_child(imm_parent, name, depon.data_offset) elif dep_type == dexfile.depend_idx: depon = obj name = name_path.split('.')[-1] _dex_tree_set_child(imm_parent, name, depon.data_idx) else: raise TypeError, 'invalid depend type %s' % (repr(dep_type)) pass pass def _sync_dependencies(): pass if __name__ == '__main__': dex = dexfile.DEXFile.open('data/testdata1.dex') import pprint print print 'Dependencies' pprint.pprint(collect_all_dep_decls())