view paraspace/dex_deptracker.py @ 46:94e80f7a61b5

Add dex_deptracker.restore_dependencies()
author Thinker K.F. Li <thinker@codemud.net>
date Sun, 19 Jun 2011 21:08:31 +0800
parents 5cea19126a11
children cd4e3584ed7f
line wrap: on
line source

from paraspace import dexfile


_nest_types = (dexfile.array,
               dexfile.cond,
               dexfile.switch)


def _resolve_name_path(name_path):
    obj = dexfile
    parent = None
    for name in name_path.split('.'):
        if isinstance(parent, dexfile.array) and obj == list:
            # array.items.<num>
            obj = parent.child_type
            parent = list
            continue
        
        parent = obj
        if isinstance(parent, dexfile.switch):
            key = eval(name)
            obj = parent.map[key]
            continue
        
        if isinstance(parent, dexfile.array) and name == 'items':
            obj = list
            continue

        if isinstance(parent, dexfile.cond) and name == 'value':
            obj = obj.child_type
            continue

        obj = getattr(parent, name)
        pass
    return obj, parent


def _travel_dex_type(clazz, name_path):
    def travelable(attr, attr_name):
        if attr_name.startswith('_'):
            return False
        if callable(attr) and ((not isinstance(attr, dexfile._dex_type)) and
                               (not (type(attr) == type and
                                     issubclass(attr, dexfile._dex_type)))):
            return False
        return True
    
    #
    # pass throught markers
    #
    while isinstance(clazz, _marker):
        clazz = clazz.back_type
        pass
    
    travel_queue = [(getattr(clazz, attr_name), name_path + '.' + attr_name)
                    for attr_name in dir(clazz)
                    if travelable(getattr(clazz, attr_name), attr_name)]
    while travel_queue:
        attr, name_path = travel_queue.pop(0)
        if isinstance(attr, _marker):
            #
            # transparent.  Enqueue back_type with the same name again.
            #
            child = attr.back_type
            travel_queue.append((child, name_path))
            continue
        
        yield attr, name_path

        if isinstance(attr, _nest_types):
            if isinstance(attr, dexfile.array):
                child_name = name_path + '.items.*'
                child = attr.child_type
                travel_queue.append((child, child_name))
            elif isinstance(attr, dexfile.cond):
                child_name = name_path + '.value'
                child = attr.child_type
                travel_queue.append((child, child_name))
            elif isinstance(attr, dexfile.switch):
                for key in attr.map.keys():
                    child_name = name_path + '.' + repr(key)
                    child = attr.map[key]
                    travel_queue.append((child, child_name))
                    pass
                pass
            pass
        pass
    pass


def _find_dep_decls_from_clazz(name_path, clazz, dex_types):
    # XXX: implements the loop with _travel_dex_type()
    dep_decls = {}

    for attr in dir(clazz):
        namelist = [name_path, attr]
        
        #
        # Find dependency
        #
        digged_flag = False
        
        value_type = getattr(clazz, attr)
        while isinstance(value_type, _nest_types) or \
                (type(value_type) == type and
                 issubclass(value_type, _nest_types)):
            if isinstance(value_type, dexfile.array):
                namelist.append('items')
                namelist.append('*')
                value_type = value_type.child_type
            elif isinstance(value_type, dexfile.cond):
                namelist.append('value')
                value_type = value_type.child_type
            elif isinstance(value_type, dexfile.switch):
                for key, child_type in value_type.map.items():
                    if child_type in dex_types.values():
                        continue
                    child_name_path = '.'.join(namelist) + '.' + repr(key)
                    child_dep_decls = \
                        _find_dep_decls_from_clazz(child_name_path,
                                                   child_type,
                                                   dex_types)
                    
                    if child_dep_decls:
                        raise ValueError, \
                            'can not depend on elements of a switch (%s)' \
                            % (child_name_path)
                    pass
                digged_flag = True
                break
            pass

        if digged_flag:
            continue

        #
        # Record dependency
        #
        if isinstance(value_type, dexfile.depend):
            from_name = '.'.join(namelist)
            if isinstance(value_type, dexfile.depend_off):
                depend_name = value_type.depend_on
                dep_decls[from_name] = (dexfile.depend_off, depend_name)
            elif isinstance(value_type, dexfile.depend_off_rel):
                depend_name = value_type.depend_on
                relative_to = value_type.relative_to
                dep_decls[from_name] = (dexfile.depend_off_rel,
                                   depend_name,
                                   relative_to)
            elif isinstance(value_type, dexfile.depend_idx):
                depend_name = value_type.depend_on
                dep_decls[from_name] = (dexfile.depend_idx, depend_name)
                pass
            pass
        pass
    return dep_decls


def _dex_tree_get_child(obj, child_name):
    child_parts = child_name.split('.')
    for child_part in child_parts:
        if isinstance(obj, list):
            idx = int(child_part)
            obj = obj[idx]
            continue
        
        if isinstance(obj, dexfile.switch):
            assert obj.map[eval(child_part)] == obj.child_type
            obj = obj.value
            continue
        
        obj = getattr(obj, child_part)
        pass
    return obj


def _dex_tree_set_child(obj, child_name, value):
    child_parts = child_name.split('.')
    for child_part in child_parts[:-1]:
        if isinstance(obj, list):
            idx = int(child_part)
            obj = obj[idx]
            continue
        
        if isinstance(obj, dexfile.switch):
            assert obj.map[eval(child_part)] == obj.child_type
            obj = obj.value
            continue
        
        obj = getattr(obj, child_part)
        pass
    
    last_part = child_parts[-1]
    if isinstance(obj, list):
        idx = int(last_part)
        obj[idx] = value
    elif isinstance(obj, dexfile.switch):
        assert obj.map[eval(last_part)] == obj.child_type
        obj.value = value
    else:
        setattr(obj, last_part, value)
        pass
    pass


def _travel_dex_relocatable(root_obj, parents=[]):
    stk = [(root_obj, parents, root_obj.__class__.__name__)]
    
    def make_travel_info(obj, obj_name, child_name, parents):
        child_parents = parents + [obj]
        child_obj = _dex_tree_get_child(obj, child_name)
        if isinstance(child_obj, dexfile.composite):
            child_path = child_obj.__class__.__name__
        else:
            child_path = obj_name + '.' + child_name
            pass
        return (child_obj, child_parents, child_path)
    
    while stk:
        obj, parents, obj_name = stk.pop(0)
        yield (obj, parents, obj_name)
        
        if isinstance(obj, list):
            children = [make_travel_info(obj, obj_name, repr(idx), parents)
                        for idx in range(len(obj))]
            stk.extend(children)
            continue
        
        if not isinstance(obj, dexfile.relocatable):
            continue

        children = [make_travel_info(obj, obj_name, child_name, parents)
                    for child_name in obj.children()]
        stk.extend(children)
        pass
    pass


def _all_dex_types():
    dex_types = dict([(name, value)
                       for name, value in dexfile.__dict__.items()
                       if name.startswith('_DEX_')])
    dex_types['DEXFile'] = dexfile.DEXFile
    return dex_types


def collect_all_dep_decls():
    dex_types = _all_dex_types()

    all_dep_decls = {}
    for name_path, clazz in dex_types.items():
        dep_decls = _find_dep_decls_from_clazz(name_path, clazz, dex_types)
        all_dep_decls.update(dep_decls)
        pass
    
    return all_dep_decls


class _marker(dexfile.relocatable):
    back_type = None
    pass

class _uid_marker(_marker):
    uid_seq = 0
    
    def __init__(self, back_type, name_path):
        self.back_type = back_type
        self.name_path = name_path
        pass
    
    def parse(self, parent, data, off):
        value = self.back_type.parse(parent, data, off)
        try:
            value.data_uid = _uid_marker.uid_seq
        except AttributeError:
            raise AttributeError, \
                'can not depend on non-instance (%s)' % (self.name_path)
        _uid_marker.uid_seq = _uid_marker.uid_seq + 1
        
        return value

    def sizeof(self, value):
        sz = self.back_type.sizeof(value)
        return sz

    def compute_size(self, back_obj):
        self.back_type.compute_size(back_obj)
        pass

    def to_str(self, back_obj):
        return self.back_type.to_str(back_obj)

    def link_prepare(self, obj, name_path, parents, markers_info):
        try:
            id_item_map = markers_info[name_path]
        except KeyError:
            id_item_map = {}
            markers_info[name_path] = id_item_map
            pass
        assert obj.data_uid not in id_item_map
        id_item_map[obj.data_uid] = obj
        pass

    def __getattr__(self, name):
        return getattr(self.back_type, name)

    def __call__(self, *args, **kws):
        return self.back_type(*args, **kws)
    pass


class _offset_marker(_uid_marker):
    def parse(self, parent, data, off):
        super(_offset_marker, self).parse(parent, data, off)
        
        value = self.back_type.parse(parent, data, off)
        value.data_offset = off
        return value
    
    def link_prepare(self, obj, name_path, parents, markers_info):
        try:
            id_item_map = markers_info[name_path]
        except KeyError:
            id_item_map = {}
            markers_info[name_path] = id_item_map
            pass
        assert obj.data_offset not in id_item_map
        id_item_map[obj.data_offset] = obj
        pass
    pass


class _rel_offset_marker(_offset_marker):
    def link_prepare(self, obj, name_path, parents, markers_info):
        rev_parents = list(parents)
        rev_parents.reverse()

        for parent in rev_parents:
            try:
                rel_marker_info = parent.rel_marker_info
            except AttributeError:
                try:
                    rel_marker_info = {}
                    parent.rel_marker_info = rel_marker_info
                except AttributeError:
                    continue
                pass
            depons = rel_marker_info.setdefault(name_path, [])
            depons.append(obj)
            pass
        pass

    @staticmethod
    def find_depon(name_path, parents):
        name_parts = name_path.split('.')
        dex_types = _all_dex_types()
        comp_type_name = name_parts[0]
        comp_type = dex_types[comp_type_name]
        
        rev_parents = list(parents)
        rev_parents.reverse()
        
        for parent in rev_parents:
            if isinstance(parent, comp_type):
                attr_name = '.'.join(name_parts[1:])
                depon = _dex_tree_get_child(parent, attr_name)
                return depon

            try:
                rel_marker_info = parent.rel_marker_info
            except:
                continue
            if name_path in rel_marker_info:
                depons = rel_marker_info[name_path]
                assert len(depons) == 1
                depon = depons[0]
                return depon
            pass
        
        raise RuntimeError, 'can not find relative offset depend'
    pass


class _idx_marker(_uid_marker):
    def parse(self, parent, data, off):
        assert isinstance(self.back_type, dexfile.array)
        array = self.back_type.parse(parent, data, off)
        for item in array.items:
            item.data_uid = _uid_marker.uid_seq
            _uid_marker.uid_seq = _uid_marker.uid_seq + 1
            pass
        return array
    
    def link_prepare(self, obj, name_path, parents, markers_info):
        try:
            id_item_map = markers_info[name_path]
        except KeyError:
            id_item_map = []
            markers_info[name_path] = id_item_map
            pass
        for idx, item in enumerate(obj.items):
            id_item_map[idx] = item
            pass
        pass
    pass


def _install_offset_marker(name_path):
    obj, parent = _resolve_name_path(name_path)
    while isinstance(parent, dexfile.ref):
        obj, parent = _resolve_name_path(parent.target_path)
        pass
    marker = _offset_marker(obj, name_path)
    name = name_path.split('.')[-1]
    _dex_tree_set_child(parent, name, marker)
    pass


def _install_rel_offset_marker(name_path):
    obj, parent = _resolve_name_path(name_path)
    while isinstance(parent, dexfile.ref):
        obj, parent = _resolve_name_path(parent.target_path)
        pass
    marker = _rel_offset_marker(obj, name_path)
    name = name_path.split('.')[-1]
    _dex_tree_set_child(parent, name, marker)
    pass


def _install_uid_marker(name_path):
    obj, parent = _resolve_name_path(name_path)
    while isinstance(parent, dexfile.ref):
        obj, parent = _resolve_name_path(parent.target_path)
        pass
    marker = _uid_marker(obj, name_path)
    name = name_path.split('.')[-1]
    _dex_tree_set_child(parent, name, marker)
    pass


def _install_idx_marker(name_path):
    obj, parent = _resolve_name_path(name_path)
    while isinstance(parent, dexfile.ref):
        obj, parent = _resolve_name_path(parent.target_path)
        pass
    marker = _idx_marker(obj, name_path)
    name = name_path.split('.')[-1]
    _dex_tree_set_child(parent, name, marker)
    pass


def _install_markers(all_dep_decls):
    all_markers = set()
    
    for from_path, dep in all_dep_decls.items():
        dep_type = dep[0]
        if issubclass(dep_type, dexfile.depend_off_rel):
            name_path1 = dep[1]
            if name_path1 not in all_markers:
                all_markers.add(name_path1)
                _install_rel_offset_marker(name_path1)
                pass
            
            name_path2 = dep[2]
            if name_path2 not in all_markers:
                all_markers.add(name_path2)
                _install_rel_offset_marker(name_path2)
                pass
            pass
        elif dep_type == dexfile.depend_off:
            name_path = dep[1]
            if name_path not in all_markers:
                all_markers.add(name_path)
                _install_offset_marker(name_path)
                pass
            pass
        elif dep_type == dexfile.depend_idx:
            name_path = dep[1]
            if name_path not in all_markers:
                all_markers.add(name_path)
                _install_idx_marker(name_path)
                pass
            pass
        else:
            raise TypeError, 'Invalid type of depend %s' % (repr(dep_type))
        pass
    pass


def _patch_dex_type_markers(all_dep_decls):
    import itertools
    marked_types = dict([(marker.back_type, name)
                         for name, marker in _all_dex_types().items()
                         if isinstance(marker, _marker)])

    travel_iters = [_travel_dex_type(dex_type, name_path)
                    for name_path, dex_type in _all_dex_types().items()]
    marked_type_refs = [(name_path, marked_types[attr])
                        for attr, name_path in itertools.chain(*travel_iters)
                        if type(attr) == type and
                        issubclass(attr, dexfile.composite) and
                        attr in marked_types]
    
    def patch_ref(name_path, depon_path):
        depon, depon_parent = _resolve_name_path(depon_path)
        
        path_elms = name_path.split('.')
        parent_path = '.'.join(path_elms[:-1])
        parent, grand = _resolve_name_path(parent_path)

        if isinstance(grand, _nest_types):
            if isinstance(grand, (dexfile.array, dexfile.cond)):
                grand.child_type = depon
            elif isinstance(grand, dexfile.switch):
                key = eval(path_elms[-1])
                grand.map[key] = depon
            else:
                raise RuntimeError, 'should not be here'
            pass
        else:
            name = path_elms[-1]
            setattr(parent, name, depon)
            pass
        pass
    
    for name_path, depon_path in marked_type_refs:
        patch_ref(name_path, depon_path)
        pass
    pass


def _build_associations(root_obj):
    for obj, parents, name_path in \
            _travel_dex_relocatable(root_obj):
        if isinstance(obj, dexfile._objs_asso):
            for parent in parents:
                if isinstance(parent, dexfile.composite):
                    break
                pass
            
            left_elts = _dex_tree_get_child(parent, obj.left)
            right_elts = _dex_tree_get_child(parent, obj.right)
            obj.build_associations()
            pass
        pass
    pass


## \brief Find a parent that is an instance of given clazz from a list.
#
def _find_parent_of_clazz(clazz, parents):
    rev_parents = list(parents)
    rev_parents.reverse()
    for parent in rev_parents:
        if isinstance(parent, clazz):
            return parent
        pass
    raise TypeError, 'can not find a prent of %s type' % (repr(clazz))


def _split_name_path_clazz_attr(name_path):
    idx = name_path.index('.')
    if idx >= 0:
        clazz = name_path[:idx]
        attr = name_path[idx + 1:]
    else:
        clazz = name_path
        attr = None
        pass
    return clazz, attr


## \brief Setup value of refs.
#
def _build_refs(root_obj):
    for obj, parents, name_path in \
            _travel_dex_relocatable(root_obj):
        if not parents:
            continue

        imm_parent = parents[-1]
        if isinstance(imm_parent, dexfile.cond) and not imm_parent.is_true:
            continue
        
        clazz, pclazz = _resolve_name_path(name_path)
        while isinstance(clazz, dexfile.depend):
            clazz = clazz.child_type
            pass
        
        if isinstance(clazz, dexfile.ref):
            pclazz_name, attr_name = _split_name_path_clazz_attr(name_path)
            if not attr_name:
                raise ValueError, \
                    'not attribute name in name path (%s)' % (name_path)
            
            parent = _find_parent_of_clazz(dexfile.composite, parents)
            
            value = clazz.get_value(parents)
            _dex_tree_set_child(parent, attr_name, value)
            pass
        pass
    pass


def _link_dependencies(root_obj, all_dep_decls):
    markers_info = {}
    depon_src_map = {}
    for dep_src, depon in all_dep_decls.items():
        for tgt in depon[1:]:
            markers_info[tgt] = {}
            depon_src_map[depon] = dep_src
            pass
        pass

    #
    # Collect marked objects
    #
    for obj, parents, name_path in \
            _travel_dex_relocatable(root_obj):
        if name_path not in markers_info:
            continue
        
        marker, dummy_parent = _resolve_name_path(name_path)
        marker.link_prepare(obj, name_path, parents, markers_info)
        pass

    #
    # Link depend source to marked target
    #
    for obj, parents, name_path in \
            _travel_dex_relocatable(root_obj):
        if name_path not in all_dep_decls:
            continue

        imm_parent = parents[-1]
        if isinstance(imm_parent, dexfile.cond) and not imm_parent.is_true:
            continue

        dep = all_dep_decls[name_path]
        dep_type = dep[0]
        if dep_type == dexfile.depend_off_rel:
            depon1 = _rel_offset_marker.find_depon(dep[1], parents)
            depon2 = _rel_offset_marker.find_depon(dep[2], parents)

            name = name_path.split('.')[-1]
            _dex_tree_set_child(imm_parent, name, (depon1, depon2))
        elif dep_type == dexfile.depend_off:
            depon_name_path = dep[1]
            depon = markers_info[depon_name_path][obj]
            name = name_path.split('.')[-1]
            _dex_tree_set_child(imm_parent, name, depon)
        elif dep_type == dexfile.depend_idx:
            depon_name_path = dep[1]
            depon = markers_info[depon_name_path][obj]
            name = name_path.split('.')[-1]
            _dex_tree_set_child(imm_parent, name, depon)
        else:
            raise TypeError, 'invalid depend type %s' % (repr(dep_type))
        pass
    pass


def build_dependencies(dexroot, all_dep_decls):
    _build_associations(dexroot)
    _build_refs(dexroot)
    _link_dependencies(dexroot, all_dep_decls)
    pass


## \brief Restore to raw value before linking for dependencies.
#
def restore_dependencies(dexroot, all_dep_decls):
    for obj, parents, name_path in \
            _travel_dex_relocatable(dexroot):
        if name_path not in all_dep_decls:
            continue

        imm_parent = parents[-1]
        if isinstance(imm_parent, dexfile.cond) and not imm_parent.is_true:
            continue

        dep = all_dep_decls[name_path]
        dep_type = dep[0]
        if dep_type == dexfile.depend_off_rel:
            depon1 = _rel_offset_marker.find_depon(dep[1], parents)
            depon2 = _rel_offset_marker.find_depon(dep[2], parents)

            name = name_path.split('.')[-1]
            off = depon2.data_offset - depon1.data_offset
            _dex_tree_set_child(imm_parent, name, off)
        elif dep_type == dexfile.depend_off:
            depon_name_path = dep[1]
            depon = markers_info[depon_name_path][obj]
            name = name_path.split('.')[-1]
            _dex_tree_set_child(imm_parent, name, depon.data_offset)
        elif dep_type == dexfile.depend_idx:
            depon_name_path = dep[1]
            depon = markers_info[depon_name_path][obj]
            name = name_path.split('.')[-1]
            _dex_tree_set_child(imm_parent, name, depon.data_idx)
        else:
            raise TypeError, 'invalid depend type %s' % (repr(dep_type))
        pass
    pass


def _sync_dependencies():
    pass


if __name__ == '__main__':
    dex = dexfile.DEXFile.open('data/testdata1.dex')
    
    import pprint
    print
    print 'Dependencies'
    pprint.pprint(collect_all_dep_decls())