view paraspace/structpath.py @ 39:aa05cc7ccd0d

Serious implementation for parsing predicates in structpath.py
author Thinker K.F. Li <thinker@codemud.net>
date Thu, 16 Jun 2011 08:03:50 +0800
parents 0766bd54c9d3
children
line wrap: on
line source

## \brief A xpath liked query language
#
# Implement a xpath liked query language.
#
# Structpath uses syntax of Python for predicate.  That means you must use
# '==' instead of '='.
#
# You need to create a context before querying.
# \code
# ctx = parent_context(None)
# ctx.root = car()
# root = ctx.root
#
# root.wheels = [wheel(), wheel(), wheel(), wheel()]
# root.handle = handle()
#
# ctx.all_classes = {'car': car, 'wheel': wheel, 'handle': handle}
# ctx.class_instances = {
#     'car': [root],
#     'wheel': root.wheels,
#     'handle': [root.handle]
#     }
#
# objs = find_objs_path(ctx, '/car/wheels')
# \endcode
#
import re

_reo_nest_chars = re.compile('[\'"\\[\\]/\\\\]')

class context(object):
    all_classes = None
    class_instances = None
    root = None
    objs = None

    def __init__(self, objs=None, ctx=None):
        if ctx:
            self.all_classes = ctx.all_classes
            self.class_instances = ctx.class_instances
            self.root = ctx.root
            pass
        self.objs = objs
        pass

    def get_parent(self, obj):
        raise NotImplementedError, 'get_parent() is not implemented'
    pass


STATE_START = 0
STATE_PRED = 1
STATE_STR_SQ = 2
STATE_STR_DQ = 3
STATE_BKSP = 4
STATE_STOP = 5

def _path_split(path):
    stk = []
    idx = 0
    left = 0
    state = STATE_START
    parts = []
    while True:
        mo = _reo_nest_chars.search(path, idx)
        
        if mo == None:
            parts.append(path[left:])
            break
        
        ch = mo.group()
        idx = mo.end()
        if state == STATE_START:
            if ch == '/':
                parts.append(path[left:idx - 1])
                left = idx
            elif ch == '[':
                stk.append(state)
                state = STATE_PRED
            elif ch == '\'':
                stk.append(state)
                state = STATE_STR_SQ
            elif ch == '"':
                stk.append(state)
                state = STATE_STR_DQ
                pass
            pass
        elif state == STATE_STR_SQ:
            if ch == '\'':
                state = stk.pop()
                pass
            elif ch == '\\':
                stk.append(state)
                state = STATE_BKSP
                bksp_idx = idx
                pass
            pass
        elif state == STATE_STR_DQ:
            if ch == '"':
                state = stk.pop()
            elif ch == '\\':
                stk.append(state)
                state = STATE_BKSP
                bksp_idx = idx
                pass
            pass
        elif state == STATE_PRED:
            if ch == '[':
                stk.append(state)
                state = STATE_PRED
            elif ch == '\'':
                stk.append(state)
                state = STATE_STR_SQ
            elif ch == '"':
                stk.append(state)
                state = STATE_STR_DQ
            elif ch == ']':
                state = stk.pop()
                pass
            pass
        elif state == STATE_BKSP:
            if idx != (bksp_idx + 1):
                idx = mo.start()
                pass
            state = stk.pop()
            pass
        pass
    
    return parts


def _path_parse_preds(path):
    stk = []
    idx = 0
    state = STATE_START
    name = None
    preds = []
    while True:
        mo = _reo_nest_chars.search(path, idx)
        
        if mo == None:
            break
        
        ch = mo.group()
        idx = mo.end()
        if state in (STATE_START, STATE_STOP):
            if ch == '[':
                name = path[:idx - 1]
                left = idx
                stk.append(STATE_STOP)
                state = STATE_PRED
            elif ch == '\'':
                stk.append(state)
                state = STATE_STR_SQ
            elif ch == '"':
                stk.append(state)
                state = STATE_STR_DQ
                pass
            else:
                raise ValueError, 'invalid structpath string'
            pass
        elif state == STATE_STR_SQ:
            if ch == '\'':
                state = stk.pop()
                pass
            elif ch == '\\':
                stk.append(state)
                state = STATE_BKSP
                bksp_idx = idx
                pass
            pass
        elif state == STATE_STR_DQ:
            if ch == '"':
                state = stk.pop()
            elif ch == '\\':
                stk.append(state)
                state = STATE_BKSP
                bksp_idx = idx
                pass
            pass
        elif state == STATE_PRED:
            if ch == '[':
                stk.append(state)
                state = STATE_PRED
            elif ch == '\'':
                stk.append(state)
                state = STATE_STR_SQ
            elif ch == '"':
                stk.append(state)
                state = STATE_STR_DQ
            elif ch == ']':
                state = stk.pop()
                if state == STATE_STOP:
                    preds.append(path[left:idx - 1])
                    pass
                pass
            pass
        elif state == STATE_BKSP:
            if idx != (bksp_idx + 1):
                idx = mo.start()
                pass
            state = stk.pop()
            pass
        pass

    if state not in (STATE_START, STATE_STOP):
        raise ValueError, 'invalid structpath string'
    if state == STATE_STOP and idx != len(path):
        raise ValueError, 'invalid structpath string'
    if state == STATE_START:
        name = path
        pass
    
    return name, preds


def _is_abs(path_parts):
    if len(path_parts) == 0:
        return False
    return path_parts[0] == ''


def _rel_of_abs(path_parts):
    return path_parts[1:]


def _is_class(part):
    return part.startswith('.')
        

def _class_name(part):
    return part[1:]


def _is_parent_name(part):
    return part == '..'


def _eval_obj_pred(ctx, obj, pred):
    ns_global = {}
    for attr in dir(obj):
        if attr.startswith('_'):
            continue
        v = _obj_attr(obj, attr)
        ns_global[attr] = v
        pass

    truth_v = eval(pred, ns_global)
    return truth_v


def _eval_obj_preds(ctx, obj, preds):
    for pred in preds:
        if not pred:
            continue
        
        if not _eval_obj_pred(ctx, obj, pred):
            return False
        pass
    return True


def _obj_attr(obj, attrname):
    if isinstance(obj, list):
        idx = int(attrname)
        return obj[idx]
    elif isinstance(obj, dict):
        key = eval(attrname)
        return obj[key]
    return getattr(obj, attrname)


def _obj_attr_objs(obj, attrname):
    if attrname == '*':
        if isinstance(obj, list):
            return obj
        elif isinstance(obj, dict):
            return list(obj.values())
        else:
            raise ValueError, '\'*\' is invliad here'
        pass
    return [_obj_attr(obj, attrname)]


def _handle_path_part_obj(ctx, part, obj):
    attr, preds = _path_parse_preds(part)
    
    if _is_parent_name(attr):
        new_objs = [ctx.get_parent(obj)]
    elif _is_class(attr):
        class_name = _class_name(attr)
        new_objs = ctx.class_instances[class_name]
    else:
        try:
            new_objs = _obj_attr_objs(obj, attr)
        except AttributeError:
            return []
        pass

    new_objs = filter((lambda x: _eval_obj_preds(ctx, x, preds)), new_objs)
                   
    return new_objs


def _handle_path_part(ctx, part):
    from itertools import chain
    
    if not ctx.objs:
        ctx = ctx.__class__([ctx.root], ctx)
        pass

    objss = [_handle_path_part_obj(ctx, part, obj)
             for obj in ctx.objs]
    objs = [o for o in chain(*objss)]
    new_ctx = ctx.__class__(objs, ctx)
    return new_ctx


def _handle_path_parts(ctx, path_parts):
    if _is_abs(path_parts):
        ctx = ctx.__class__([ctx.root], ctx)
        path_parts = _rel_of_abs(path_parts)
        pass

    if len(path_parts) == 1 and path_parts[0] == '':
        return ctx
    
    for path_part in path_parts:
        ctx = _handle_path_part(ctx, path_part)
        pass
    
    return ctx


def find_objs_path(ctx, path):
    path_parts = _path_split(path)
    rctx = _handle_path_parts(ctx, path_parts)
    return rctx.objs