Mercurial > paraspace
view paraspace/structpath.py @ 39:aa05cc7ccd0d
Serious implementation for parsing predicates in structpath.py
author | Thinker K.F. Li <thinker@codemud.net> |
---|---|
date | Thu, 16 Jun 2011 08:03:50 +0800 |
parents | 0766bd54c9d3 |
children |
line wrap: on
line source
## \brief A xpath liked query language # # Implement a xpath liked query language. # # Structpath uses syntax of Python for predicate. That means you must use # '==' instead of '='. # # You need to create a context before querying. # \code # ctx = parent_context(None) # ctx.root = car() # root = ctx.root # # root.wheels = [wheel(), wheel(), wheel(), wheel()] # root.handle = handle() # # ctx.all_classes = {'car': car, 'wheel': wheel, 'handle': handle} # ctx.class_instances = { # 'car': [root], # 'wheel': root.wheels, # 'handle': [root.handle] # } # # objs = find_objs_path(ctx, '/car/wheels') # \endcode # import re _reo_nest_chars = re.compile('[\'"\\[\\]/\\\\]') class context(object): all_classes = None class_instances = None root = None objs = None def __init__(self, objs=None, ctx=None): if ctx: self.all_classes = ctx.all_classes self.class_instances = ctx.class_instances self.root = ctx.root pass self.objs = objs pass def get_parent(self, obj): raise NotImplementedError, 'get_parent() is not implemented' pass STATE_START = 0 STATE_PRED = 1 STATE_STR_SQ = 2 STATE_STR_DQ = 3 STATE_BKSP = 4 STATE_STOP = 5 def _path_split(path): stk = [] idx = 0 left = 0 state = STATE_START parts = [] while True: mo = _reo_nest_chars.search(path, idx) if mo == None: parts.append(path[left:]) break ch = mo.group() idx = mo.end() if state == STATE_START: if ch == '/': parts.append(path[left:idx - 1]) left = idx elif ch == '[': stk.append(state) state = STATE_PRED elif ch == '\'': stk.append(state) state = STATE_STR_SQ elif ch == '"': stk.append(state) state = STATE_STR_DQ pass pass elif state == STATE_STR_SQ: if ch == '\'': state = stk.pop() pass elif ch == '\\': stk.append(state) state = STATE_BKSP bksp_idx = idx pass pass elif state == STATE_STR_DQ: if ch == '"': state = stk.pop() elif ch == '\\': stk.append(state) state = STATE_BKSP bksp_idx = idx pass pass elif state == STATE_PRED: if ch == '[': stk.append(state) state = STATE_PRED elif ch == '\'': stk.append(state) state = STATE_STR_SQ elif ch == '"': stk.append(state) state = STATE_STR_DQ elif ch == ']': state = stk.pop() pass pass elif state == STATE_BKSP: if idx != (bksp_idx + 1): idx = mo.start() pass state = stk.pop() pass pass return parts def _path_parse_preds(path): stk = [] idx = 0 state = STATE_START name = None preds = [] while True: mo = _reo_nest_chars.search(path, idx) if mo == None: break ch = mo.group() idx = mo.end() if state in (STATE_START, STATE_STOP): if ch == '[': name = path[:idx - 1] left = idx stk.append(STATE_STOP) state = STATE_PRED elif ch == '\'': stk.append(state) state = STATE_STR_SQ elif ch == '"': stk.append(state) state = STATE_STR_DQ pass else: raise ValueError, 'invalid structpath string' pass elif state == STATE_STR_SQ: if ch == '\'': state = stk.pop() pass elif ch == '\\': stk.append(state) state = STATE_BKSP bksp_idx = idx pass pass elif state == STATE_STR_DQ: if ch == '"': state = stk.pop() elif ch == '\\': stk.append(state) state = STATE_BKSP bksp_idx = idx pass pass elif state == STATE_PRED: if ch == '[': stk.append(state) state = STATE_PRED elif ch == '\'': stk.append(state) state = STATE_STR_SQ elif ch == '"': stk.append(state) state = STATE_STR_DQ elif ch == ']': state = stk.pop() if state == STATE_STOP: preds.append(path[left:idx - 1]) pass pass pass elif state == STATE_BKSP: if idx != (bksp_idx + 1): idx = mo.start() pass state = stk.pop() pass pass if state not in (STATE_START, STATE_STOP): raise ValueError, 'invalid structpath string' if state == STATE_STOP and idx != len(path): raise ValueError, 'invalid structpath string' if state == STATE_START: name = path pass return name, preds def _is_abs(path_parts): if len(path_parts) == 0: return False return path_parts[0] == '' def _rel_of_abs(path_parts): return path_parts[1:] def _is_class(part): return part.startswith('.') def _class_name(part): return part[1:] def _is_parent_name(part): return part == '..' def _eval_obj_pred(ctx, obj, pred): ns_global = {} for attr in dir(obj): if attr.startswith('_'): continue v = _obj_attr(obj, attr) ns_global[attr] = v pass truth_v = eval(pred, ns_global) return truth_v def _eval_obj_preds(ctx, obj, preds): for pred in preds: if not pred: continue if not _eval_obj_pred(ctx, obj, pred): return False pass return True def _obj_attr(obj, attrname): if isinstance(obj, list): idx = int(attrname) return obj[idx] elif isinstance(obj, dict): key = eval(attrname) return obj[key] return getattr(obj, attrname) def _obj_attr_objs(obj, attrname): if attrname == '*': if isinstance(obj, list): return obj elif isinstance(obj, dict): return list(obj.values()) else: raise ValueError, '\'*\' is invliad here' pass return [_obj_attr(obj, attrname)] def _handle_path_part_obj(ctx, part, obj): attr, preds = _path_parse_preds(part) if _is_parent_name(attr): new_objs = [ctx.get_parent(obj)] elif _is_class(attr): class_name = _class_name(attr) new_objs = ctx.class_instances[class_name] else: try: new_objs = _obj_attr_objs(obj, attr) except AttributeError: return [] pass new_objs = filter((lambda x: _eval_obj_preds(ctx, x, preds)), new_objs) return new_objs def _handle_path_part(ctx, part): from itertools import chain if not ctx.objs: ctx = ctx.__class__([ctx.root], ctx) pass objss = [_handle_path_part_obj(ctx, part, obj) for obj in ctx.objs] objs = [o for o in chain(*objss)] new_ctx = ctx.__class__(objs, ctx) return new_ctx def _handle_path_parts(ctx, path_parts): if _is_abs(path_parts): ctx = ctx.__class__([ctx.root], ctx) path_parts = _rel_of_abs(path_parts) pass if len(path_parts) == 1 and path_parts[0] == '': return ctx for path_part in path_parts: ctx = _handle_path_part(ctx, path_part) pass return ctx def find_objs_path(ctx, path): path_parts = _path_split(path) rctx = _handle_path_parts(ctx, path_parts) return rctx.objs