Mercurial > lcfOS
diff python/baselex.py @ 319:8d07a4254f04
Work on burg
author | Windel Bouwman |
---|---|
date | Sat, 18 Jan 2014 18:58:43 +0100 |
parents | |
children | 818be710e13d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/baselex.py Sat Jan 18 18:58:43 2014 +0100 @@ -0,0 +1,24 @@ + +import re +from ppci import Token + +def tokenize(tok_spec, txt): + tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec) + gettok = re.compile(tok_re).match + func_map = {pair[0]: pair[2] for pair in tok_spec} + + # Parse line: + line = txt + mo = gettok(line) + pos = 0 + while mo: + typ = mo.lastgroup + val = mo.group(typ) + func = func_map[typ] + if func: + typ, val = func(typ, val) + yield Token(typ, val) + pos = mo.end() + mo = gettok(line, pos) + if len(line) != pos: + raise ParserException('Lex fault at {}'.format(line[pos:]))