diff python/baselex.py @ 319:8d07a4254f04

Work on burg
author Windel Bouwman
date Sat, 18 Jan 2014 18:58:43 +0100
parents
children 818be710e13d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/baselex.py	Sat Jan 18 18:58:43 2014 +0100
@@ -0,0 +1,24 @@
+
+import re
+from ppci import Token
+
+def tokenize(tok_spec, txt):
+    tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
+    gettok = re.compile(tok_re).match
+    func_map = {pair[0]: pair[2] for pair in tok_spec}
+
+    # Parse line:
+    line = txt
+    mo = gettok(line)
+    pos = 0
+    while mo:
+        typ = mo.lastgroup
+        val = mo.group(typ)
+        func = func_map[typ]
+        if func:
+            typ, val = func(typ, val)
+            yield Token(typ, val)
+        pos = mo.end()
+        mo = gettok(line, pos)
+    if len(line) != pos:
+        raise ParserException('Lex fault at {}'.format(line[pos:]))