319
|
1
|
|
2 import re
|
|
3 from ppci import Token
|
|
4
|
|
5 def tokenize(tok_spec, txt):
|
|
6 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
|
|
7 gettok = re.compile(tok_re).match
|
|
8 func_map = {pair[0]: pair[2] for pair in tok_spec}
|
|
9
|
|
10 # Parse line:
|
|
11 line = txt
|
|
12 mo = gettok(line)
|
|
13 pos = 0
|
|
14 while mo:
|
|
15 typ = mo.lastgroup
|
|
16 val = mo.group(typ)
|
|
17 func = func_map[typ]
|
|
18 if func:
|
|
19 typ, val = func(typ, val)
|
|
20 yield Token(typ, val)
|
|
21 pos = mo.end()
|
|
22 mo = gettok(line, pos)
|
|
23 if len(line) != pos:
|
|
24 raise ParserException('Lex fault at {}'.format(line[pos:]))
|