annotate python/baselex.py @ 319:8d07a4254f04

Work on burg
author Windel Bouwman
date Sat, 18 Jan 2014 18:58:43 +0100
parents
children 818be710e13d
rev   line source
319
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
1
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
2 import re
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
3 from ppci import Token
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
4
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
5 def tokenize(tok_spec, txt):
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
6 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
7 gettok = re.compile(tok_re).match
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
8 func_map = {pair[0]: pair[2] for pair in tok_spec}
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
9
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
10 # Parse line:
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
11 line = txt
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
12 mo = gettok(line)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
13 pos = 0
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
14 while mo:
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
15 typ = mo.lastgroup
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
16 val = mo.group(typ)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
17 func = func_map[typ]
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
18 if func:
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
19 typ, val = func(typ, val)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
20 yield Token(typ, val)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
21 pos = mo.end()
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
22 mo = gettok(line, pos)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
23 if len(line) != pos:
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
24 raise ParserException('Lex fault at {}'.format(line[pos:]))