annotate python/baselex.py @ 335:582a1aaa3983

Added long branch format
author Windel Bouwman
date Mon, 17 Feb 2014 20:41:30 +0100
parents 8d07a4254f04
children 818be710e13d
rev   line source
319
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
1
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
2 import re
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
3 from ppci import Token
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
4
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
5 def tokenize(tok_spec, txt):
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
6 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
7 gettok = re.compile(tok_re).match
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
8 func_map = {pair[0]: pair[2] for pair in tok_spec}
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
9
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
10 # Parse line:
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
11 line = txt
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
12 mo = gettok(line)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
13 pos = 0
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
14 while mo:
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
15 typ = mo.lastgroup
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
16 val = mo.group(typ)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
17 func = func_map[typ]
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
18 if func:
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
19 typ, val = func(typ, val)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
20 yield Token(typ, val)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
21 pos = mo.end()
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
22 mo = gettok(line, pos)
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
23 if len(line) != pos:
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
24 raise ParserException('Lex fault at {}'.format(line[pos:]))