Mercurial > lcfOS
comparison python/baselex.py @ 382:0c44e494ef58
Made lexer more generic
author | Windel Bouwman |
---|---|
date | Sun, 27 Apr 2014 12:24:21 +0200 |
parents | 818be710e13d |
children | fb3c1f029b30 |
comparison
equal
deleted
inserted
replaced
381:6df89163e114 | 382:0c44e494ef58 |
---|---|
1 | 1 |
2 import re | 2 import re |
3 from ppci import Token, CompilerError | 3 from ppci import Token, CompilerError |
4 from pyyacc import EOF | |
4 | 5 |
5 def tokenize(tok_spec, txt): | |
6 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec) | |
7 gettok = re.compile(tok_re).match | |
8 func_map = {pair[0]: pair[2] for pair in tok_spec} | |
9 | 6 |
10 # Parse line: | 7 class BaseLexer: |
11 line = txt | 8 """ Base class for a lexer """ |
12 mo = gettok(line) | 9 def __init__(self, tok_spec): |
13 pos = 0 | 10 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec) |
14 while mo: | 11 self.gettok = re.compile(tok_re).match |
15 typ = mo.lastgroup | 12 self.func_map = {pair[0]: pair[2] for pair in tok_spec} |
16 val = mo.group(typ) | 13 |
17 func = func_map[typ] | 14 def feed(self, txt): |
18 if func: | 15 """ Feeds the lexer with extra input """ |
19 typ, val = func(typ, val) | 16 self.tokens = self.tokenize(txt) |
20 yield Token(typ, val) | 17 |
21 pos = mo.end() | 18 def tokenize(self, txt): |
22 mo = gettok(line, pos) | 19 """ Generator that generates tokens from text """ |
23 if len(line) != pos: | 20 mo = self.gettok(txt) |
24 raise CompilerError('Lex fault at {}'.format(line[pos:])) | 21 pos = 0 |
22 while mo: | |
23 typ = mo.lastgroup | |
24 val = mo.group(typ) | |
25 func = self.func_map[typ] | |
26 if func: | |
27 typ, val = func(typ, val) | |
28 yield Token(typ, val) | |
29 pos = mo.end() | |
30 mo = self.gettok(txt, pos) | |
31 if len(txt) != pos: | |
32 raise CompilerError('Lex fault at {}'.format(txt[pos:])) | |
33 | |
34 def next_token(self): | |
35 try: | |
36 return self.tokens.__next__() | |
37 except StopIteration: | |
38 return Token(EOF, EOF) |