319
|
1
|
|
2 import re
|
357
|
3 from ppci import Token, CompilerError
|
382
|
4 from pyyacc import EOF
|
319
|
5
|
382
|
6
|
|
7 class BaseLexer:
|
|
8 """ Base class for a lexer """
|
|
9 def __init__(self, tok_spec):
|
|
10 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
|
|
11 self.gettok = re.compile(tok_re).match
|
|
12 self.func_map = {pair[0]: pair[2] for pair in tok_spec}
|
|
13
|
|
14 def feed(self, txt):
|
|
15 """ Feeds the lexer with extra input """
|
|
16 self.tokens = self.tokenize(txt)
|
319
|
17
|
382
|
18 def tokenize(self, txt):
|
|
19 """ Generator that generates tokens from text """
|
|
20 mo = self.gettok(txt)
|
|
21 pos = 0
|
|
22 while mo:
|
|
23 typ = mo.lastgroup
|
|
24 val = mo.group(typ)
|
|
25 func = self.func_map[typ]
|
|
26 if func:
|
|
27 typ, val = func(typ, val)
|
|
28 yield Token(typ, val)
|
|
29 pos = mo.end()
|
|
30 mo = self.gettok(txt, pos)
|
|
31 if len(txt) != pos:
|
|
32 raise CompilerError('Lex fault at {}'.format(txt[pos:]))
|
|
33
|
|
34 def next_token(self):
|
|
35 try:
|
|
36 return self.tokens.__next__()
|
|
37 except StopIteration:
|
|
38 return Token(EOF, EOF)
|