319
|
1
|
|
2 import re
|
396
|
3 from ppci import Token, CompilerError, SourceLocation
|
382
|
4 from pyyacc import EOF
|
319
|
5
|
382
|
6
|
|
7 class BaseLexer:
|
396
|
8 """ Base class for a lexer. This class can be overridden to create a
|
|
9 lexer. This class handles the regular expression generation and
|
|
10 source position accounting.
|
|
11 """
|
382
|
12 def __init__(self, tok_spec):
|
|
13 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
|
|
14 self.gettok = re.compile(tok_re).match
|
|
15 self.func_map = {pair[0]: pair[2] for pair in tok_spec}
|
396
|
16 self.filename = None
|
382
|
17
|
|
18 def feed(self, txt):
|
|
19 """ Feeds the lexer with extra input """
|
|
20 self.tokens = self.tokenize(txt)
|
319
|
21
|
382
|
22 def tokenize(self, txt):
|
396
|
23 """ Generator that generates tokens from text
|
|
24 It does not yield the EOF token.
|
|
25 """
|
|
26 self.line = 1
|
|
27 self.line_start = 0
|
|
28 self.pos = 0
|
382
|
29 mo = self.gettok(txt)
|
|
30 while mo:
|
|
31 typ = mo.lastgroup
|
|
32 val = mo.group(typ)
|
396
|
33 column = mo.start() - self.line_start
|
|
34 length = mo.end() - mo.start()
|
|
35 loc = SourceLocation(self.filename, self.line, column, length)
|
382
|
36 func = self.func_map[typ]
|
|
37 if func:
|
396
|
38 res = func(typ, val)
|
|
39 if res:
|
|
40 typ, val = res
|
|
41 yield Token(typ, val, loc)
|
|
42 self.pos = mo.end()
|
|
43 mo = self.gettok(txt, self.pos)
|
|
44 if len(txt) != self.pos:
|
|
45 raise CompilerError('Lex fault at {}'.format(txt[self.pos:]))
|
|
46
|
|
47 def newline(self):
|
|
48 """ Enters a new line """
|
|
49 self.line_start = self.pos
|
|
50 self.line = self.line + 1
|
382
|
51
|
|
52 def next_token(self):
|
|
53 try:
|
|
54 return self.tokens.__next__()
|
|
55 except StopIteration:
|
396
|
56 loc = SourceLocation(self.filename, self.line, 0, 0)
|
|
57 return Token(EOF, EOF, loc)
|