Mercurial > lcfOS
comparison python/baselex.py @ 396:fb3c1f029b30
Added baselexer into c3 lexer
author | Windel Bouwman |
---|---|
date | Tue, 27 May 2014 22:19:32 +0200 |
parents | 0c44e494ef58 |
children |
comparison
equal
deleted
inserted
replaced
395:3b0c495e3008 | 396:fb3c1f029b30 |
---|---|
1 | 1 |
2 import re | 2 import re |
3 from ppci import Token, CompilerError | 3 from ppci import Token, CompilerError, SourceLocation |
4 from pyyacc import EOF | 4 from pyyacc import EOF |
5 | 5 |
6 | 6 |
7 class BaseLexer: | 7 class BaseLexer: |
8 """ Base class for a lexer """ | 8 """ Base class for a lexer. This class can be overridden to create a |
9 lexer. This class handles the regular expression generation and | |
10 source position accounting. | |
11 """ | |
9 def __init__(self, tok_spec): | 12 def __init__(self, tok_spec): |
10 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec) | 13 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec) |
11 self.gettok = re.compile(tok_re).match | 14 self.gettok = re.compile(tok_re).match |
12 self.func_map = {pair[0]: pair[2] for pair in tok_spec} | 15 self.func_map = {pair[0]: pair[2] for pair in tok_spec} |
16 self.filename = None | |
13 | 17 |
14 def feed(self, txt): | 18 def feed(self, txt): |
15 """ Feeds the lexer with extra input """ | 19 """ Feeds the lexer with extra input """ |
16 self.tokens = self.tokenize(txt) | 20 self.tokens = self.tokenize(txt) |
17 | 21 |
18 def tokenize(self, txt): | 22 def tokenize(self, txt): |
19 """ Generator that generates tokens from text """ | 23 """ Generator that generates tokens from text |
24 It does not yield the EOF token. | |
25 """ | |
26 self.line = 1 | |
27 self.line_start = 0 | |
28 self.pos = 0 | |
20 mo = self.gettok(txt) | 29 mo = self.gettok(txt) |
21 pos = 0 | |
22 while mo: | 30 while mo: |
23 typ = mo.lastgroup | 31 typ = mo.lastgroup |
24 val = mo.group(typ) | 32 val = mo.group(typ) |
33 column = mo.start() - self.line_start | |
34 length = mo.end() - mo.start() | |
35 loc = SourceLocation(self.filename, self.line, column, length) | |
25 func = self.func_map[typ] | 36 func = self.func_map[typ] |
26 if func: | 37 if func: |
27 typ, val = func(typ, val) | 38 res = func(typ, val) |
28 yield Token(typ, val) | 39 if res: |
29 pos = mo.end() | 40 typ, val = res |
30 mo = self.gettok(txt, pos) | 41 yield Token(typ, val, loc) |
31 if len(txt) != pos: | 42 self.pos = mo.end() |
32 raise CompilerError('Lex fault at {}'.format(txt[pos:])) | 43 mo = self.gettok(txt, self.pos) |
44 if len(txt) != self.pos: | |
45 raise CompilerError('Lex fault at {}'.format(txt[self.pos:])) | |
46 | |
47 def newline(self): | |
48 """ Enters a new line """ | |
49 self.line_start = self.pos | |
50 self.line = self.line + 1 | |
33 | 51 |
34 def next_token(self): | 52 def next_token(self): |
35 try: | 53 try: |
36 return self.tokens.__next__() | 54 return self.tokens.__next__() |
37 except StopIteration: | 55 except StopIteration: |
38 return Token(EOF, EOF) | 56 loc = SourceLocation(self.filename, self.line, 0, 0) |
57 return Token(EOF, EOF, loc) |