comparison python/baselex.py @ 382:0c44e494ef58

Made lexer more generic
author Windel Bouwman
date Sun, 27 Apr 2014 12:24:21 +0200
parents 818be710e13d
children fb3c1f029b30
comparison
equal deleted inserted replaced
381:6df89163e114 382:0c44e494ef58
1 1
2 import re 2 import re
3 from ppci import Token, CompilerError 3 from ppci import Token, CompilerError
4 from pyyacc import EOF
4 5
5 def tokenize(tok_spec, txt):
6 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
7 gettok = re.compile(tok_re).match
8 func_map = {pair[0]: pair[2] for pair in tok_spec}
9 6
10 # Parse line: 7 class BaseLexer:
11 line = txt 8 """ Base class for a lexer """
12 mo = gettok(line) 9 def __init__(self, tok_spec):
13 pos = 0 10 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
14 while mo: 11 self.gettok = re.compile(tok_re).match
15 typ = mo.lastgroup 12 self.func_map = {pair[0]: pair[2] for pair in tok_spec}
16 val = mo.group(typ) 13
17 func = func_map[typ] 14 def feed(self, txt):
18 if func: 15 """ Feeds the lexer with extra input """
19 typ, val = func(typ, val) 16 self.tokens = self.tokenize(txt)
20 yield Token(typ, val) 17
21 pos = mo.end() 18 def tokenize(self, txt):
22 mo = gettok(line, pos) 19 """ Generator that generates tokens from text """
23 if len(line) != pos: 20 mo = self.gettok(txt)
24 raise CompilerError('Lex fault at {}'.format(line[pos:])) 21 pos = 0
22 while mo:
23 typ = mo.lastgroup
24 val = mo.group(typ)
25 func = self.func_map[typ]
26 if func:
27 typ, val = func(typ, val)
28 yield Token(typ, val)
29 pos = mo.end()
30 mo = self.gettok(txt, pos)
31 if len(txt) != pos:
32 raise CompilerError('Lex fault at {}'.format(txt[pos:]))
33
34 def next_token(self):
35 try:
36 return self.tokens.__next__()
37 except StopIteration:
38 return Token(EOF, EOF)