comparison python/baselex.py @ 396:fb3c1f029b30

Added baselexer into c3 lexer
author Windel Bouwman
date Tue, 27 May 2014 22:19:32 +0200
parents 0c44e494ef58
children
comparison
equal deleted inserted replaced
395:3b0c495e3008 396:fb3c1f029b30
1 1
2 import re 2 import re
3 from ppci import Token, CompilerError 3 from ppci import Token, CompilerError, SourceLocation
4 from pyyacc import EOF 4 from pyyacc import EOF
5 5
6 6
7 class BaseLexer: 7 class BaseLexer:
8 """ Base class for a lexer """ 8 """ Base class for a lexer. This class can be overridden to create a
9 lexer. This class handles the regular expression generation and
10 source position accounting.
11 """
9 def __init__(self, tok_spec): 12 def __init__(self, tok_spec):
10 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec) 13 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
11 self.gettok = re.compile(tok_re).match 14 self.gettok = re.compile(tok_re).match
12 self.func_map = {pair[0]: pair[2] for pair in tok_spec} 15 self.func_map = {pair[0]: pair[2] for pair in tok_spec}
16 self.filename = None
13 17
14 def feed(self, txt): 18 def feed(self, txt):
15 """ Feeds the lexer with extra input """ 19 """ Feeds the lexer with extra input """
16 self.tokens = self.tokenize(txt) 20 self.tokens = self.tokenize(txt)
17 21
18 def tokenize(self, txt): 22 def tokenize(self, txt):
19 """ Generator that generates tokens from text """ 23 """ Generator that generates tokens from text
24 It does not yield the EOF token.
25 """
26 self.line = 1
27 self.line_start = 0
28 self.pos = 0
20 mo = self.gettok(txt) 29 mo = self.gettok(txt)
21 pos = 0
22 while mo: 30 while mo:
23 typ = mo.lastgroup 31 typ = mo.lastgroup
24 val = mo.group(typ) 32 val = mo.group(typ)
33 column = mo.start() - self.line_start
34 length = mo.end() - mo.start()
35 loc = SourceLocation(self.filename, self.line, column, length)
25 func = self.func_map[typ] 36 func = self.func_map[typ]
26 if func: 37 if func:
27 typ, val = func(typ, val) 38 res = func(typ, val)
28 yield Token(typ, val) 39 if res:
29 pos = mo.end() 40 typ, val = res
30 mo = self.gettok(txt, pos) 41 yield Token(typ, val, loc)
31 if len(txt) != pos: 42 self.pos = mo.end()
32 raise CompilerError('Lex fault at {}'.format(txt[pos:])) 43 mo = self.gettok(txt, self.pos)
44 if len(txt) != self.pos:
45 raise CompilerError('Lex fault at {}'.format(txt[self.pos:]))
46
47 def newline(self):
48 """ Enters a new line """
49 self.line_start = self.pos
50 self.line = self.line + 1
33 51
34 def next_token(self): 52 def next_token(self):
35 try: 53 try:
36 return self.tokens.__next__() 54 return self.tokens.__next__()
37 except StopIteration: 55 except StopIteration:
38 return Token(EOF, EOF) 56 loc = SourceLocation(self.filename, self.line, 0, 0)
57 return Token(EOF, EOF, loc)