annotate python/baselex.py @ 396:fb3c1f029b30

Added baselexer into c3 lexer
author Windel Bouwman
date Tue, 27 May 2014 22:19:32 +0200
parents 0c44e494ef58
children
rev   line source
319
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
1
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
2 import re
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
3 from ppci import Token, CompilerError, SourceLocation
382
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
4 from pyyacc import EOF
319
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
5
382
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
6
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
7 class BaseLexer:
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
8 """ Base class for a lexer. This class can be overridden to create a
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
9 lexer. This class handles the regular expression generation and
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
10 source position accounting.
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
11 """
382
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
12 def __init__(self, tok_spec):
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
13 tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
14 self.gettok = re.compile(tok_re).match
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
15 self.func_map = {pair[0]: pair[2] for pair in tok_spec}
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
16 self.filename = None
382
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
17
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
18 def feed(self, txt):
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
19 """ Feeds the lexer with extra input """
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
20 self.tokens = self.tokenize(txt)
319
8d07a4254f04 Work on burg
Windel Bouwman
parents:
diff changeset
21
382
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
22 def tokenize(self, txt):
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
23 """ Generator that generates tokens from text
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
24 It does not yield the EOF token.
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
25 """
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
26 self.line = 1
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
27 self.line_start = 0
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
28 self.pos = 0
382
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
29 mo = self.gettok(txt)
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
30 while mo:
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
31 typ = mo.lastgroup
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
32 val = mo.group(typ)
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
33 column = mo.start() - self.line_start
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
34 length = mo.end() - mo.start()
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
35 loc = SourceLocation(self.filename, self.line, column, length)
382
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
36 func = self.func_map[typ]
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
37 if func:
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
38 res = func(typ, val)
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
39 if res:
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
40 typ, val = res
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
41 yield Token(typ, val, loc)
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
42 self.pos = mo.end()
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
43 mo = self.gettok(txt, self.pos)
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
44 if len(txt) != self.pos:
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
45 raise CompilerError('Lex fault at {}'.format(txt[self.pos:]))
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
46
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
47 def newline(self):
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
48 """ Enters a new line """
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
49 self.line_start = self.pos
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
50 self.line = self.line + 1
382
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
51
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
52 def next_token(self):
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
53 try:
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
54 return self.tokens.__next__()
0c44e494ef58 Made lexer more generic
Windel Bouwman
parents: 357
diff changeset
55 except StopIteration:
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
56 loc = SourceLocation(self.filename, self.line, 0, 0)
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 382
diff changeset
57 return Token(EOF, EOF, loc)