288
|
1 import re
|
396
|
2 from ppci import CompilerError, SourceLocation, Token, make_num
|
383
|
3 from baselex import BaseLexer
|
148
|
4
|
|
5 """
|
|
6 Lexical analyzer part. Splits the input character stream into tokens.
|
|
7 """
|
|
8
|
288
|
9 keywords = ['and', 'or', 'not', 'true', 'false',
|
315
|
10 'else', 'if', 'while', 'for', 'return',
|
393
|
11 'switch', 'case', 'default',
|
305
|
12 'function', 'var', 'type', 'const',
|
393
|
13 'struct', 'cast', 'sizeof',
|
305
|
14 'import', 'module']
|
148
|
15
|
293
|
16
|
396
|
17 class Lexer(BaseLexer):
|
305
|
18 """ Generates a sequence of token from an input stream """
|
293
|
19 def __init__(self, diag):
|
|
20 self.diag = diag
|
396
|
21 tok_spec = [
|
|
22 ('REAL', r'\d+\.\d+', lambda typ, val: (typ, float(val))),
|
|
23 ('HEXNUMBER', r'0x[\da-fA-F]+', lambda typ, val: ('NUMBER', make_num(val))),
|
|
24 ('NUMBER', r'\d+', lambda typ, val: (typ, int(val))),
|
|
25 ('ID', r'[A-Za-z][A-Za-z\d_]*', self.handle_id),
|
|
26 ('NEWLINE', r'\n', lambda typ, val: self.newline()),
|
|
27 ('SKIP', r'[ \t]', None),
|
|
28 ('COMMENTS', r'//.*', None),
|
|
29 ('LONGCOMMENTBEGIN', r'\/\*', self.handle_comment_start),
|
|
30 ('LONGCOMMENTEND', r'\*\/', self.handle_comment_stop),
|
|
31 ('LEESTEKEN', r'==|->|<<|>>|!=|\+\+|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|', lambda typ, val: (val, val)),
|
|
32 ('STRING', r'".*?"', lambda typ, val: (typ, val[1:-1]))
|
|
33 ]
|
|
34 super().__init__(tok_spec)
|
306
|
35
|
396
|
36 def lex(self, input_file):
|
293
|
37 filename = input_file.name if hasattr(input_file, 'name') else ''
|
|
38 s = input_file.read()
|
|
39 input_file.close()
|
|
40 self.diag.addSource(filename, s)
|
396
|
41 self.filename = filename
|
|
42 return self.tokenize(s)
|
|
43
|
|
44 def handle_comment_start(self, typ, val):
|
|
45 self.incomment = True
|
|
46
|
|
47 def handle_comment_stop(self, typ, val):
|
|
48 self.incomment = False
|
|
49
|
|
50 def tokenize(self, text):
|
|
51 """ Keeps track of the long comments """
|
|
52 self.incomment = False
|
|
53 for token in super().tokenize(text):
|
|
54 if self.incomment:
|
305
|
55 pass # Wait until we are not in a comment section
|
293
|
56 else:
|
396
|
57 yield token
|
|
58 loc = SourceLocation(self.filename, self.line, 0, 0)
|
|
59 yield Token('EOF', 'EOF', loc)
|
|
60
|
|
61 def handle_id(self, typ, val):
|
|
62 if val in keywords:
|
|
63 typ = val
|
|
64 return typ, val
|