annotate python/ppci/c3/lexer.py @ 396:fb3c1f029b30

Added baselexer into c3 lexer
author Windel Bouwman
date Tue, 27 May 2014 22:19:32 +0200
parents 6ae782a085e0
children 5d03c10fe19d
rev   line source
288
a747a45dcd78 Various styling work
Windel Bouwman
parents: 287
diff changeset
1 import re
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
2 from ppci import CompilerError, SourceLocation, Token, make_num
383
173e20a47fda Added linker description loader
Windel Bouwman
parents: 353
diff changeset
3 from baselex import BaseLexer
148
e5263f74b287 Added c3 language frontend initial parser
Windel Bouwman
parents:
diff changeset
4
e5263f74b287 Added c3 language frontend initial parser
Windel Bouwman
parents:
diff changeset
5 """
e5263f74b287 Added c3 language frontend initial parser
Windel Bouwman
parents:
diff changeset
6 Lexical analyzer part. Splits the input character stream into tokens.
e5263f74b287 Added c3 language frontend initial parser
Windel Bouwman
parents:
diff changeset
7 """
e5263f74b287 Added c3 language frontend initial parser
Windel Bouwman
parents:
diff changeset
8
288
a747a45dcd78 Various styling work
Windel Bouwman
parents: 287
diff changeset
9 keywords = ['and', 'or', 'not', 'true', 'false',
315
084cccaa5deb Added console and screen
Windel Bouwman
parents: 306
diff changeset
10 'else', 'if', 'while', 'for', 'return',
393
6ae782a085e0 Added init program
Windel Bouwman
parents: 383
diff changeset
11 'switch', 'case', 'default',
305
0615b5308710 Updated docs
Windel Bouwman
parents: 300
diff changeset
12 'function', 'var', 'type', 'const',
393
6ae782a085e0 Added init program
Windel Bouwman
parents: 383
diff changeset
13 'struct', 'cast', 'sizeof',
305
0615b5308710 Updated docs
Windel Bouwman
parents: 300
diff changeset
14 'import', 'module']
148
e5263f74b287 Added c3 language frontend initial parser
Windel Bouwman
parents:
diff changeset
15
293
6aa721e7b10b Try to improve build sequence
Windel Bouwman
parents: 288
diff changeset
16
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
17 class Lexer(BaseLexer):
305
0615b5308710 Updated docs
Windel Bouwman
parents: 300
diff changeset
18 """ Generates a sequence of token from an input stream """
293
6aa721e7b10b Try to improve build sequence
Windel Bouwman
parents: 288
diff changeset
19 def __init__(self, diag):
6aa721e7b10b Try to improve build sequence
Windel Bouwman
parents: 288
diff changeset
20 self.diag = diag
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
21 tok_spec = [
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
22 ('REAL', r'\d+\.\d+', lambda typ, val: (typ, float(val))),
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
23 ('HEXNUMBER', r'0x[\da-fA-F]+', lambda typ, val: ('NUMBER', make_num(val))),
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
24 ('NUMBER', r'\d+', lambda typ, val: (typ, int(val))),
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
25 ('ID', r'[A-Za-z][A-Za-z\d_]*', self.handle_id),
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
26 ('NEWLINE', r'\n', lambda typ, val: self.newline()),
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
27 ('SKIP', r'[ \t]', None),
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
28 ('COMMENTS', r'//.*', None),
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
29 ('LONGCOMMENTBEGIN', r'\/\*', self.handle_comment_start),
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
30 ('LONGCOMMENTEND', r'\*\/', self.handle_comment_stop),
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
31 ('LEESTEKEN', r'==|->|<<|>>|!=|\+\+|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|', lambda typ, val: (val, val)),
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
32 ('STRING', r'".*?"', lambda typ, val: (typ, val[1:-1]))
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
33 ]
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
34 super().__init__(tok_spec)
306
b145f8e6050b Start on c3 rewrite
Windel Bouwman
parents: 305
diff changeset
35
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
36 def lex(self, input_file):
293
6aa721e7b10b Try to improve build sequence
Windel Bouwman
parents: 288
diff changeset
37 filename = input_file.name if hasattr(input_file, 'name') else ''
6aa721e7b10b Try to improve build sequence
Windel Bouwman
parents: 288
diff changeset
38 s = input_file.read()
6aa721e7b10b Try to improve build sequence
Windel Bouwman
parents: 288
diff changeset
39 input_file.close()
6aa721e7b10b Try to improve build sequence
Windel Bouwman
parents: 288
diff changeset
40 self.diag.addSource(filename, s)
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
41 self.filename = filename
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
42 return self.tokenize(s)
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
43
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
44 def handle_comment_start(self, typ, val):
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
45 self.incomment = True
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
46
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
47 def handle_comment_stop(self, typ, val):
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
48 self.incomment = False
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
49
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
50 def tokenize(self, text):
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
51 """ Keeps track of the long comments """
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
52 self.incomment = False
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
53 for token in super().tokenize(text):
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
54 if self.incomment:
305
0615b5308710 Updated docs
Windel Bouwman
parents: 300
diff changeset
55 pass # Wait until we are not in a comment section
293
6aa721e7b10b Try to improve build sequence
Windel Bouwman
parents: 288
diff changeset
56 else:
396
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
57 yield token
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
58 loc = SourceLocation(self.filename, self.line, 0, 0)
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
59 yield Token('EOF', 'EOF', loc)
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
60
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
61 def handle_id(self, typ, val):
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
62 if val in keywords:
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
63 typ = val
fb3c1f029b30 Added baselexer into c3 lexer
Windel Bouwman
parents: 393
diff changeset
64 return typ, val