Mercurial > lcfOS
comparison python/c3/lexer.py @ 293:6aa721e7b10b
Try to improve build sequence
author | Windel Bouwman |
---|---|
date | Thu, 28 Nov 2013 20:39:37 +0100 |
parents | a747a45dcd78 |
children |
comparison
equal
deleted
inserted
replaced
292:534b94b40aa8 | 293:6aa721e7b10b |
---|---|
11 'else', 'if', 'while', 'return', | 11 'else', 'if', 'while', 'return', |
12 'function', 'var', 'type', 'const', | 12 'function', 'var', 'type', 'const', |
13 'struct', 'cast', | 13 'struct', 'cast', |
14 'import', 'module'] | 14 'import', 'module'] |
15 | 15 |
16 def tokenize(input_file): | |
17 """ | |
18 Tokenizer, generates an iterator that | |
19 returns tokens! | |
20 | 16 |
21 Input is a file like object. | 17 class Lexer: |
18 def __init__(self, diag): | |
19 self.diag = diag | |
22 | 20 |
23 This GREAT example was taken from python re doc page! | 21 def tokenize(self, input_file): |
24 """ | 22 """ |
25 filename = input_file.name if hasattr(input_file, 'name') else '' | 23 Tokenizer, generates an iterator that |
26 s = input_file.read() | 24 returns tokens! |
27 input_file.close() | 25 |
28 tok_spec = [ | 26 Input is a file like object. |
29 ('REAL', r'\d+\.\d+'), | 27 |
30 ('HEXNUMBER', r'0x[\da-fA-F]+'), | 28 This GREAT example was taken from python re doc page! |
31 ('NUMBER', r'\d+'), | 29 """ |
32 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | 30 filename = input_file.name if hasattr(input_file, 'name') else '' |
33 ('NEWLINE', r'\n'), | 31 s = input_file.read() |
34 ('SKIP', r'[ \t]'), | 32 input_file.close() |
35 ('COMMENTS', r'//.*'), | 33 self.diag.addSource(filename, s) |
36 ('LONGCOMMENTBEGIN', r'\/\*'), | 34 tok_spec = [ |
37 ('LONGCOMMENTEND', r'\*\/'), | 35 ('REAL', r'\d+\.\d+'), |
38 ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'), | 36 ('HEXNUMBER', r'0x[\da-fA-F]+'), |
39 ('STRING', r"'.*?'") | 37 ('NUMBER', r'\d+'), |
40 ] | 38 ('ID', r'[A-Za-z][A-Za-z\d_]*'), |
41 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | 39 ('NEWLINE', r'\n'), |
42 gettok = re.compile(tok_re).match | 40 ('SKIP', r'[ \t]'), |
43 line = 1 | 41 ('COMMENTS', r'//.*'), |
44 pos = line_start = 0 | 42 ('LONGCOMMENTBEGIN', r'\/\*'), |
45 mo = gettok(s) | 43 ('LONGCOMMENTEND', r'\*\/'), |
46 incomment = False | 44 ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'), |
47 while mo is not None: | 45 ('STRING', r"'.*?'") |
48 typ = mo.lastgroup | 46 ] |
49 val = mo.group(typ) | 47 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) |
50 if typ == 'NEWLINE': | 48 gettok = re.compile(tok_re).match |
51 line_start = pos | 49 line = 1 |
52 line += 1 | 50 pos = line_start = 0 |
53 elif typ == 'COMMENTS': | 51 mo = gettok(s) |
54 pass | 52 incomment = False |
55 elif typ == 'LONGCOMMENTBEGIN': | 53 while mo is not None: |
56 incomment = True | 54 typ = mo.lastgroup |
57 elif typ == 'LONGCOMMENTEND': | 55 val = mo.group(typ) |
58 incomment = False | 56 if typ == 'NEWLINE': |
59 elif typ == 'SKIP': | 57 line_start = pos |
60 pass | 58 line += 1 |
61 elif incomment: | 59 elif typ == 'COMMENTS': |
62 pass # Wait until we are not in a comment section | 60 pass |
63 else: | 61 elif typ == 'LONGCOMMENTBEGIN': |
64 if typ == 'ID': | 62 incomment = True |
65 if val in keywords: | 63 elif typ == 'LONGCOMMENTEND': |
64 incomment = False | |
65 elif typ == 'SKIP': | |
66 pass | |
67 elif incomment: | |
68 pass # Wait until we are not in a comment section | |
69 else: | |
70 if typ == 'ID': | |
71 if val in keywords: | |
72 typ = val | |
73 elif typ == 'LEESTEKEN': | |
66 typ = val | 74 typ = val |
67 elif typ == 'LEESTEKEN': | 75 elif typ == 'NUMBER': |
68 typ = val | 76 val = int(val) |
69 elif typ == 'NUMBER': | 77 elif typ == 'HEXNUMBER': |
70 val = int(val) | 78 val = int(val[2:], 16) |
71 elif typ == 'HEXNUMBER': | 79 typ = 'NUMBER' |
72 val = int(val[2:], 16) | 80 elif typ == 'REAL': |
73 typ = 'NUMBER' | 81 val = float(val) |
74 elif typ == 'REAL': | 82 elif typ == 'STRING': |
75 val = float(val) | 83 val = val[1:-1] |
76 elif typ == 'STRING': | 84 loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start()) |
77 val = val[1:-1] | 85 yield Token(typ, val, loc) |
78 loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start()) | 86 pos = mo.end() |
79 yield Token(typ, val, loc) | 87 mo = gettok(s, pos) |
80 pos = mo.end() | 88 if pos != len(s): |
81 mo = gettok(s, pos) | 89 col = pos - line_start |
82 if pos != len(s): | 90 loc = SourceLocation(filename, line, col, 1) |
83 col = pos - line_start | 91 raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc) |
84 loc = SourceLocation(filename, line, col, 1) | 92 loc = SourceLocation(filename, line, 0, 0) |
85 raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc) | 93 yield Token('END', '', loc) |
86 loc = SourceLocation(filename, line, 0, 0) | |
87 yield Token('END', '', loc) |