comparison python/c3/lexer.py @ 293:6aa721e7b10b

Try to improve build sequence
author Windel Bouwman
date Thu, 28 Nov 2013 20:39:37 +0100
parents a747a45dcd78
children
comparison
equal deleted inserted replaced
292:534b94b40aa8 293:6aa721e7b10b
11 'else', 'if', 'while', 'return', 11 'else', 'if', 'while', 'return',
12 'function', 'var', 'type', 'const', 12 'function', 'var', 'type', 'const',
13 'struct', 'cast', 13 'struct', 'cast',
14 'import', 'module'] 14 'import', 'module']
15 15
16 def tokenize(input_file):
17 """
18 Tokenizer, generates an iterator that
19 returns tokens!
20 16
21 Input is a file like object. 17 class Lexer:
18 def __init__(self, diag):
19 self.diag = diag
22 20
23 This GREAT example was taken from python re doc page! 21 def tokenize(self, input_file):
24 """ 22 """
25 filename = input_file.name if hasattr(input_file, 'name') else '' 23 Tokenizer, generates an iterator that
26 s = input_file.read() 24 returns tokens!
27 input_file.close() 25
28 tok_spec = [ 26 Input is a file like object.
29 ('REAL', r'\d+\.\d+'), 27
30 ('HEXNUMBER', r'0x[\da-fA-F]+'), 28 This GREAT example was taken from python re doc page!
31 ('NUMBER', r'\d+'), 29 """
32 ('ID', r'[A-Za-z][A-Za-z\d_]*'), 30 filename = input_file.name if hasattr(input_file, 'name') else ''
33 ('NEWLINE', r'\n'), 31 s = input_file.read()
34 ('SKIP', r'[ \t]'), 32 input_file.close()
35 ('COMMENTS', r'//.*'), 33 self.diag.addSource(filename, s)
36 ('LONGCOMMENTBEGIN', r'\/\*'), 34 tok_spec = [
37 ('LONGCOMMENTEND', r'\*\/'), 35 ('REAL', r'\d+\.\d+'),
38 ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'), 36 ('HEXNUMBER', r'0x[\da-fA-F]+'),
39 ('STRING', r"'.*?'") 37 ('NUMBER', r'\d+'),
40 ] 38 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
41 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) 39 ('NEWLINE', r'\n'),
42 gettok = re.compile(tok_re).match 40 ('SKIP', r'[ \t]'),
43 line = 1 41 ('COMMENTS', r'//.*'),
44 pos = line_start = 0 42 ('LONGCOMMENTBEGIN', r'\/\*'),
45 mo = gettok(s) 43 ('LONGCOMMENTEND', r'\*\/'),
46 incomment = False 44 ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'),
47 while mo is not None: 45 ('STRING', r"'.*?'")
48 typ = mo.lastgroup 46 ]
49 val = mo.group(typ) 47 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
50 if typ == 'NEWLINE': 48 gettok = re.compile(tok_re).match
51 line_start = pos 49 line = 1
52 line += 1 50 pos = line_start = 0
53 elif typ == 'COMMENTS': 51 mo = gettok(s)
54 pass 52 incomment = False
55 elif typ == 'LONGCOMMENTBEGIN': 53 while mo is not None:
56 incomment = True 54 typ = mo.lastgroup
57 elif typ == 'LONGCOMMENTEND': 55 val = mo.group(typ)
58 incomment = False 56 if typ == 'NEWLINE':
59 elif typ == 'SKIP': 57 line_start = pos
60 pass 58 line += 1
61 elif incomment: 59 elif typ == 'COMMENTS':
62 pass # Wait until we are not in a comment section 60 pass
63 else: 61 elif typ == 'LONGCOMMENTBEGIN':
64 if typ == 'ID': 62 incomment = True
65 if val in keywords: 63 elif typ == 'LONGCOMMENTEND':
64 incomment = False
65 elif typ == 'SKIP':
66 pass
67 elif incomment:
68 pass # Wait until we are not in a comment section
69 else:
70 if typ == 'ID':
71 if val in keywords:
72 typ = val
73 elif typ == 'LEESTEKEN':
66 typ = val 74 typ = val
67 elif typ == 'LEESTEKEN': 75 elif typ == 'NUMBER':
68 typ = val 76 val = int(val)
69 elif typ == 'NUMBER': 77 elif typ == 'HEXNUMBER':
70 val = int(val) 78 val = int(val[2:], 16)
71 elif typ == 'HEXNUMBER': 79 typ = 'NUMBER'
72 val = int(val[2:], 16) 80 elif typ == 'REAL':
73 typ = 'NUMBER' 81 val = float(val)
74 elif typ == 'REAL': 82 elif typ == 'STRING':
75 val = float(val) 83 val = val[1:-1]
76 elif typ == 'STRING': 84 loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start())
77 val = val[1:-1] 85 yield Token(typ, val, loc)
78 loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start()) 86 pos = mo.end()
79 yield Token(typ, val, loc) 87 mo = gettok(s, pos)
80 pos = mo.end() 88 if pos != len(s):
81 mo = gettok(s, pos) 89 col = pos - line_start
82 if pos != len(s): 90 loc = SourceLocation(filename, line, col, 1)
83 col = pos - line_start 91 raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc)
84 loc = SourceLocation(filename, line, col, 1) 92 loc = SourceLocation(filename, line, 0, 0)
85 raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc) 93 yield Token('END', '', loc)
86 loc = SourceLocation(filename, line, 0, 0)
87 yield Token('END', '', loc)