comparison python/c3/lexer.py @ 287:1c7c1e619be8

File movage
author Windel Bouwman
date Thu, 21 Nov 2013 11:57:27 +0100
parents 05184b95fa16
children a747a45dcd78
comparison
equal deleted inserted replaced
286:d9df72971cbf 287:1c7c1e619be8
10 'else', 'if', 'while', 'return', \ 10 'else', 'if', 'while', 'return', \
11 'function', 'var', 'type', 'const', \ 11 'function', 'var', 'type', 'const', \
12 'struct', 'cast', \ 12 'struct', 'cast', \
13 'import', 'module' ] 13 'import', 'module' ]
14 14
15 def tokenize(s): 15 def tokenize(input_file):
16 """ 16 """
17 Tokenizer, generates an iterator that 17 Tokenizer, generates an iterator that
18 returns tokens! 18 returns tokens!
19 19
20 Input is a file like object.
21
20 This GREAT example was taken from python re doc page! 22 This GREAT example was taken from python re doc page!
21 """ 23 """
22 tok_spec = [ 24 filename = input_file.name if hasattr(input_file, 'name') else ''
25 s = input_file.read()
26 tok_spec = [
23 ('REAL', r'\d+\.\d+'), 27 ('REAL', r'\d+\.\d+'),
24 ('HEXNUMBER', r'0x[\da-fA-F]+'), 28 ('HEXNUMBER', r'0x[\da-fA-F]+'),
25 ('NUMBER', r'\d+'), 29 ('NUMBER', r'\d+'),
26 ('ID', r'[A-Za-z][A-Za-z\d_]*'), 30 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
27 ('NEWLINE', r'\n'), 31 ('NEWLINE', r'\n'),
30 ('LONGCOMMENTBEGIN', r'\/\*'), 34 ('LONGCOMMENTBEGIN', r'\/\*'),
31 ('LONGCOMMENTEND', r'\*\/'), 35 ('LONGCOMMENTEND', r'\*\/'),
32 ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'), 36 ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'),
33 ('STRING', r"'.*?'") 37 ('STRING', r"'.*?'")
34 ] 38 ]
35 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) 39 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
36 gettok = re.compile(tok_re).match 40 gettok = re.compile(tok_re).match
37 line = 1 41 line = 1
38 pos = line_start = 0 42 pos = line_start = 0
39 mo = gettok(s) 43 mo = gettok(s)
40 incomment = False 44 incomment = False
41 while mo is not None: 45 while mo is not None:
42 typ = mo.lastgroup 46 typ = mo.lastgroup
43 val = mo.group(typ) 47 val = mo.group(typ)
44 if typ == 'NEWLINE': 48 if typ == 'NEWLINE':
45 line_start = pos 49 line_start = pos
46 line += 1 50 line += 1
67 typ = 'NUMBER' 71 typ = 'NUMBER'
68 elif typ == 'REAL': 72 elif typ == 'REAL':
69 val = float(val) 73 val = float(val)
70 elif typ == 'STRING': 74 elif typ == 'STRING':
71 val = val[1:-1] 75 val = val[1:-1]
72 loc = SourceLocation(line, mo.start()-line_start, mo.end() - mo.start()) 76 loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start())
73 yield Token(typ, val, loc) 77 yield Token(typ, val, loc)
74 pos = mo.end() 78 pos = mo.end()
75 mo = gettok(s, pos) 79 mo = gettok(s, pos)
76 if pos != len(s): 80 if pos != len(s):
77 col = pos - line_start 81 col = pos - line_start
78 loc = SourceLocation(line, col, 1) 82 loc = SourceLocation(filename, line, col, 1)
79 raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc) 83 raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc)
80 loc = SourceLocation(line, 0, 0) 84 loc = SourceLocation(filename, line, 0, 0)
81 yield Token('END', '', loc) 85 yield Token('END', '', loc)
82 86