Mercurial > lcfOS
comparison python/c3/lexer.py @ 287:1c7c1e619be8
File movage
author | Windel Bouwman |
---|---|
date | Thu, 21 Nov 2013 11:57:27 +0100 |
parents | 05184b95fa16 |
children | a747a45dcd78 |
comparison
equal
deleted
inserted
replaced
286:d9df72971cbf | 287:1c7c1e619be8 |
---|---|
10 'else', 'if', 'while', 'return', \ | 10 'else', 'if', 'while', 'return', \ |
11 'function', 'var', 'type', 'const', \ | 11 'function', 'var', 'type', 'const', \ |
12 'struct', 'cast', \ | 12 'struct', 'cast', \ |
13 'import', 'module' ] | 13 'import', 'module' ] |
14 | 14 |
15 def tokenize(s): | 15 def tokenize(input_file): |
16 """ | 16 """ |
17 Tokenizer, generates an iterator that | 17 Tokenizer, generates an iterator that |
18 returns tokens! | 18 returns tokens! |
19 | 19 |
20 Input is a file like object. | |
21 | |
20 This GREAT example was taken from python re doc page! | 22 This GREAT example was taken from python re doc page! |
21 """ | 23 """ |
22 tok_spec = [ | 24 filename = input_file.name if hasattr(input_file, 'name') else '' |
25 s = input_file.read() | |
26 tok_spec = [ | |
23 ('REAL', r'\d+\.\d+'), | 27 ('REAL', r'\d+\.\d+'), |
24 ('HEXNUMBER', r'0x[\da-fA-F]+'), | 28 ('HEXNUMBER', r'0x[\da-fA-F]+'), |
25 ('NUMBER', r'\d+'), | 29 ('NUMBER', r'\d+'), |
26 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | 30 ('ID', r'[A-Za-z][A-Za-z\d_]*'), |
27 ('NEWLINE', r'\n'), | 31 ('NEWLINE', r'\n'), |
30 ('LONGCOMMENTBEGIN', r'\/\*'), | 34 ('LONGCOMMENTBEGIN', r'\/\*'), |
31 ('LONGCOMMENTEND', r'\*\/'), | 35 ('LONGCOMMENTEND', r'\*\/'), |
32 ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'), | 36 ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'), |
33 ('STRING', r"'.*?'") | 37 ('STRING', r"'.*?'") |
34 ] | 38 ] |
35 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | 39 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) |
36 gettok = re.compile(tok_re).match | 40 gettok = re.compile(tok_re).match |
37 line = 1 | 41 line = 1 |
38 pos = line_start = 0 | 42 pos = line_start = 0 |
39 mo = gettok(s) | 43 mo = gettok(s) |
40 incomment = False | 44 incomment = False |
41 while mo is not None: | 45 while mo is not None: |
42 typ = mo.lastgroup | 46 typ = mo.lastgroup |
43 val = mo.group(typ) | 47 val = mo.group(typ) |
44 if typ == 'NEWLINE': | 48 if typ == 'NEWLINE': |
45 line_start = pos | 49 line_start = pos |
46 line += 1 | 50 line += 1 |
67 typ = 'NUMBER' | 71 typ = 'NUMBER' |
68 elif typ == 'REAL': | 72 elif typ == 'REAL': |
69 val = float(val) | 73 val = float(val) |
70 elif typ == 'STRING': | 74 elif typ == 'STRING': |
71 val = val[1:-1] | 75 val = val[1:-1] |
72 loc = SourceLocation(line, mo.start()-line_start, mo.end() - mo.start()) | 76 loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start()) |
73 yield Token(typ, val, loc) | 77 yield Token(typ, val, loc) |
74 pos = mo.end() | 78 pos = mo.end() |
75 mo = gettok(s, pos) | 79 mo = gettok(s, pos) |
76 if pos != len(s): | 80 if pos != len(s): |
77 col = pos - line_start | 81 col = pos - line_start |
78 loc = SourceLocation(line, col, 1) | 82 loc = SourceLocation(filename, line, col, 1) |
79 raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc) | 83 raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc) |
80 loc = SourceLocation(line, 0, 0) | 84 loc = SourceLocation(filename, line, 0, 0) |
81 yield Token('END', '', loc) | 85 yield Token('END', '', loc) |
82 | 86 |