Mercurial > lcfOS
comparison python/c3/lexer.py @ 148:e5263f74b287
Added c3 language frontend initial parser
author | Windel Bouwman |
---|---|
date | Fri, 01 Mar 2013 10:24:01 +0100 |
parents | |
children | 74241ca312cc |
comparison
equal
deleted
inserted
replaced
147:4e79484a9d47 | 148:e5263f74b287 |
---|---|
1 import collections, re | |
2 from ppci.errors import CompilerException, SourceLocation | |
3 | |
4 """ | |
5 Lexical analyzer part. Splits the input character stream into tokens. | |
6 """ | |
7 | |
8 # Token is used in the lexical analyzer: | |
9 Token = collections.namedtuple('Token', 'typ val loc') | |
10 | |
11 keywords = ['and', 'or', 'not','true', 'false', \ | |
12 'else', 'if', 'while', 'return', \ | |
13 'public', 'function', 'var', 'type', \ | |
14 'import', 'package' ] | |
15 | |
16 def tokenize(s): | |
17 """ | |
18 Tokenizer, generates an iterator that | |
19 returns tokens! | |
20 | |
21 This GREAT example was taken from python re doc page! | |
22 """ | |
23 tok_spec = [ | |
24 ('REAL', r'\d+\.\d+'), | |
25 ('HEXNUMBER', r'0x[\da-fA-F]+'), | |
26 ('NUMBER', r'\d+'), | |
27 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | |
28 ('NEWLINE', r'\n'), | |
29 ('SKIP', r'[ \t]'), | |
30 ('COMMENTS', r'//.*'), | |
31 ('LEESTEKEN', r'==|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}'), | |
32 ('STRING', r"'.*?'") | |
33 ] | |
34 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | |
35 gettok = re.compile(tok_re).match | |
36 line = 1 | |
37 pos = line_start = 0 | |
38 mo = gettok(s) | |
39 while mo is not None: | |
40 typ = mo.lastgroup | |
41 val = mo.group(typ) | |
42 if typ == 'NEWLINE': | |
43 line_start = pos | |
44 line += 1 | |
45 elif typ == 'COMMENTS': | |
46 pass | |
47 elif typ == 'SKIP': | |
48 pass | |
49 else: | |
50 if typ == 'ID': | |
51 if val in keywords: | |
52 typ = val | |
53 elif typ == 'LEESTEKEN': | |
54 typ = val | |
55 elif typ == 'NUMBER': | |
56 val = int(val) | |
57 elif typ == 'HEXNUMBER': | |
58 val = int(val[2:], 16) | |
59 typ = 'NUMBER' | |
60 elif typ == 'REAL': | |
61 val = float(val) | |
62 elif typ == 'STRING': | |
63 val = val[1:-1] | |
64 loc = SourceLocation(line, mo.start()-line_start) | |
65 yield Token(typ, val, loc) | |
66 pos = mo.end() | |
67 mo = gettok(s, pos) | |
68 if pos != len(s): | |
69 col = pos - line_start | |
70 pos = line | |
71 raise CompilerException('Unexpected character {0}'.format(s[pos]), pos) | |
72 yield Token('END', '', line) | |
73 |