Mercurial > lcfOS
comparison python/pyburg.py @ 318:e84047f29c78
Add burg and yacc initial attempts
author | Windel Bouwman |
---|---|
date | Tue, 31 Dec 2013 12:38:15 +0100 |
parents | |
children | 8d07a4254f04 |
comparison
equal
deleted
inserted
replaced
317:e30a77ae359b | 318:e84047f29c78 |
---|---|
1 #!/usr/bin/python | |
2 | |
3 """ Bottom up rewrite generator in python """ | |
4 import sys | |
5 import re | |
6 import argparse | |
7 from ppci import Token | |
8 import burg_parser | |
9 | |
10 | |
11 class BurgLexer: | |
12 def feed(self, txt): | |
13 tok_spec = [ | |
14 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | |
15 ('STRING', r"'[^']*'"), | |
16 ('BRACEDCODE', r"\{[^\}]*\}"), | |
17 ('OTHER', r'[:;\|]'), | |
18 ('SKIP', r'[ ]') | |
19 ] | |
20 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | |
21 gettok = re.compile(tok_re).match | |
22 | |
23 lines = txt.split('\n') | |
24 def tokenize_line(line): | |
25 """ Generator that splits up a line into tokens """ | |
26 mo = gettok(line) | |
27 pos = 0 | |
28 while mo: | |
29 typ = mo.lastgroup | |
30 val = mo.group(typ) | |
31 if typ == 'ID': | |
32 yield Token(typ, val) | |
33 elif typ == 'STRING': | |
34 typ = 'ID' | |
35 yield Token(typ, val[1:-1]) | |
36 elif typ == 'OTHER': | |
37 typ = val | |
38 yield Token(typ, val) | |
39 elif typ == 'BRACEDCODE': | |
40 yield Token(typ, val) | |
41 elif typ == 'SKIP': | |
42 pass | |
43 else: | |
44 raise NotImplementedError(str(typ)) | |
45 pos = mo.end() | |
46 mo = gettok(line, pos) | |
47 if len(line) != pos: | |
48 raise ParseError('Lex fault at {}'.format(line)) | |
49 | |
50 def tokenize(): | |
51 section = 0 | |
52 for line in lines: | |
53 line = line.strip() | |
54 if not line: | |
55 continue # Skip empty lines | |
56 if line == '%%': | |
57 section += 1 | |
58 yield Token('%%', '%%') | |
59 continue | |
60 if section == 0: | |
61 if line.startswith('%tokens'): | |
62 yield Token('%tokens', '%tokens') | |
63 yield from tokenize_line(line[7:]) | |
64 else: | |
65 yield Token('HEADER', line) | |
66 elif section == 1: | |
67 yield from tokenize_line(line) | |
68 yield Token('eof', 'eof') | |
69 self.tokens = tokenize() | |
70 self.token = self.tokens.__next__() | |
71 | |
72 def next_token(self): | |
73 t = self.token | |
74 if t.typ != 'eof': | |
75 self.token = self.tokens.__next__() | |
76 return t | |
77 | |
78 | |
79 class BurgParser(burg_parser.Parser): | |
80 """ Derive from automatically generated parser """ | |
81 def add_rule(self, *args): | |
82 print(args) | |
83 | |
84 | |
85 def main(): | |
86 # Parse arguments: | |
87 parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler') | |
88 parser.add_argument('source', type=argparse.FileType('r'), \ | |
89 help='the parser specification') | |
90 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ | |
91 default=sys.stdout) | |
92 args = parser.parse_args() | |
93 src = args.source.read() | |
94 args.source.close() | |
95 | |
96 l = BurgLexer() | |
97 p = BurgParser() | |
98 l.feed(src) | |
99 p.parse(l) | |
100 | |
101 if __name__ == '__main__': | |
102 main() |