318
|
1 #!/usr/bin/python
|
|
2
|
|
3 """ Bottom up rewrite generator in python """
|
|
4 import sys
|
|
5 import re
|
|
6 import argparse
|
|
7 from ppci import Token
|
|
8 import burg_parser
|
|
9
|
|
10
|
|
11 class BurgLexer:
|
|
12 def feed(self, txt):
|
|
13 tok_spec = [
|
|
14 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
|
|
15 ('STRING', r"'[^']*'"),
|
|
16 ('BRACEDCODE', r"\{[^\}]*\}"),
|
|
17 ('OTHER', r'[:;\|]'),
|
|
18 ('SKIP', r'[ ]')
|
|
19 ]
|
|
20 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
|
|
21 gettok = re.compile(tok_re).match
|
|
22
|
|
23 lines = txt.split('\n')
|
|
24 def tokenize_line(line):
|
|
25 """ Generator that splits up a line into tokens """
|
|
26 mo = gettok(line)
|
|
27 pos = 0
|
|
28 while mo:
|
|
29 typ = mo.lastgroup
|
|
30 val = mo.group(typ)
|
|
31 if typ == 'ID':
|
|
32 yield Token(typ, val)
|
|
33 elif typ == 'STRING':
|
|
34 typ = 'ID'
|
|
35 yield Token(typ, val[1:-1])
|
|
36 elif typ == 'OTHER':
|
|
37 typ = val
|
|
38 yield Token(typ, val)
|
|
39 elif typ == 'BRACEDCODE':
|
|
40 yield Token(typ, val)
|
|
41 elif typ == 'SKIP':
|
|
42 pass
|
|
43 else:
|
|
44 raise NotImplementedError(str(typ))
|
|
45 pos = mo.end()
|
|
46 mo = gettok(line, pos)
|
|
47 if len(line) != pos:
|
|
48 raise ParseError('Lex fault at {}'.format(line))
|
|
49
|
|
50 def tokenize():
|
|
51 section = 0
|
|
52 for line in lines:
|
|
53 line = line.strip()
|
|
54 if not line:
|
|
55 continue # Skip empty lines
|
|
56 if line == '%%':
|
|
57 section += 1
|
|
58 yield Token('%%', '%%')
|
|
59 continue
|
|
60 if section == 0:
|
|
61 if line.startswith('%tokens'):
|
|
62 yield Token('%tokens', '%tokens')
|
|
63 yield from tokenize_line(line[7:])
|
|
64 else:
|
|
65 yield Token('HEADER', line)
|
|
66 elif section == 1:
|
|
67 yield from tokenize_line(line)
|
|
68 yield Token('eof', 'eof')
|
|
69 self.tokens = tokenize()
|
|
70 self.token = self.tokens.__next__()
|
|
71
|
|
72 def next_token(self):
|
|
73 t = self.token
|
|
74 if t.typ != 'eof':
|
|
75 self.token = self.tokens.__next__()
|
|
76 return t
|
|
77
|
|
78
|
|
79 class BurgParser(burg_parser.Parser):
|
|
80 """ Derive from automatically generated parser """
|
|
81 def add_rule(self, *args):
|
|
82 print(args)
|
|
83
|
|
84
|
|
85 def main():
|
|
86 # Parse arguments:
|
|
87 parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler')
|
|
88 parser.add_argument('source', type=argparse.FileType('r'), \
|
|
89 help='the parser specification')
|
|
90 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
|
|
91 default=sys.stdout)
|
|
92 args = parser.parse_args()
|
|
93 src = args.source.read()
|
|
94 args.source.close()
|
|
95
|
|
96 l = BurgLexer()
|
|
97 p = BurgParser()
|
|
98 l.feed(src)
|
|
99 p.parse(l)
|
|
100
|
|
101 if __name__ == '__main__':
|
|
102 main()
|