annotate python/pyburg.py @ 318:e84047f29c78

Add burg and yacc initial attempts
author Windel Bouwman
date Tue, 31 Dec 2013 12:38:15 +0100
parents
children 8d07a4254f04
rev   line source
318
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
1 #!/usr/bin/python
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
2
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
3 """ Bottom up rewrite generator in python """
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
4 import sys
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
5 import re
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
6 import argparse
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
7 from ppci import Token
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
8 import burg_parser
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
9
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
10
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
11 class BurgLexer:
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
12 def feed(self, txt):
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
13 tok_spec = [
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
14 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
15 ('STRING', r"'[^']*'"),
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
16 ('BRACEDCODE', r"\{[^\}]*\}"),
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
17 ('OTHER', r'[:;\|]'),
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
18 ('SKIP', r'[ ]')
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
19 ]
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
20 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
21 gettok = re.compile(tok_re).match
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
22
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
23 lines = txt.split('\n')
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
24 def tokenize_line(line):
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
25 """ Generator that splits up a line into tokens """
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
26 mo = gettok(line)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
27 pos = 0
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
28 while mo:
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
29 typ = mo.lastgroup
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
30 val = mo.group(typ)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
31 if typ == 'ID':
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
32 yield Token(typ, val)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
33 elif typ == 'STRING':
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
34 typ = 'ID'
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
35 yield Token(typ, val[1:-1])
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
36 elif typ == 'OTHER':
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
37 typ = val
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
38 yield Token(typ, val)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
39 elif typ == 'BRACEDCODE':
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
40 yield Token(typ, val)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
41 elif typ == 'SKIP':
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
42 pass
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
43 else:
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
44 raise NotImplementedError(str(typ))
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
45 pos = mo.end()
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
46 mo = gettok(line, pos)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
47 if len(line) != pos:
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
48 raise ParseError('Lex fault at {}'.format(line))
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
49
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
50 def tokenize():
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
51 section = 0
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
52 for line in lines:
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
53 line = line.strip()
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
54 if not line:
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
55 continue # Skip empty lines
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
56 if line == '%%':
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
57 section += 1
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
58 yield Token('%%', '%%')
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
59 continue
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
60 if section == 0:
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
61 if line.startswith('%tokens'):
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
62 yield Token('%tokens', '%tokens')
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
63 yield from tokenize_line(line[7:])
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
64 else:
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
65 yield Token('HEADER', line)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
66 elif section == 1:
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
67 yield from tokenize_line(line)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
68 yield Token('eof', 'eof')
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
69 self.tokens = tokenize()
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
70 self.token = self.tokens.__next__()
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
71
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
72 def next_token(self):
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
73 t = self.token
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
74 if t.typ != 'eof':
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
75 self.token = self.tokens.__next__()
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
76 return t
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
77
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
78
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
79 class BurgParser(burg_parser.Parser):
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
80 """ Derive from automatically generated parser """
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
81 def add_rule(self, *args):
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
82 print(args)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
83
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
84
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
85 def main():
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
86 # Parse arguments:
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
87 parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler')
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
88 parser.add_argument('source', type=argparse.FileType('r'), \
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
89 help='the parser specification')
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
90 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
91 default=sys.stdout)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
92 args = parser.parse_args()
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
93 src = args.source.read()
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
94 args.source.close()
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
95
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
96 l = BurgLexer()
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
97 p = BurgParser()
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
98 l.feed(src)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
99 p.parse(l)
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
100
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
101 if __name__ == '__main__':
e84047f29c78 Add burg and yacc initial attempts
Windel Bouwman
parents:
diff changeset
102 main()