comparison python/pyburg.py @ 319:8d07a4254f04

Work on burg
author Windel Bouwman
date Sat, 18 Jan 2014 18:58:43 +0100
parents e84047f29c78
children 84d67cce67b7
comparison
equal deleted inserted replaced
318:e84047f29c78 319:8d07a4254f04
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 """ Bottom up rewrite generator in python """ 3 """
4 Bottom up rewrite generator.
5
6 This script takes as input a description of patterns and outputs a
7 matcher class that can match trees given the patterns.
8
9 Patterns are specified as follows:
10 reg -> ADDI32(reg, reg) 2 (. add $1 $2 .)
11 reg -> MULI32(reg, reg) 3 (. .)
12 or a multiply add:
13 reg -> ADDI32(MULI32(reg, reg), reg) 4 (. muladd $1, $2, $3 .)
14 The general specification pattern is:
15 [result] -> [tree] [cost] [template code]
16
17 A tree is described using parenthesis notation. For example a node X with
18 three
19 child nodes is described as:
20 X(a, b, b)
21 Trees can be nested:
22 X(Y(a, a), a)
23 The 'a' in the example above indicates an open connection to a next tree
24 pattern.
25
26 The generated matcher uses dynamic programming to find the best match of the
27 tree. This strategy consists of two steps:
28 - label: During this phase the given tree is traversed in a bottom up way.
29 each node is labelled with a possible matching rule and the corresponding cost.
30 - select: In this step, the tree is traversed again, selecting at each point
31 the cheapest way to get to the goal.
32
33 """
34
4 import sys 35 import sys
5 import re
6 import argparse 36 import argparse
7 from ppci import Token 37 from ppci import Token
8 import burg_parser 38 import burg_parser # Automatically generated
39 from pyyacc import ParserException, EOF
40 import baselex
41 from tree import Tree
9 42
10 43
11 class BurgLexer: 44 class BurgLexer:
12 def feed(self, txt): 45 def feed(self, txt):
13 tok_spec = [ 46 tok_spec = [
14 ('ID', r'[A-Za-z][A-Za-z\d_]*'), 47 ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)),
15 ('STRING', r"'[^']*'"), 48 ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)),
16 ('BRACEDCODE', r"\{[^\}]*\}"), 49 ('number', r'\d+', lambda typ, val: (typ, int(val))),
17 ('OTHER', r'[:;\|]'), 50 ('STRING', r"'[^']*'", lambda typ, val: ('id', val[1:-1])),
18 ('SKIP', r'[ ]') 51 ('template', r"\(\..*\.\)", lambda typ, val: (typ, val)),
52 ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)),
53 ('SKIP', r'[ ]', None)
19 ] 54 ]
20 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
21 gettok = re.compile(tok_re).match
22 55
23 lines = txt.split('\n') 56 lines = txt.split('\n')
24 def tokenize_line(line):
25 """ Generator that splits up a line into tokens """
26 mo = gettok(line)
27 pos = 0
28 while mo:
29 typ = mo.lastgroup
30 val = mo.group(typ)
31 if typ == 'ID':
32 yield Token(typ, val)
33 elif typ == 'STRING':
34 typ = 'ID'
35 yield Token(typ, val[1:-1])
36 elif typ == 'OTHER':
37 typ = val
38 yield Token(typ, val)
39 elif typ == 'BRACEDCODE':
40 yield Token(typ, val)
41 elif typ == 'SKIP':
42 pass
43 else:
44 raise NotImplementedError(str(typ))
45 pos = mo.end()
46 mo = gettok(line, pos)
47 if len(line) != pos:
48 raise ParseError('Lex fault at {}'.format(line))
49 57
50 def tokenize(): 58 def tokenize():
51 section = 0
52 for line in lines: 59 for line in lines:
53 line = line.strip() 60 line = line.strip()
54 if not line: 61 if not line:
55 continue # Skip empty lines 62 continue # Skip empty lines
56 if line == '%%': 63 elif line == '%%':
57 section += 1
58 yield Token('%%', '%%') 64 yield Token('%%', '%%')
59 continue 65 else:
60 if section == 0: 66 yield from baselex.tokenize(tok_spec, line)
61 if line.startswith('%tokens'): 67 yield Token(EOF, EOF)
62 yield Token('%tokens', '%tokens')
63 yield from tokenize_line(line[7:])
64 else:
65 yield Token('HEADER', line)
66 elif section == 1:
67 yield from tokenize_line(line)
68 yield Token('eof', 'eof')
69 self.tokens = tokenize() 68 self.tokens = tokenize()
70 self.token = self.tokens.__next__() 69 self.token = self.tokens.__next__()
71 70
72 def next_token(self): 71 def next_token(self):
73 t = self.token 72 t = self.token
74 if t.typ != 'eof': 73 if t.typ != EOF:
75 self.token = self.tokens.__next__() 74 self.token = self.tokens.__next__()
76 return t 75 return t
77 76
78 77
78 class Rule:
79 def __init__(self, non_term, tree, cost, template):
80 self.non_term = non_term
81 self.tree = tree
82 self.cost = cost
83 self.template = template
84 self.nr = 0
85
86 def __repr__(self):
87 return '{} -> {} ${}'.format(self.non_term, self.tree, self.cost)
88
89
90 class Symbol:
91 def __init__(self, name):
92 self.name = name
93
94
95 class Term(Symbol):
96 pass
97
98
99 class Nonterm(Symbol):
100 def __init__(self, name):
101 super().__init__(name)
102 self.rules = []
103
104
105 class BurgSystem:
106 def __init__(self):
107 self.rules = []
108 self.symbols = {}
109 self.goal = None
110
111 def symType(self, t):
112 return (s.name for s in self.symbols.values() if type(s) is t)
113
114 terminals = property(lambda s: s.symType(Term))
115
116 non_terminals = property(lambda s: s.symType(Nonterm))
117
118 def add_rule(self, non_term, tree, cost, template):
119 rule = Rule(non_term, tree, cost, template)
120 self.non_term(rule.non_term)
121 self.rules.append(rule)
122 rule.nr = len(self.rules)
123
124 def non_term(self, name):
125 if name in self.terminals:
126 raise BurgError('Cannot redefine terminal')
127 if not self.goal:
128 self.goal = name
129 self.install(name, Nonterm)
130
131 def tree(self, name, *args):
132 return Tree(name, *args)
133
134 def install(self, name, t):
135 assert type(name) is str
136 if name in self.symbols:
137 assert type(self.symbols[name]) is t
138 return self.symbols[name]
139 else:
140 self.symbols[name] = t(name)
141
142 def add_terminal(self, terminal):
143 self.install(terminal, Term)
144
145
146 class BurgError(Exception):
147 pass
148
149
79 class BurgParser(burg_parser.Parser): 150 class BurgParser(burg_parser.Parser):
80 """ Derive from automatically generated parser """ 151 """ Derived from automatically generated parser """
81 def add_rule(self, *args): 152 def parse(self, l):
82 print(args) 153 self.system = BurgSystem()
154 super().parse(l)
155 return self.system
156
157
158 class BurgGenerator:
159 def print(self, *args):
160 """ Print helper function that prints to output file """
161 print(*args, file=self.output_file)
162
163 def generate(self, system, output_file):
164 """ Generate script that implements the burg spec """
165 self.output_file = output_file
166 self.system = system
167
168 self.print('#!/usr/bin/python')
169 self.print('from tree import Tree, BaseMatcher, State')
170 self.print()
171 self.print('class Matcher(BaseMatcher):')
172 self.print(' def __init__(self):')
173 self.print(' self.kid_functions = {}')
174 self.print(' self.nts_map = {}')
175 for rule in self.system.rules:
176 kids, dummy = self.compute_kids(rule.tree, 't')
177 lf = 'lambda t: [{}]'.format(', '.join(kids), rule)
178 self.print(' # {}'.format(rule))
179 self.print(' self.kid_functions[{}] = {}'.format(rule.nr, lf))
180 self.print(' self.nts_map[{}] = {}'.format(rule.nr, dummy))
181 self.print('')
182 self.emit_state()
183 self.print()
184 self.print(' def gen(self, tree):')
185 self.print(' self.burm_label(tree)')
186 self.print(' if not tree.state.has_goal("{}"):'.format(self.system.goal))
187 self.print(' raise Exception("Tree not covered")')
188 self.print(' self.apply_rules(tree, "{}")'.format(self.system.goal))
189
190 def emit_record(self, rule, state_var):
191 # TODO: check for rules fullfilled (by not using 999999)
192 self.print(' nts = self.nts({})'.format(rule.nr))
193 self.print(' kids = self.kids(tree, {})'.format(rule.nr))
194 self.print(' c = sum(x.state.get_cost(y) for x, y in zip(kids, nts)) + {}'.format(rule.cost))
195 self.print(' tree.state.set_cost("{}", c, {})'.format(rule.non_term, rule.nr))
196
197 def emit_state(self):
198 """ Emit a function that assigns a new state to a node """
199 self.print(' def burm_state(self, tree):')
200 self.print(' tree.state = State()')
201 for term in self.system.terminals:
202 self.emitcase(term)
203 self.print()
204
205 def emitcase(self, term):
206 rules = [rule for rule in self.system.rules if rule.tree.name == term]
207 for rule in rules:
208 condition = self.emittest(rule.tree, 'tree')
209 self.print(' if {}:'.format(condition))
210 self.emit_record(rule, 'state')
211
212 def emit_closure(self):
213 for non_terminal in self.system.non_terminals:
214 self.print('def closure_{}(self, c):'.format(non_terminal))
215 self.print(' pass')
216 self.print()
217
218 def compute_kids(self, t, root_name):
219 """ Compute of a pattern the blanks that must be provided from below in the tree """
220 if t.name in self.system.non_terminals:
221 return [root_name], [t.name]
222 else:
223 k = []
224 nts = []
225 for i, c in enumerate(t.children):
226 pfx = root_name + '.children[{}]'.format(i)
227 kf, dummy = self.compute_kids(c, pfx)
228 nts.extend(dummy)
229 k.extend(kf)
230 return k, nts
231
232
233 def emittest(self, tree, prefix):
234 """ Generate condition for a tree pattern """
235 ct = (c for c in tree.children if c.name not in self.system.non_terminals)
236 child_tests = (self.emittest(c, prefix + '.children[{}]'.format(i)) for i, c in enumerate(ct))
237 child_tests = ('({})'.format(ct) for ct in child_tests)
238 child_tests = ' and '.join(child_tests)
239 child_tests = ' and ' + child_tests if child_tests else ''
240 tst = '{}.name == "{}"'.format(prefix, tree.name)
241 return tst + child_tests
83 242
84 243
85 def main(): 244 def main():
86 # Parse arguments: 245 # Parse arguments:
87 parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler') 246 parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler')
91 default=sys.stdout) 250 default=sys.stdout)
92 args = parser.parse_args() 251 args = parser.parse_args()
93 src = args.source.read() 252 src = args.source.read()
94 args.source.close() 253 args.source.close()
95 254
255 # Parse specification into burgsystem:
96 l = BurgLexer() 256 l = BurgLexer()
97 p = BurgParser() 257 p = BurgParser()
98 l.feed(src) 258 l.feed(src)
99 p.parse(l) 259 burg_system = p.parse(l)
260
261 # Generate matcher:
262 generator = BurgGenerator()
263 generator.generate(burg_system, args.output)
100 264
101 if __name__ == '__main__': 265 if __name__ == '__main__':
102 main() 266 main()