318
|
1 #!/usr/bin/python
|
|
2
|
319
|
3 """
|
|
4 Bottom up rewrite generator.
|
|
5
|
|
6 This script takes as input a description of patterns and outputs a
|
|
7 matcher class that can match trees given the patterns.
|
|
8
|
|
9 Patterns are specified as follows:
|
|
10 reg -> ADDI32(reg, reg) 2 (. add $1 $2 .)
|
|
11 reg -> MULI32(reg, reg) 3 (. .)
|
|
12 or a multiply add:
|
|
13 reg -> ADDI32(MULI32(reg, reg), reg) 4 (. muladd $1, $2, $3 .)
|
|
14 The general specification pattern is:
|
|
15 [result] -> [tree] [cost] [template code]
|
|
16
|
|
17 A tree is described using parenthesis notation. For example a node X with
|
|
18 three
|
|
19 child nodes is described as:
|
|
20 X(a, b, b)
|
|
21 Trees can be nested:
|
|
22 X(Y(a, a), a)
|
|
23 The 'a' in the example above indicates an open connection to a next tree
|
|
24 pattern.
|
|
25
|
|
26 The generated matcher uses dynamic programming to find the best match of the
|
|
27 tree. This strategy consists of two steps:
|
|
28 - label: During this phase the given tree is traversed in a bottom up way.
|
|
29 each node is labelled with a possible matching rule and the corresponding cost.
|
|
30 - select: In this step, the tree is traversed again, selecting at each point
|
|
31 the cheapest way to get to the goal.
|
|
32
|
|
33 """
|
|
34
|
318
|
35 import sys
|
|
36 import argparse
|
|
37 from ppci import Token
|
319
|
38 import burg_parser # Automatically generated
|
|
39 from pyyacc import ParserException, EOF
|
|
40 import baselex
|
|
41 from tree import Tree
|
318
|
42
|
|
43
|
|
44 class BurgLexer:
|
|
45 def feed(self, txt):
|
|
46 tok_spec = [
|
319
|
47 ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)),
|
|
48 ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)),
|
|
49 ('number', r'\d+', lambda typ, val: (typ, int(val))),
|
|
50 ('STRING', r"'[^']*'", lambda typ, val: ('id', val[1:-1])),
|
|
51 ('template', r"\(\..*\.\)", lambda typ, val: (typ, val)),
|
|
52 ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)),
|
|
53 ('SKIP', r'[ ]', None)
|
318
|
54 ]
|
|
55
|
|
56 lines = txt.split('\n')
|
|
57
|
|
58 def tokenize():
|
|
59 for line in lines:
|
|
60 line = line.strip()
|
|
61 if not line:
|
|
62 continue # Skip empty lines
|
319
|
63 elif line == '%%':
|
318
|
64 yield Token('%%', '%%')
|
319
|
65 else:
|
|
66 yield from baselex.tokenize(tok_spec, line)
|
|
67 yield Token(EOF, EOF)
|
318
|
68 self.tokens = tokenize()
|
|
69 self.token = self.tokens.__next__()
|
|
70
|
|
71 def next_token(self):
|
|
72 t = self.token
|
319
|
73 if t.typ != EOF:
|
318
|
74 self.token = self.tokens.__next__()
|
|
75 return t
|
|
76
|
|
77
|
319
|
78 class Rule:
|
|
79 def __init__(self, non_term, tree, cost, template):
|
|
80 self.non_term = non_term
|
|
81 self.tree = tree
|
|
82 self.cost = cost
|
|
83 self.template = template
|
|
84 self.nr = 0
|
|
85
|
|
86 def __repr__(self):
|
|
87 return '{} -> {} ${}'.format(self.non_term, self.tree, self.cost)
|
|
88
|
|
89
|
|
90 class Symbol:
|
|
91 def __init__(self, name):
|
|
92 self.name = name
|
|
93
|
|
94
|
|
95 class Term(Symbol):
|
|
96 pass
|
|
97
|
|
98
|
|
99 class Nonterm(Symbol):
|
|
100 def __init__(self, name):
|
|
101 super().__init__(name)
|
|
102 self.rules = []
|
|
103
|
|
104
|
|
105 class BurgSystem:
|
|
106 def __init__(self):
|
|
107 self.rules = []
|
|
108 self.symbols = {}
|
|
109 self.goal = None
|
|
110
|
|
111 def symType(self, t):
|
|
112 return (s.name for s in self.symbols.values() if type(s) is t)
|
|
113
|
|
114 terminals = property(lambda s: s.symType(Term))
|
|
115
|
|
116 non_terminals = property(lambda s: s.symType(Nonterm))
|
|
117
|
|
118 def add_rule(self, non_term, tree, cost, template):
|
|
119 rule = Rule(non_term, tree, cost, template)
|
|
120 self.non_term(rule.non_term)
|
|
121 self.rules.append(rule)
|
|
122 rule.nr = len(self.rules)
|
|
123
|
|
124 def non_term(self, name):
|
|
125 if name in self.terminals:
|
|
126 raise BurgError('Cannot redefine terminal')
|
|
127 if not self.goal:
|
|
128 self.goal = name
|
|
129 self.install(name, Nonterm)
|
|
130
|
|
131 def tree(self, name, *args):
|
|
132 return Tree(name, *args)
|
|
133
|
|
134 def install(self, name, t):
|
|
135 assert type(name) is str
|
|
136 if name in self.symbols:
|
|
137 assert type(self.symbols[name]) is t
|
|
138 return self.symbols[name]
|
|
139 else:
|
|
140 self.symbols[name] = t(name)
|
|
141
|
|
142 def add_terminal(self, terminal):
|
|
143 self.install(terminal, Term)
|
|
144
|
|
145
|
|
146 class BurgError(Exception):
|
|
147 pass
|
|
148
|
|
149
|
318
|
150 class BurgParser(burg_parser.Parser):
|
319
|
151 """ Derived from automatically generated parser """
|
|
152 def parse(self, l):
|
|
153 self.system = BurgSystem()
|
|
154 super().parse(l)
|
|
155 return self.system
|
|
156
|
|
157
|
|
158 class BurgGenerator:
|
|
159 def print(self, *args):
|
|
160 """ Print helper function that prints to output file """
|
|
161 print(*args, file=self.output_file)
|
|
162
|
|
163 def generate(self, system, output_file):
|
|
164 """ Generate script that implements the burg spec """
|
|
165 self.output_file = output_file
|
|
166 self.system = system
|
|
167
|
|
168 self.print('#!/usr/bin/python')
|
|
169 self.print('from tree import Tree, BaseMatcher, State')
|
|
170 self.print()
|
|
171 self.print('class Matcher(BaseMatcher):')
|
|
172 self.print(' def __init__(self):')
|
|
173 self.print(' self.kid_functions = {}')
|
|
174 self.print(' self.nts_map = {}')
|
|
175 for rule in self.system.rules:
|
|
176 kids, dummy = self.compute_kids(rule.tree, 't')
|
|
177 lf = 'lambda t: [{}]'.format(', '.join(kids), rule)
|
|
178 self.print(' # {}'.format(rule))
|
|
179 self.print(' self.kid_functions[{}] = {}'.format(rule.nr, lf))
|
|
180 self.print(' self.nts_map[{}] = {}'.format(rule.nr, dummy))
|
|
181 self.print('')
|
|
182 self.emit_state()
|
|
183 self.print()
|
|
184 self.print(' def gen(self, tree):')
|
|
185 self.print(' self.burm_label(tree)')
|
|
186 self.print(' if not tree.state.has_goal("{}"):'.format(self.system.goal))
|
|
187 self.print(' raise Exception("Tree not covered")')
|
|
188 self.print(' self.apply_rules(tree, "{}")'.format(self.system.goal))
|
|
189
|
|
190 def emit_record(self, rule, state_var):
|
|
191 # TODO: check for rules fullfilled (by not using 999999)
|
|
192 self.print(' nts = self.nts({})'.format(rule.nr))
|
|
193 self.print(' kids = self.kids(tree, {})'.format(rule.nr))
|
|
194 self.print(' c = sum(x.state.get_cost(y) for x, y in zip(kids, nts)) + {}'.format(rule.cost))
|
|
195 self.print(' tree.state.set_cost("{}", c, {})'.format(rule.non_term, rule.nr))
|
|
196
|
|
197 def emit_state(self):
|
|
198 """ Emit a function that assigns a new state to a node """
|
|
199 self.print(' def burm_state(self, tree):')
|
|
200 self.print(' tree.state = State()')
|
|
201 for term in self.system.terminals:
|
|
202 self.emitcase(term)
|
|
203 self.print()
|
|
204
|
|
205 def emitcase(self, term):
|
|
206 rules = [rule for rule in self.system.rules if rule.tree.name == term]
|
|
207 for rule in rules:
|
|
208 condition = self.emittest(rule.tree, 'tree')
|
|
209 self.print(' if {}:'.format(condition))
|
|
210 self.emit_record(rule, 'state')
|
|
211
|
|
212 def emit_closure(self):
|
|
213 for non_terminal in self.system.non_terminals:
|
|
214 self.print('def closure_{}(self, c):'.format(non_terminal))
|
|
215 self.print(' pass')
|
|
216 self.print()
|
|
217
|
|
218 def compute_kids(self, t, root_name):
|
|
219 """ Compute of a pattern the blanks that must be provided from below in the tree """
|
|
220 if t.name in self.system.non_terminals:
|
|
221 return [root_name], [t.name]
|
|
222 else:
|
|
223 k = []
|
|
224 nts = []
|
|
225 for i, c in enumerate(t.children):
|
|
226 pfx = root_name + '.children[{}]'.format(i)
|
|
227 kf, dummy = self.compute_kids(c, pfx)
|
|
228 nts.extend(dummy)
|
|
229 k.extend(kf)
|
|
230 return k, nts
|
|
231
|
|
232
|
|
233 def emittest(self, tree, prefix):
|
|
234 """ Generate condition for a tree pattern """
|
|
235 ct = (c for c in tree.children if c.name not in self.system.non_terminals)
|
|
236 child_tests = (self.emittest(c, prefix + '.children[{}]'.format(i)) for i, c in enumerate(ct))
|
|
237 child_tests = ('({})'.format(ct) for ct in child_tests)
|
|
238 child_tests = ' and '.join(child_tests)
|
|
239 child_tests = ' and ' + child_tests if child_tests else ''
|
|
240 tst = '{}.name == "{}"'.format(prefix, tree.name)
|
|
241 return tst + child_tests
|
318
|
242
|
|
243
|
|
244 def main():
|
|
245 # Parse arguments:
|
|
246 parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler')
|
|
247 parser.add_argument('source', type=argparse.FileType('r'), \
|
|
248 help='the parser specification')
|
|
249 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
|
|
250 default=sys.stdout)
|
|
251 args = parser.parse_args()
|
|
252 src = args.source.read()
|
|
253 args.source.close()
|
|
254
|
319
|
255 # Parse specification into burgsystem:
|
318
|
256 l = BurgLexer()
|
|
257 p = BurgParser()
|
|
258 l.feed(src)
|
319
|
259 burg_system = p.parse(l)
|
|
260
|
|
261 # Generate matcher:
|
|
262 generator = BurgGenerator()
|
|
263 generator.generate(burg_system, args.output)
|
318
|
264
|
|
265 if __name__ == '__main__':
|
|
266 main()
|