318
|
1 #!/usr/bin/python
|
|
2
|
319
|
3 """
|
322
|
4 Bottom up rewrite generator
|
|
5 ---------------------------
|
319
|
6
|
321
|
7 This script takes as input a description of patterns and outputs a
|
|
8 matcher class that can match trees given the patterns.
|
319
|
9
|
322
|
10 Patterns are specified as follows::
|
|
11
|
321
|
12 reg -> ADDI32(reg, reg) 2 (. add NT0 NT1 .)
|
319
|
13 reg -> MULI32(reg, reg) 3 (. .)
|
322
|
14
|
|
15 or a multiply add::
|
|
16
|
|
17 reg -> ADDI32(MULI32(reg, reg), reg) 4 (. muladd $1, $2, $3 .)
|
|
18
|
|
19 The general specification pattern is::
|
|
20
|
|
21 [result] -> [tree] [cost] [template code]
|
319
|
22
|
321
|
23 Trees
|
|
24 -----
|
|
25
|
|
26 A tree is described using parenthesis notation. For example a node X with
|
|
27 three child nodes is described as:
|
322
|
28
|
319
|
29 X(a, b, b)
|
322
|
30
|
321
|
31 Trees can be nested:
|
322
|
32
|
319
|
33 X(Y(a, a), a)
|
322
|
34
|
321
|
35 The 'a' in the example above indicates an open connection to a next tree
|
|
36 pattern.
|
|
37
|
319
|
38
|
321
|
39 In the example above 'reg' is a non-terminal. ADDI32 is a terminal. non-terminals
|
|
40 cannot have child nodes. A special case occurs in this case:
|
322
|
41
|
|
42 reg -> rc
|
|
43
|
321
|
44 where 'rc' is a non-terminal. This is an example of a chain rule. Chain rules
|
|
45 can be used to allow several variants of non-terminals.
|
320
|
46
|
321
|
47 The generated matcher uses dynamic programming to find the best match of the
|
|
48 tree. This strategy consists of two steps:
|
322
|
49
|
321
|
50 - label: During this phase the given tree is traversed in a bottom up way.
|
|
51 each node is labelled with a possible matching rule and the corresponding cost.
|
|
52 - select: In this step, the tree is traversed again, selecting at each point
|
|
53 the cheapest way to get to the goal.
|
319
|
54
|
|
55 """
|
|
56
|
318
|
57 import sys
|
321
|
58 import os
|
322
|
59 import io
|
|
60 import types
|
318
|
61 import argparse
|
396
|
62 from ppci import Token, SourceLocation
|
382
|
63 from pyyacc import ParserException
|
321
|
64 import yacc
|
319
|
65 import baselex
|
|
66 from tree import Tree
|
318
|
67
|
321
|
68 # Generate parser on the fly:
|
|
69 spec_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'burg.x')
|
|
70 burg_parser = yacc.load_as_module(spec_file)
|
|
71
|
318
|
72
|
382
|
73 class BurgLexer(baselex.BaseLexer):
|
|
74 def __init__(self):
|
318
|
75 tok_spec = [
|
319
|
76 ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)),
|
|
77 ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)),
|
|
78 ('number', r'\d+', lambda typ, val: (typ, int(val))),
|
357
|
79 ('STRING', r"'[^']*'", lambda typ, val: ('string', val[1:-1])),
|
319
|
80 ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)),
|
|
81 ('SKIP', r'[ ]', None)
|
318
|
82 ]
|
382
|
83 super().__init__(tok_spec)
|
318
|
84
|
382
|
85 def tokenize(self, txt):
|
318
|
86 lines = txt.split('\n')
|
322
|
87 header_lines = []
|
382
|
88 section = 0
|
|
89 for line in lines:
|
396
|
90 loc = SourceLocation(self.filename, 0, 0, 0)
|
382
|
91 line = line.strip()
|
|
92 if not line:
|
|
93 continue # Skip empty lines
|
|
94 elif line == '%%':
|
|
95 section += 1
|
|
96 if section == 1:
|
396
|
97 yield Token('header', header_lines, loc)
|
|
98 yield Token('%%', '%%', loc)
|
382
|
99 else:
|
|
100 if section == 0:
|
|
101 header_lines.append(line)
|
319
|
102 else:
|
382
|
103 # we could use yield from below, but python 3.2 does not work then:
|
|
104 for tk in super().tokenize(line):
|
|
105 yield tk
|
318
|
106
|
|
107
|
319
|
108 class Rule:
|
320
|
109 """ A rewrite rule. Specifies a tree that can be rewritten into a result
|
|
110 at a specific cost """
|
357
|
111 def __init__(self, non_term, tree, cost, acceptance, template):
|
319
|
112 self.non_term = non_term
|
|
113 self.tree = tree
|
|
114 self.cost = cost
|
357
|
115 self.acceptance = acceptance
|
319
|
116 self.template = template
|
|
117 self.nr = 0
|
|
118
|
|
119 def __repr__(self):
|
|
120 return '{} -> {} ${}'.format(self.non_term, self.tree, self.cost)
|
|
121
|
|
122
|
|
123 class Symbol:
|
|
124 def __init__(self, name):
|
|
125 self.name = name
|
|
126
|
|
127
|
|
128 class Term(Symbol):
|
|
129 pass
|
|
130
|
|
131
|
|
132 class Nonterm(Symbol):
|
|
133 def __init__(self, name):
|
|
134 super().__init__(name)
|
320
|
135 self.chain_rules = []
|
319
|
136
|
|
137
|
|
138 class BurgSystem:
|
|
139 def __init__(self):
|
|
140 self.rules = []
|
|
141 self.symbols = {}
|
|
142 self.goal = None
|
|
143
|
|
144 def symType(self, t):
|
|
145 return (s.name for s in self.symbols.values() if type(s) is t)
|
|
146
|
|
147 terminals = property(lambda s: s.symType(Term))
|
|
148
|
|
149 non_terminals = property(lambda s: s.symType(Nonterm))
|
|
150
|
357
|
151 def add_rule(self, non_term, tree, cost, acceptance, template):
|
|
152 template = template.strip()
|
320
|
153 if not template:
|
|
154 template = 'pass'
|
357
|
155 rule = Rule(non_term, tree, cost, acceptance, template)
|
320
|
156 if len(tree.children) == 0 and tree.name not in self.terminals:
|
|
157 self.non_term(tree.name).chain_rules.append(rule)
|
319
|
158 self.non_term(rule.non_term)
|
|
159 self.rules.append(rule)
|
|
160 rule.nr = len(self.rules)
|
|
161
|
|
162 def non_term(self, name):
|
|
163 if name in self.terminals:
|
|
164 raise BurgError('Cannot redefine terminal')
|
|
165 if not self.goal:
|
|
166 self.goal = name
|
320
|
167 return self.install(name, Nonterm)
|
319
|
168
|
|
169 def tree(self, name, *args):
|
|
170 return Tree(name, *args)
|
|
171
|
|
172 def install(self, name, t):
|
|
173 assert type(name) is str
|
|
174 if name in self.symbols:
|
|
175 assert type(self.symbols[name]) is t
|
|
176 else:
|
|
177 self.symbols[name] = t(name)
|
320
|
178 return self.symbols[name]
|
319
|
179
|
|
180 def add_terminal(self, terminal):
|
|
181 self.install(terminal, Term)
|
|
182
|
|
183
|
|
184 class BurgError(Exception):
|
|
185 pass
|
|
186
|
|
187
|
318
|
188 class BurgParser(burg_parser.Parser):
|
319
|
189 """ Derived from automatically generated parser """
|
|
190 def parse(self, l):
|
|
191 self.system = BurgSystem()
|
|
192 super().parse(l)
|
|
193 return self.system
|
|
194
|
|
195
|
|
196 class BurgGenerator:
|
|
197 def print(self, *args):
|
|
198 """ Print helper function that prints to output file """
|
|
199 print(*args, file=self.output_file)
|
|
200
|
|
201 def generate(self, system, output_file):
|
|
202 """ Generate script that implements the burg spec """
|
|
203 self.output_file = output_file
|
|
204 self.system = system
|
|
205
|
|
206 self.print('#!/usr/bin/python')
|
|
207 self.print('from tree import Tree, BaseMatcher, State')
|
322
|
208 for header in self.system.header_lines:
|
|
209 self.print(header)
|
319
|
210 self.print()
|
|
211 self.print('class Matcher(BaseMatcher):')
|
320
|
212 self.print(' def __init__(self):')
|
|
213 self.print(' self.kid_functions = {}')
|
|
214 self.print(' self.nts_map = {}')
|
|
215 self.print(' self.pat_f = {}')
|
319
|
216 for rule in self.system.rules:
|
|
217 kids, dummy = self.compute_kids(rule.tree, 't')
|
320
|
218 rule.num_nts = len(dummy)
|
319
|
219 lf = 'lambda t: [{}]'.format(', '.join(kids), rule)
|
320
|
220 pf = 'self.P{}'.format(rule.nr)
|
|
221 self.print(' # {}: {}'.format(rule.nr, rule))
|
|
222 self.print(' self.kid_functions[{}] = {}'.format(rule.nr, lf))
|
|
223 self.print(' self.nts_map[{}] = {}'.format(rule.nr, dummy))
|
|
224 self.print(' self.pat_f[{}] = {}'.format(rule.nr, pf))
|
|
225 self.print()
|
|
226 for rule in self.system.rules:
|
|
227 if rule.num_nts > 0:
|
357
|
228 args = ', '.join('c{}'.format(x) for x in range(rule.num_nts))
|
|
229 args = ', ' + args
|
320
|
230 else:
|
|
231 args = ''
|
357
|
232 # Create template function:
|
322
|
233 self.print(' def P{}(self, tree{}):'.format(rule.nr, args))
|
|
234 template = rule.template
|
|
235 for t in template.split(';'):
|
|
236 self.print(' {}'.format(t.strip()))
|
357
|
237 # Create acceptance function:
|
|
238 if rule.acceptance:
|
|
239 self.print(' def A{}(self, tree):'.format(rule.nr))
|
|
240 for t in rule.acceptance.split(';'):
|
|
241 self.print(' {}'.format(t.strip()))
|
319
|
242 self.emit_state()
|
320
|
243 self.print(' def gen(self, tree):')
|
|
244 self.print(' self.burm_label(tree)')
|
|
245 self.print(' if not tree.state.has_goal("{}"):'.format(self.system.goal))
|
357
|
246 self.print(' raise Exception("Tree {} not covered".format(tree))')
|
323
|
247 self.print(' return self.apply_rules(tree, "{}")'.format(self.system.goal))
|
319
|
248
|
|
249 def emit_record(self, rule, state_var):
|
|
250 # TODO: check for rules fullfilled (by not using 999999)
|
357
|
251 acc = ''
|
|
252 if rule.acceptance:
|
|
253 acc = ' and self.A{}(tree)'.format(rule.nr)
|
322
|
254 self.print(' nts = self.nts({})'.format(rule.nr))
|
|
255 self.print(' kids = self.kids(tree, {})'.format(rule.nr))
|
357
|
256 self.print(' if all(x.state.has_goal(y) for x, y in zip(kids, nts)){}:'.format(acc))
|
354
|
257 self.print(' c = sum(x.state.get_cost(y) for x, y in zip(kids, nts)) + {}'.format(rule.cost))
|
|
258 self.print(' tree.state.set_cost("{}", c, {})'.format(rule.non_term, rule.nr))
|
320
|
259 for cr in self.system.symbols[rule.non_term].chain_rules:
|
354
|
260 self.print(' # Chain rule: {}'.format(cr))
|
|
261 self.print(' tree.state.set_cost("{}", c + {}, {})'.format(cr.non_term, cr.cost, cr.nr))
|
319
|
262
|
|
263 def emit_state(self):
|
|
264 """ Emit a function that assigns a new state to a node """
|
320
|
265 self.print(' def burm_state(self, tree):')
|
322
|
266 self.print(' tree.state = State()')
|
319
|
267 for term in self.system.terminals:
|
|
268 self.emitcase(term)
|
|
269 self.print()
|
|
270
|
|
271 def emitcase(self, term):
|
|
272 rules = [rule for rule in self.system.rules if rule.tree.name == term]
|
|
273 for rule in rules:
|
|
274 condition = self.emittest(rule.tree, 'tree')
|
322
|
275 self.print(' if {}:'.format(condition))
|
319
|
276 self.emit_record(rule, 'state')
|
|
277
|
|
278 def compute_kids(self, t, root_name):
|
|
279 """ Compute of a pattern the blanks that must be provided from below in the tree """
|
|
280 if t.name in self.system.non_terminals:
|
|
281 return [root_name], [t.name]
|
|
282 else:
|
|
283 k = []
|
|
284 nts = []
|
|
285 for i, c in enumerate(t.children):
|
|
286 pfx = root_name + '.children[{}]'.format(i)
|
|
287 kf, dummy = self.compute_kids(c, pfx)
|
|
288 nts.extend(dummy)
|
|
289 k.extend(kf)
|
|
290 return k, nts
|
|
291
|
|
292
|
|
293 def emittest(self, tree, prefix):
|
|
294 """ Generate condition for a tree pattern """
|
|
295 ct = (c for c in tree.children if c.name not in self.system.non_terminals)
|
|
296 child_tests = (self.emittest(c, prefix + '.children[{}]'.format(i)) for i, c in enumerate(ct))
|
|
297 child_tests = ('({})'.format(ct) for ct in child_tests)
|
|
298 child_tests = ' and '.join(child_tests)
|
|
299 child_tests = ' and ' + child_tests if child_tests else ''
|
|
300 tst = '{}.name == "{}"'.format(prefix, tree.name)
|
|
301 return tst + child_tests
|
318
|
302
|
|
303
|
321
|
304 def make_argument_parser():
|
|
305 """ Constructs an argument parser """
|
318
|
306 parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler')
|
|
307 parser.add_argument('source', type=argparse.FileType('r'), \
|
|
308 help='the parser specification')
|
|
309 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
|
|
310 default=sys.stdout)
|
321
|
311 return parser
|
|
312
|
382
|
313
|
322
|
314 def load_as_module(filename):
|
|
315 """ Load a parser spec file, generate LR tables and create module """
|
|
316 ob = io.StringIO()
|
|
317 args = argparse.Namespace(source=open(filename), output=ob)
|
|
318 main(args)
|
|
319
|
|
320 matcher_mod = types.ModuleType('generated_matcher')
|
|
321 exec(ob.getvalue(), matcher_mod.__dict__)
|
|
322 return matcher_mod
|
|
323
|
321
|
324
|
|
325 def main(args):
|
318
|
326 src = args.source.read()
|
|
327 args.source.close()
|
|
328
|
319
|
329 # Parse specification into burgsystem:
|
318
|
330 l = BurgLexer()
|
|
331 p = BurgParser()
|
|
332 l.feed(src)
|
319
|
333 burg_system = p.parse(l)
|
|
334
|
|
335 # Generate matcher:
|
|
336 generator = BurgGenerator()
|
|
337 generator.generate(burg_system, args.output)
|
318
|
338
|
321
|
339
|
318
|
340 if __name__ == '__main__':
|
321
|
341 # Parse arguments:
|
|
342 args = make_argument_parser().parse_args()
|
|
343 main(args)
|