comparison python/pyburg.py @ 322:44f336460c2a

Half of use of burg spec for arm
author Windel Bouwman
date Mon, 27 Jan 2014 19:58:07 +0100
parents 8c569fbe60e4
children e9fe6988497c
comparison
equal deleted inserted replaced
321:8c569fbe60e4 322:44f336460c2a
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 """ 3 """
4 Bottom up rewrite generator. 4 Bottom up rewrite generator
5 ---------------------------
5 6
6 This script takes as input a description of patterns and outputs a 7 This script takes as input a description of patterns and outputs a
7 matcher class that can match trees given the patterns. 8 matcher class that can match trees given the patterns.
8 9
9 Patterns are specified as follows: 10 Patterns are specified as follows::
11
10 reg -> ADDI32(reg, reg) 2 (. add NT0 NT1 .) 12 reg -> ADDI32(reg, reg) 2 (. add NT0 NT1 .)
11 reg -> MULI32(reg, reg) 3 (. .) 13 reg -> MULI32(reg, reg) 3 (. .)
12 or a multiply add: 14
13 reg -> ADDI32(MULI32(reg, reg), reg) 4 (. muladd $1, $2, $3 .) 15 or a multiply add::
14 The general specification pattern is: 16
15 [result] -> [tree] [cost] [template code] 17 reg -> ADDI32(MULI32(reg, reg), reg) 4 (. muladd $1, $2, $3 .)
18
19 The general specification pattern is::
20
21 [result] -> [tree] [cost] [template code]
16 22
17 Trees 23 Trees
18 ----- 24 -----
19 25
20 A tree is described using parenthesis notation. For example a node X with 26 A tree is described using parenthesis notation. For example a node X with
21 three child nodes is described as: 27 three child nodes is described as:
28
22 X(a, b, b) 29 X(a, b, b)
30
23 Trees can be nested: 31 Trees can be nested:
32
24 X(Y(a, a), a) 33 X(Y(a, a), a)
34
25 The 'a' in the example above indicates an open connection to a next tree 35 The 'a' in the example above indicates an open connection to a next tree
26 pattern. 36 pattern.
27 37
28 38
29 In the example above 'reg' is a non-terminal. ADDI32 is a terminal. non-terminals 39 In the example above 'reg' is a non-terminal. ADDI32 is a terminal. non-terminals
30 cannot have child nodes. A special case occurs in this case: 40 cannot have child nodes. A special case occurs in this case:
31 reg -> rc 41
42 reg -> rc
43
32 where 'rc' is a non-terminal. This is an example of a chain rule. Chain rules 44 where 'rc' is a non-terminal. This is an example of a chain rule. Chain rules
33 can be used to allow several variants of non-terminals. 45 can be used to allow several variants of non-terminals.
34 46
35 The generated matcher uses dynamic programming to find the best match of the 47 The generated matcher uses dynamic programming to find the best match of the
36 tree. This strategy consists of two steps: 48 tree. This strategy consists of two steps:
49
37 - label: During this phase the given tree is traversed in a bottom up way. 50 - label: During this phase the given tree is traversed in a bottom up way.
38 each node is labelled with a possible matching rule and the corresponding cost. 51 each node is labelled with a possible matching rule and the corresponding cost.
39 - select: In this step, the tree is traversed again, selecting at each point 52 - select: In this step, the tree is traversed again, selecting at each point
40 the cheapest way to get to the goal. 53 the cheapest way to get to the goal.
41 54
42 """ 55 """
43 56
44 import sys 57 import sys
45 import os 58 import os
59 import io
60 import types
46 import argparse 61 import argparse
47 from ppci import Token 62 from ppci import Token
48 from pyyacc import ParserException, EOF 63 from pyyacc import ParserException, EOF
49 import yacc 64 import yacc
50 import baselex 65 import baselex
66 ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)), 81 ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)),
67 ('SKIP', r'[ ]', None) 82 ('SKIP', r'[ ]', None)
68 ] 83 ]
69 84
70 lines = txt.split('\n') 85 lines = txt.split('\n')
86 header_lines = []
71 87
72 def tokenize(): 88 def tokenize():
89 section = 0
73 for line in lines: 90 for line in lines:
74 line = line.strip() 91 line = line.strip()
75 if not line: 92 if not line:
76 continue # Skip empty lines 93 continue # Skip empty lines
77 elif line == '%%': 94 elif line == '%%':
95 section += 1
96 if section == 1:
97 yield Token('header', header_lines)
78 yield Token('%%', '%%') 98 yield Token('%%', '%%')
79 else: 99 else:
80 yield from baselex.tokenize(tok_spec, line) 100 if section == 0:
101 header_lines.append(line)
102 else:
103 yield from baselex.tokenize(tok_spec, line)
81 yield Token(EOF, EOF) 104 yield Token(EOF, EOF)
82 self.tokens = tokenize() 105 self.tokens = tokenize()
83 self.token = self.tokens.__next__() 106 self.token = self.tokens.__next__()
84 107
85 def next_token(self): 108 def next_token(self):
186 self.output_file = output_file 209 self.output_file = output_file
187 self.system = system 210 self.system = system
188 211
189 self.print('#!/usr/bin/python') 212 self.print('#!/usr/bin/python')
190 self.print('from tree import Tree, BaseMatcher, State') 213 self.print('from tree import Tree, BaseMatcher, State')
214 for header in self.system.header_lines:
215 self.print(header)
191 self.print() 216 self.print()
192 self.print('class Matcher(BaseMatcher):') 217 self.print('class Matcher(BaseMatcher):')
193 self.print(' def __init__(self):') 218 self.print(' def __init__(self):')
194 self.print(' self.kid_functions = {}') 219 self.print(' self.kid_functions = {}')
195 self.print(' self.nts_map = {}') 220 self.print(' self.nts_map = {}')
207 for rule in self.system.rules: 232 for rule in self.system.rules:
208 if rule.num_nts > 0: 233 if rule.num_nts > 0:
209 args = ', ' + ', '.join('nt{}'.format(x) for x in range(rule.num_nts)) 234 args = ', ' + ', '.join('nt{}'.format(x) for x in range(rule.num_nts))
210 else: 235 else:
211 args = '' 236 args = ''
212 self.print(' def P{}(self{}):'.format(rule.nr, args)) 237 self.print(' def P{}(self, tree{}):'.format(rule.nr, args))
213 self.print(' {}'.format(rule.template)) 238 template = rule.template
239 template = template.replace('$$', 'tree')
240 for i in range(rule.num_nts):
241 template = template.replace('${}'.format(i+1), 'nt{}'.format(i))
242 for t in template.split(';'):
243 self.print(' {}'.format(t.strip()))
214 self.emit_state() 244 self.emit_state()
215 self.print(' def gen(self, tree):') 245 self.print(' def gen(self, tree):')
216 self.print(' self.burm_label(tree)') 246 self.print(' self.burm_label(tree)')
217 self.print(' if not tree.state.has_goal("{}"):'.format(self.system.goal)) 247 self.print(' if not tree.state.has_goal("{}"):'.format(self.system.goal))
218 self.print(' raise Exception("Tree not covered")') 248 self.print(' raise Exception("Tree not covered")')
219 self.print(' self.apply_rules(tree, "{}")'.format(self.system.goal)) 249 self.print(' self.apply_rules(tree, "{}")'.format(self.system.goal))
220 250
221 def emit_record(self, rule, state_var): 251 def emit_record(self, rule, state_var):
222 # TODO: check for rules fullfilled (by not using 999999) 252 # TODO: check for rules fullfilled (by not using 999999)
223 self.print(' nts = self.nts({})'.format(rule.nr)) 253 self.print(' nts = self.nts({})'.format(rule.nr))
224 self.print(' kids = self.kids(tree, {})'.format(rule.nr)) 254 self.print(' kids = self.kids(tree, {})'.format(rule.nr))
225 self.print(' c = sum(x.state.get_cost(y) for x, y in zip(kids, nts)) + {}'.format(rule.cost)) 255 self.print(' c = sum(x.state.get_cost(y) for x, y in zip(kids, nts)) + {}'.format(rule.cost))
226 self.print(' tree.state.set_cost("{}", c, {})'.format(rule.non_term, rule.nr)) 256 self.print(' tree.state.set_cost("{}", c, {})'.format(rule.non_term, rule.nr))
227 for cr in self.system.symbols[rule.non_term].chain_rules: 257 for cr in self.system.symbols[rule.non_term].chain_rules:
228 self.print(' # Chain rule: {}'.format(cr)) 258 self.print(' # Chain rule: {}'.format(cr))
229 self.print(' tree.state.set_cost("{}", c + {}, {})'.format(cr.non_term, cr.cost, cr.nr)) 259 self.print(' tree.state.set_cost("{}", c + {}, {})'.format(cr.non_term, cr.cost, cr.nr))
230 260
231 def emit_state(self): 261 def emit_state(self):
232 """ Emit a function that assigns a new state to a node """ 262 """ Emit a function that assigns a new state to a node """
233 self.print(' def burm_state(self, tree):') 263 self.print(' def burm_state(self, tree):')
234 self.print(' tree.state = State()') 264 self.print(' tree.state = State()')
235 for term in self.system.terminals: 265 for term in self.system.terminals:
236 self.emitcase(term) 266 self.emitcase(term)
237 self.print() 267 self.print()
238 268
239 def emitcase(self, term): 269 def emitcase(self, term):
240 rules = [rule for rule in self.system.rules if rule.tree.name == term] 270 rules = [rule for rule in self.system.rules if rule.tree.name == term]
241 for rule in rules: 271 for rule in rules:
242 condition = self.emittest(rule.tree, 'tree') 272 condition = self.emittest(rule.tree, 'tree')
243 self.print(' if {}:'.format(condition)) 273 self.print(' if {}:'.format(condition))
244 self.emit_record(rule, 'state') 274 self.emit_record(rule, 'state')
245 275
246 def compute_kids(self, t, root_name): 276 def compute_kids(self, t, root_name):
247 """ Compute of a pattern the blanks that must be provided from below in the tree """ 277 """ Compute of a pattern the blanks that must be provided from below in the tree """
248 if t.name in self.system.non_terminals: 278 if t.name in self.system.non_terminals:
276 help='the parser specification') 306 help='the parser specification')
277 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ 307 parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
278 default=sys.stdout) 308 default=sys.stdout)
279 return parser 309 return parser
280 310
311 def load_as_module(filename):
312 """ Load a parser spec file, generate LR tables and create module """
313 ob = io.StringIO()
314 args = argparse.Namespace(source=open(filename), output=ob)
315 main(args)
316
317 matcher_mod = types.ModuleType('generated_matcher')
318 exec(ob.getvalue(), matcher_mod.__dict__)
319 return matcher_mod
320
281 321
282 def main(args): 322 def main(args):
283 src = args.source.read() 323 src = args.source.read()
284 args.source.close() 324 args.source.close()
285 325