Mercurial > lcfOS
comparison python/pyburg.py @ 319:8d07a4254f04
Work on burg
author | Windel Bouwman |
---|---|
date | Sat, 18 Jan 2014 18:58:43 +0100 |
parents | e84047f29c78 |
children | 84d67cce67b7 |
comparison
equal
deleted
inserted
replaced
318:e84047f29c78 | 319:8d07a4254f04 |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 """ Bottom up rewrite generator in python """ | 3 """ |
4 Bottom up rewrite generator. | |
5 | |
6 This script takes as input a description of patterns and outputs a | |
7 matcher class that can match trees given the patterns. | |
8 | |
9 Patterns are specified as follows: | |
10 reg -> ADDI32(reg, reg) 2 (. add $1 $2 .) | |
11 reg -> MULI32(reg, reg) 3 (. .) | |
12 or a multiply add: | |
13 reg -> ADDI32(MULI32(reg, reg), reg) 4 (. muladd $1, $2, $3 .) | |
14 The general specification pattern is: | |
15 [result] -> [tree] [cost] [template code] | |
16 | |
17 A tree is described using parenthesis notation. For example a node X with | |
18 three | |
19 child nodes is described as: | |
20 X(a, b, b) | |
21 Trees can be nested: | |
22 X(Y(a, a), a) | |
23 The 'a' in the example above indicates an open connection to a next tree | |
24 pattern. | |
25 | |
26 The generated matcher uses dynamic programming to find the best match of the | |
27 tree. This strategy consists of two steps: | |
28 - label: During this phase the given tree is traversed in a bottom up way. | |
29 each node is labelled with a possible matching rule and the corresponding cost. | |
30 - select: In this step, the tree is traversed again, selecting at each point | |
31 the cheapest way to get to the goal. | |
32 | |
33 """ | |
34 | |
4 import sys | 35 import sys |
5 import re | |
6 import argparse | 36 import argparse |
7 from ppci import Token | 37 from ppci import Token |
8 import burg_parser | 38 import burg_parser # Automatically generated |
39 from pyyacc import ParserException, EOF | |
40 import baselex | |
41 from tree import Tree | |
9 | 42 |
10 | 43 |
11 class BurgLexer: | 44 class BurgLexer: |
12 def feed(self, txt): | 45 def feed(self, txt): |
13 tok_spec = [ | 46 tok_spec = [ |
14 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | 47 ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)), |
15 ('STRING', r"'[^']*'"), | 48 ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)), |
16 ('BRACEDCODE', r"\{[^\}]*\}"), | 49 ('number', r'\d+', lambda typ, val: (typ, int(val))), |
17 ('OTHER', r'[:;\|]'), | 50 ('STRING', r"'[^']*'", lambda typ, val: ('id', val[1:-1])), |
18 ('SKIP', r'[ ]') | 51 ('template', r"\(\..*\.\)", lambda typ, val: (typ, val)), |
52 ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)), | |
53 ('SKIP', r'[ ]', None) | |
19 ] | 54 ] |
20 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | |
21 gettok = re.compile(tok_re).match | |
22 | 55 |
23 lines = txt.split('\n') | 56 lines = txt.split('\n') |
24 def tokenize_line(line): | |
25 """ Generator that splits up a line into tokens """ | |
26 mo = gettok(line) | |
27 pos = 0 | |
28 while mo: | |
29 typ = mo.lastgroup | |
30 val = mo.group(typ) | |
31 if typ == 'ID': | |
32 yield Token(typ, val) | |
33 elif typ == 'STRING': | |
34 typ = 'ID' | |
35 yield Token(typ, val[1:-1]) | |
36 elif typ == 'OTHER': | |
37 typ = val | |
38 yield Token(typ, val) | |
39 elif typ == 'BRACEDCODE': | |
40 yield Token(typ, val) | |
41 elif typ == 'SKIP': | |
42 pass | |
43 else: | |
44 raise NotImplementedError(str(typ)) | |
45 pos = mo.end() | |
46 mo = gettok(line, pos) | |
47 if len(line) != pos: | |
48 raise ParseError('Lex fault at {}'.format(line)) | |
49 | 57 |
50 def tokenize(): | 58 def tokenize(): |
51 section = 0 | |
52 for line in lines: | 59 for line in lines: |
53 line = line.strip() | 60 line = line.strip() |
54 if not line: | 61 if not line: |
55 continue # Skip empty lines | 62 continue # Skip empty lines |
56 if line == '%%': | 63 elif line == '%%': |
57 section += 1 | |
58 yield Token('%%', '%%') | 64 yield Token('%%', '%%') |
59 continue | 65 else: |
60 if section == 0: | 66 yield from baselex.tokenize(tok_spec, line) |
61 if line.startswith('%tokens'): | 67 yield Token(EOF, EOF) |
62 yield Token('%tokens', '%tokens') | |
63 yield from tokenize_line(line[7:]) | |
64 else: | |
65 yield Token('HEADER', line) | |
66 elif section == 1: | |
67 yield from tokenize_line(line) | |
68 yield Token('eof', 'eof') | |
69 self.tokens = tokenize() | 68 self.tokens = tokenize() |
70 self.token = self.tokens.__next__() | 69 self.token = self.tokens.__next__() |
71 | 70 |
72 def next_token(self): | 71 def next_token(self): |
73 t = self.token | 72 t = self.token |
74 if t.typ != 'eof': | 73 if t.typ != EOF: |
75 self.token = self.tokens.__next__() | 74 self.token = self.tokens.__next__() |
76 return t | 75 return t |
77 | 76 |
78 | 77 |
78 class Rule: | |
79 def __init__(self, non_term, tree, cost, template): | |
80 self.non_term = non_term | |
81 self.tree = tree | |
82 self.cost = cost | |
83 self.template = template | |
84 self.nr = 0 | |
85 | |
86 def __repr__(self): | |
87 return '{} -> {} ${}'.format(self.non_term, self.tree, self.cost) | |
88 | |
89 | |
90 class Symbol: | |
91 def __init__(self, name): | |
92 self.name = name | |
93 | |
94 | |
95 class Term(Symbol): | |
96 pass | |
97 | |
98 | |
99 class Nonterm(Symbol): | |
100 def __init__(self, name): | |
101 super().__init__(name) | |
102 self.rules = [] | |
103 | |
104 | |
105 class BurgSystem: | |
106 def __init__(self): | |
107 self.rules = [] | |
108 self.symbols = {} | |
109 self.goal = None | |
110 | |
111 def symType(self, t): | |
112 return (s.name for s in self.symbols.values() if type(s) is t) | |
113 | |
114 terminals = property(lambda s: s.symType(Term)) | |
115 | |
116 non_terminals = property(lambda s: s.symType(Nonterm)) | |
117 | |
118 def add_rule(self, non_term, tree, cost, template): | |
119 rule = Rule(non_term, tree, cost, template) | |
120 self.non_term(rule.non_term) | |
121 self.rules.append(rule) | |
122 rule.nr = len(self.rules) | |
123 | |
124 def non_term(self, name): | |
125 if name in self.terminals: | |
126 raise BurgError('Cannot redefine terminal') | |
127 if not self.goal: | |
128 self.goal = name | |
129 self.install(name, Nonterm) | |
130 | |
131 def tree(self, name, *args): | |
132 return Tree(name, *args) | |
133 | |
134 def install(self, name, t): | |
135 assert type(name) is str | |
136 if name in self.symbols: | |
137 assert type(self.symbols[name]) is t | |
138 return self.symbols[name] | |
139 else: | |
140 self.symbols[name] = t(name) | |
141 | |
142 def add_terminal(self, terminal): | |
143 self.install(terminal, Term) | |
144 | |
145 | |
146 class BurgError(Exception): | |
147 pass | |
148 | |
149 | |
79 class BurgParser(burg_parser.Parser): | 150 class BurgParser(burg_parser.Parser): |
80 """ Derive from automatically generated parser """ | 151 """ Derived from automatically generated parser """ |
81 def add_rule(self, *args): | 152 def parse(self, l): |
82 print(args) | 153 self.system = BurgSystem() |
154 super().parse(l) | |
155 return self.system | |
156 | |
157 | |
158 class BurgGenerator: | |
159 def print(self, *args): | |
160 """ Print helper function that prints to output file """ | |
161 print(*args, file=self.output_file) | |
162 | |
163 def generate(self, system, output_file): | |
164 """ Generate script that implements the burg spec """ | |
165 self.output_file = output_file | |
166 self.system = system | |
167 | |
168 self.print('#!/usr/bin/python') | |
169 self.print('from tree import Tree, BaseMatcher, State') | |
170 self.print() | |
171 self.print('class Matcher(BaseMatcher):') | |
172 self.print(' def __init__(self):') | |
173 self.print(' self.kid_functions = {}') | |
174 self.print(' self.nts_map = {}') | |
175 for rule in self.system.rules: | |
176 kids, dummy = self.compute_kids(rule.tree, 't') | |
177 lf = 'lambda t: [{}]'.format(', '.join(kids), rule) | |
178 self.print(' # {}'.format(rule)) | |
179 self.print(' self.kid_functions[{}] = {}'.format(rule.nr, lf)) | |
180 self.print(' self.nts_map[{}] = {}'.format(rule.nr, dummy)) | |
181 self.print('') | |
182 self.emit_state() | |
183 self.print() | |
184 self.print(' def gen(self, tree):') | |
185 self.print(' self.burm_label(tree)') | |
186 self.print(' if not tree.state.has_goal("{}"):'.format(self.system.goal)) | |
187 self.print(' raise Exception("Tree not covered")') | |
188 self.print(' self.apply_rules(tree, "{}")'.format(self.system.goal)) | |
189 | |
190 def emit_record(self, rule, state_var): | |
191 # TODO: check for rules fullfilled (by not using 999999) | |
192 self.print(' nts = self.nts({})'.format(rule.nr)) | |
193 self.print(' kids = self.kids(tree, {})'.format(rule.nr)) | |
194 self.print(' c = sum(x.state.get_cost(y) for x, y in zip(kids, nts)) + {}'.format(rule.cost)) | |
195 self.print(' tree.state.set_cost("{}", c, {})'.format(rule.non_term, rule.nr)) | |
196 | |
197 def emit_state(self): | |
198 """ Emit a function that assigns a new state to a node """ | |
199 self.print(' def burm_state(self, tree):') | |
200 self.print(' tree.state = State()') | |
201 for term in self.system.terminals: | |
202 self.emitcase(term) | |
203 self.print() | |
204 | |
205 def emitcase(self, term): | |
206 rules = [rule for rule in self.system.rules if rule.tree.name == term] | |
207 for rule in rules: | |
208 condition = self.emittest(rule.tree, 'tree') | |
209 self.print(' if {}:'.format(condition)) | |
210 self.emit_record(rule, 'state') | |
211 | |
212 def emit_closure(self): | |
213 for non_terminal in self.system.non_terminals: | |
214 self.print('def closure_{}(self, c):'.format(non_terminal)) | |
215 self.print(' pass') | |
216 self.print() | |
217 | |
218 def compute_kids(self, t, root_name): | |
219 """ Compute of a pattern the blanks that must be provided from below in the tree """ | |
220 if t.name in self.system.non_terminals: | |
221 return [root_name], [t.name] | |
222 else: | |
223 k = [] | |
224 nts = [] | |
225 for i, c in enumerate(t.children): | |
226 pfx = root_name + '.children[{}]'.format(i) | |
227 kf, dummy = self.compute_kids(c, pfx) | |
228 nts.extend(dummy) | |
229 k.extend(kf) | |
230 return k, nts | |
231 | |
232 | |
233 def emittest(self, tree, prefix): | |
234 """ Generate condition for a tree pattern """ | |
235 ct = (c for c in tree.children if c.name not in self.system.non_terminals) | |
236 child_tests = (self.emittest(c, prefix + '.children[{}]'.format(i)) for i, c in enumerate(ct)) | |
237 child_tests = ('({})'.format(ct) for ct in child_tests) | |
238 child_tests = ' and '.join(child_tests) | |
239 child_tests = ' and ' + child_tests if child_tests else '' | |
240 tst = '{}.name == "{}"'.format(prefix, tree.name) | |
241 return tst + child_tests | |
83 | 242 |
84 | 243 |
85 def main(): | 244 def main(): |
86 # Parse arguments: | 245 # Parse arguments: |
87 parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler') | 246 parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler') |
91 default=sys.stdout) | 250 default=sys.stdout) |
92 args = parser.parse_args() | 251 args = parser.parse_args() |
93 src = args.source.read() | 252 src = args.source.read() |
94 args.source.close() | 253 args.source.close() |
95 | 254 |
255 # Parse specification into burgsystem: | |
96 l = BurgLexer() | 256 l = BurgLexer() |
97 p = BurgParser() | 257 p = BurgParser() |
98 l.feed(src) | 258 l.feed(src) |
99 p.parse(l) | 259 burg_system = p.parse(l) |
260 | |
261 # Generate matcher: | |
262 generator = BurgGenerator() | |
263 generator.generate(burg_system, args.output) | |
100 | 264 |
101 if __name__ == '__main__': | 265 if __name__ == '__main__': |
102 main() | 266 main() |