Mercurial > lcfOS
diff python/pyburg.py @ 319:8d07a4254f04
Work on burg
author | Windel Bouwman |
---|---|
date | Sat, 18 Jan 2014 18:58:43 +0100 |
parents | e84047f29c78 |
children | 84d67cce67b7 |
line wrap: on
line diff
--- a/python/pyburg.py Tue Dec 31 12:38:15 2013 +0100 +++ b/python/pyburg.py Sat Jan 18 18:58:43 2014 +0100 @@ -1,85 +1,244 @@ #!/usr/bin/python -""" Bottom up rewrite generator in python """ +""" + Bottom up rewrite generator. + + This script takes as input a description of patterns and outputs a + matcher class that can match trees given the patterns. + + Patterns are specified as follows: + reg -> ADDI32(reg, reg) 2 (. add $1 $2 .) + reg -> MULI32(reg, reg) 3 (. .) + or a multiply add: + reg -> ADDI32(MULI32(reg, reg), reg) 4 (. muladd $1, $2, $3 .) + The general specification pattern is: + [result] -> [tree] [cost] [template code] + + A tree is described using parenthesis notation. For example a node X with + three + child nodes is described as: + X(a, b, b) + Trees can be nested: + X(Y(a, a), a) + The 'a' in the example above indicates an open connection to a next tree + pattern. + + The generated matcher uses dynamic programming to find the best match of the + tree. This strategy consists of two steps: + - label: During this phase the given tree is traversed in a bottom up way. + each node is labelled with a possible matching rule and the corresponding cost. + - select: In this step, the tree is traversed again, selecting at each point + the cheapest way to get to the goal. + +""" + import sys -import re import argparse from ppci import Token -import burg_parser +import burg_parser # Automatically generated +from pyyacc import ParserException, EOF +import baselex +from tree import Tree class BurgLexer: def feed(self, txt): tok_spec = [ - ('ID', r'[A-Za-z][A-Za-z\d_]*'), - ('STRING', r"'[^']*'"), - ('BRACEDCODE', r"\{[^\}]*\}"), - ('OTHER', r'[:;\|]'), - ('SKIP', r'[ ]') + ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)), + ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)), + ('number', r'\d+', lambda typ, val: (typ, int(val))), + ('STRING', r"'[^']*'", lambda typ, val: ('id', val[1:-1])), + ('template', r"\(\..*\.\)", lambda typ, val: (typ, val)), + ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)), + ('SKIP', r'[ ]', None) ] - tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - gettok = re.compile(tok_re).match lines = txt.split('\n') - def tokenize_line(line): - """ Generator that splits up a line into tokens """ - mo = gettok(line) - pos = 0 - while mo: - typ = mo.lastgroup - val = mo.group(typ) - if typ == 'ID': - yield Token(typ, val) - elif typ == 'STRING': - typ = 'ID' - yield Token(typ, val[1:-1]) - elif typ == 'OTHER': - typ = val - yield Token(typ, val) - elif typ == 'BRACEDCODE': - yield Token(typ, val) - elif typ == 'SKIP': - pass - else: - raise NotImplementedError(str(typ)) - pos = mo.end() - mo = gettok(line, pos) - if len(line) != pos: - raise ParseError('Lex fault at {}'.format(line)) def tokenize(): - section = 0 for line in lines: line = line.strip() if not line: continue # Skip empty lines - if line == '%%': - section += 1 + elif line == '%%': yield Token('%%', '%%') - continue - if section == 0: - if line.startswith('%tokens'): - yield Token('%tokens', '%tokens') - yield from tokenize_line(line[7:]) - else: - yield Token('HEADER', line) - elif section == 1: - yield from tokenize_line(line) - yield Token('eof', 'eof') + else: + yield from baselex.tokenize(tok_spec, line) + yield Token(EOF, EOF) self.tokens = tokenize() self.token = self.tokens.__next__() def next_token(self): t = self.token - if t.typ != 'eof': + if t.typ != EOF: self.token = self.tokens.__next__() return t +class Rule: + def __init__(self, non_term, tree, cost, template): + self.non_term = non_term + self.tree = tree + self.cost = cost + self.template = template + self.nr = 0 + + def __repr__(self): + return '{} -> {} ${}'.format(self.non_term, self.tree, self.cost) + + +class Symbol: + def __init__(self, name): + self.name = name + + +class Term(Symbol): + pass + + +class Nonterm(Symbol): + def __init__(self, name): + super().__init__(name) + self.rules = [] + + +class BurgSystem: + def __init__(self): + self.rules = [] + self.symbols = {} + self.goal = None + + def symType(self, t): + return (s.name for s in self.symbols.values() if type(s) is t) + + terminals = property(lambda s: s.symType(Term)) + + non_terminals = property(lambda s: s.symType(Nonterm)) + + def add_rule(self, non_term, tree, cost, template): + rule = Rule(non_term, tree, cost, template) + self.non_term(rule.non_term) + self.rules.append(rule) + rule.nr = len(self.rules) + + def non_term(self, name): + if name in self.terminals: + raise BurgError('Cannot redefine terminal') + if not self.goal: + self.goal = name + self.install(name, Nonterm) + + def tree(self, name, *args): + return Tree(name, *args) + + def install(self, name, t): + assert type(name) is str + if name in self.symbols: + assert type(self.symbols[name]) is t + return self.symbols[name] + else: + self.symbols[name] = t(name) + + def add_terminal(self, terminal): + self.install(terminal, Term) + + +class BurgError(Exception): + pass + + class BurgParser(burg_parser.Parser): - """ Derive from automatically generated parser """ - def add_rule(self, *args): - print(args) + """ Derived from automatically generated parser """ + def parse(self, l): + self.system = BurgSystem() + super().parse(l) + return self.system + + +class BurgGenerator: + def print(self, *args): + """ Print helper function that prints to output file """ + print(*args, file=self.output_file) + + def generate(self, system, output_file): + """ Generate script that implements the burg spec """ + self.output_file = output_file + self.system = system + + self.print('#!/usr/bin/python') + self.print('from tree import Tree, BaseMatcher, State') + self.print() + self.print('class Matcher(BaseMatcher):') + self.print(' def __init__(self):') + self.print(' self.kid_functions = {}') + self.print(' self.nts_map = {}') + for rule in self.system.rules: + kids, dummy = self.compute_kids(rule.tree, 't') + lf = 'lambda t: [{}]'.format(', '.join(kids), rule) + self.print(' # {}'.format(rule)) + self.print(' self.kid_functions[{}] = {}'.format(rule.nr, lf)) + self.print(' self.nts_map[{}] = {}'.format(rule.nr, dummy)) + self.print('') + self.emit_state() + self.print() + self.print(' def gen(self, tree):') + self.print(' self.burm_label(tree)') + self.print(' if not tree.state.has_goal("{}"):'.format(self.system.goal)) + self.print(' raise Exception("Tree not covered")') + self.print(' self.apply_rules(tree, "{}")'.format(self.system.goal)) + + def emit_record(self, rule, state_var): + # TODO: check for rules fullfilled (by not using 999999) + self.print(' nts = self.nts({})'.format(rule.nr)) + self.print(' kids = self.kids(tree, {})'.format(rule.nr)) + self.print(' c = sum(x.state.get_cost(y) for x, y in zip(kids, nts)) + {}'.format(rule.cost)) + self.print(' tree.state.set_cost("{}", c, {})'.format(rule.non_term, rule.nr)) + + def emit_state(self): + """ Emit a function that assigns a new state to a node """ + self.print(' def burm_state(self, tree):') + self.print(' tree.state = State()') + for term in self.system.terminals: + self.emitcase(term) + self.print() + + def emitcase(self, term): + rules = [rule for rule in self.system.rules if rule.tree.name == term] + for rule in rules: + condition = self.emittest(rule.tree, 'tree') + self.print(' if {}:'.format(condition)) + self.emit_record(rule, 'state') + + def emit_closure(self): + for non_terminal in self.system.non_terminals: + self.print('def closure_{}(self, c):'.format(non_terminal)) + self.print(' pass') + self.print() + + def compute_kids(self, t, root_name): + """ Compute of a pattern the blanks that must be provided from below in the tree """ + if t.name in self.system.non_terminals: + return [root_name], [t.name] + else: + k = [] + nts = [] + for i, c in enumerate(t.children): + pfx = root_name + '.children[{}]'.format(i) + kf, dummy = self.compute_kids(c, pfx) + nts.extend(dummy) + k.extend(kf) + return k, nts + + + def emittest(self, tree, prefix): + """ Generate condition for a tree pattern """ + ct = (c for c in tree.children if c.name not in self.system.non_terminals) + child_tests = (self.emittest(c, prefix + '.children[{}]'.format(i)) for i, c in enumerate(ct)) + child_tests = ('({})'.format(ct) for ct in child_tests) + child_tests = ' and '.join(child_tests) + child_tests = ' and ' + child_tests if child_tests else '' + tst = '{}.name == "{}"'.format(prefix, tree.name) + return tst + child_tests def main(): @@ -93,10 +252,15 @@ src = args.source.read() args.source.close() + # Parse specification into burgsystem: l = BurgLexer() p = BurgParser() l.feed(src) - p.parse(l) + burg_system = p.parse(l) + + # Generate matcher: + generator = BurgGenerator() + generator.generate(burg_system, args.output) if __name__ == '__main__': main()