diff python/pyburg.py @ 319:8d07a4254f04

Work on burg
author Windel Bouwman
date Sat, 18 Jan 2014 18:58:43 +0100
parents e84047f29c78
children 84d67cce67b7
line wrap: on
line diff
--- a/python/pyburg.py	Tue Dec 31 12:38:15 2013 +0100
+++ b/python/pyburg.py	Sat Jan 18 18:58:43 2014 +0100
@@ -1,85 +1,244 @@
 #!/usr/bin/python
 
-""" Bottom up rewrite generator in python """
+"""
+    Bottom up rewrite generator.
+
+    This script takes as input a description of patterns and outputs a
+    matcher class that can match trees given the patterns.
+
+    Patterns are specified as follows:
+     reg -> ADDI32(reg, reg) 2 (. add $1 $2 .)
+     reg -> MULI32(reg, reg) 3 (. .)
+    or a multiply add:
+     reg -> ADDI32(MULI32(reg, reg), reg) 4 (. muladd $1, $2, $3 .)
+    The general specification pattern is:
+     [result] -> [tree] [cost] [template code]
+
+    A tree is described using parenthesis notation. For example a node X with
+    three
+    child nodes is described as:
+     X(a, b, b)
+    Trees can be nested:
+     X(Y(a, a), a)
+    The 'a' in the example above indicates an open connection to a next tree
+    pattern.
+
+    The generated matcher uses dynamic programming to find the best match of the
+    tree. This strategy consists of two steps:
+    - label: During this phase the given tree is traversed in a bottom up way.
+      each node is labelled with a possible matching rule and the corresponding cost.
+    - select: In this step, the tree is traversed again, selecting at each point
+      the cheapest way to get to the goal.
+
+"""
+
 import sys
-import re
 import argparse
 from ppci import Token
-import burg_parser
+import burg_parser   # Automatically generated
+from pyyacc import ParserException, EOF
+import baselex
+from tree import Tree
 
 
 class BurgLexer:
     def feed(self, txt):
         tok_spec = [
-           ('ID', r'[A-Za-z][A-Za-z\d_]*'),
-           ('STRING', r"'[^']*'"),
-           ('BRACEDCODE', r"\{[^\}]*\}"),
-           ('OTHER', r'[:;\|]'),
-           ('SKIP', r'[ ]')
+           ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)),
+           ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)),
+           ('number', r'\d+', lambda typ, val: (typ, int(val))),
+           ('STRING', r"'[^']*'", lambda typ, val: ('id', val[1:-1])),
+           ('template', r"\(\..*\.\)", lambda typ, val: (typ, val)),
+           ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)),
+           ('SKIP', r'[ ]', None)
             ]
-        tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
-        gettok = re.compile(tok_re).match
 
         lines = txt.split('\n')
-        def tokenize_line(line):
-            """ Generator that splits up a line into tokens """
-            mo = gettok(line)
-            pos = 0
-            while mo:
-                typ = mo.lastgroup
-                val = mo.group(typ)
-                if typ == 'ID':
-                    yield Token(typ, val)
-                elif typ == 'STRING':
-                    typ = 'ID'
-                    yield Token(typ, val[1:-1])
-                elif typ == 'OTHER':
-                    typ = val
-                    yield Token(typ, val)
-                elif typ == 'BRACEDCODE':
-                    yield Token(typ, val)
-                elif typ == 'SKIP':
-                    pass
-                else:
-                    raise NotImplementedError(str(typ))
-                pos = mo.end()
-                mo = gettok(line, pos)
-            if len(line) != pos:
-                raise ParseError('Lex fault at {}'.format(line))
 
         def tokenize():
-            section = 0
             for line in lines:
                 line = line.strip()
                 if not line:
                     continue  # Skip empty lines
-                if line == '%%':
-                    section += 1
+                elif line == '%%':
                     yield Token('%%', '%%')
-                    continue
-                if section == 0:
-                    if line.startswith('%tokens'):
-                        yield Token('%tokens', '%tokens')
-                        yield from tokenize_line(line[7:])
-                    else:
-                        yield Token('HEADER', line)
-                elif section == 1:
-                    yield from tokenize_line(line)
-            yield Token('eof', 'eof')
+                else:
+                    yield from baselex.tokenize(tok_spec, line)
+            yield Token(EOF, EOF)
         self.tokens = tokenize()
         self.token = self.tokens.__next__()
 
     def next_token(self):
         t = self.token
-        if t.typ != 'eof':
+        if t.typ != EOF:
             self.token = self.tokens.__next__()
         return t
 
 
+class Rule:
+    def __init__(self, non_term, tree, cost, template):
+        self.non_term = non_term
+        self.tree = tree
+        self.cost = cost
+        self.template = template
+        self.nr = 0
+
+    def __repr__(self):
+        return '{} -> {} ${}'.format(self.non_term, self.tree, self.cost)
+
+
+class Symbol:
+    def __init__(self, name):
+        self.name = name
+
+
+class Term(Symbol):
+    pass
+
+
+class Nonterm(Symbol):
+    def __init__(self, name):
+        super().__init__(name)
+        self.rules = []
+
+
+class BurgSystem:
+    def __init__(self):
+        self.rules = []
+        self.symbols = {}
+        self.goal = None
+
+    def symType(self, t):
+        return (s.name for s in self.symbols.values() if type(s) is t)
+
+    terminals = property(lambda s: s.symType(Term))
+
+    non_terminals = property(lambda s: s.symType(Nonterm))
+
+    def add_rule(self, non_term, tree, cost, template):
+        rule = Rule(non_term, tree, cost, template)
+        self.non_term(rule.non_term)
+        self.rules.append(rule)
+        rule.nr = len(self.rules)
+
+    def non_term(self, name):
+        if name in self.terminals:
+            raise BurgError('Cannot redefine terminal')
+        if not self.goal:
+            self.goal = name
+        self.install(name, Nonterm)
+
+    def tree(self, name, *args):
+        return Tree(name, *args)
+
+    def install(self, name, t):
+        assert type(name) is str
+        if name in self.symbols:
+            assert type(self.symbols[name]) is t
+            return self.symbols[name]
+        else:
+            self.symbols[name] = t(name)
+
+    def add_terminal(self, terminal):
+        self.install(terminal, Term)
+
+
+class BurgError(Exception):
+    pass
+
+
 class BurgParser(burg_parser.Parser):
-    """ Derive from automatically generated parser """
-    def add_rule(self, *args):
-        print(args)
+    """ Derived from automatically generated parser """
+    def parse(self, l):
+        self.system = BurgSystem()
+        super().parse(l)
+        return self.system
+
+
+class BurgGenerator:
+    def print(self, *args):
+        """ Print helper function that prints to output file """
+        print(*args, file=self.output_file)
+
+    def generate(self, system, output_file):
+        """ Generate script that implements the burg spec """
+        self.output_file = output_file
+        self.system = system
+
+        self.print('#!/usr/bin/python')
+        self.print('from tree import Tree, BaseMatcher, State')
+        self.print()
+        self.print('class Matcher(BaseMatcher):')
+        self.print('  def __init__(self):')
+        self.print('    self.kid_functions = {}')
+        self.print('    self.nts_map = {}')
+        for rule in self.system.rules:
+            kids, dummy = self.compute_kids(rule.tree, 't')
+            lf = 'lambda t: [{}]'.format(', '.join(kids), rule)
+            self.print('    # {}'.format(rule))
+            self.print('    self.kid_functions[{}] = {}'.format(rule.nr, lf))
+            self.print('    self.nts_map[{}] = {}'.format(rule.nr, dummy))
+        self.print('')
+        self.emit_state()
+        self.print()
+        self.print('  def gen(self, tree):')
+        self.print('    self.burm_label(tree)')
+        self.print('    if not tree.state.has_goal("{}"):'.format(self.system.goal))
+        self.print('        raise Exception("Tree not covered")')
+        self.print('    self.apply_rules(tree, "{}")'.format(self.system.goal))
+
+    def emit_record(self, rule, state_var):
+        # TODO: check for rules fullfilled (by not using 999999)
+        self.print('        nts = self.nts({})'.format(rule.nr))
+        self.print('        kids = self.kids(tree, {})'.format(rule.nr))
+        self.print('        c = sum(x.state.get_cost(y) for x, y in zip(kids, nts)) + {}'.format(rule.cost))
+        self.print('        tree.state.set_cost("{}", c, {})'.format(rule.non_term, rule.nr))
+
+    def emit_state(self):
+        """ Emit a function that assigns a new state to a node """
+        self.print('  def burm_state(self, tree):')
+        self.print('    tree.state = State()')
+        for term in self.system.terminals:
+            self.emitcase(term)
+        self.print()
+
+    def emitcase(self, term):
+        rules = [rule for rule in self.system.rules if rule.tree.name == term]
+        for rule in rules:
+            condition = self.emittest(rule.tree, 'tree')
+            self.print('    if {}:'.format(condition))
+            self.emit_record(rule, 'state')
+
+    def emit_closure(self):
+        for non_terminal in self.system.non_terminals:
+            self.print('def closure_{}(self, c):'.format(non_terminal))
+            self.print('    pass')
+            self.print()
+
+    def compute_kids(self, t, root_name):
+        """ Compute of a pattern the blanks that must be provided from below in the tree """
+        if t.name in self.system.non_terminals:
+            return [root_name], [t.name]
+        else:
+            k = []
+            nts = []
+            for i, c in enumerate(t.children):
+                pfx = root_name + '.children[{}]'.format(i)
+                kf, dummy = self.compute_kids(c, pfx)
+                nts.extend(dummy)
+                k.extend(kf)
+            return k, nts
+
+
+    def emittest(self, tree, prefix):
+        """ Generate condition for a tree pattern """
+        ct = (c for c in tree.children if c.name not in self.system.non_terminals)
+        child_tests = (self.emittest(c, prefix + '.children[{}]'.format(i)) for i, c in enumerate(ct))
+        child_tests = ('({})'.format(ct) for ct in child_tests)
+        child_tests = ' and '.join(child_tests)
+        child_tests = ' and ' + child_tests if child_tests else ''
+        tst = '{}.name == "{}"'.format(prefix, tree.name)
+        return tst + child_tests
 
 
 def main():
@@ -93,10 +252,15 @@
     src = args.source.read()
     args.source.close()
 
+    # Parse specification into burgsystem:
     l = BurgLexer()
     p = BurgParser()
     l.feed(src)
-    p.parse(l)
+    burg_system = p.parse(l)
+
+    # Generate matcher:
+    generator = BurgGenerator()
+    generator.generate(burg_system, args.output)
 
 if __name__ == '__main__':
     main()