changeset 319:8d07a4254f04

Work on burg
author Windel Bouwman
date Sat, 18 Jan 2014 18:58:43 +0100
parents e84047f29c78
children 84d67cce67b7
files doc/utils.rst python/arm.pb python/asm.py python/baselex.py python/bootstrap.sh python/burg.x python/ppci/c3/lexer.py python/pyburg.py python/pyyacc.py python/test_burm.py python/tree.py python/yacc.py test/testpyy.py
diffstat 13 files changed, 471 insertions(+), 132 deletions(-) [+]
line wrap: on
line diff
--- a/doc/utils.rst	Tue Dec 31 12:38:15 2013 +0100
+++ b/doc/utils.rst	Sat Jan 18 18:58:43 2014 +0100
@@ -18,3 +18,14 @@
 Hexfile containing 3 bytes
 
 
+Yacc
+----
+
+.. automodule:: yacc
+
+
+Burg
+----
+
+.. automodule:: pyburg
+
--- a/python/arm.pb	Tue Dec 31 12:38:15 2013 +0100
+++ b/python/arm.pb	Sat Jan 18 18:58:43 2014 +0100
@@ -1,7 +1,20 @@
 
+%terminal ADDI ADDRLP ASGNI
+%terminal CNSTI CVCI IOI INDIRC
 
+%%
+
+stmt: ASGNI(disp, reg) 1 (. .)
+stmt: reg 0 (. .)
+reg: ADDI(reg, rc) 1 (. self.emit(Add, src=[$1], dst=[$c], other=[$2]) .)
+reg: CVCI(INDIRC(disp)) 1 (.  .)
+reg: IOI 0 (. .)
+reg: disp 1 (. .)
+disp: ADDI(reg, con) 1 (. .)
+disp: ADDRLP 0 (. .)
+rc: con 0 (. .)
+rc: reg 0 (. .)
+con: CNSTI 0 (. .)
+con: IOI 0 (. .)
 
 
-reg: ADD32(reg, const) { self.emit(Add, src=[$1], dst=[$c], other=[$2]) } 1
-
-
--- a/python/asm.py	Tue Dec 31 12:38:15 2013 +0100
+++ b/python/asm.py	Sat Jan 18 18:58:43 2014 +0100
@@ -84,8 +84,10 @@
         g.add_production('asmline2', ['label'])
         g.add_production('asmline2', [])
         g.add_production('label', ['ID', ':'], self.p_label)
+        #g.add_production('label', [])
         g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
         g.add_production('instruction', ['opcode'], self.p_ins_2)
+        #g.add_production('instruction', [])
         g.add_production('opcode', ['ID'], lambda x: x.val)
         g.add_production('operands', ['operand'], self.p_operands_1)
         g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/baselex.py	Sat Jan 18 18:58:43 2014 +0100
@@ -0,0 +1,24 @@
+
+import re
+from ppci import Token
+
+def tokenize(tok_spec, txt):
+    tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
+    gettok = re.compile(tok_re).match
+    func_map = {pair[0]: pair[2] for pair in tok_spec}
+
+    # Parse line:
+    line = txt
+    mo = gettok(line)
+    pos = 0
+    while mo:
+        typ = mo.lastgroup
+        val = mo.group(typ)
+        func = func_map[typ]
+        if func:
+            typ, val = func(typ, val)
+            yield Token(typ, val)
+        pos = mo.end()
+        mo = gettok(line, pos)
+    if len(line) != pos:
+        raise ParserException('Lex fault at {}'.format(line[pos:]))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/bootstrap.sh	Sat Jan 18 18:58:43 2014 +0100
@@ -0,0 +1,10 @@
+#!/usr/bin/bash
+
+echo "Generating burg_parser.py"
+
+./yacc.py burg.x -o burg_parser.py
+
+echo "Generating arm code generator"
+
+./pyburg.py arm.pb -o arm_burm.py
+
--- a/python/burg.x	Tue Dec 31 12:38:15 2013 +0100
+++ b/python/burg.x	Sat Jan 18 18:58:43 2014 +0100
@@ -1,15 +1,30 @@
 
-from tree import Tree
-%tokens ':' ';' '(' ')' ',' template id number '%%' header
+%tokens ':' ';' '(' ')' ',' template id number '%%' '%terminal'
 
 %%
 
-rule: id ':' tree template cost { self.add_rule($1, $3, $4, $5) };
+burgdef: directives '%%' rules;
+
+directives:
+          | directives directive;
 
-cost: number { return number.val };
+directive: termdef;
+
+termdef: '%terminal' termids;
+
+termids:
+       | termids termid;
 
-tree: id { return Tree($1) }
-    | id '(' tree ')' { return Tree($1, $3) }
-    | id '(' tree ',' tree ')' { return Tree($1, $3, $5) };
+termid: id { self.system.add_terminal($1.val) };
+
+rules:
+     | rules rule;
+
+rule: id ':' tree cost template { self.system.add_rule($1.val, $3, $4, $5.val) };
 
+cost: number { return $1.val };
 
+tree: id { return self.system.tree($1.val) }
+    | id '(' tree ')' { return self.system.tree($1.val, $3) }
+    | id '(' tree ',' tree ')' { return self.system.tree($1.val, $3, $5) };
+
--- a/python/ppci/c3/lexer.py	Tue Dec 31 12:38:15 2013 +0100
+++ b/python/ppci/c3/lexer.py	Sat Jan 18 18:58:43 2014 +0100
@@ -1,6 +1,4 @@
-import collections
 import re
-
 from ppci import CompilerError, SourceLocation, Token
 
 """
--- a/python/pyburg.py	Tue Dec 31 12:38:15 2013 +0100
+++ b/python/pyburg.py	Sat Jan 18 18:58:43 2014 +0100
@@ -1,85 +1,244 @@
 #!/usr/bin/python
 
-""" Bottom up rewrite generator in python """
+"""
+    Bottom up rewrite generator.
+
+    This script takes as input a description of patterns and outputs a
+    matcher class that can match trees given the patterns.
+
+    Patterns are specified as follows:
+     reg -> ADDI32(reg, reg) 2 (. add $1 $2 .)
+     reg -> MULI32(reg, reg) 3 (. .)
+    or a multiply add:
+     reg -> ADDI32(MULI32(reg, reg), reg) 4 (. muladd $1, $2, $3 .)
+    The general specification pattern is:
+     [result] -> [tree] [cost] [template code]
+
+    A tree is described using parenthesis notation. For example a node X with
+    three
+    child nodes is described as:
+     X(a, b, b)
+    Trees can be nested:
+     X(Y(a, a), a)
+    The 'a' in the example above indicates an open connection to a next tree
+    pattern.
+
+    The generated matcher uses dynamic programming to find the best match of the
+    tree. This strategy consists of two steps:
+    - label: During this phase the given tree is traversed in a bottom up way.
+      each node is labelled with a possible matching rule and the corresponding cost.
+    - select: In this step, the tree is traversed again, selecting at each point
+      the cheapest way to get to the goal.
+
+"""
+
 import sys
-import re
 import argparse
 from ppci import Token
-import burg_parser
+import burg_parser   # Automatically generated
+from pyyacc import ParserException, EOF
+import baselex
+from tree import Tree
 
 
 class BurgLexer:
     def feed(self, txt):
         tok_spec = [
-           ('ID', r'[A-Za-z][A-Za-z\d_]*'),
-           ('STRING', r"'[^']*'"),
-           ('BRACEDCODE', r"\{[^\}]*\}"),
-           ('OTHER', r'[:;\|]'),
-           ('SKIP', r'[ ]')
+           ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)),
+           ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)),
+           ('number', r'\d+', lambda typ, val: (typ, int(val))),
+           ('STRING', r"'[^']*'", lambda typ, val: ('id', val[1:-1])),
+           ('template', r"\(\..*\.\)", lambda typ, val: (typ, val)),
+           ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)),
+           ('SKIP', r'[ ]', None)
             ]
-        tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
-        gettok = re.compile(tok_re).match
 
         lines = txt.split('\n')
-        def tokenize_line(line):
-            """ Generator that splits up a line into tokens """
-            mo = gettok(line)
-            pos = 0
-            while mo:
-                typ = mo.lastgroup
-                val = mo.group(typ)
-                if typ == 'ID':
-                    yield Token(typ, val)
-                elif typ == 'STRING':
-                    typ = 'ID'
-                    yield Token(typ, val[1:-1])
-                elif typ == 'OTHER':
-                    typ = val
-                    yield Token(typ, val)
-                elif typ == 'BRACEDCODE':
-                    yield Token(typ, val)
-                elif typ == 'SKIP':
-                    pass
-                else:
-                    raise NotImplementedError(str(typ))
-                pos = mo.end()
-                mo = gettok(line, pos)
-            if len(line) != pos:
-                raise ParseError('Lex fault at {}'.format(line))
 
         def tokenize():
-            section = 0
             for line in lines:
                 line = line.strip()
                 if not line:
                     continue  # Skip empty lines
-                if line == '%%':
-                    section += 1
+                elif line == '%%':
                     yield Token('%%', '%%')
-                    continue
-                if section == 0:
-                    if line.startswith('%tokens'):
-                        yield Token('%tokens', '%tokens')
-                        yield from tokenize_line(line[7:])
-                    else:
-                        yield Token('HEADER', line)
-                elif section == 1:
-                    yield from tokenize_line(line)
-            yield Token('eof', 'eof')
+                else:
+                    yield from baselex.tokenize(tok_spec, line)
+            yield Token(EOF, EOF)
         self.tokens = tokenize()
         self.token = self.tokens.__next__()
 
     def next_token(self):
         t = self.token
-        if t.typ != 'eof':
+        if t.typ != EOF:
             self.token = self.tokens.__next__()
         return t
 
 
+class Rule:
+    def __init__(self, non_term, tree, cost, template):
+        self.non_term = non_term
+        self.tree = tree
+        self.cost = cost
+        self.template = template
+        self.nr = 0
+
+    def __repr__(self):
+        return '{} -> {} ${}'.format(self.non_term, self.tree, self.cost)
+
+
+class Symbol:
+    def __init__(self, name):
+        self.name = name
+
+
+class Term(Symbol):
+    pass
+
+
+class Nonterm(Symbol):
+    def __init__(self, name):
+        super().__init__(name)
+        self.rules = []
+
+
+class BurgSystem:
+    def __init__(self):
+        self.rules = []
+        self.symbols = {}
+        self.goal = None
+
+    def symType(self, t):
+        return (s.name for s in self.symbols.values() if type(s) is t)
+
+    terminals = property(lambda s: s.symType(Term))
+
+    non_terminals = property(lambda s: s.symType(Nonterm))
+
+    def add_rule(self, non_term, tree, cost, template):
+        rule = Rule(non_term, tree, cost, template)
+        self.non_term(rule.non_term)
+        self.rules.append(rule)
+        rule.nr = len(self.rules)
+
+    def non_term(self, name):
+        if name in self.terminals:
+            raise BurgError('Cannot redefine terminal')
+        if not self.goal:
+            self.goal = name
+        self.install(name, Nonterm)
+
+    def tree(self, name, *args):
+        return Tree(name, *args)
+
+    def install(self, name, t):
+        assert type(name) is str
+        if name in self.symbols:
+            assert type(self.symbols[name]) is t
+            return self.symbols[name]
+        else:
+            self.symbols[name] = t(name)
+
+    def add_terminal(self, terminal):
+        self.install(terminal, Term)
+
+
+class BurgError(Exception):
+    pass
+
+
 class BurgParser(burg_parser.Parser):
-    """ Derive from automatically generated parser """
-    def add_rule(self, *args):
-        print(args)
+    """ Derived from automatically generated parser """
+    def parse(self, l):
+        self.system = BurgSystem()
+        super().parse(l)
+        return self.system
+
+
+class BurgGenerator:
+    def print(self, *args):
+        """ Print helper function that prints to output file """
+        print(*args, file=self.output_file)
+
+    def generate(self, system, output_file):
+        """ Generate script that implements the burg spec """
+        self.output_file = output_file
+        self.system = system
+
+        self.print('#!/usr/bin/python')
+        self.print('from tree import Tree, BaseMatcher, State')
+        self.print()
+        self.print('class Matcher(BaseMatcher):')
+        self.print('  def __init__(self):')
+        self.print('    self.kid_functions = {}')
+        self.print('    self.nts_map = {}')
+        for rule in self.system.rules:
+            kids, dummy = self.compute_kids(rule.tree, 't')
+            lf = 'lambda t: [{}]'.format(', '.join(kids), rule)
+            self.print('    # {}'.format(rule))
+            self.print('    self.kid_functions[{}] = {}'.format(rule.nr, lf))
+            self.print('    self.nts_map[{}] = {}'.format(rule.nr, dummy))
+        self.print('')
+        self.emit_state()
+        self.print()
+        self.print('  def gen(self, tree):')
+        self.print('    self.burm_label(tree)')
+        self.print('    if not tree.state.has_goal("{}"):'.format(self.system.goal))
+        self.print('        raise Exception("Tree not covered")')
+        self.print('    self.apply_rules(tree, "{}")'.format(self.system.goal))
+
+    def emit_record(self, rule, state_var):
+        # TODO: check for rules fullfilled (by not using 999999)
+        self.print('        nts = self.nts({})'.format(rule.nr))
+        self.print('        kids = self.kids(tree, {})'.format(rule.nr))
+        self.print('        c = sum(x.state.get_cost(y) for x, y in zip(kids, nts)) + {}'.format(rule.cost))
+        self.print('        tree.state.set_cost("{}", c, {})'.format(rule.non_term, rule.nr))
+
+    def emit_state(self):
+        """ Emit a function that assigns a new state to a node """
+        self.print('  def burm_state(self, tree):')
+        self.print('    tree.state = State()')
+        for term in self.system.terminals:
+            self.emitcase(term)
+        self.print()
+
+    def emitcase(self, term):
+        rules = [rule for rule in self.system.rules if rule.tree.name == term]
+        for rule in rules:
+            condition = self.emittest(rule.tree, 'tree')
+            self.print('    if {}:'.format(condition))
+            self.emit_record(rule, 'state')
+
+    def emit_closure(self):
+        for non_terminal in self.system.non_terminals:
+            self.print('def closure_{}(self, c):'.format(non_terminal))
+            self.print('    pass')
+            self.print()
+
+    def compute_kids(self, t, root_name):
+        """ Compute of a pattern the blanks that must be provided from below in the tree """
+        if t.name in self.system.non_terminals:
+            return [root_name], [t.name]
+        else:
+            k = []
+            nts = []
+            for i, c in enumerate(t.children):
+                pfx = root_name + '.children[{}]'.format(i)
+                kf, dummy = self.compute_kids(c, pfx)
+                nts.extend(dummy)
+                k.extend(kf)
+            return k, nts
+
+
+    def emittest(self, tree, prefix):
+        """ Generate condition for a tree pattern """
+        ct = (c for c in tree.children if c.name not in self.system.non_terminals)
+        child_tests = (self.emittest(c, prefix + '.children[{}]'.format(i)) for i, c in enumerate(ct))
+        child_tests = ('({})'.format(ct) for ct in child_tests)
+        child_tests = ' and '.join(child_tests)
+        child_tests = ' and ' + child_tests if child_tests else ''
+        tst = '{}.name == "{}"'.format(prefix, tree.name)
+        return tst + child_tests
 
 
 def main():
@@ -93,10 +252,15 @@
     src = args.source.read()
     args.source.close()
 
+    # Parse specification into burgsystem:
     l = BurgLexer()
     p = BurgParser()
     l.feed(src)
-    p.parse(l)
+    burg_system = p.parse(l)
+
+    # Generate matcher:
+    generator = BurgGenerator()
+    generator.generate(burg_system, args.output)
 
 if __name__ == '__main__':
     main()
--- a/python/pyyacc.py	Tue Dec 31 12:38:15 2013 +0100
+++ b/python/pyyacc.py	Sat Jan 18 18:58:43 2014 +0100
@@ -53,11 +53,47 @@
     for production in g.productions:
         print(production)
 
+
+def calculate_first_sets(grammar):
+    """
+        Calculate first sets for each grammar symbol
+        This is a dictionary which maps each grammar symbol
+        to a set of terminals that can be encountered first
+        when looking for the symbol.
+    """
+    first = {}
+    nullable = {}
+    for terminal in grammar.terminals | {EOF, EPS}:
+        first[terminal] = set([terminal])
+        nullable[terminal] = False
+    for nt in grammar.nonterminals:
+        first[nt] = set()
+        nullable[nt] = False
+    while True:
+        some_change = False
+        for rule in grammar.productions:
+            # Check for null-ability:
+            if all(nullable[beta] for beta in rule.symbols):
+                if not nullable[rule.name]:
+                    nullable[rule.name] = True
+                    some_change = True
+            # Update first sets:
+            for beta in rule.symbols:
+                if not nullable[beta]:
+                    if first[beta] - first[rule.name]:
+                        first[rule.name] |= first[beta]
+                        some_change = True
+                    break
+        if not some_change:
+            break
+    return first
+
+
 class Grammar:
     """ Defines a grammar of a language """
     def __init__(self, terminals):
-        self.terminals = terminals
-        self.nonterminals = []
+        self.terminals = set(terminals)
+        self.nonterminals = set()
         self.productions = []
         self._first = None  # Cached first set
         self.start_symbol = None
@@ -71,8 +107,7 @@
         self.productions.append(production)
         if name in self.terminals:
             raise ParserGenerationException("Cannot redefine terminal {0}".format(name))
-        if not name in self.nonterminals:
-            self.nonterminals.append(name)
+        self.nonterminals.add(name)
         self._first = None  # Invalidate cached version
 
     def productionsForName(self, name):
@@ -82,7 +117,7 @@
     @property
     def Symbols(self):
         """ Get all the symbols defined by this grammar """
-        return self.nonterminals + self.terminals
+        return self.nonterminals | self.terminals
 
     @property
     def first(self):
@@ -92,40 +127,9 @@
           looking for the grammar symbol
         """
         if not self._first:
-            self._first = self.calcFirstSets()
+            self._first = calculate_first_sets(self)
         return self._first
 
-    def calcFirstSets(self):
-        """
-            Calculate first sets for each grammar symbol
-            This is a dictionary which maps each grammar symbol
-            to a set of terminals that can be encountered first
-            when looking for the symbol.
-        """
-        first = {}
-        for t in self.terminals + [EOF, EPS]:
-            first[t] = set([t])
-        for nt in self.nonterminals:
-            first[nt] = set()
-        epsset = {EPS}
-        while True:
-            some_change = False
-            for p in self.productions:
-                rhs = set()
-                for beta in p.symbols:
-                    rhs = rhs | (first[beta] - epsset)
-                    if not EPS in first[beta]:
-                        break
-                else:
-                    if EPS in first[beta]:
-                        rhs.add(EPS)
-                if rhs - first[p.name]:
-                    first[p.name] |= rhs
-                    some_change = True
-            if not some_change:
-                break
-        return first
-
     def closure(self, itemset):
         """ Expand itemset by using epsilon moves """
         worklist = list(itemset)
@@ -140,7 +144,7 @@
                 f.discard(EPS)
                 f.add(itm.look_ahead)
             return f
-        # Start of algorithm: 
+        # Start of algorithm:
         while worklist:
             item = worklist.pop(0)
             if not item.IsShift:
@@ -162,7 +166,7 @@
 
     def nextItemSet(self, itemset, symbol):
         """
-            Determines the next itemset for the current set and a symbol 
+            Determines the next itemset for the current set and a symbol
             This is the goto procedure
         """
         next_set = set()
@@ -194,7 +198,7 @@
         """ Checks no symbols are undefined """
         for production in self.productions:
             for symbol in production.symbols:
-                if symbol not in self.Symbols + [EPS]:
+                if symbol not in self.Symbols:
                     raise ParserGenerationException('Symbol {0} undefined'.format(symbol))
 
     def genParser(self):
@@ -395,5 +399,5 @@
                 break
         # At exit, the stack must be 1 long
         # TODO: fix that this holds:
-        #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack) 
+        #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack)
         return ret_val
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/test_burm.py	Sat Jan 18 18:58:43 2014 +0100
@@ -0,0 +1,17 @@
+from tree import Tree
+from arm_burm import Matcher
+
+
+def main():
+    t = Tree('ASGNI',
+             Tree('ADDRLP'),
+             Tree('ADDI',
+                  Tree('CVCI', Tree('INDIRC', Tree('ADDRLP'))),
+                  Tree('CNSTI')
+                 )
+            )
+    print(t)
+    Matcher().gen(t)
+
+if __name__ == '__main__':
+    main()
--- a/python/tree.py	Tue Dec 31 12:38:15 2013 +0100
+++ b/python/tree.py	Sat Jan 18 18:58:43 2014 +0100
@@ -1,8 +1,49 @@
 
 class Tree:
+    """ Tree node with a name and possibly some child nodes """
     def __init__(self, name, *args):
         self.name = name
         self.children = args
 
     def __repr__(self):
-        return 'Tree({}, {})'.format(self.name, self.children)
+        if self.children:
+            ch = ', '.join(str(c) for c in self.children)
+            return '{}({})'.format(self.name, ch)
+        else:
+            return '{}'.format(self.name)
+
+
+class State:
+    def __init__(self):
+        self.labels = {}
+    def has_goal(self, goal):
+        return goal in self.labels
+    def get_cost(self, goal):
+        if not self.has_goal(goal): return 999999
+        return self.labels[goal][0]
+    def get_rule(self, goal):
+        return self.labels[goal][1]
+    def set_cost(self, goal, cost, rule):
+        if self.has_goal(goal):
+            if self.get_cost(goal) > cost:
+                self.labels[goal] = (cost, rule)
+        else:
+            self.labels[goal] = (cost, rule)
+
+class BaseMatcher:
+    def kids(self, tree, rule):
+        return self.kid_functions[rule](tree)
+
+    def nts(self, rule):
+        return self.nts_map[rule]
+
+    def burm_label(self, tree):
+        for c in tree.children:
+            self.burm_label(c)
+        self.burm_state(tree)
+
+    def apply_rules(self, tree, goal):
+        print(tree.state.get_rule(goal))
+        rule = tree.state.get_rule(goal)
+        for kid_tree, kid_goal in zip(self.kids(tree, rule), self.nts(rule)):
+            self.apply_rules(kid_tree, kid_goal)
--- a/python/yacc.py	Tue Dec 31 12:38:15 2013 +0100
+++ b/python/yacc.py	Sat Jan 18 18:58:43 2014 +0100
@@ -1,6 +1,28 @@
 #!/usr/bin/python
 
-""" Parser generator utility """
+"""
+Parser generator utility. This script can generate a python script from a 
+grammar description.
+
+Invoke the script on a grammar specification file:
+
+.. code::
+
+    $ ./yacc.py test.x -o test_parser.py
+
+And use the generated parser by deriving a user class:
+
+
+.. code::
+
+    import test_parser
+    class MyParser(test_parser.Parser):
+        pass
+    p = MyParser()
+    p.parse()
+
+
+"""
 
 import argparse
 import re
@@ -131,6 +153,7 @@
             else:
                 headers.append(self.consume('HEADER')[1])
         self.consume('%%')
+        self.headers = headers
         self.grammar = Grammar(terminals)
         while self.Peak != 'eof':
             self.parse_rule()
@@ -140,6 +163,7 @@
         return self.consume('ID')[1]
 
     def parse_rhs(self):
+        """ Parse the right hand side of a rule definition """
         symbols = []
         while self.Peak not in [';', 'BRACEDCODE', '|']:
             symbols.append(self.parse_symbol())
@@ -151,6 +175,7 @@
         return symbols, action
 
     def parse_rule(self):
+        """ Parse a rule definition """
         p = self.parse_symbol()
         self.consume(':')
         symbols, action = self.parse_rhs()
@@ -166,47 +191,51 @@
     def __init__(self):
         pass
 
-    def generate(self, grammar, output_file):
+    def generate(self, grammar, headers, output_file):
         print_grammar(grammar)
         self.grammar = grammar
+        self.headers = headers
         self.action_table, self.goto_table = grammar.doGenerate()
         self.generate_python_script(output_file)
 
-    def generate_python_script(self, f):
+    def generate_python_script(self, output_file):
         """ Generate python script with the parser table """
-        print('#!/usr/bin/python', file=f)
+        print('#!/usr/bin/python', file=output_file)
         stamp = datetime.datetime.now().ctime()
-        print('""" Automatically generated by xacc on {} """'.format(stamp), file=f)
-        print('from pyyacc import LRParser, Reduce, Shift, Accept, Production, Grammar', file=f)
-        print('from ppci import Token', file=f)
-        print(file=f)
-        print('class Parser(LRParser):', file=f)
-        print('    def __init__(self):', file=f)
+        print('""" Automatically generated by xacc on {} """'.format(stamp), file=output_file)
+        print('from pyyacc import LRParser, Reduce, Shift, Accept, Production, Grammar', file=output_file)
+        print('from ppci import Token', file=output_file)
+        print(file=output_file)
+        for h in self.headers:
+            print(h, file=output_file)
+        print(file=output_file)
+        print('class Parser(LRParser):', file=output_file)
+        print('    def __init__(self):', file=output_file)
         # Generate rules:
-        print('        self.start_symbol = "{}"'.format(self.grammar.start_symbol), file=f)
-        print('        self.grammar = Grammar({})'.format(self.grammar.terminals), file=f)
+        print('        self.start_symbol = "{}"'.format(self.grammar.start_symbol), file=output_file)
+        print('        self.grammar = Grammar({})'.format(self.grammar.terminals), file=output_file)
         for rule_number, rule in enumerate(self.grammar.productions):
             rule.f_name = 'action_{}_{}'.format(rule.name, rule_number)
-            print('        self.grammar.add_production("{}", {}, self.{})'.format(rule.name, rule.symbols, rule.f_name), file=f)
+            print('        self.grammar.add_production("{}", {}, self.{})'.format(rule.name, rule.symbols, rule.f_name), file=output_file)
         # Fill action table:
-        print('        self.action_table = {}', file=f)
+        print('        self.action_table = {}', file=output_file)
         for state in self.action_table:
             action = self.action_table[state]
-            print('        self.action_table[{}] = {}'.format(state, action), file=f)
-        print('', file=f)
+            print('        self.action_table[{}] = {}'.format(state, action), file=output_file)
+        print('', file=output_file)
 
         # Fill goto table:
-        print('        self.goto_table = {}', file=f)
+        print('        self.goto_table = {}', file=output_file)
         for gt in self.goto_table:
             to = self.goto_table[gt]
-            print('        self.goto_table[{}] = {}'.format(gt, to), file=f)
-        print('', file=f)
+            print('        self.goto_table[{}] = {}'.format(gt, to), file=output_file)
+        print('', file=output_file)
 
         # Generate a function for each action:
         for rule in self.grammar.productions:
             M = len(rule.symbols)
             args = ', '.join('arg{}'.format(n + 1) for n in range(M))
-            print('    def {}(self, {}):'.format(rule.f_name, args), file=f)
+            print('    def {}(self, {}):'.format(rule.f_name, args), file=output_file)
             if rule.f == None:
                 semantics = 'pass'
             else:
@@ -215,8 +244,7 @@
                     semantics = 'pass'
             for n in range(M):
                 semantics = semantics.replace('${}'.format(n + 1), 'arg{}'.format(n + 1))
-            print('        {}'.format(semantics), file=f)
-            print('', file=f)
+            print('        {}'.format(semantics), file=output_file)
 
 
 def main():
@@ -238,7 +266,7 @@
     # Sequence source through the generator parts:
     lexer.feed(src)
     grammar = parser.parse_grammar()
-    generator.generate(grammar, args.output)
+    generator.generate(grammar, parser.headers, args.output)
     args.output.close()
 
 
--- a/test/testpyy.py	Tue Dec 31 12:38:15 2013 +0100
+++ b/test/testpyy.py	Sat Jan 18 18:58:43 2014 +0100
@@ -1,6 +1,6 @@
 import unittest
 from pyyacc import Grammar, Item, ParserGenerationException, ParserException
-from pyyacc import EPS, EOF
+from pyyacc import EPS, EOF, calculate_first_sets
 from ppci import Token
 
 
@@ -123,6 +123,18 @@
         tokens = genTokens(['id', 'id'])   # i.e. "inc rax"
         p.parse(tokens)
 
+    def testEpsSequence(self):
+        """ Test epsilon terminal for use in sequences """
+        g = Grammar(['a'])
+        g.add_production('aas', [])
+        g.add_production('aas', ['aas', 'a'])
+        g.start_symbol = 'aas'
+        p = g.genParser()
+        tokens = genTokens(['a', 'a', 'a'])
+        p.parse(tokens)
+        tokens = genTokens([])
+        p.parse(tokens)
+
     def test_cb(self):
         """ Test callback of one rule and order or parameters """
         self.cb_called = False
@@ -156,7 +168,7 @@
 
     def testFirstSimpleGrammar(self):
         # 1. define a simple grammar:
-        first = self.g.calcFirstSets()
+        first = calculate_first_sets(self.g)
         self.assertEqual(first['input'], {'identifier', '(', 'num'})
         self.assertEqual(first['term'], {'identifier', '(', 'num'})