changeset 318:e84047f29c78

Add burg and yacc initial attempts
author Windel Bouwman
date Tue, 31 Dec 2013 12:38:15 +0100
parents e30a77ae359b
children 8d07a4254f04
files python/arm.pb python/asm.py python/burg.x python/ppci/irutils.py python/pyburg.py python/pyyacc.py python/target/arminstructionselector.py python/tree.py python/yacc.py python/zcc.py readme.rst test/expression.x test/testasm.py test/testpyy.py
diffstat 14 files changed, 568 insertions(+), 128 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/arm.pb	Tue Dec 31 12:38:15 2013 +0100
@@ -0,0 +1,7 @@
+
+
+
+
+reg: ADD32(reg, const) { self.emit(Add, src=[$1], dst=[$c], other=[$2]) } 1
+
+
--- a/python/asm.py	Sun Dec 22 15:50:59 2013 +0100
+++ b/python/asm.py	Tue Dec 31 12:38:15 2013 +0100
@@ -1,19 +1,20 @@
 #!/usr/bin/env python3
 
-import re, argparse
+import re
+import argparse
 import pyyacc
 from ppci import Token, CompilerError, SourceLocation
 from target import Target, Label
 from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber
 
 def tokenize(s):
-     """
+    """
        Tokenizer, generates an iterator that
        returns tokens!
 
        This GREAT example was taken from python re doc page!
-     """
-     tok_spec = [
+    """
+    tok_spec = [
        ('REAL', r'\d+\.\d+'),
        ('HEXNUMBER', r'0x[\da-fA-F]+'),
        ('NUMBER', r'\d+'),
@@ -22,13 +23,13 @@
        ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'),
        ('STRING', r"'.*?'"),
        ('COMMENT', r";.*")
-     ]
-     tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
-     gettok = re.compile(tok_re).match
-     line = 1
-     pos = line_start = 0
-     mo = gettok(s)
-     while mo is not None:
+    ]
+    tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+    gettok = re.compile(tok_re).match
+    line = 1
+    pos = line_start = 0
+    mo = gettok(s)
+    while mo is not None:
        typ = mo.lastgroup
        val = mo.group(typ)
        if typ == 'NEWLINE':
@@ -51,10 +52,11 @@
          yield Token(typ, val, loc)
        pos = mo.end()
        mo = gettok(s, pos)
-     if pos != len(s):
+    if pos != len(s):
        col = pos - line_start
        loc = SourceLocation('', line, col, 0)
        raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
+    yield Token('EOF', pyyacc.EOF)
 
 
 class Lexer:
@@ -62,33 +64,29 @@
         self.tokens = tokenize(src)
         self.curTok = self.tokens.__next__()
 
-    def eat(self):
+    def next_token(self):
         t = self.curTok
-        self.curTok = self.tokens.__next__()
+        if t.typ != 'EOF':
+            self.curTok = self.tokens.__next__()
         return t
 
-    @property
-    def Peak(self):
-        return self.curTok
-
 
 class Parser:
     def __init__(self):
         # Construct a parser given a grammar:
         ident = lambda x: x   # Identity helper function
-        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}'])
+        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}',
+            pyyacc.EOF])
         g.add_production('asmline', ['asmline2'])
         g.add_production('asmline', ['asmline2', 'COMMENT'])
         g.add_production('asmline2', ['label', 'instruction'])
         g.add_production('asmline2', ['instruction'])
         g.add_production('asmline2', ['label'])
         g.add_production('asmline2', [])
-        g.add_production('optcomment', [])
-        g.add_production('optcomment', ['COMMENT'])
         g.add_production('label', ['ID', ':'], self.p_label)
         g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
         g.add_production('instruction', ['opcode'], self.p_ins_2)
-        g.add_production('opcode', ['ID'], ident)
+        g.add_production('opcode', ['ID'], lambda x: x.val)
         g.add_production('operands', ['operand'], self.p_operands_1)
         g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
         g.add_production('operand', ['expression'], ident)
@@ -98,13 +96,13 @@
         g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2)
         g.add_production('expression', ['term'], ident)
         g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
-        g.add_production('addop', ['-'], ident)
-        g.add_production('addop', ['+'], ident)
-        g.add_production('mulop', ['*'], ident)
+        g.add_production('addop', ['-'], lambda x: x.val)
+        g.add_production('addop', ['+'], lambda x: x.val)
+        g.add_production('mulop', ['*'], lambda x: x.val)
         g.add_production('term', ['factor'], ident)
         g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
-        g.add_production('factor', ['ID'], lambda name: ASymbol(name))
-        g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num)))
+        g.add_production('factor', ['ID'], lambda name: ASymbol(name.val))
+        g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val)))
         g.start_symbol = 'asmline'
         self.p = g.genParser()
 
@@ -112,10 +110,13 @@
     def p_ins_1(self, opc, ops):
         ins = AInstruction(opc, ops)
         self.emit(ins)
+
     def p_ins_2(self, opc):
         self.p_ins_1(opc, [])
+
     def p_operands_1(self, op1):
         return [op1]
+
     def p_operands_2(self, ops, comma, op2):
         assert type(ops) is list
         ops.append(op2)
@@ -131,17 +132,20 @@
 
     def p_list_op(self, brace_open, lst, brace_close):
         return AUnop('{}', lst)
+
     def p_mem_op(self, brace_open, exp, brace_close):
         return AUnop('[]', exp)
+
     def p_label(self, lname, cn):
-        lab = ALabel(lname)
+        lab = ALabel(lname.val)
         self.emit(lab)
+
     def p_binop(self, exp1, op, exp2):
         return ABinop(op, exp1, exp2)
 
-    def parse(self, tokens, emitter):
+    def parse(self, lexer, emitter):
         self.emit = emitter
-        self.p.parse(tokens)
+        self.p.parse(lexer)
 
 # Pre construct parser to save time:
 asmParser = Parser()
@@ -163,7 +167,7 @@
 
     def parse_line(self, line):
         """ Parse line into asm AST """
-        tokens = tokenize(line)
+        tokens = Lexer(line)
         self.p.parse(tokens, self.emit)
 
     def assemble(self, asmsrc):
@@ -172,16 +176,15 @@
             self.assemble_line(line)
 
     def assemble_line(self, line):
-        """ 
-            Assemble a single source line. 
-            Do not take newlines into account 
+        """
+            Assemble a single source line.
+            Do not take newlines into account
         """
         self.parse_line(line)
         self.assemble_aast()
 
     def assemble_aast(self):
         """ Assemble a parsed asm line """
-        # TODO
         if not self.target:
             raise CompilerError('Cannot assemble without target')
         while self.stack:
@@ -199,8 +202,8 @@
 if __name__ == '__main__':
     # When run as main file, try to grab command line arguments:
     parser = argparse.ArgumentParser(description="Assembler")
-    parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble')
+    parser.add_argument('sourcefile', type=argparse.FileType('r'),
+        help='the source file to assemble')
     args = parser.parse_args()
     a = Assembler()
     obj = a.assemble(args.sourcefile.read())
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/burg.x	Tue Dec 31 12:38:15 2013 +0100
@@ -0,0 +1,15 @@
+
+from tree import Tree
+%tokens ':' ';' '(' ')' ',' template id number '%%' header
+
+%%
+
+rule: id ':' tree template cost { self.add_rule($1, $3, $4, $5) };
+
+cost: number { return number.val };
+
+tree: id { return Tree($1) }
+    | id '(' tree ')' { return Tree($1, $3) }
+    | id '(' tree ',' tree ')' { return Tree($1, $3, $5) };
+
+
--- a/python/ppci/irutils.py	Sun Dec 22 15:50:59 2013 +0100
+++ b/python/ppci/irutils.py	Tue Dec 31 12:38:15 2013 +0100
@@ -119,7 +119,7 @@
             return self.next_token()
         else:
             raise IrParseException('Expected "{}" got "{}"'.format(typ, self.Peak))
-        
+
     def parse_module(self):
         """ Entry for recursive descent parser """
         self.Consume('module')
@@ -132,7 +132,7 @@
             else:
                 raise IrParseException('Expected function got {}'.format(self.Peak))
         return module
-                
+
     def parse_function(self):
         self.Consume('function')
         self.parse_type()
@@ -259,7 +259,7 @@
         assert block.LastInstruction.IsTerminator
         for i in block.Instructions[:-1]:
             assert not isinstance(i, ir.LastStatement)
-        
+
     def verify_block(self, block):
         for instruction in block.Instructions:
             self.verify_instruction(instruction)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/pyburg.py	Tue Dec 31 12:38:15 2013 +0100
@@ -0,0 +1,102 @@
+#!/usr/bin/python
+
+""" Bottom up rewrite generator in python """
+import sys
+import re
+import argparse
+from ppci import Token
+import burg_parser
+
+
+class BurgLexer:
+    def feed(self, txt):
+        tok_spec = [
+           ('ID', r'[A-Za-z][A-Za-z\d_]*'),
+           ('STRING', r"'[^']*'"),
+           ('BRACEDCODE', r"\{[^\}]*\}"),
+           ('OTHER', r'[:;\|]'),
+           ('SKIP', r'[ ]')
+            ]
+        tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+        gettok = re.compile(tok_re).match
+
+        lines = txt.split('\n')
+        def tokenize_line(line):
+            """ Generator that splits up a line into tokens """
+            mo = gettok(line)
+            pos = 0
+            while mo:
+                typ = mo.lastgroup
+                val = mo.group(typ)
+                if typ == 'ID':
+                    yield Token(typ, val)
+                elif typ == 'STRING':
+                    typ = 'ID'
+                    yield Token(typ, val[1:-1])
+                elif typ == 'OTHER':
+                    typ = val
+                    yield Token(typ, val)
+                elif typ == 'BRACEDCODE':
+                    yield Token(typ, val)
+                elif typ == 'SKIP':
+                    pass
+                else:
+                    raise NotImplementedError(str(typ))
+                pos = mo.end()
+                mo = gettok(line, pos)
+            if len(line) != pos:
+                raise ParseError('Lex fault at {}'.format(line))
+
+        def tokenize():
+            section = 0
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    continue  # Skip empty lines
+                if line == '%%':
+                    section += 1
+                    yield Token('%%', '%%')
+                    continue
+                if section == 0:
+                    if line.startswith('%tokens'):
+                        yield Token('%tokens', '%tokens')
+                        yield from tokenize_line(line[7:])
+                    else:
+                        yield Token('HEADER', line)
+                elif section == 1:
+                    yield from tokenize_line(line)
+            yield Token('eof', 'eof')
+        self.tokens = tokenize()
+        self.token = self.tokens.__next__()
+
+    def next_token(self):
+        t = self.token
+        if t.typ != 'eof':
+            self.token = self.tokens.__next__()
+        return t
+
+
+class BurgParser(burg_parser.Parser):
+    """ Derive from automatically generated parser """
+    def add_rule(self, *args):
+        print(args)
+
+
+def main():
+    # Parse arguments:
+    parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler')
+    parser.add_argument('source', type=argparse.FileType('r'), \
+      help='the parser specification')
+    parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
+        default=sys.stdout)
+    args = parser.parse_args()
+    src = args.source.read()
+    args.source.close()
+
+    l = BurgLexer()
+    p = BurgParser()
+    l.feed(src)
+    p.parse(l)
+
+if __name__ == '__main__':
+    main()
--- a/python/pyyacc.py	Sun Dec 22 15:50:59 2013 +0100
+++ b/python/pyyacc.py	Tue Dec 31 12:38:15 2013 +0100
@@ -2,14 +2,11 @@
   Parser generator script
 """
 
-import hashlib
 from ppci import Token
 
 EPS = 'EPS'
 EOF = 'EOF'
-SHIFT = 1
-REDUCE = 2
-ACCEPT = 3
+
 
 class ParserGenerationException(Exception):
     """ Raised when something goes wrong during parser generation """
@@ -21,6 +18,41 @@
     pass
 
 
+class Action:
+    def __repr__(self):
+        return 'Action'
+
+    def __eq__(self, other):
+        return str(self) == str(other)
+
+
+class Shift(Action):
+    def __init__(self, to_state):
+        self.to_state = to_state
+
+    def __repr__(self):
+        return 'Shift({})'.format(self.to_state)
+
+
+class Reduce(Action):
+    def __init__(self, rule):
+        self.rule = rule
+
+    def __repr__(self):
+        return 'Reduce({})'.format(self.rule)
+
+
+class Accept(Reduce):
+    def __repr__(self):
+        return 'Accept({})'.format(self.rule)
+
+
+def print_grammar(g):
+    """ Pretty print a grammar """
+    print(g)
+    for production in g.productions:
+        print(production)
+
 class Grammar:
     """ Defines a grammar of a language """
     def __init__(self, terminals):
@@ -28,6 +60,10 @@
         self.nonterminals = []
         self.productions = []
         self._first = None  # Cached first set
+        self.start_symbol = None
+
+    def __repr__(self):
+        return 'Grammar with {} rules'.format(len(self.productions))
 
     def add_production(self, name, symbols, f=None):
         """ Add a production rule to the grammar """
@@ -50,10 +86,10 @@
 
     @property
     def first(self):
-        """ 
+        """
           The first set is a mapping from a grammar symbol to a set of
           set of all terminal symbols that can be the first terminal when
-          looking for the grammar symbol 
+          looking for the grammar symbol
         """
         if not self._first:
             self._first = self.calcFirstSets()
@@ -125,7 +161,7 @@
         return self.closure(iis)
 
     def nextItemSet(self, itemset, symbol):
-        """ 
+        """
             Determines the next itemset for the current set and a symbol 
             This is the goto procedure
         """
@@ -134,7 +170,7 @@
             if item.can_shift_over(symbol):
                 next_set.add(item.shifted())
         return self.closure(next_set)
-    
+
     def genCanonicalSet(self, iis):
         states = []
         worklist = []
@@ -153,7 +189,7 @@
                 addSt(nis)
                 transitions[(states.index(itemset), symbol)] = states.index(nis)
         return states, transitions
-    
+
     def checkSymbols(self):
         """ Checks no symbols are undefined """
         for production in self.productions:
@@ -161,20 +197,16 @@
                 if symbol not in self.Symbols + [EPS]:
                     raise ParserGenerationException('Symbol {0} undefined'.format(symbol))
 
-    def getSignature(self):
-        m = hashlib.md5()
-        m.update((str(self.productions) + str(self.start_symbol)).encode('ascii'))
-        signature = m.hexdigest()
-        
     def genParser(self):
         """ Generates a parser from the grammar (using a caching algorithm) """
-        signature = self.getSignature() 
         action_table, goto_table = self.doGenerate()
         p = LRParser(action_table, goto_table, self.start_symbol)
         p.grammar = self
         return p
 
     def doGenerate(self):
+        if not self.start_symbol:
+            self.start_symbol = self.productions[0].name
         self.checkSymbols()
         action_table = {}
         goto_table = {}
@@ -183,32 +215,24 @@
         # First generate all item sets by using the nextItemset function:
         states, transitions = self.genCanonicalSet(iis)
 
-        def action_str(act):
-            a, p = act
-            if a is SHIFT:
-                return 'Shift {0}'.format(0)
-            elif a is REDUCE:
-                return 'Reduce {0}'.format(p)
-            return 'Other'
-        
         def setAction(state, t, action):
+            assert isinstance(action, Action)
             key = (state, t)
             assert type(state) is int
             assert type(t) is str
             if key in action_table:
                 action2 = action_table[key]
                 if action != action2:
-                    if (action2[0] == REDUCE) and (action[0] == SHIFT):
+                    if (type(action2) is Reduce) and (type(action) is Shift):
                         # Automatically resolve and do the shift action!
                         # Simple, but almost always what you want!!
                         action_table[key] = action
+                    elif isinstance(action2, Shift) and isinstance(action, Reduce):
+                        pass
                     else:
-                        if (action2[0] == SHIFT) and (action[0] == REDUCE):
-                            pass
-                        else:
-                            a1 = action_str(action)
-                            a2 = action_str(action2)
-                            raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2))
+                        a1 = str(action)
+                        a2 = str(action2)
+                        raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2))
             else:
                 action_table[key] = action
 
@@ -219,14 +243,15 @@
                 if item.IsShift and item.Next in self.terminals:
                     # Rule 1, a shift item:
                     nextstate = transitions[(states.index(state), item.Next)]
-                    setAction(states.index(state), item.Next, (SHIFT, nextstate))
+                    setAction(states.index(state), item.Next, Shift(nextstate))
                 if item.IsReduce:
                     if item.production.name == self.start_symbol and item.look_ahead == EOF:
                         # Rule 3: accept:
-                        setAction(states.index(state), item.look_ahead, (ACCEPT, self.productions.index(item.production)))
+                        act = Accept(self.productions.index(item.production))
                     else:
                         # Rule 2, reduce item:
-                        setAction(states.index(state), item.look_ahead, (REDUCE, self.productions.index(item.production)))
+                        act = Reduce(self.productions.index(item.production))
+                    setAction(states.index(state), item.look_ahead, act)
             for nt in self.nonterminals:
                 key = (states.index(state), nt)
                 if key in transitions:
@@ -242,12 +267,13 @@
         self.f = f
 
     def __repr__(self):
-        return '{0} -> {1}'.format(self.name, self.symbols)
+        action = ' ' + str(self.f) if self.f else ''
+        return '{0} -> {1}'.format(self.name, self.symbols) + action
 
 
 class Item:
-    """ 
-        Represents a partially parsed item 
+    """
+        Represents a partially parsed item
         It has a production it is looking for, a position
         in this production called the 'dot' and a look ahead
         symbol that must follow this item.
@@ -311,68 +337,63 @@
 
 
 class LRParser:
-    """ LR parser """
+    """ LR parser automata """
     def __init__(self, action_table, goto_table, start_symbol):
         self.action_table = action_table
         self.goto_table = goto_table
         self.start_symbol = start_symbol
 
-    def parse(self, toks):
+    def parse(self, lexer):
         """ Parse an iterable with tokens """
-        assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks))
+        assert hasattr(lexer, 'next_token'), '{0} is no lexer'.format(type(lexer))
         stack = [0]
         r_data_stack = []
-        try:
-            look_ahead = toks.__next__()
-        except StopIteration:
-            look_ahead = Token(EOF, EOF)
+        look_ahead = lexer.next_token()
         assert type(look_ahead) is Token
         # TODO: exit on this condition:
         while stack != [0, self.start_symbol, 2222]:
-            #print(stack)
             state = stack[-1]   # top of stack
             key = (state, look_ahead.typ)
             if not key in self.action_table:
                 raise ParserException('Error parsing at character {0}'.format(look_ahead))
-            action, param = self.action_table[key]
-            if action == REDUCE:
+            action = self.action_table[key]
+            if type(action) is Reduce:
                 f_args = []
-                param = self.grammar.productions[param]
-                for s in param.symbols:
+                prod = self.grammar.productions[action.rule]
+                for s in prod.symbols:
                     stack.pop()
                     stack.pop()
                     f_args.append(r_data_stack.pop())
                 f_args.reverse()
                 r_data = None
-                if param.f:
-                    r_data = param.f(*f_args)
+                if prod.f:
+                    r_data = prod.f(*f_args)
                 state = stack[-1]
-                stack.append(param.name)
-                stack.append(self.goto_table[(state, param.name)])
+                stack.append(prod.name)
+                stack.append(self.goto_table[(state, prod.name)])
                 r_data_stack.append(r_data)
-            elif action == SHIFT:
+            elif type(action) is Shift:
                 stack.append(look_ahead.typ)
-                stack.append(param)
-                r_data_stack.append(look_ahead.val)
-                try:
-                    look_ahead = toks.__next__()
-                except StopIteration:
-                    look_ahead = Token(EOF, EOF)
+                stack.append(action.to_state)
+                r_data_stack.append(look_ahead)
+                look_ahead = lexer.next_token()
                 assert type(look_ahead) is Token
-            elif action == ACCEPT:
+            elif type(action) is Accept:
                 # Pop last rule data off the stack:
                 f_args = []
-                param = self.grammar.productions[param]
+                param = self.grammar.productions[action.rule]
                 for s in param.symbols:
                     stack.pop()
                     stack.pop()
                     f_args.append(r_data_stack.pop())
                 f_args.reverse()
                 if param.f:
-                    param.f(*f_args)
+                    ret_val = param.f(*f_args)
+                else:
+                    ret_val = None
                 # Break out!
                 break
         # At exit, the stack must be 1 long
         # TODO: fix that this holds:
         #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack) 
-
+        return ret_val
--- a/python/target/arminstructionselector.py	Sun Dec 22 15:50:59 2013 +0100
+++ b/python/target/arminstructionselector.py	Tue Dec 31 12:38:15 2013 +0100
@@ -12,9 +12,7 @@
 class ArmInstructionSelector(InstructionSelector):
     """ Instruction selector for the arm architecture """
     def munchExpr(self, e):
-        if isinstance(e, ir.Alloc):
-            return 0
-        elif isinstance(e, ir.Binop) and e.operation == '+' and \
+        if isinstance(e, ir.Binop) and e.operation == '+' and \
             isinstance(e.b, ir.Const) and e.b.value < 8:
             a = self.munchExpr(e.a)
             d = self.newTmp()
@@ -142,4 +140,4 @@
             raise NotImplementedError('Stmt --> {}'.format(s))
 
     def move(self, dst, src):
-        self.emit(Mov2, src=[src], dst=[dst])
+        self.emit(Mov2, src=[src], dst=[dst], ismove=True)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/tree.py	Tue Dec 31 12:38:15 2013 +0100
@@ -0,0 +1,8 @@
+
+class Tree:
+    def __init__(self, name, *args):
+        self.name = name
+        self.children = args
+
+    def __repr__(self):
+        return 'Tree({}, {})'.format(self.name, self.children)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/yacc.py	Tue Dec 31 12:38:15 2013 +0100
@@ -0,0 +1,246 @@
+#!/usr/bin/python
+
+""" Parser generator utility """
+
+import argparse
+import re
+import sys
+import datetime
+from pyyacc import Grammar, print_grammar
+
+
+class XaccLexer:
+    def __init__(self):
+        pass
+
+    def feed(self, txt):
+        # Create a regular expression for the lexing part:
+        tok_spec = [
+           ('ID', r'[A-Za-z][A-Za-z\d_]*'),
+           ('STRING', r"'[^']*'"),
+           ('BRACEDCODE', r"\{[^\}]*\}"),
+           ('OTHER', r'[:;\|]'),
+           ('SKIP', r'[ ]')
+            ]
+        tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+        gettok = re.compile(tok_re).match
+
+        lines = txt.split('\n')
+
+        def tokenize_line(line):
+            """ Generator that splits up a line into tokens """
+            mo = gettok(line)
+            pos = 0
+            while mo:
+                typ = mo.lastgroup
+                val = mo.group(typ)
+                if typ == 'ID':
+                    yield (typ, val)
+                elif typ == 'STRING':
+                    typ = 'ID'
+                    yield (typ, val[1:-1])
+                elif typ == 'OTHER':
+                    typ = val
+                    yield (typ, val)
+                elif typ == 'BRACEDCODE':
+                    yield (typ, val)
+                elif typ == 'SKIP':
+                    pass
+                else:
+                    raise NotImplementedError(str(typ))
+                pos = mo.end()
+                mo = gettok(line, pos)
+            if len(line) != pos:
+                raise ParseError('Lex fault at {}'.format(line))
+
+        def tokenize():
+            section = 0
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    continue  # Skip empty lines
+                if line == '%%':
+                    section += 1
+                    yield('%%', '%%')
+                    continue
+                if section == 0:
+                    if line.startswith('%tokens'):
+                        yield('%tokens', '%tokens')
+                        yield from tokenize_line(line[7:])
+                    else:
+                        yield ('HEADER', line)
+                elif section == 1:
+                    yield from tokenize_line(line)
+            yield ('eof', 'eof')
+        self.tokens = tokenize()
+        self.token = self.tokens.__next__()
+
+    def next_token(self):
+        t = self.token
+        if t[0] != 'eof':
+            self.token = self.tokens.__next__()
+        #print(t)
+        return t
+
+
+class ParseError(Exception):
+    pass
+
+
+class XaccParser:
+    """ Implements a recursive descent parser to parse grammar rules.
+        We could have made an generated parser, but that would yield a chicken
+        egg issue.
+    """
+    def __init__(self, lexer):
+        self.lexer = lexer
+
+    @property
+    def Peak(self):
+        """ Sneak peak to the next token in line """
+        return self.lexer.token[0]
+
+    def next_token(self):
+        """ Take the next token """
+        return self.lexer.next_token()
+
+    def consume(self, typ):
+        """ Eat next token of type typ or raise an exception """
+        if self.Peak == typ:
+            return self.next_token()
+        else:
+            raise ParseError('Expected {}, but got {}'.format(typ, self.Peak))
+
+    def has_consumed(self, typ):
+        """ Consume typ if possible and return true if so """
+        if self.Peak == typ:
+            self.consume(typ)
+            return True
+        return False
+
+    def parse_grammar(self):
+        """ Entry parse function into recursive descent parser """
+        # parse header
+        headers = []
+        terminals = []
+        while self.Peak in ['HEADER', '%tokens']:
+            if self.Peak == '%tokens':
+                self.consume('%tokens')
+                while self.Peak == 'ID':
+                    terminals.append(self.consume('ID')[1])
+            else:
+                headers.append(self.consume('HEADER')[1])
+        self.consume('%%')
+        self.grammar = Grammar(terminals)
+        while self.Peak != 'eof':
+            self.parse_rule()
+        return self.grammar
+
+    def parse_symbol(self):
+        return self.consume('ID')[1]
+
+    def parse_rhs(self):
+        symbols = []
+        while self.Peak not in [';', 'BRACEDCODE', '|']:
+            symbols.append(self.parse_symbol())
+        if self.Peak == 'BRACEDCODE':
+            action = self.consume('BRACEDCODE')[1]
+            action = action[1:-1].strip()
+        else:
+            action = None
+        return symbols, action
+
+    def parse_rule(self):
+        p = self.parse_symbol()
+        self.consume(':')
+        symbols, action = self.parse_rhs()
+        self.grammar.add_production(p, symbols, action)
+        while self.has_consumed('|'):
+            symbols, action = self.parse_rhs()
+            self.grammar.add_production(p, symbols, action)
+        self.consume(';')
+
+
+class XaccGenerator:
+    """ Generator that writes generated parser to file """
+    def __init__(self):
+        pass
+
+    def generate(self, grammar, output_file):
+        print_grammar(grammar)
+        self.grammar = grammar
+        self.action_table, self.goto_table = grammar.doGenerate()
+        self.generate_python_script(output_file)
+
+    def generate_python_script(self, f):
+        """ Generate python script with the parser table """
+        print('#!/usr/bin/python', file=f)
+        stamp = datetime.datetime.now().ctime()
+        print('""" Automatically generated by xacc on {} """'.format(stamp), file=f)
+        print('from pyyacc import LRParser, Reduce, Shift, Accept, Production, Grammar', file=f)
+        print('from ppci import Token', file=f)
+        print(file=f)
+        print('class Parser(LRParser):', file=f)
+        print('    def __init__(self):', file=f)
+        # Generate rules:
+        print('        self.start_symbol = "{}"'.format(self.grammar.start_symbol), file=f)
+        print('        self.grammar = Grammar({})'.format(self.grammar.terminals), file=f)
+        for rule_number, rule in enumerate(self.grammar.productions):
+            rule.f_name = 'action_{}_{}'.format(rule.name, rule_number)
+            print('        self.grammar.add_production("{}", {}, self.{})'.format(rule.name, rule.symbols, rule.f_name), file=f)
+        # Fill action table:
+        print('        self.action_table = {}', file=f)
+        for state in self.action_table:
+            action = self.action_table[state]
+            print('        self.action_table[{}] = {}'.format(state, action), file=f)
+        print('', file=f)
+
+        # Fill goto table:
+        print('        self.goto_table = {}', file=f)
+        for gt in self.goto_table:
+            to = self.goto_table[gt]
+            print('        self.goto_table[{}] = {}'.format(gt, to), file=f)
+        print('', file=f)
+
+        # Generate a function for each action:
+        for rule in self.grammar.productions:
+            M = len(rule.symbols)
+            args = ', '.join('arg{}'.format(n + 1) for n in range(M))
+            print('    def {}(self, {}):'.format(rule.f_name, args), file=f)
+            if rule.f == None:
+                semantics = 'pass'
+            else:
+                semantics = str(rule.f)
+                if semantics.strip() == '':
+                    semantics = 'pass'
+            for n in range(M):
+                semantics = semantics.replace('${}'.format(n + 1), 'arg{}'.format(n + 1))
+            print('        {}'.format(semantics), file=f)
+            print('', file=f)
+
+
+def main():
+    # Parse arguments:
+    parser = argparse.ArgumentParser(description='xacc compiler compiler')
+    parser.add_argument('source', type=argparse.FileType('r'), \
+      help='the parser specification')
+    parser.add_argument('-o', '--output', type=argparse.FileType('w'), \
+        default=sys.stdout)
+    args = parser.parse_args()
+    src = args.source.read()
+    args.source.close()
+
+    # Construction of generator parts:
+    lexer = XaccLexer()
+    parser = XaccParser(lexer)
+    generator = XaccGenerator()
+
+    # Sequence source through the generator parts:
+    lexer.feed(src)
+    grammar = parser.parse_grammar()
+    generator.generate(grammar, args.output)
+    args.output.close()
+
+
+if __name__ == '__main__':
+    main()
--- a/python/zcc.py	Sun Dec 22 15:50:59 2013 +0100
+++ b/python/zcc.py	Tue Dec 31 12:38:15 2013 +0100
@@ -118,7 +118,6 @@
 
 # Parse arguments:
 parser = argparse.ArgumentParser(description='lcfos Compiler')
-# Input:
 parser.add_argument('source', type=argparse.FileType('r'), \
   help='the source file to build', nargs="+")
 parser.add_argument('-i', '--imp', type=argparse.FileType('r'), \
--- a/readme.rst	Sun Dec 22 15:50:59 2013 +0100
+++ b/readme.rst	Tue Dec 31 12:38:15 2013 +0100
@@ -30,6 +30,13 @@
     python ide.py
 
 
+Source code
+-----------
+
+Sources are located here:
+https://bitbucket.org/windel/lcfos
+
+
 Docs
 ----
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/expression.x	Tue Dec 31 12:38:15 2013 +0100
@@ -0,0 +1,16 @@
+
+%tokens '+' number '(' ')' '*'
+%%
+
+input: expression {return $1};
+
+expression: term { return $1 }
+          | expression '+' term {};
+
+term: factor { return $1 }
+    | term '*' factor { return $1 * $3 };
+
+factor: '(' expression ')'
+      | number {return $1.val};
+
+
--- a/test/testasm.py	Sun Dec 22 15:50:59 2013 +0100
+++ b/test/testasm.py	Tue Dec 31 12:38:15 2013 +0100
@@ -13,20 +13,20 @@
 
     def testLex0(self):
         """ Check if the lexer is OK """
-        asmline, toks = 'mov rax, rbx ', ['ID', 'ID', ',', 'ID']
+        asmline, toks = 'mov rax, rbx ', ['ID', 'ID', ',', 'ID', 'EOF']
         self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks)
 
     def testLex1(self):
         """ Test if lexer correctly maps some tokens """
-        asmline, toks = 'lab1: mov rax, rbx ', ['ID', ':', 'ID', 'ID', ',', 'ID']
-        self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks)
-
-    def testLex1(self):
-        """ Test if lexer correctly maps some tokens """
-        asmline, toks = 'mov 3.13 0xC 13', ['ID', 'REAL', 'NUMBER', 'NUMBER']
+        asmline, toks = 'lab1: mov rax, rbx ', ['ID', ':', 'ID', 'ID', ',', 'ID', 'EOF']
         self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks)
 
     def testLex2(self):
+        """ Test if lexer correctly maps some tokens """
+        asmline, toks = 'mov 3.13 0xC 13', ['ID', 'REAL', 'NUMBER', 'NUMBER', 'EOF']
+        self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks)
+
+    def testLex3(self):
         """ Test if lexer fails on a token that is invalid """
         asmline = '0z4: mov rax, rbx $ '
         with self.assertRaises(CompilerError):
@@ -81,7 +81,7 @@
     def testParse6(self):
         # A line can be empty
         self.a.parse_line('')
-    
+
 
 class AssemblerOtherTestCase(unittest.TestCase):
     def testWithoutTarget(self):
--- a/test/testpyy.py	Sun Dec 22 15:50:59 2013 +0100
+++ b/test/testpyy.py	Tue Dec 31 12:38:15 2013 +0100
@@ -1,10 +1,25 @@
-import unittest, pprint
-from pyyacc import Grammar, Item, ParserGenerationException, ParserException, EPS, EOF
+import unittest
+from pyyacc import Grammar, Item, ParserGenerationException, ParserException
+from pyyacc import EPS, EOF
 from ppci import Token
 
-def genTokens(lst):
-    for t in lst:
-        yield Token(t, t)
+
+class genTokens:
+    def __init__(self, lst):
+        def tokGen():
+            for t in lst:
+                yield Token(t, t)
+            while True:
+                yield Token(EOF, EOF)
+        self.tokens = tokGen()
+        self.token = self.tokens.__next__()
+
+    def next_token(self):
+        t = self.token
+        if t.typ != EOF:
+            self.token = self.tokens.__next__()
+        return t
+
 
 class testLR(unittest.TestCase):
     """ Test basic LR(1) parser generator constructs """
@@ -25,6 +40,7 @@
         p = g.genParser()
         # 4. feed input:
         p.parse(tokens)
+
     def testReduceReduceConflict(self):
         """ Check if a reduce-reduce conflict is detected """
         # Define a grammar with an obvious reduce-reduce conflict:
@@ -37,6 +53,7 @@
         g.start_symbol = 'goal'
         with self.assertRaises(ParserGenerationException):
             p = g.genParser()
+
     def testShiftReduceConflict(self):
         """ Must be handled automatically by doing shift """
         g = Grammar([EOF, 'if', 'then', 'else', 'ass'])
@@ -60,6 +77,7 @@
         g.start_symbol = 'goal'
         with self.assertRaises(ParserGenerationException):
             g.genParser()
+
     def testRedefineTerminal(self):
         """ Test correct behavior when a terminal is redefined """
         g = Grammar([EOF, 'b', 'c'])
@@ -69,6 +87,7 @@
         g.add_production('a', ['c'])
         g.start_symbol = 'goal'
         g.genParser()
+
     def testEmpty(self):
         """ Test empty token stream """
         g = Grammar([','])
@@ -78,7 +97,7 @@
         tokens = genTokens([])
         with self.assertRaises(ParserException):
             p.parse(tokens)
-        
+
     def testEps(self):
         """ Test epsilon terminal """
         g = Grammar(['a', 'b'])
@@ -109,9 +128,9 @@
         self.cb_called = False
         def cb(a, c, b):
             self.cb_called = True
-            self.assertEqual(a, 'a')
-            self.assertEqual(b, 'b')
-            self.assertEqual(c, 'c')
+            self.assertEqual(a.val, 'a')
+            self.assertEqual(b.val, 'b')
+            self.assertEqual(c.val, 'c')
         g = Grammar(['a', 'b', 'c'])
         g.add_production('goal', ['a', 'c', 'b'], cb)
         g.start_symbol = 'goal'
@@ -147,7 +166,8 @@
         # Must result in 12 sets:
         self.assertEqual(len(s), 24)
 
-class testPG(unittest.TestCase):
+
+class testParserGenerator(unittest.TestCase):
     """ Tests several parts of the parser generator """
     def setUp(self):
         g = Grammar(['(', ')'])
@@ -216,5 +236,3 @@
 
 if __name__ == '__main__':
     unittest.main()
-
-