changeset 194:b01429a5d695

Fixed test
author Windel Bouwman
date Wed, 29 May 2013 22:36:37 +0200
parents f091e7d70996
children 37ac6c016e0f
files python/c3/builder.py python/c3/lexer.py python/c3/semantics.py python/libasm.py python/ppci/common.py python/ppci/errors.py python/pyyacc.py python/testasm.py python/testc3.py python/testpyy.py
diffstat 10 files changed, 146 insertions(+), 68 deletions(-) [+]
line wrap: on
line diff
--- a/python/c3/builder.py	Sun May 26 23:58:59 2013 +0200
+++ b/python/c3/builder.py	Wed May 29 22:36:37 2013 +0200
@@ -22,6 +22,8 @@
             return
       if not self.tc.checkPackage(pkg):
             return
+
+      # Only return ircode when everything is OK
       ircode = self.cg.gencode(pkg)
       return ircode
 
--- a/python/c3/lexer.py	Sun May 26 23:58:59 2013 +0200
+++ b/python/c3/lexer.py	Wed May 29 22:36:37 2013 +0200
@@ -67,5 +67,6 @@
        col = pos - line_start
        pos = line
        raise CompilerError('Unexpected character {0}'.format(s[pos]), pos)
-     yield Token('END', '', line)
+     loc = SourceLocation(line, 0, 0)
+     yield Token('END', '', loc)
 
--- a/python/c3/semantics.py	Sun May 26 23:58:59 2013 +0200
+++ b/python/c3/semantics.py	Wed May 29 22:36:37 2013 +0200
@@ -1,5 +1,6 @@
 from . import astnodes
 from .scope import Scope, topScope
+from ppci import CompilerError
 
 class Semantics:
    """ This class constructs the AST from parser input """
@@ -12,7 +13,7 @@
    def addSymbol(self, s):
       if self.curScope.hasSymbol(s.name):
          msg = 'Redefinition of {0}'.format(s.name)
-         self.diag.error(msg, s.loc)
+         raise CompilerError(msg, s.loc)
       else:
          self.curScope.addSymbol(s)
    def handlePackage(self, name, loc):
--- a/python/libasm.py	Sun May 26 23:58:59 2013 +0200
+++ b/python/libasm.py	Wed May 29 22:36:37 2013 +0200
@@ -79,14 +79,19 @@
       return self.curTok
 
 class Assembler:
+    def handle_ins(self, id0):
+        self.ins = id0
+    def p_label(self, lname, cn):
+        self.label = lname
+
     def __init__(self):
         # Construct a parser given a grammar:
         g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', pyyacc.EPS])
 
         g.add_production('asmline', ['label', 'instruction', 'operands'])
-        g.add_production('label', ['ID', ':'])
-        g.add_production('label', [pyyacc.EPS])   # label is optional
-        g.add_production('instruction', ['ID'])
+        g.add_production('asmline', ['instruction', 'operands'])
+        g.add_production('label', ['ID', ':'], self.p_label)
+        g.add_production('instruction', ['ID'], self.handle_ins)
         g.add_production('operands', ['operand'])
         g.add_production('operands', ['operands', ',', 'operand'])
         g.add_production('operand', ['expression'])
@@ -102,22 +107,28 @@
         g.start_symbol = 'asmline'
 
         self.p = g.genParser()
+    def parse_line(self, line):
+        """ Parse line into asm AST """
+        tokens = tokenize(line)
+        self.p.parse(tokens)
+        aast = 1 # TODO
+        return aast
 
     def assemble(self, asmsrc):
-      lxr = Lexer(asmsrc)
-      prsr = Parser(lxr)
-      instructions = prsr.parse()
-      return instructions
+        lxr = Lexer(asmsrc)
+        prsr = Parser(lxr)
+        instructions = prsr.parse()
+        return instructions
 
     def assembleLine(self, line):
         """ 
             Assemble a single source line. 
             Do not take newlines into account 
         """
-        tokens = tokenize(line)
-        self.p.parse(tokens)
+        aast = self.parseLine(line)
+        self.assemble_aast(aast)
 
-    def assembleAst(self, at):
+    def assemble_aast(self, at):
         """ Assemble a parsed asm line """
         pass
 
--- a/python/ppci/common.py	Sun May 26 23:58:59 2013 +0200
+++ b/python/ppci/common.py	Wed May 29 22:36:37 2013 +0200
@@ -1,7 +1,16 @@
 from collections import namedtuple
 
 # Token is used in the lexical analyzer:
-Token = namedtuple('Token', 'typ val loc')
+class Token:
+    def __init__(self, typ, val, loc=None):
+        self.typ = typ
+        self.val = val
+        if loc is None:
+            loc = SourceLocation(0, 0, 0)
+        assert type(loc) is SourceLocation
+        self.loc = loc
+    def __repr__(self):
+        return 'Token({0}, {1})'.format(self.typ, self.val)
 
 class SourceLocation:
    def __init__(self, row, col, ln):
--- a/python/ppci/errors.py	Sun May 26 23:58:59 2013 +0200
+++ b/python/ppci/errors.py	Wed May 29 22:36:37 2013 +0200
@@ -3,10 +3,13 @@
    Diagnostic utils
 """
 
+from . import SourceLocation
+
 class CompilerError(Exception):
     def __init__(self, msg, loc):
         self.msg = msg
         self.loc = loc
+        assert type(loc) is SourceLocation, '{0} must be SourceLocation'.format(type(loc))
     def __repr__(self):
         return 'Compilererror {0} at row {1}'.format(self.msg, self.loc.row)
 
--- a/python/pyyacc.py	Sun May 26 23:58:59 2013 +0200
+++ b/python/pyyacc.py	Wed May 29 22:36:37 2013 +0200
@@ -28,14 +28,15 @@
         self.productions = []
         self._first = None  # Cached first set
 
-    def add_production(self, name, symbols):
+    def add_production(self, name, symbols, f=None):
         """ Add a production rule to the grammar """
-        production = Production(name, symbols)
+        production = Production(name, symbols, f)
         self.productions.append(production)
         if name in self.terminals:
             raise ParserGenerationException("Cannot redefine terminal {0}".format(name))
         if not name in self.nonterminals:
             self.nonterminals.append(name)
+        self._first = None  # Invalidate cached version
 
     def productionsForName(self, name):
         """ Retrieve all productions for a non terminal """
@@ -156,7 +157,7 @@
         """ Checks no symbols are undefined """
         for production in self.productions:
             for symbol in production.symbols:
-                if symbol not in self.Symbols:
+                if symbol not in self.Symbols + [EPS]:
                     raise ParserGenerationException('Symbol {0} undefined'.format(symbol))
 
 
@@ -169,13 +170,31 @@
 
         # First generate all item sets by using the nextItemset function:
         states, transitions = self.genCanonicalSet(iis)
+
+        def action_str(act):
+            a, p = act
+            if a is SHIFT:
+                return 'Shift {0}'.format(0)
+            elif a is REDUCE:
+                return 'Reduce {0}'.format(p)
+            return 'Other'
         
         def setAction(state, t, action):
             key = (state, t)
             if key in action_table:
                 action2 = action_table[key]
                 if action != action2:
-                    raise ParserGenerationException('LR construction conflict')
+                    if (action2[0] == REDUCE) and (action[0] == SHIFT):
+                        # Automatically resolve and do the shift action!
+                        # Simple, but almost always what you want!!
+                        action_table[key] = action
+                    else:
+                        if (action2[0] == SHIFT) and (action[0] == REDUCE):
+                            pass
+                        else:
+                            a1 = action_str(action)
+                            a2 = action_str(action2)
+                            raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2))
             else:
                 action_table[key] = action
 
@@ -204,9 +223,10 @@
 
 class Production:
     """ Production rule for a grammar """
-    def __init__(self, name, symbols):
+    def __init__(self, name, symbols, f=None):
         self.name = name
         self.symbols = symbols
+        self.f = f
 
     def __repr__(self):
         return '{0} -> {1}'.format(self.name, self.symbols)
@@ -287,18 +307,26 @@
         """ Parse an iterable with tokens """
         assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks))
         stack = [0]
-        look_ahead = toks.__next__()
+        try:
+            look_ahead = toks.__next__()
+        except StopIteration:
+            look_ahead = Token(EOF, EOF)
         assert type(look_ahead) is Token
         while True:
             state = stack[-1]   # top of stack
             key = (state, look_ahead.typ)
             if not key in self.action_table:
-                raise Exception('Error parsing at character {0}'.format(look_ahead))
+                raise ParserException('Error parsing at character {0}'.format(look_ahead))
             action, param = self.action_table[key]
             if action == REDUCE:
+                #print('reduce', param)
+                f_args = []
                 for s in param.symbols:
                     stack.pop()
                     stack.pop()
+                    f_args.append(0)
+                if param.f:
+                    param.f(*f_args)
                 state = stack[-1]
                 stack.append(param.name)
                 stack.append(self.goto_table[(state, param.name)])
@@ -308,34 +336,8 @@
                 try:
                     look_ahead = toks.__next__()
                 except StopIteration:
-                    look_ahead = Token(EOF, EOF, 0)
+                    look_ahead = Token(EOF, EOF)
                 assert type(look_ahead) is Token
             elif action == ACCEPT:
                 break
 
-
-def testSimpleGrammar():
-    # 1. define a simple grammar:
-    g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
-    g.add_production('input', ['expression'])
-    g.add_production('expression', ['term'])
-    g.add_production('expression', ['expression', '+', 'term'])
-    g.add_production('term', ['factor'])
-    g.add_production('term', ['term', '*', 'factor'])
-    g.add_production('factor', ['(', 'expression', ')'])
-    g.add_production('factor', ['identifier'])
-    g.start_symbol = 'input'
-    # 2. define input:
-    tokens = ['identifier', '+', 'identifier', '+', 'identifier']
-    # 3. build parser:
-    p = g.genParser()
-    # 4. feed input:
-    def genTokens(lst):
-        for t in lst:
-            yield Token(t, t, 0)
-    p.parse(genTokens(tokens))
-
-
-if __name__ == '__main__':
-    testSimpleGrammar()
-
--- a/python/testasm.py	Sun May 26 23:58:59 2013 +0200
+++ b/python/testasm.py	Wed May 29 22:36:37 2013 +0200
@@ -35,12 +35,18 @@
     def testParse(self):
         asmline = 'lab1: mov rax, rbx'
         a = libasm.Assembler()
-        a.assembleLine(asmline)
+        a.parse_line(asmline)
 
     def testParse2(self):
         asmline = 'a: mov rax, [rbx + 2]'
         a = libasm.Assembler()
-        a.assembleLine(asmline)
+        a.parse_line(asmline)
+
+    def testParse3(self):
+        # A label must be optional:
+        asmline = 'mov rax, 1'
+        a = libasm.Assembler()
+        a.parse_line(asmline)
 
 if __name__ == '__main__':
     unittest.main()
--- a/python/testc3.py	Sun May 26 23:58:59 2013 +0200
+++ b/python/testc3.py	Wed May 29 22:36:37 2013 +0200
@@ -136,20 +136,23 @@
          }
       """
       self.diag.clear()
-      ir = self.builder.build(snippet)
+      ircode = self.builder.build(snippet)
       self.assertEqual(len(self.diag.diags), 3)
       self.assertEqual(self.diag.diags[0].loc.row, 8)
       self.assertEqual(self.diag.diags[1].loc.row, 9)
       self.assertEqual(self.diag.diags[2].loc.row, 10)
-      self.assertFalse(ir)
+      self.assertFalse(ircode)
    def testEmpty(self):
       snippet = """
       package A
       """
-      self.builder.build(snippet)
+      ircode = self.builder.build(snippet)
+      self.assertFalse(ircode)
    def testEmpty2(self):
       snippet = ""
-      self.builder.build(snippet)
+      self.diag.clear()
+      ircode = self.builder.build(snippet)
+      self.assertFalse(ircode)
    def testRedefine(self):
       snippet = """
       package test;
@@ -158,7 +161,8 @@
       var int a;
       """
       self.diag.clear()
-      self.builder.build(snippet)
+      ircode = self.builder.build(snippet)
+      self.assertFalse(ircode)
       self.assertEqual(len(self.diag.diags), 1)
       self.assertEqual(self.diag.diags[0].loc.row, 5)
    def testWhile(self):
--- a/python/testpyy.py	Sun May 26 23:58:59 2013 +0200
+++ b/python/testpyy.py	Wed May 29 22:36:37 2013 +0200
@@ -1,18 +1,16 @@
 import unittest, pprint
-from pyyacc import Grammar, Item, EOF, ParserGenerationException
+from pyyacc import Grammar, Item, ParserGenerationException, ParserException, EPS, EOF
 from ppci import Token
 
 def genTokens(lst):
     for t in lst:
-        yield Token(t, t, 0)
+        yield Token(t, t)
 
 class testLR(unittest.TestCase):
-    def setUp(self):
-        pass
-
+    """ Test basic LR(1) parser generator constructs """
     def testSimpleGrammar(self):
         # 1. define a simple grammar:
-        g = Grammar(['EOF', 'identifier', '(', ')', '+', '*'])
+        g = Grammar(['identifier', '(', ')', '+', '*'])
         g.add_production('input', ['expression'])
         g.add_production('expression', ['term'])
         g.add_production('expression', ['expression', '+', 'term'])
@@ -30,7 +28,7 @@
     def testReduceReduceConflict(self):
         """ Check if a reduce-reduce conflict is detected """
         # Define a grammar with an obvious reduce-reduce conflict:
-        g = Grammar([EOF, 'id'])
+        g = Grammar(['id'])
         g.add_production('goal', ['a'])
         g.add_production('a', ['b'])
         g.add_production('a', ['c'])
@@ -40,16 +38,22 @@
         with self.assertRaises(ParserGenerationException):
             p = g.genParser()
     def testShiftReduceConflict(self):
-        g = Grammar([EOF, 'if', 'then', 'else'])
-        g.add_production('if_stmt', ['if', 'then'])
-        g.add_production('if_stmt', ['if', 'then', 'else'])
-        g.add_production('stmt', ['if_stmt', 'else'])
+        """ Must be handled automatically by doing shift """
+        g = Grammar([EOF, 'if', 'then', 'else', 'ass'])
+        # Ambiguous grammar:
+        g.add_production('if_stmt', ['if', 'then', 'stmt'])
+        g.add_production('if_stmt', ['if', 'then', 'stmt', 'else', 'stmt'])
+        g.add_production('stmt', ['if_stmt'])
+        g.add_production('stmt', ['ass'])
         g.start_symbol = 'stmt'
-        with self.assertRaises(ParserGenerationException):
-            g.genParser()
+        p = g.genParser()
+        # Ambiguous program:
+        tokens = genTokens(['if', 'then','if', 'then', 'ass', 'else', 'ass' ])
+        p.parse(tokens)
+
     def testUndefinedTerminal(self):
         """ Test correct behavior when a terminal is undefined """
-        g = Grammar([EOF, 'b'])
+        g = Grammar(['b'])
         g.add_production('goal', ['a'])
         g.add_production('a', ['b'])
         g.add_production('a', ['c'])
@@ -65,6 +69,41 @@
         g.add_production('a', ['c'])
         g.start_symbol = 'goal'
         g.genParser()
+    def testEmpty(self):
+        """ Test empty token stream """
+        g = Grammar([','])
+        g.add_production('input', [','])
+        g.start_symbol = 'input'
+        p = g.genParser()
+        tokens = genTokens([])
+        with self.assertRaises(ParserException):
+            p.parse(tokens)
+        
+    def testEps(self):
+        """ Test epsilon terminal """
+        g = Grammar(['a', 'b'])
+        g.add_production('input', ['optional_a', 'b'])
+        g.add_production('optional_a', ['a'])
+        g.add_production('optional_a', [])
+        g.start_symbol = 'input'
+        p = g.genParser()
+        tokens = genTokens(['b'])
+        p.parse(tokens)
+
+    def testEps2(self):
+        g = Grammar(['id', ':'])
+        g.add_production('input', ['opt_lab', 'ins', 'op1'])
+        g.add_production('input', ['ins', 'op1'])
+        g.add_production('opt_lab', ['id', ':'])
+        g.add_production('ins', ['id'])
+        g.add_production('op1', ['id'])
+        g.start_symbol = 'input'
+        p = g.genParser()
+        tokens = genTokens(['id', ':', 'id', 'id'])   # i.e. "lab_0: inc rax" 
+        p.parse(tokens)
+        tokens = genTokens(['id', 'id'])   # i.e. "inc rax"
+        p.parse(tokens)
+
 
 class testExpressionGrammar(unittest.TestCase):
     def setUp(self):