changeset 195:37ac6c016e0f

Expanded asm subsystem
author Windel Bouwman
date Fri, 31 May 2013 21:06:44 +0200
parents b01429a5d695
children ec2b423cdbea
files python/libasm.py python/pyyacc.py python/testasm.py python/testpyy.py
diffstat 4 files changed, 184 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/python/libasm.py	Wed May 29 22:36:37 2013 +0200
+++ b/python/libasm.py	Fri May 31 21:06:44 2013 +0200
@@ -78,35 +78,118 @@
    def Peak(self):
       return self.curTok
 
-class Assembler:
-    def handle_ins(self, id0):
-        self.ins = id0
-    def p_label(self, lname, cn):
-        self.label = lname
+class ANode:
+    def __eq__(self, other):
+        return self.__repr__() == other.__repr__()
+
+class ALabel(ANode):
+    def __init__(self, name):
+        self.name = name
+    def __repr__(self):
+        return '{0}:'.format(self.name)
+
+class AInstruction(ANode):
+    def __init__(self, opcode, operands):
+        self.opcode = opcode
+        self.operands = operands
+    def __repr__(self):
+        ops = ', '.join(map(str, self.operands))
+        return '{0} {1}'.format(self.opcode, ops)
+
+class AExpression(ANode):
+    def __add__(self, other):
+        return ABinop('+', self, other)
+    def __mul__(self, other):
+        return ABinop('*', self, other)
 
+class ABinop(AExpression):
+    def __init__(self, op, arg1, arg2):
+        self.op = op
+        self.arg1 = arg1
+        self.arg2 = arg2
+    def __repr__(self):
+        return '{0} {1} {2}'.format(self.op, self.arg1, self.arg2)
+
+class AUnop(AExpression):
+    def __init__(self, op, arg):
+        self.op = op
+        self.arg = arg
+    def __repr__(self):
+        return '{0} {1}'.format(self.op, self.arg)
+
+class ASymbol(AExpression):
+    def __init__(self, name):
+        self.name = name
+    def __repr__(self):
+        return self.name
+
+class ANumber(AExpression):
+    def __init__(self, n):
+        self.n = n
+    def __repr__(self):
+        return '{0}'.format(self.n)
+
+class Assembler:
     def __init__(self):
+        self.output = []
         # Construct a parser given a grammar:
-        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', pyyacc.EPS])
-
-        g.add_production('asmline', ['label', 'instruction', 'operands'])
-        g.add_production('asmline', ['instruction', 'operands'])
+        ident = lambda x: x   # Identity helper function
+        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS])
+        g.add_production('asmline', ['label', 'instruction'])
+        g.add_production('asmline', ['instruction'])
+        g.add_production('asmline', ['label'])
         g.add_production('label', ['ID', ':'], self.p_label)
-        g.add_production('instruction', ['ID'], self.handle_ins)
-        g.add_production('operands', ['operand'])
-        g.add_production('operands', ['operands', ',', 'operand'])
-        g.add_production('operand', ['expression'])
-        g.add_production('operand', ['[', 'expression', ']'])
-        g.add_production('expression', ['term'])
-        g.add_production('expression', ['expression', 'addop', 'term'])
-        g.add_production('addop', ['-'])
-        g.add_production('addop', ['+'])
-        g.add_production('term', ['factor'])
-        g.add_production('factor', ['ID'])
-        g.add_production('factor', ['NUMBER'])
-        # TODO: expand grammar
+        g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
+        g.add_production('instruction', ['opcode'], self.p_ins_2)
+        g.add_production('opcode', ['ID'], ident)
+        g.add_production('operands', ['operand'], self.p_operands_1)
+        g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
+        g.add_production('operand', ['expression'], ident)
+        g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op)
+        g.add_production('expression', ['term'], ident)
+        g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
+        g.add_production('addop', ['-'], ident)
+        g.add_production('addop', ['+'], ident)
+        g.add_production('mulop', ['*'], ident)
+        g.add_production('term', ['factor'], ident)
+        g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
+        g.add_production('factor', ['ID'], self.p_symbol)
+        g.add_production('factor', ['NUMBER'], self.p_number)
         g.start_symbol = 'asmline'
+        self.p = g.genParser()
 
-        self.p = g.genParser()
+    # Parser handlers:
+    def p_ins_1(self, opc, ops):
+        ins = AInstruction(opc, ops)
+        self.emit(ins)
+    def p_ins_2(self, opc):
+        self.p_ins_1(opc, [])
+    def p_operands_1(self, op1):
+        return [op1]
+    def p_operands_2(self, ops, comma, op2):
+        assert type(ops) is list
+        ops.append(op2)
+        return ops
+    def p_mem_op(self, brace_open, exp, brace_close):
+        return AUnop('[]', exp)
+    def handle_ins(self, id0, operands):
+        ins = AInstruction(id0)
+        self.emit(ins)
+    def p_label(self, lname, cn):
+        lab = ALabel(lname)
+        self.emit(lab)
+    def p_binop(self, exp1, op, exp2):
+        return ABinop(op, exp1, exp2)
+    def p_symbol(self, name):
+        return ASymbol(name)
+    def p_number(self, n):
+        n = int(n)
+        return ANumber(n)
+
+    # Top level:
+    def emit(self, a):
+        self.output.append(a)
+
     def parse_line(self, line):
         """ Parse line into asm AST """
         tokens = tokenize(line)
--- a/python/pyyacc.py	Wed May 29 22:36:37 2013 +0200
+++ b/python/pyyacc.py	Fri May 31 21:06:44 2013 +0200
@@ -209,7 +209,7 @@
                 if item.IsReduce:
                     if item.production.name == self.start_symbol and item.look_ahead == EOF:
                         # Rule 3: accept:
-                        setAction(states.index(state), item.look_ahead, (ACCEPT, None))
+                        setAction(states.index(state), item.look_ahead, (ACCEPT, item.production))
                     else:
                         # Rule 2, reduce item:
                         setAction(states.index(state), item.look_ahead, (REDUCE, item.production))
@@ -218,7 +218,7 @@
                 if key in transitions:
                     goto_table[key] = transitions[key]
 
-        return LRParser(action_table, goto_table)
+        return LRParser(action_table, goto_table, self.start_symbol)
 
 
 class Production:
@@ -299,45 +299,65 @@
 
 class LRParser:
     """ LR parser """
-    def __init__(self, action_table, goto_table):
+    def __init__(self, action_table, goto_table, start_symbol):
         self.action_table = action_table
         self.goto_table = goto_table
+        self.start_symbol = start_symbol
 
     def parse(self, toks):
         """ Parse an iterable with tokens """
         assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks))
         stack = [0]
+        r_data_stack = []
         try:
             look_ahead = toks.__next__()
         except StopIteration:
             look_ahead = Token(EOF, EOF)
         assert type(look_ahead) is Token
-        while True:
+        # TODO: exit on this condition:
+        while stack != [0, self.start_symbol, 2222]:
+            #print(stack)
             state = stack[-1]   # top of stack
             key = (state, look_ahead.typ)
             if not key in self.action_table:
                 raise ParserException('Error parsing at character {0}'.format(look_ahead))
             action, param = self.action_table[key]
             if action == REDUCE:
-                #print('reduce', param)
                 f_args = []
                 for s in param.symbols:
                     stack.pop()
                     stack.pop()
-                    f_args.append(0)
+                    f_args.append(r_data_stack.pop())
+                f_args.reverse()
+                r_data = None
                 if param.f:
-                    param.f(*f_args)
+                    r_data = param.f(*f_args)
                 state = stack[-1]
                 stack.append(param.name)
                 stack.append(self.goto_table[(state, param.name)])
+                r_data_stack.append(r_data)
             elif action == SHIFT:
                 stack.append(look_ahead.typ)
                 stack.append(param)
+                r_data_stack.append(look_ahead.val)
                 try:
                     look_ahead = toks.__next__()
                 except StopIteration:
                     look_ahead = Token(EOF, EOF)
                 assert type(look_ahead) is Token
             elif action == ACCEPT:
+                # Pop last rule data off the stack:
+                f_args = []
+                for s in param.symbols:
+                    stack.pop()
+                    stack.pop()
+                    f_args.append(r_data_stack.pop())
+                f_args.reverse()
+                if param.f:
+                    param.f(*f_args)
+                # Break out!
                 break
+        # At exit, the stack must be 1 long
+        # TODO: fix that this holds:
+        #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack) 
 
--- a/python/testasm.py	Wed May 29 22:36:37 2013 +0200
+++ b/python/testasm.py	Fri May 31 21:06:44 2013 +0200
@@ -3,6 +3,7 @@
 import unittest
 import libasm
 import ppci
+from libasm import AInstruction, ABinop, AUnop, ASymbol, ALabel, ANumber
 
 class AssemblerTestCase(unittest.TestCase):
     """ 
@@ -41,12 +42,45 @@
         asmline = 'a: mov rax, [rbx + 2]'
         a = libasm.Assembler()
         a.parse_line(asmline)
+        output = []
+        output.append(ALabel('a'))
+        output.append(AInstruction('mov', [ASymbol('rax'), AUnop('[]', ASymbol('rbx') + ANumber(2))]))
+        self.assertSequenceEqual(output, a.output)
 
     def testParse3(self):
         # A label must be optional:
         asmline = 'mov rax, 1'
         a = libasm.Assembler()
         a.parse_line(asmline)
+        output = []
+        output.append(AInstruction('mov', [ASymbol('rax'), ANumber(1)]))
+        self.assertSequenceEqual(output, a.output)
+
+    def testParse4(self):
+        # Test 3 operands:
+        asmline = 'add rax, [4*rbx + 22], rcx'
+        a = libasm.Assembler()
+        a.parse_line(asmline)
+        output = []
+        ops = []
+        ops.append(ASymbol('rax'))
+        ops.append(AUnop('[]', ANumber(4) * ASymbol('rbx') + ANumber(22)))
+        ops.append(ASymbol('rcx'))
+        output.append(AInstruction('add', ops))
+        self.assertSequenceEqual(output, a.output)
+
+    def testParse5(self):
+        # An instruction must be optional:
+        asmline = 'lab1:'
+        a = libasm.Assembler()
+        a.parse_line(asmline)
+        output = []
+        output.append(ALabel('lab1'))
+        self.assertSequenceEqual(output, a.output)
+    
+    def testX86(self):
+        # TODO
+        pass
 
 if __name__ == '__main__':
     unittest.main()
--- a/python/testpyy.py	Wed May 29 22:36:37 2013 +0200
+++ b/python/testpyy.py	Fri May 31 21:06:44 2013 +0200
@@ -48,7 +48,7 @@
         g.start_symbol = 'stmt'
         p = g.genParser()
         # Ambiguous program:
-        tokens = genTokens(['if', 'then','if', 'then', 'ass', 'else', 'ass' ])
+        tokens = genTokens(['if', 'then','if', 'then', 'ass', 'else', 'ass'])
         p.parse(tokens)
 
     def testUndefinedTerminal(self):
@@ -104,6 +104,22 @@
         tokens = genTokens(['id', 'id'])   # i.e. "inc rax"
         p.parse(tokens)
 
+    def test_cb(self):
+        """ Test callback of one rule and order or parameters """
+        self.cb_called = False
+        def cb(a, c, b):
+            self.cb_called = True
+            self.assertEqual(a, 'a')
+            self.assertEqual(b, 'b')
+            self.assertEqual(c, 'c')
+        g = Grammar(['a', 'b', 'c'])
+        g.add_production('goal', ['a', 'c', 'b'], cb)
+        g.start_symbol = 'goal'
+        p = g.genParser()
+        tokens = genTokens(['a', 'c', 'b'])
+        p.parse(tokens)
+        self.assertTrue(self.cb_called)
+
 
 class testExpressionGrammar(unittest.TestCase):
     def setUp(self):