Mercurial > lcfOS

--- a/python/asm.py	Sat Jun 01 11:56:16 2013 +0200
+++ b/python/asm.py	Sat Jun 01 13:11:22 2013 +0200
@@ -1,16 +1,8 @@
-import re
+import re, sys, argparse
 import pyyacc
 from ppci import Token, CompilerError, SourceLocation
-import sys, argparse


-# Different instruction sets:
-class InstructionSet:
-   pass
-
-class X86(InstructionSet):
-   pass
-
 # Generic assembler:
 keywords = ['global', 'db']

@@ -28,7 +20,8 @@
        ('ID', r'[A-Za-z][A-Za-z\d_]*'),
        ('SKIP', r'[ \t]'),
        ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
-       ('STRING', r"'.*?'")
+       ('STRING', r"'.*?'"),
+       ('COMMENT', r";.*")
      ]
      tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
      gettok = re.compile(tok_re).match
@@ -138,11 +131,15 @@
         self.output = []
         # Construct a parser given a grammar:
         ident = lambda x: x   # Identity helper function
-        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS])
-        g.add_production('asmline', ['label', 'instruction'])
-        g.add_production('asmline', ['instruction'])
-        g.add_production('asmline', ['label'])
-        g.add_production('asmline', [])
+        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT'])
+        g.add_production('asmline', ['asmline2'])
+        g.add_production('asmline', ['asmline2', 'COMMENT'])
+        g.add_production('asmline2', ['label', 'instruction'])
+        g.add_production('asmline2', ['instruction'])
+        g.add_production('asmline2', ['label'])
+        g.add_production('asmline2', [])
+        g.add_production('optcomment', [])
+        g.add_production('optcomment', ['COMMENT'])
         g.add_production('label', ['ID', ':'], self.p_label)
         g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
         g.add_production('instruction', ['opcode'], self.p_ins_2)
@@ -218,7 +215,7 @@
         self.parse_line(line)
         self.assemble_aast()

-    def assemble_aast(self, at):
+    def assemble_aast(self):
         """ Assemble a parsed asm line """
         pass
--- a/python/testasm.py	Sat Jun 01 11:56:16 2013 +0200
+++ b/python/testasm.py	Sat Jun 01 13:11:22 2013 +0200
@@ -1,46 +1,46 @@
 #!/usr/bin/python

 import unittest
-import libasm
 import ppci
-from libasm import AInstruction, ABinop, AUnop, ASymbol, ALabel, ANumber
+from asm import AInstruction, ABinop, AUnop, ASymbol, ALabel, ANumber, tokenize, Assembler

-class AssemblerTestCase(unittest.TestCase):
-    """
-        Tests the assembler parts
-    """
-    def setUp(self):
-        pass
+class AssemblerLexingCase(unittest.TestCase):
+    """ Tests the assemblers lexer """

     def testLex0(self):
         """ Check if the lexer is OK """
         asmline, toks = 'mov rax, rbx ', ['ID', 'ID', ',', 'ID']
-        self.assertSequenceEqual([tok.typ for tok in libasm.tokenize(asmline)], toks)
+        self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks)

     def testLex1(self):
         """ Test if lexer correctly maps some tokens """
         asmline, toks = 'lab1: mov rax, rbx ', ['ID', ':', 'ID', 'ID', ',', 'ID']
-        self.assertSequenceEqual([tok.typ for tok in libasm.tokenize(asmline)], toks)
+        self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks)

     def testLex1(self):
         """ Test if lexer correctly maps some tokens """
         asmline, toks = 'mov 3.13 0xC 13', ['ID', 'REAL', 'NUMBER', 'NUMBER']
-        self.assertSequenceEqual([tok.typ for tok in libasm.tokenize(asmline)], toks)
+        self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks)

     def testLex2(self):
         """ Test if lexer fails on a token that is invalid """
         asmline = '0z4: mov rax, rbx $ '
         with self.assertRaises(ppci.CompilerError):
-            list(libasm.tokenize(asmline))
+            list(tokenize(asmline))
+
+class AssemblerParsingTestCase(unittest.TestCase):
+    """
+        Tests the assembler parts
+    """

     def testParse(self):
         asmline = 'lab1: mov rax, rbx'
-        a = libasm.Assembler()
+        a = Assembler()
         a.parse_line(asmline)

     def testParse2(self):
         asmline = 'a: mov rax, [rbx + 2]'
-        a = libasm.Assembler()
+        a = Assembler()
         a.parse_line(asmline)
         output = []
         output.append(ALabel('a'))
@@ -50,7 +50,7 @@
     def testParse3(self):
         # A label must be optional:
         asmline = 'mov rax, 1'
-        a = libasm.Assembler()
+        a = Assembler()
         a.parse_line(asmline)
         output = []
         output.append(AInstruction('mov', [ASymbol('rax'), ANumber(1)]))
@@ -59,7 +59,7 @@
     def testParse4(self):
         # Test 3 operands:
         asmline = 'add rax, [4*rbx + 22], rcx'
-        a = libasm.Assembler()
+        a = Assembler()
         a.parse_line(asmline)
         output = []
         ops = []
@@ -72,7 +72,7 @@
     def testParse5(self):
         # An instruction must be optional:
         asmline = 'lab1:'
-        a = libasm.Assembler()
+        a = Assembler()
         a.parse_line(asmline)
         output = []
         output.append(ALabel('lab1'))
@@ -80,17 +80,18 @@

     def testParse6(self):
         # A line can be empty
-        a = libasm.Assembler()
+        a = Assembler()
         a.parse_line('')

+class AssemblerOtherTestCase(unittest.TestCase):
     def testX86(self):
-        testsrc = """
+        testsrc = """ ; tst
         begin:
-        mov rax, rbx
-        xor rcx, rbx
-        inc rcx
+        mov rax, rbx ; 0x48, 0x89, 0xd8
+        xor rcx, rbx ; 0x48, 0x31, 0xd9
+        inc rcx ; 0x48 0xff 0xc1
         """
-        a = libasm.Assembler()
+        a = Assembler()
         a.assemble(testsrc)
         # Compare with nasm output:
         nasmbytes = [0x48, 0x89, 0xd8, 0x48, 0x31, 0xd9, 0x48, 0xff, 0xc1]