diff python/libasm.py @ 191:6b2bec5653f1

Added assembler testset
author Windel Bouwman
date Sun, 26 May 2013 15:28:07 +0200
parents 5e1dd04cb61c
children f091e7d70996
line wrap: on
line diff
--- a/python/libasm.py	Sat May 25 15:15:42 2013 +0200
+++ b/python/libasm.py	Sun May 26 15:28:07 2013 +0200
@@ -1,4 +1,6 @@
-import collections, re
+import re
+import pyyacc
+from ppci import Token, CompilerError, SourceLocation
 
 # Different instruction sets:
 class InstructionSet:
@@ -8,19 +10,6 @@
    pass
 
 # Generic assembler:
-
-class SourceLocation:
-   def __init__(self, x):
-      self.pos = x
-
-class SourceRange:
-   def __init__(self, p1, p2):
-      self.p1 = p1
-      self.p2 = p2
-
-# Token is used in the lexical analyzer:
-Token = collections.namedtuple('Token', 'typ val row col')
-
 keywords = ['global', 'db']
 
 def tokenize(s):
@@ -35,13 +24,11 @@
        ('HEXNUMBER', r'0x[\da-fA-F]+'),
        ('NUMBER', r'\d+'),
        ('ID', r'[A-Za-z][A-Za-z\d_]*'),
-       ('NEWLINE', r'\n'),
        ('SKIP', r'[ \t]'),
-       ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
+       ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
        ('STRING', r"'.*?'")
      ]
      tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
-     print(tok_re)
      gettok = re.compile(tok_re).match
      line = 1
      pos = line_start = 0
@@ -69,13 +56,15 @@
            val = float(val)
          elif typ == 'STRING':
            val = val[1:-1]
-         yield Token(typ, val, line, mo.start()-line_start)
+         col = mo.start() - line_start
+         loc = SourceLocation(line, col, 0)   # TODO retrieve length?
+         yield Token(typ, val, loc)
        pos = mo.end()
        mo = gettok(s, pos)
      if pos != len(s):
        col = pos - line_start
-       raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col)
-     yield Token('END', '', line, 0)
+       loc = SourceLocation(line, col, 0)
+       raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
 
 class Lexer:
    def __init__(self, src):
@@ -89,28 +78,40 @@
    def Peak(self):
       return self.curTok
 
-class Parser:
-   def __init__(self, lxr):
-      self.lxr = lxr
-   def parse(self):
-      t = self.lxr.eat()
+class Assembler:
+    def __init__(self):
+        # Construct a parser given a grammar:
+        g = pyyacc.Grammar(['ID', ',', '[', ']', ':'])
 
-      while True:
-         ins = self.parseLine()
-         print(ins)
-         t = self.lxr.eat()
-   def parseLine(self):
-      self.parseLabel()
-      if self.lxr.Peak == ';':
-         self.eatComments()
-   def parseLabel(self):
-      i = self.lxr.eat()
+        g.add_production('asmline', ['label', 'instruction', 'operands'])
+        g.add_production('label', ['ID', ':'])
+        g.add_production('label', ['EPS'])
+        g.add_production('instruction', ['ID'])
+        g.add_production('operands', ['operand'])
+        g.add_production('operands', ['operands', ',', 'operand'])
+        g.add_production('operand', ['expression'])
+        g.add_production('expression', ['ID'])
+        # TODO: expand grammar
+        g.start_symbol = 'asmline'
 
-class Assembler:
-   def assemble(self, asmsrc):
-      print('assembling', asmsrc)
+        self.p = g.genParser()
+
+    def assemble(self, asmsrc):
       lxr = Lexer(asmsrc)
       prsr = Parser(lxr)
       instructions = prsr.parse()
       return instructions
 
+    def assembleLine(self, line):
+        """ 
+            Assemble a single source line. 
+            Do not take newlines into account 
+        """
+        tokens = tokenize(line)
+        self.p.parse(tokens)
+
+    def assembleAst(self, at):
+        """ Assemble a parsed asm line """
+        pass
+
+