diff python/asm.py @ 197:4a1ca1271241

Rename libasm
author Windel Bouwman
date Sat, 01 Jun 2013 11:56:16 +0200
parents python/libasm.py@ec2b423cdbea
children 33d50727a23c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/asm.py	Sat Jun 01 11:56:16 2013 +0200
@@ -0,0 +1,237 @@
+import re
+import pyyacc
+from ppci import Token, CompilerError, SourceLocation
+import sys, argparse
+
+
+# Different instruction sets:
+class InstructionSet:
+   pass
+
+class X86(InstructionSet):
+   pass
+
+# Generic assembler:
+keywords = ['global', 'db']
+
+def tokenize(s):
+     """
+       Tokenizer, generates an iterator that
+       returns tokens!
+
+       This GREAT example was taken from python re doc page!
+     """
+     tok_spec = [
+       ('REAL', r'\d+\.\d+'),
+       ('HEXNUMBER', r'0x[\da-fA-F]+'),
+       ('NUMBER', r'\d+'),
+       ('ID', r'[A-Za-z][A-Za-z\d_]*'),
+       ('SKIP', r'[ \t]'),
+       ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
+       ('STRING', r"'.*?'")
+     ]
+     tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+     gettok = re.compile(tok_re).match
+     line = 1
+     pos = line_start = 0
+     mo = gettok(s)
+     while mo is not None:
+       typ = mo.lastgroup
+       val = mo.group(typ)
+       if typ == 'NEWLINE':
+         line_start = pos
+         line += 1
+       elif typ == 'COMMENTS':
+         pass
+       elif typ != 'SKIP':
+         if typ == 'ID':
+           if val in keywords:
+             typ = val
+         elif typ == 'LEESTEKEN':
+           typ = val
+         elif typ == 'NUMBER':
+           val = int(val)
+         elif typ == 'HEXNUMBER':
+           val = int(val[2:], 16)
+           typ = 'NUMBER'
+         elif typ == 'REAL':
+           val = float(val)
+         elif typ == 'STRING':
+           val = val[1:-1]
+         col = mo.start() - line_start
+         loc = SourceLocation(line, col, 0)   # TODO retrieve length?
+         yield Token(typ, val, loc)
+       pos = mo.end()
+       mo = gettok(s, pos)
+     if pos != len(s):
+       col = pos - line_start
+       loc = SourceLocation(line, col, 0)
+       raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
+
+class Lexer:
+   def __init__(self, src):
+      self.tokens = tokenize(src)
+      self.curTok = self.tokens.__next__()
+   def eat(self):
+      t = self.curTok
+      self.curTok = self.tokens.__next__()
+      return t
+   @property
+   def Peak(self):
+      return self.curTok
+
+class ANode:
+    def __eq__(self, other):
+        return self.__repr__() == other.__repr__()
+
+class ALabel(ANode):
+    def __init__(self, name):
+        self.name = name
+    def __repr__(self):
+        return '{0}:'.format(self.name)
+
+class AInstruction(ANode):
+    def __init__(self, opcode, operands):
+        self.opcode = opcode
+        self.operands = operands
+    def __repr__(self):
+        ops = ', '.join(map(str, self.operands))
+        return '{0} {1}'.format(self.opcode, ops)
+
+class AExpression(ANode):
+    def __add__(self, other):
+        assert isinstance(other, AExpression)
+        return ABinop('+', self, other)
+    def __mul__(self, other):
+        assert isinstance(other, AExpression)
+        return ABinop('*', self, other)
+
+class ABinop(AExpression):
+    def __init__(self, op, arg1, arg2):
+        self.op = op
+        self.arg1 = arg1
+        self.arg2 = arg2
+    def __repr__(self):
+        return '{0} {1} {2}'.format(self.op, self.arg1, self.arg2)
+
+class AUnop(AExpression):
+    def __init__(self, op, arg):
+        self.op = op
+        self.arg = arg
+    def __repr__(self):
+        return '{0} {1}'.format(self.op, self.arg)
+
+class ASymbol(AExpression):
+    def __init__(self, name):
+        self.name = name
+    def __repr__(self):
+        return self.name
+
+class ANumber(AExpression):
+    def __init__(self, n):
+        self.n = n
+    def __repr__(self):
+        return '{0}'.format(self.n)
+
+class Assembler:
+    def __init__(self):
+        self.output = []
+        # Construct a parser given a grammar:
+        ident = lambda x: x   # Identity helper function
+        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS])
+        g.add_production('asmline', ['label', 'instruction'])
+        g.add_production('asmline', ['instruction'])
+        g.add_production('asmline', ['label'])
+        g.add_production('asmline', [])
+        g.add_production('label', ['ID', ':'], self.p_label)
+        g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
+        g.add_production('instruction', ['opcode'], self.p_ins_2)
+        g.add_production('opcode', ['ID'], ident)
+        g.add_production('operands', ['operand'], self.p_operands_1)
+        g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
+        g.add_production('operand', ['expression'], ident)
+        g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op)
+        g.add_production('expression', ['term'], ident)
+        g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
+        g.add_production('addop', ['-'], ident)
+        g.add_production('addop', ['+'], ident)
+        g.add_production('mulop', ['*'], ident)
+        g.add_production('term', ['factor'], ident)
+        g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
+        g.add_production('factor', ['ID'], self.p_symbol)
+        g.add_production('factor', ['NUMBER'], self.p_number)
+        g.start_symbol = 'asmline'
+        self.p = g.genParser()
+
+    # Parser handlers:
+    def p_ins_1(self, opc, ops):
+        ins = AInstruction(opc, ops)
+        self.emit(ins)
+    def p_ins_2(self, opc):
+        self.p_ins_1(opc, [])
+    def p_operands_1(self, op1):
+        return [op1]
+    def p_operands_2(self, ops, comma, op2):
+        assert type(ops) is list
+        ops.append(op2)
+        return ops
+    def p_mem_op(self, brace_open, exp, brace_close):
+        return AUnop('[]', exp)
+    def handle_ins(self, id0, operands):
+        ins = AInstruction(id0)
+        self.emit(ins)
+    def p_label(self, lname, cn):
+        lab = ALabel(lname)
+        self.emit(lab)
+    def p_binop(self, exp1, op, exp2):
+        return ABinop(op, exp1, exp2)
+    def p_symbol(self, name):
+        return ASymbol(name)
+    def p_number(self, n):
+        n = int(n)
+        return ANumber(n)
+
+    # Top level interface:
+    def emit(self, a):
+        """ Emit a parsed instruction """
+        self.output.append(a)
+        # Determine the bit pattern from a lookup table:
+        # TODO
+
+
+    def parse_line(self, line):
+        """ Parse line into asm AST """
+        tokens = tokenize(line)
+        self.p.parse(tokens)
+
+    def assemble(self, asmsrc):
+        """ Assemble this source snippet """
+        for line in asmsrc.split('\n'):
+            self.assemble_line(line)
+        self.back_patch()
+
+    def assemble_line(self, line):
+        """ 
+            Assemble a single source line. 
+            Do not take newlines into account 
+        """
+        self.parse_line(line)
+        self.assemble_aast()
+
+    def assemble_aast(self, at):
+        """ Assemble a parsed asm line """
+        pass
+
+    def back_patch(self):
+        """ Fix references to earlier labels """
+        pass
+
+
+if __name__ == '__main__':
+    # When run as main file, try to grab command line arguments:
+    parser = argparse.ArgumentParser(description="Assembler")
+    parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble')
+    args = parser.parse_args()
+    a = Assembler()
+    obj = a.assemble(args.sourcefile.read())
+