Mercurial > lcfOS

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/asm.py	Sat Jun 01 11:56:16 2013 +0200
@@ -0,0 +1,237 @@
+import re
+import pyyacc
+from ppci import Token, CompilerError, SourceLocation
+import sys, argparse
+
+
+# Different instruction sets:
+class InstructionSet:
+   pass
+
+class X86(InstructionSet):
+   pass
+
+# Generic assembler:
+keywords = ['global', 'db']
+
+def tokenize(s):
+     """
+       Tokenizer, generates an iterator that
+       returns tokens!
+
+       This GREAT example was taken from python re doc page!
+     """
+     tok_spec = [
+       ('REAL', r'\d+\.\d+'),
+       ('HEXNUMBER', r'0x[\da-fA-F]+'),
+       ('NUMBER', r'\d+'),
+       ('ID', r'[A-Za-z][A-Za-z\d_]*'),
+       ('SKIP', r'[ \t]'),
+       ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
+       ('STRING', r"'.*?'")
+     ]
+     tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+     gettok = re.compile(tok_re).match
+     line = 1
+     pos = line_start = 0
+     mo = gettok(s)
+     while mo is not None:
+       typ = mo.lastgroup
+       val = mo.group(typ)
+       if typ == 'NEWLINE':
+         line_start = pos
+         line += 1
+       elif typ == 'COMMENTS':
+         pass
+       elif typ != 'SKIP':
+         if typ == 'ID':
+           if val in keywords:
+             typ = val
+         elif typ == 'LEESTEKEN':
+           typ = val
+         elif typ == 'NUMBER':
+           val = int(val)
+         elif typ == 'HEXNUMBER':
+           val = int(val[2:], 16)
+           typ = 'NUMBER'
+         elif typ == 'REAL':
+           val = float(val)
+         elif typ == 'STRING':
+           val = val[1:-1]
+         col = mo.start() - line_start
+         loc = SourceLocation(line, col, 0)   # TODO retrieve length?
+         yield Token(typ, val, loc)
+       pos = mo.end()
+       mo = gettok(s, pos)
+     if pos != len(s):
+       col = pos - line_start
+       loc = SourceLocation(line, col, 0)
+       raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
+
+class Lexer:
+   def __init__(self, src):
+      self.tokens = tokenize(src)
+      self.curTok = self.tokens.__next__()
+   def eat(self):
+      t = self.curTok
+      self.curTok = self.tokens.__next__()
+      return t
+   @property
+   def Peak(self):
+      return self.curTok
+
+class ANode:
+    def __eq__(self, other):
+        return self.__repr__() == other.__repr__()
+
+class ALabel(ANode):
+    def __init__(self, name):
+        self.name = name
+    def __repr__(self):
+        return '{0}:'.format(self.name)
+
+class AInstruction(ANode):
+    def __init__(self, opcode, operands):
+        self.opcode = opcode
+        self.operands = operands
+    def __repr__(self):
+        ops = ', '.join(map(str, self.operands))
+        return '{0} {1}'.format(self.opcode, ops)
+
+class AExpression(ANode):
+    def __add__(self, other):
+        assert isinstance(other, AExpression)
+        return ABinop('+', self, other)
+    def __mul__(self, other):
+        assert isinstance(other, AExpression)
+        return ABinop('*', self, other)
+
+class ABinop(AExpression):
+    def __init__(self, op, arg1, arg2):
+        self.op = op
+        self.arg1 = arg1
+        self.arg2 = arg2
+    def __repr__(self):
+        return '{0} {1} {2}'.format(self.op, self.arg1, self.arg2)
+
+class AUnop(AExpression):
+    def __init__(self, op, arg):
+        self.op = op
+        self.arg = arg
+    def __repr__(self):
+        return '{0} {1}'.format(self.op, self.arg)
+
+class ASymbol(AExpression):
+    def __init__(self, name):
+        self.name = name
+    def __repr__(self):
+        return self.name
+
+class ANumber(AExpression):
+    def __init__(self, n):
+        self.n = n
+    def __repr__(self):
+        return '{0}'.format(self.n)
+
+class Assembler:
+    def __init__(self):
+        self.output = []
+        # Construct a parser given a grammar:
+        ident = lambda x: x   # Identity helper function
+        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS])
+        g.add_production('asmline', ['label', 'instruction'])
+        g.add_production('asmline', ['instruction'])
+        g.add_production('asmline', ['label'])
+        g.add_production('asmline', [])
+        g.add_production('label', ['ID', ':'], self.p_label)
+        g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
+        g.add_production('instruction', ['opcode'], self.p_ins_2)
+        g.add_production('opcode', ['ID'], ident)
+        g.add_production('operands', ['operand'], self.p_operands_1)
+        g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
+        g.add_production('operand', ['expression'], ident)
+        g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op)
+        g.add_production('expression', ['term'], ident)
+        g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
+        g.add_production('addop', ['-'], ident)
+        g.add_production('addop', ['+'], ident)
+        g.add_production('mulop', ['*'], ident)
+        g.add_production('term', ['factor'], ident)
+        g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
+        g.add_production('factor', ['ID'], self.p_symbol)
+        g.add_production('factor', ['NUMBER'], self.p_number)
+        g.start_symbol = 'asmline'
+        self.p = g.genParser()
+
+    # Parser handlers:
+    def p_ins_1(self, opc, ops):
+        ins = AInstruction(opc, ops)
+        self.emit(ins)
+    def p_ins_2(self, opc):
+        self.p_ins_1(opc, [])
+    def p_operands_1(self, op1):
+        return [op1]
+    def p_operands_2(self, ops, comma, op2):
+        assert type(ops) is list
+        ops.append(op2)
+        return ops
+    def p_mem_op(self, brace_open, exp, brace_close):
+        return AUnop('[]', exp)
+    def handle_ins(self, id0, operands):
+        ins = AInstruction(id0)
+        self.emit(ins)
+    def p_label(self, lname, cn):
+        lab = ALabel(lname)
+        self.emit(lab)
+    def p_binop(self, exp1, op, exp2):
+        return ABinop(op, exp1, exp2)
+    def p_symbol(self, name):
+        return ASymbol(name)
+    def p_number(self, n):
+        n = int(n)
+        return ANumber(n)
+
+    # Top level interface:
+    def emit(self, a):
+        """ Emit a parsed instruction """
+        self.output.append(a)
+        # Determine the bit pattern from a lookup table:
+        # TODO
+
+
+    def parse_line(self, line):
+        """ Parse line into asm AST """
+        tokens = tokenize(line)
+        self.p.parse(tokens)
+
+    def assemble(self, asmsrc):
+        """ Assemble this source snippet """
+        for line in asmsrc.split('\n'):
+            self.assemble_line(line)
+        self.back_patch()
+
+    def assemble_line(self, line):
+        """
+            Assemble a single source line.
+            Do not take newlines into account
+        """
+        self.parse_line(line)
+        self.assemble_aast()
+
+    def assemble_aast(self, at):
+        """ Assemble a parsed asm line """
+        pass
+
+    def back_patch(self):
+        """ Fix references to earlier labels """
+        pass
+
+
+if __name__ == '__main__':
+    # When run as main file, try to grab command line arguments:
+    parser = argparse.ArgumentParser(description="Assembler")
+    parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble')
+    args = parser.parse_args()
+    a = Assembler()
+    obj = a.assemble(args.sourcefile.read())
+
--- a/python/libasm.py	Sat Jun 01 11:55:49 2013 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,237 +0,0 @@
-import re
-import pyyacc
-from ppci import Token, CompilerError, SourceLocation
-import sys, argparse
-
-
-# Different instruction sets:
-class InstructionSet:
-   pass
-
-class X86(InstructionSet):
-   pass
-
-# Generic assembler:
-keywords = ['global', 'db']
-
-def tokenize(s):
-     """
-       Tokenizer, generates an iterator that
-       returns tokens!
-
-       This GREAT example was taken from python re doc page!
-     """
-     tok_spec = [
-       ('REAL', r'\d+\.\d+'),
-       ('HEXNUMBER', r'0x[\da-fA-F]+'),
-       ('NUMBER', r'\d+'),
-       ('ID', r'[A-Za-z][A-Za-z\d_]*'),
-       ('SKIP', r'[ \t]'),
-       ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
-       ('STRING', r"'.*?'")
-     ]
-     tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
-     gettok = re.compile(tok_re).match
-     line = 1
-     pos = line_start = 0
-     mo = gettok(s)
-     while mo is not None:
-       typ = mo.lastgroup
-       val = mo.group(typ)
-       if typ == 'NEWLINE':
-         line_start = pos
-         line += 1
-       elif typ == 'COMMENTS':
-         pass
-       elif typ != 'SKIP':
-         if typ == 'ID':
-           if val in keywords:
-             typ = val
-         elif typ == 'LEESTEKEN':
-           typ = val
-         elif typ == 'NUMBER':
-           val = int(val)
-         elif typ == 'HEXNUMBER':
-           val = int(val[2:], 16)
-           typ = 'NUMBER'
-         elif typ == 'REAL':
-           val = float(val)
-         elif typ == 'STRING':
-           val = val[1:-1]
-         col = mo.start() - line_start
-         loc = SourceLocation(line, col, 0)   # TODO retrieve length?
-         yield Token(typ, val, loc)
-       pos = mo.end()
-       mo = gettok(s, pos)
-     if pos != len(s):
-       col = pos - line_start
-       loc = SourceLocation(line, col, 0)
-       raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
-
-class Lexer:
-   def __init__(self, src):
-      self.tokens = tokenize(src)
-      self.curTok = self.tokens.__next__()
-   def eat(self):
-      t = self.curTok
-      self.curTok = self.tokens.__next__()
-      return t
-   @property
-   def Peak(self):
-      return self.curTok
-
-class ANode:
-    def __eq__(self, other):
-        return self.__repr__() == other.__repr__()
-
-class ALabel(ANode):
-    def __init__(self, name):
-        self.name = name
-    def __repr__(self):
-        return '{0}:'.format(self.name)
-
-class AInstruction(ANode):
-    def __init__(self, opcode, operands):
-        self.opcode = opcode
-        self.operands = operands
-    def __repr__(self):
-        ops = ', '.join(map(str, self.operands))
-        return '{0} {1}'.format(self.opcode, ops)
-
-class AExpression(ANode):
-    def __add__(self, other):
-        assert isinstance(other, AExpression)
-        return ABinop('+', self, other)
-    def __mul__(self, other):
-        assert isinstance(other, AExpression)
-        return ABinop('*', self, other)
-
-class ABinop(AExpression):
-    def __init__(self, op, arg1, arg2):
-        self.op = op
-        self.arg1 = arg1
-        self.arg2 = arg2
-    def __repr__(self):
-        return '{0} {1} {2}'.format(self.op, self.arg1, self.arg2)
-
-class AUnop(AExpression):
-    def __init__(self, op, arg):
-        self.op = op
-        self.arg = arg
-    def __repr__(self):
-        return '{0} {1}'.format(self.op, self.arg)
-
-class ASymbol(AExpression):
-    def __init__(self, name):
-        self.name = name
-    def __repr__(self):
-        return self.name
-
-class ANumber(AExpression):
-    def __init__(self, n):
-        self.n = n
-    def __repr__(self):
-        return '{0}'.format(self.n)
-
-class Assembler:
-    def __init__(self):
-        self.output = []
-        # Construct a parser given a grammar:
-        ident = lambda x: x   # Identity helper function
-        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS])
-        g.add_production('asmline', ['label', 'instruction'])
-        g.add_production('asmline', ['instruction'])
-        g.add_production('asmline', ['label'])
-        g.add_production('asmline', [])
-        g.add_production('label', ['ID', ':'], self.p_label)
-        g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
-        g.add_production('instruction', ['opcode'], self.p_ins_2)
-        g.add_production('opcode', ['ID'], ident)
-        g.add_production('operands', ['operand'], self.p_operands_1)
-        g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
-        g.add_production('operand', ['expression'], ident)
-        g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op)
-        g.add_production('expression', ['term'], ident)
-        g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
-        g.add_production('addop', ['-'], ident)
-        g.add_production('addop', ['+'], ident)
-        g.add_production('mulop', ['*'], ident)
-        g.add_production('term', ['factor'], ident)
-        g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
-        g.add_production('factor', ['ID'], self.p_symbol)
-        g.add_production('factor', ['NUMBER'], self.p_number)
-        g.start_symbol = 'asmline'
-        self.p = g.genParser()
-
-    # Parser handlers:
-    def p_ins_1(self, opc, ops):
-        ins = AInstruction(opc, ops)
-        self.emit(ins)
-    def p_ins_2(self, opc):
-        self.p_ins_1(opc, [])
-    def p_operands_1(self, op1):
-        return [op1]
-    def p_operands_2(self, ops, comma, op2):
-        assert type(ops) is list
-        ops.append(op2)
-        return ops
-    def p_mem_op(self, brace_open, exp, brace_close):
-        return AUnop('[]', exp)
-    def handle_ins(self, id0, operands):
-        ins = AInstruction(id0)
-        self.emit(ins)
-    def p_label(self, lname, cn):
-        lab = ALabel(lname)
-        self.emit(lab)
-    def p_binop(self, exp1, op, exp2):
-        return ABinop(op, exp1, exp2)
-    def p_symbol(self, name):
-        return ASymbol(name)
-    def p_number(self, n):
-        n = int(n)
-        return ANumber(n)
-
-    # Top level interface:
-    def emit(self, a):
-        """ Emit a parsed instruction """
-        self.output.append(a)
-        # Determine the bit pattern from a lookup table:
-        # TODO
-
-
-    def parse_line(self, line):
-        """ Parse line into asm AST """
-        tokens = tokenize(line)
-        self.p.parse(tokens)
-
-    def assemble(self, asmsrc):
-        """ Assemble this source snippet """
-        for line in asmsrc.split('\n'):
-            self.assemble_line(line)
-        self.back_patch()
-
-    def assemble_line(self, line):
-        """
-            Assemble a single source line.
-            Do not take newlines into account
-        """
-        self.parse_line(line)
-        self.assemble_aast()
-
-    def assemble_aast(self, at):
-        """ Assemble a parsed asm line """
-        pass
-
-    def back_patch(self):
-        """ Fix references to earlier labels """
-        pass
-
-
-if __name__ == '__main__':
-    # When run as main file, try to grab command line arguments:
-    parser = argparse.ArgumentParser(description="Assembler")
-    parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble')
-    args = parser.parse_args()
-    a = Assembler()
-    obj = a.assemble(args.sourcefile.read())
-