Mercurial > lcfOS
diff python/asm.py @ 197:4a1ca1271241
Rename libasm
author | Windel Bouwman |
---|---|
date | Sat, 01 Jun 2013 11:56:16 +0200 |
parents | python/libasm.py@ec2b423cdbea |
children | 33d50727a23c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/asm.py Sat Jun 01 11:56:16 2013 +0200 @@ -0,0 +1,237 @@ +import re +import pyyacc +from ppci import Token, CompilerError, SourceLocation +import sys, argparse + + +# Different instruction sets: +class InstructionSet: + pass + +class X86(InstructionSet): + pass + +# Generic assembler: +keywords = ['global', 'db'] + +def tokenize(s): + """ + Tokenizer, generates an iterator that + returns tokens! + + This GREAT example was taken from python re doc page! + """ + tok_spec = [ + ('REAL', r'\d+\.\d+'), + ('HEXNUMBER', r'0x[\da-fA-F]+'), + ('NUMBER', r'\d+'), + ('ID', r'[A-Za-z][A-Za-z\d_]*'), + ('SKIP', r'[ \t]'), + ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'), + ('STRING', r"'.*?'") + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + while mo is not None: + typ = mo.lastgroup + val = mo.group(typ) + if typ == 'NEWLINE': + line_start = pos + line += 1 + elif typ == 'COMMENTS': + pass + elif typ != 'SKIP': + if typ == 'ID': + if val in keywords: + typ = val + elif typ == 'LEESTEKEN': + typ = val + elif typ == 'NUMBER': + val = int(val) + elif typ == 'HEXNUMBER': + val = int(val[2:], 16) + typ = 'NUMBER' + elif typ == 'REAL': + val = float(val) + elif typ == 'STRING': + val = val[1:-1] + col = mo.start() - line_start + loc = SourceLocation(line, col, 0) # TODO retrieve length? + yield Token(typ, val, loc) + pos = mo.end() + mo = gettok(s, pos) + if pos != len(s): + col = pos - line_start + loc = SourceLocation(line, col, 0) + raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) + +class Lexer: + def __init__(self, src): + self.tokens = tokenize(src) + self.curTok = self.tokens.__next__() + def eat(self): + t = self.curTok + self.curTok = self.tokens.__next__() + return t + @property + def Peak(self): + return self.curTok + +class ANode: + def __eq__(self, other): + return self.__repr__() == other.__repr__() + +class ALabel(ANode): + def __init__(self, name): + self.name = name + def __repr__(self): + return '{0}:'.format(self.name) + +class AInstruction(ANode): + def __init__(self, opcode, operands): + self.opcode = opcode + self.operands = operands + def __repr__(self): + ops = ', '.join(map(str, self.operands)) + return '{0} {1}'.format(self.opcode, ops) + +class AExpression(ANode): + def __add__(self, other): + assert isinstance(other, AExpression) + return ABinop('+', self, other) + def __mul__(self, other): + assert isinstance(other, AExpression) + return ABinop('*', self, other) + +class ABinop(AExpression): + def __init__(self, op, arg1, arg2): + self.op = op + self.arg1 = arg1 + self.arg2 = arg2 + def __repr__(self): + return '{0} {1} {2}'.format(self.op, self.arg1, self.arg2) + +class AUnop(AExpression): + def __init__(self, op, arg): + self.op = op + self.arg = arg + def __repr__(self): + return '{0} {1}'.format(self.op, self.arg) + +class ASymbol(AExpression): + def __init__(self, name): + self.name = name + def __repr__(self): + return self.name + +class ANumber(AExpression): + def __init__(self, n): + self.n = n + def __repr__(self): + return '{0}'.format(self.n) + +class Assembler: + def __init__(self): + self.output = [] + # Construct a parser given a grammar: + ident = lambda x: x # Identity helper function + g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS]) + g.add_production('asmline', ['label', 'instruction']) + g.add_production('asmline', ['instruction']) + g.add_production('asmline', ['label']) + g.add_production('asmline', []) + g.add_production('label', ['ID', ':'], self.p_label) + g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) + g.add_production('instruction', ['opcode'], self.p_ins_2) + g.add_production('opcode', ['ID'], ident) + g.add_production('operands', ['operand'], self.p_operands_1) + g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) + g.add_production('operand', ['expression'], ident) + g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) + g.add_production('expression', ['term'], ident) + g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) + g.add_production('addop', ['-'], ident) + g.add_production('addop', ['+'], ident) + g.add_production('mulop', ['*'], ident) + g.add_production('term', ['factor'], ident) + g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) + g.add_production('factor', ['ID'], self.p_symbol) + g.add_production('factor', ['NUMBER'], self.p_number) + g.start_symbol = 'asmline' + self.p = g.genParser() + + # Parser handlers: + def p_ins_1(self, opc, ops): + ins = AInstruction(opc, ops) + self.emit(ins) + def p_ins_2(self, opc): + self.p_ins_1(opc, []) + def p_operands_1(self, op1): + return [op1] + def p_operands_2(self, ops, comma, op2): + assert type(ops) is list + ops.append(op2) + return ops + def p_mem_op(self, brace_open, exp, brace_close): + return AUnop('[]', exp) + def handle_ins(self, id0, operands): + ins = AInstruction(id0) + self.emit(ins) + def p_label(self, lname, cn): + lab = ALabel(lname) + self.emit(lab) + def p_binop(self, exp1, op, exp2): + return ABinop(op, exp1, exp2) + def p_symbol(self, name): + return ASymbol(name) + def p_number(self, n): + n = int(n) + return ANumber(n) + + # Top level interface: + def emit(self, a): + """ Emit a parsed instruction """ + self.output.append(a) + # Determine the bit pattern from a lookup table: + # TODO + + + def parse_line(self, line): + """ Parse line into asm AST """ + tokens = tokenize(line) + self.p.parse(tokens) + + def assemble(self, asmsrc): + """ Assemble this source snippet """ + for line in asmsrc.split('\n'): + self.assemble_line(line) + self.back_patch() + + def assemble_line(self, line): + """ + Assemble a single source line. + Do not take newlines into account + """ + self.parse_line(line) + self.assemble_aast() + + def assemble_aast(self, at): + """ Assemble a parsed asm line """ + pass + + def back_patch(self): + """ Fix references to earlier labels """ + pass + + +if __name__ == '__main__': + # When run as main file, try to grab command line arguments: + parser = argparse.ArgumentParser(description="Assembler") + parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble') + args = parser.parse_args() + a = Assembler() + obj = a.assemble(args.sourcefile.read()) +