Mercurial > lcfOS
view python/libasm.py @ 196:ec2b423cdbea
Merge asm and asmlib files
author | Windel Bouwman |
---|---|
date | Sat, 01 Jun 2013 11:55:49 +0200 |
parents | 37ac6c016e0f |
children |
line wrap: on
line source
import re import pyyacc from ppci import Token, CompilerError, SourceLocation import sys, argparse # Different instruction sets: class InstructionSet: pass class X86(InstructionSet): pass # Generic assembler: keywords = ['global', 'db'] def tokenize(s): """ Tokenizer, generates an iterator that returns tokens! This GREAT example was taken from python re doc page! """ tok_spec = [ ('REAL', r'\d+\.\d+'), ('HEXNUMBER', r'0x[\da-fA-F]+'), ('NUMBER', r'\d+'), ('ID', r'[A-Za-z][A-Za-z\d_]*'), ('SKIP', r'[ \t]'), ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'), ('STRING', r"'.*?'") ] tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) gettok = re.compile(tok_re).match line = 1 pos = line_start = 0 mo = gettok(s) while mo is not None: typ = mo.lastgroup val = mo.group(typ) if typ == 'NEWLINE': line_start = pos line += 1 elif typ == 'COMMENTS': pass elif typ != 'SKIP': if typ == 'ID': if val in keywords: typ = val elif typ == 'LEESTEKEN': typ = val elif typ == 'NUMBER': val = int(val) elif typ == 'HEXNUMBER': val = int(val[2:], 16) typ = 'NUMBER' elif typ == 'REAL': val = float(val) elif typ == 'STRING': val = val[1:-1] col = mo.start() - line_start loc = SourceLocation(line, col, 0) # TODO retrieve length? yield Token(typ, val, loc) pos = mo.end() mo = gettok(s, pos) if pos != len(s): col = pos - line_start loc = SourceLocation(line, col, 0) raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) class Lexer: def __init__(self, src): self.tokens = tokenize(src) self.curTok = self.tokens.__next__() def eat(self): t = self.curTok self.curTok = self.tokens.__next__() return t @property def Peak(self): return self.curTok class ANode: def __eq__(self, other): return self.__repr__() == other.__repr__() class ALabel(ANode): def __init__(self, name): self.name = name def __repr__(self): return '{0}:'.format(self.name) class AInstruction(ANode): def __init__(self, opcode, operands): self.opcode = opcode self.operands = operands def __repr__(self): ops = ', '.join(map(str, self.operands)) return '{0} {1}'.format(self.opcode, ops) class AExpression(ANode): def __add__(self, other): assert isinstance(other, AExpression) return ABinop('+', self, other) def __mul__(self, other): assert isinstance(other, AExpression) return ABinop('*', self, other) class ABinop(AExpression): def __init__(self, op, arg1, arg2): self.op = op self.arg1 = arg1 self.arg2 = arg2 def __repr__(self): return '{0} {1} {2}'.format(self.op, self.arg1, self.arg2) class AUnop(AExpression): def __init__(self, op, arg): self.op = op self.arg = arg def __repr__(self): return '{0} {1}'.format(self.op, self.arg) class ASymbol(AExpression): def __init__(self, name): self.name = name def __repr__(self): return self.name class ANumber(AExpression): def __init__(self, n): self.n = n def __repr__(self): return '{0}'.format(self.n) class Assembler: def __init__(self): self.output = [] # Construct a parser given a grammar: ident = lambda x: x # Identity helper function g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS]) g.add_production('asmline', ['label', 'instruction']) g.add_production('asmline', ['instruction']) g.add_production('asmline', ['label']) g.add_production('asmline', []) g.add_production('label', ['ID', ':'], self.p_label) g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) g.add_production('instruction', ['opcode'], self.p_ins_2) g.add_production('opcode', ['ID'], ident) g.add_production('operands', ['operand'], self.p_operands_1) g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) g.add_production('operand', ['expression'], ident) g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) g.add_production('expression', ['term'], ident) g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) g.add_production('addop', ['-'], ident) g.add_production('addop', ['+'], ident) g.add_production('mulop', ['*'], ident) g.add_production('term', ['factor'], ident) g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) g.add_production('factor', ['ID'], self.p_symbol) g.add_production('factor', ['NUMBER'], self.p_number) g.start_symbol = 'asmline' self.p = g.genParser() # Parser handlers: def p_ins_1(self, opc, ops): ins = AInstruction(opc, ops) self.emit(ins) def p_ins_2(self, opc): self.p_ins_1(opc, []) def p_operands_1(self, op1): return [op1] def p_operands_2(self, ops, comma, op2): assert type(ops) is list ops.append(op2) return ops def p_mem_op(self, brace_open, exp, brace_close): return AUnop('[]', exp) def handle_ins(self, id0, operands): ins = AInstruction(id0) self.emit(ins) def p_label(self, lname, cn): lab = ALabel(lname) self.emit(lab) def p_binop(self, exp1, op, exp2): return ABinop(op, exp1, exp2) def p_symbol(self, name): return ASymbol(name) def p_number(self, n): n = int(n) return ANumber(n) # Top level interface: def emit(self, a): """ Emit a parsed instruction """ self.output.append(a) # Determine the bit pattern from a lookup table: # TODO def parse_line(self, line): """ Parse line into asm AST """ tokens = tokenize(line) self.p.parse(tokens) def assemble(self, asmsrc): """ Assemble this source snippet """ for line in asmsrc.split('\n'): self.assemble_line(line) self.back_patch() def assemble_line(self, line): """ Assemble a single source line. Do not take newlines into account """ self.parse_line(line) self.assemble_aast() def assemble_aast(self, at): """ Assemble a parsed asm line """ pass def back_patch(self): """ Fix references to earlier labels """ pass if __name__ == '__main__': # When run as main file, try to grab command line arguments: parser = argparse.ArgumentParser(description="Assembler") parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble') args = parser.parse_args() a = Assembler() obj = a.assemble(args.sourcefile.read())