# HG changeset patch # User Windel Bouwman # Date 1370080576 -7200 # Node ID 4a1ca1271241444217556fddc7e035c1330906fe # Parent ec2b423cdbea95a919d2839dbf99203eb88544f5 Rename libasm diff -r ec2b423cdbea -r 4a1ca1271241 python/asm.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/asm.py Sat Jun 01 11:56:16 2013 +0200 @@ -0,0 +1,237 @@ +import re +import pyyacc +from ppci import Token, CompilerError, SourceLocation +import sys, argparse + + +# Different instruction sets: +class InstructionSet: + pass + +class X86(InstructionSet): + pass + +# Generic assembler: +keywords = ['global', 'db'] + +def tokenize(s): + """ + Tokenizer, generates an iterator that + returns tokens! + + This GREAT example was taken from python re doc page! + """ + tok_spec = [ + ('REAL', r'\d+\.\d+'), + ('HEXNUMBER', r'0x[\da-fA-F]+'), + ('NUMBER', r'\d+'), + ('ID', r'[A-Za-z][A-Za-z\d_]*'), + ('SKIP', r'[ \t]'), + ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'), + ('STRING', r"'.*?'") + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + while mo is not None: + typ = mo.lastgroup + val = mo.group(typ) + if typ == 'NEWLINE': + line_start = pos + line += 1 + elif typ == 'COMMENTS': + pass + elif typ != 'SKIP': + if typ == 'ID': + if val in keywords: + typ = val + elif typ == 'LEESTEKEN': + typ = val + elif typ == 'NUMBER': + val = int(val) + elif typ == 'HEXNUMBER': + val = int(val[2:], 16) + typ = 'NUMBER' + elif typ == 'REAL': + val = float(val) + elif typ == 'STRING': + val = val[1:-1] + col = mo.start() - line_start + loc = SourceLocation(line, col, 0) # TODO retrieve length? + yield Token(typ, val, loc) + pos = mo.end() + mo = gettok(s, pos) + if pos != len(s): + col = pos - line_start + loc = SourceLocation(line, col, 0) + raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) + +class Lexer: + def __init__(self, src): + self.tokens = tokenize(src) + self.curTok = self.tokens.__next__() + def eat(self): + t = self.curTok + self.curTok = self.tokens.__next__() + return t + @property + def Peak(self): + return self.curTok + +class ANode: + def __eq__(self, other): + return self.__repr__() == other.__repr__() + +class ALabel(ANode): + def __init__(self, name): + self.name = name + def __repr__(self): + return '{0}:'.format(self.name) + +class AInstruction(ANode): + def __init__(self, opcode, operands): + self.opcode = opcode + self.operands = operands + def __repr__(self): + ops = ', '.join(map(str, self.operands)) + return '{0} {1}'.format(self.opcode, ops) + +class AExpression(ANode): + def __add__(self, other): + assert isinstance(other, AExpression) + return ABinop('+', self, other) + def __mul__(self, other): + assert isinstance(other, AExpression) + return ABinop('*', self, other) + +class ABinop(AExpression): + def __init__(self, op, arg1, arg2): + self.op = op + self.arg1 = arg1 + self.arg2 = arg2 + def __repr__(self): + return '{0} {1} {2}'.format(self.op, self.arg1, self.arg2) + +class AUnop(AExpression): + def __init__(self, op, arg): + self.op = op + self.arg = arg + def __repr__(self): + return '{0} {1}'.format(self.op, self.arg) + +class ASymbol(AExpression): + def __init__(self, name): + self.name = name + def __repr__(self): + return self.name + +class ANumber(AExpression): + def __init__(self, n): + self.n = n + def __repr__(self): + return '{0}'.format(self.n) + +class Assembler: + def __init__(self): + self.output = [] + # Construct a parser given a grammar: + ident = lambda x: x # Identity helper function + g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS]) + g.add_production('asmline', ['label', 'instruction']) + g.add_production('asmline', ['instruction']) + g.add_production('asmline', ['label']) + g.add_production('asmline', []) + g.add_production('label', ['ID', ':'], self.p_label) + g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) + g.add_production('instruction', ['opcode'], self.p_ins_2) + g.add_production('opcode', ['ID'], ident) + g.add_production('operands', ['operand'], self.p_operands_1) + g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) + g.add_production('operand', ['expression'], ident) + g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) + g.add_production('expression', ['term'], ident) + g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) + g.add_production('addop', ['-'], ident) + g.add_production('addop', ['+'], ident) + g.add_production('mulop', ['*'], ident) + g.add_production('term', ['factor'], ident) + g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) + g.add_production('factor', ['ID'], self.p_symbol) + g.add_production('factor', ['NUMBER'], self.p_number) + g.start_symbol = 'asmline' + self.p = g.genParser() + + # Parser handlers: + def p_ins_1(self, opc, ops): + ins = AInstruction(opc, ops) + self.emit(ins) + def p_ins_2(self, opc): + self.p_ins_1(opc, []) + def p_operands_1(self, op1): + return [op1] + def p_operands_2(self, ops, comma, op2): + assert type(ops) is list + ops.append(op2) + return ops + def p_mem_op(self, brace_open, exp, brace_close): + return AUnop('[]', exp) + def handle_ins(self, id0, operands): + ins = AInstruction(id0) + self.emit(ins) + def p_label(self, lname, cn): + lab = ALabel(lname) + self.emit(lab) + def p_binop(self, exp1, op, exp2): + return ABinop(op, exp1, exp2) + def p_symbol(self, name): + return ASymbol(name) + def p_number(self, n): + n = int(n) + return ANumber(n) + + # Top level interface: + def emit(self, a): + """ Emit a parsed instruction """ + self.output.append(a) + # Determine the bit pattern from a lookup table: + # TODO + + + def parse_line(self, line): + """ Parse line into asm AST """ + tokens = tokenize(line) + self.p.parse(tokens) + + def assemble(self, asmsrc): + """ Assemble this source snippet """ + for line in asmsrc.split('\n'): + self.assemble_line(line) + self.back_patch() + + def assemble_line(self, line): + """ + Assemble a single source line. + Do not take newlines into account + """ + self.parse_line(line) + self.assemble_aast() + + def assemble_aast(self, at): + """ Assemble a parsed asm line """ + pass + + def back_patch(self): + """ Fix references to earlier labels """ + pass + + +if __name__ == '__main__': + # When run as main file, try to grab command line arguments: + parser = argparse.ArgumentParser(description="Assembler") + parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble') + args = parser.parse_args() + a = Assembler() + obj = a.assemble(args.sourcefile.read()) + diff -r ec2b423cdbea -r 4a1ca1271241 python/libasm.py --- a/python/libasm.py Sat Jun 01 11:55:49 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,237 +0,0 @@ -import re -import pyyacc -from ppci import Token, CompilerError, SourceLocation -import sys, argparse - - -# Different instruction sets: -class InstructionSet: - pass - -class X86(InstructionSet): - pass - -# Generic assembler: -keywords = ['global', 'db'] - -def tokenize(s): - """ - Tokenizer, generates an iterator that - returns tokens! - - This GREAT example was taken from python re doc page! - """ - tok_spec = [ - ('REAL', r'\d+\.\d+'), - ('HEXNUMBER', r'0x[\da-fA-F]+'), - ('NUMBER', r'\d+'), - ('ID', r'[A-Za-z][A-Za-z\d_]*'), - ('SKIP', r'[ \t]'), - ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'), - ('STRING', r"'.*?'") - ] - tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - gettok = re.compile(tok_re).match - line = 1 - pos = line_start = 0 - mo = gettok(s) - while mo is not None: - typ = mo.lastgroup - val = mo.group(typ) - if typ == 'NEWLINE': - line_start = pos - line += 1 - elif typ == 'COMMENTS': - pass - elif typ != 'SKIP': - if typ == 'ID': - if val in keywords: - typ = val - elif typ == 'LEESTEKEN': - typ = val - elif typ == 'NUMBER': - val = int(val) - elif typ == 'HEXNUMBER': - val = int(val[2:], 16) - typ = 'NUMBER' - elif typ == 'REAL': - val = float(val) - elif typ == 'STRING': - val = val[1:-1] - col = mo.start() - line_start - loc = SourceLocation(line, col, 0) # TODO retrieve length? - yield Token(typ, val, loc) - pos = mo.end() - mo = gettok(s, pos) - if pos != len(s): - col = pos - line_start - loc = SourceLocation(line, col, 0) - raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) - -class Lexer: - def __init__(self, src): - self.tokens = tokenize(src) - self.curTok = self.tokens.__next__() - def eat(self): - t = self.curTok - self.curTok = self.tokens.__next__() - return t - @property - def Peak(self): - return self.curTok - -class ANode: - def __eq__(self, other): - return self.__repr__() == other.__repr__() - -class ALabel(ANode): - def __init__(self, name): - self.name = name - def __repr__(self): - return '{0}:'.format(self.name) - -class AInstruction(ANode): - def __init__(self, opcode, operands): - self.opcode = opcode - self.operands = operands - def __repr__(self): - ops = ', '.join(map(str, self.operands)) - return '{0} {1}'.format(self.opcode, ops) - -class AExpression(ANode): - def __add__(self, other): - assert isinstance(other, AExpression) - return ABinop('+', self, other) - def __mul__(self, other): - assert isinstance(other, AExpression) - return ABinop('*', self, other) - -class ABinop(AExpression): - def __init__(self, op, arg1, arg2): - self.op = op - self.arg1 = arg1 - self.arg2 = arg2 - def __repr__(self): - return '{0} {1} {2}'.format(self.op, self.arg1, self.arg2) - -class AUnop(AExpression): - def __init__(self, op, arg): - self.op = op - self.arg = arg - def __repr__(self): - return '{0} {1}'.format(self.op, self.arg) - -class ASymbol(AExpression): - def __init__(self, name): - self.name = name - def __repr__(self): - return self.name - -class ANumber(AExpression): - def __init__(self, n): - self.n = n - def __repr__(self): - return '{0}'.format(self.n) - -class Assembler: - def __init__(self): - self.output = [] - # Construct a parser given a grammar: - ident = lambda x: x # Identity helper function - g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS]) - g.add_production('asmline', ['label', 'instruction']) - g.add_production('asmline', ['instruction']) - g.add_production('asmline', ['label']) - g.add_production('asmline', []) - g.add_production('label', ['ID', ':'], self.p_label) - g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) - g.add_production('instruction', ['opcode'], self.p_ins_2) - g.add_production('opcode', ['ID'], ident) - g.add_production('operands', ['operand'], self.p_operands_1) - g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) - g.add_production('operand', ['expression'], ident) - g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) - g.add_production('expression', ['term'], ident) - g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) - g.add_production('addop', ['-'], ident) - g.add_production('addop', ['+'], ident) - g.add_production('mulop', ['*'], ident) - g.add_production('term', ['factor'], ident) - g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) - g.add_production('factor', ['ID'], self.p_symbol) - g.add_production('factor', ['NUMBER'], self.p_number) - g.start_symbol = 'asmline' - self.p = g.genParser() - - # Parser handlers: - def p_ins_1(self, opc, ops): - ins = AInstruction(opc, ops) - self.emit(ins) - def p_ins_2(self, opc): - self.p_ins_1(opc, []) - def p_operands_1(self, op1): - return [op1] - def p_operands_2(self, ops, comma, op2): - assert type(ops) is list - ops.append(op2) - return ops - def p_mem_op(self, brace_open, exp, brace_close): - return AUnop('[]', exp) - def handle_ins(self, id0, operands): - ins = AInstruction(id0) - self.emit(ins) - def p_label(self, lname, cn): - lab = ALabel(lname) - self.emit(lab) - def p_binop(self, exp1, op, exp2): - return ABinop(op, exp1, exp2) - def p_symbol(self, name): - return ASymbol(name) - def p_number(self, n): - n = int(n) - return ANumber(n) - - # Top level interface: - def emit(self, a): - """ Emit a parsed instruction """ - self.output.append(a) - # Determine the bit pattern from a lookup table: - # TODO - - - def parse_line(self, line): - """ Parse line into asm AST """ - tokens = tokenize(line) - self.p.parse(tokens) - - def assemble(self, asmsrc): - """ Assemble this source snippet """ - for line in asmsrc.split('\n'): - self.assemble_line(line) - self.back_patch() - - def assemble_line(self, line): - """ - Assemble a single source line. - Do not take newlines into account - """ - self.parse_line(line) - self.assemble_aast() - - def assemble_aast(self, at): - """ Assemble a parsed asm line """ - pass - - def back_patch(self): - """ Fix references to earlier labels """ - pass - - -if __name__ == '__main__': - # When run as main file, try to grab command line arguments: - parser = argparse.ArgumentParser(description="Assembler") - parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble') - args = parser.parse_args() - a = Assembler() - obj = a.assemble(args.sourcefile.read()) -