Mercurial > lcfOS
diff python/asm.py @ 334:6f4753202b9a
Added more recipes
author | Windel Bouwman |
---|---|
date | Thu, 13 Feb 2014 22:02:08 +0100 |
parents | 8d07a4254f04 |
children | 6df89163e114 |
line wrap: on
line diff
--- a/python/asm.py Sun Feb 09 15:27:57 2014 +0100 +++ b/python/asm.py Thu Feb 13 22:02:08 2014 +0100 @@ -1,205 +1,7 @@ #!/usr/bin/env python3 -import re import argparse -import pyyacc -from ppci import Token, CompilerError, SourceLocation -from target import Target, Label -from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber - -def tokenize(s): - """ - Tokenizer, generates an iterator that - returns tokens! - - This GREAT example was taken from python re doc page! - """ - tok_spec = [ - ('REAL', r'\d+\.\d+'), - ('HEXNUMBER', r'0x[\da-fA-F]+'), - ('NUMBER', r'\d+'), - ('ID', r'[A-Za-z][A-Za-z\d_]*'), - ('SKIP', r'[ \t]'), - ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), - ('STRING', r"'.*?'"), - ('COMMENT', r";.*") - ] - tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - gettok = re.compile(tok_re).match - line = 1 - pos = line_start = 0 - mo = gettok(s) - while mo is not None: - typ = mo.lastgroup - val = mo.group(typ) - if typ == 'NEWLINE': - line_start = pos - line += 1 - elif typ != 'SKIP': - if typ == 'LEESTEKEN': - typ = val - elif typ == 'NUMBER': - val = int(val) - elif typ == 'HEXNUMBER': - val = int(val[2:], 16) - typ = 'NUMBER' - elif typ == 'REAL': - val = float(val) - elif typ == 'STRING': - val = val[1:-1] - col = mo.start() - line_start - loc = SourceLocation('', line, col, 0) # TODO retrieve length? - yield Token(typ, val, loc) - pos = mo.end() - mo = gettok(s, pos) - if pos != len(s): - col = pos - line_start - loc = SourceLocation('', line, col, 0) - raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) - yield Token('EOF', pyyacc.EOF) - - -class Lexer: - def __init__(self, src): - self.tokens = tokenize(src) - self.curTok = self.tokens.__next__() - - def next_token(self): - t = self.curTok - if t.typ != 'EOF': - self.curTok = self.tokens.__next__() - return t - - -class Parser: - def __init__(self): - # Construct a parser given a grammar: - ident = lambda x: x # Identity helper function - g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', - pyyacc.EOF]) - g.add_production('asmline', ['asmline2']) - g.add_production('asmline', ['asmline2', 'COMMENT']) - g.add_production('asmline2', ['label', 'instruction']) - g.add_production('asmline2', ['instruction']) - g.add_production('asmline2', ['label']) - g.add_production('asmline2', []) - g.add_production('label', ['ID', ':'], self.p_label) - #g.add_production('label', []) - g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) - g.add_production('instruction', ['opcode'], self.p_ins_2) - #g.add_production('instruction', []) - g.add_production('opcode', ['ID'], lambda x: x.val) - g.add_production('operands', ['operand'], self.p_operands_1) - g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) - g.add_production('operand', ['expression'], ident) - g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) - g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op) - g.add_production('listitems', ['expression'], self.p_listitems_1) - g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2) - g.add_production('expression', ['term'], ident) - g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) - g.add_production('addop', ['-'], lambda x: x.val) - g.add_production('addop', ['+'], lambda x: x.val) - g.add_production('mulop', ['*'], lambda x: x.val) - g.add_production('term', ['factor'], ident) - g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) - g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) - g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) - g.start_symbol = 'asmline' - self.p = g.genParser() - - # Parser handlers: - def p_ins_1(self, opc, ops): - ins = AInstruction(opc, ops) - self.emit(ins) - - def p_ins_2(self, opc): - self.p_ins_1(opc, []) - - def p_operands_1(self, op1): - return [op1] - - def p_operands_2(self, ops, comma, op2): - assert type(ops) is list - ops.append(op2) - return ops - - def p_listitems_1(self, li1): - return [li1] - - def p_listitems_2(self, lis, comma, li2): - assert type(lis) is list - lis.append(li2) - return lis - - def p_list_op(self, brace_open, lst, brace_close): - return AUnop('{}', lst) - - def p_mem_op(self, brace_open, exp, brace_close): - return AUnop('[]', exp) - - def p_label(self, lname, cn): - lab = ALabel(lname.val) - self.emit(lab) - - def p_binop(self, exp1, op, exp2): - return ABinop(op, exp1, exp2) - - def parse(self, lexer, emitter): - self.emit = emitter - self.p.parse(lexer) - -# Pre construct parser to save time: -asmParser = Parser() - -class Assembler: - def __init__(self, target=None, stream=None): - self.target = target - self.stream = stream - self.restart() - self.p = asmParser - - # Top level interface: - def restart(self): - self.stack = [] - - def emit(self, a): - """ Emit a parsed instruction """ - self.stack.append(a) - - def parse_line(self, line): - """ Parse line into asm AST """ - tokens = Lexer(line) - self.p.parse(tokens, self.emit) - - def assemble(self, asmsrc): - """ Assemble this source snippet """ - for line in asmsrc.split('\n'): - self.assemble_line(line) - - def assemble_line(self, line): - """ - Assemble a single source line. - Do not take newlines into account - """ - self.parse_line(line) - self.assemble_aast() - - def assemble_aast(self): - """ Assemble a parsed asm line """ - if not self.target: - raise CompilerError('Cannot assemble without target') - while self.stack: - vi = self.stack.pop(0) - if type(vi) is AInstruction: - mi = self.target.mapInstruction(vi) - elif type(vi) is ALabel: - mi = Label(vi.name) - else: - raise NotImplementedError('{}'.format(vi)) - if self.stream: - self.stream.emit(mi) - +from ppci.assembler import Assembler if __name__ == '__main__': # When run as main file, try to grab command line arguments: