Mercurial > lcfOS
diff python/ppci/assembler.py @ 334:6f4753202b9a
Added more recipes
author | Windel Bouwman |
---|---|
date | Thu, 13 Feb 2014 22:02:08 +0100 |
parents | |
children | d1ecc493384e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/ppci/assembler.py Thu Feb 13 22:02:08 2014 +0100 @@ -0,0 +1,203 @@ + +import re +import pyyacc +from . import Token, CompilerError, SourceLocation +from target import Target, Label +from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber + +def tokenize(s): + """ + Tokenizer, generates an iterator that + returns tokens! + + This GREAT example was taken from python re doc page! + """ + tok_spec = [ + ('REAL', r'\d+\.\d+'), + ('HEXNUMBER', r'0x[\da-fA-F]+'), + ('NUMBER', r'\d+'), + ('ID', r'[A-Za-z][A-Za-z\d_]*'), + ('SKIP', r'[ \t]'), + ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), + ('STRING', r"'.*?'"), + ('COMMENT', r";.*") + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + while mo is not None: + typ = mo.lastgroup + val = mo.group(typ) + if typ == 'NEWLINE': + line_start = pos + line += 1 + elif typ != 'SKIP': + if typ == 'LEESTEKEN': + typ = val + elif typ == 'NUMBER': + val = int(val) + elif typ == 'HEXNUMBER': + val = int(val[2:], 16) + typ = 'NUMBER' + elif typ == 'REAL': + val = float(val) + elif typ == 'STRING': + val = val[1:-1] + col = mo.start() - line_start + loc = SourceLocation('', line, col, 0) # TODO retrieve length? + yield Token(typ, val, loc) + pos = mo.end() + mo = gettok(s, pos) + if pos != len(s): + col = pos - line_start + loc = SourceLocation('', line, col, 0) + raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) + yield Token('EOF', pyyacc.EOF) + + +class Lexer: + def __init__(self, src): + self.tokens = tokenize(src) + self.curTok = self.tokens.__next__() + + def next_token(self): + t = self.curTok + if t.typ != 'EOF': + self.curTok = self.tokens.__next__() + return t + + +class Parser: + def __init__(self): + # Construct a parser given a grammar: + ident = lambda x: x # Identity helper function + g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', + pyyacc.EOF]) + g.add_production('asmline', ['asmline2']) + g.add_production('asmline', ['asmline2', 'COMMENT']) + g.add_production('asmline2', ['label', 'instruction']) + g.add_production('asmline2', ['instruction']) + g.add_production('asmline2', ['label']) + g.add_production('asmline2', []) + g.add_production('label', ['ID', ':'], self.p_label) + #g.add_production('label', []) + g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) + g.add_production('instruction', ['opcode'], self.p_ins_2) + #g.add_production('instruction', []) + g.add_production('opcode', ['ID'], lambda x: x.val) + g.add_production('operands', ['operand'], self.p_operands_1) + g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) + g.add_production('operand', ['expression'], ident) + g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) + g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op) + g.add_production('listitems', ['expression'], self.p_listitems_1) + g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2) + g.add_production('expression', ['term'], ident) + g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) + g.add_production('addop', ['-'], lambda x: x.val) + g.add_production('addop', ['+'], lambda x: x.val) + g.add_production('mulop', ['*'], lambda x: x.val) + g.add_production('term', ['factor'], ident) + g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) + g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) + g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) + g.start_symbol = 'asmline' + self.p = g.genParser() + + # Parser handlers: + def p_ins_1(self, opc, ops): + ins = AInstruction(opc, ops) + self.emit(ins) + + def p_ins_2(self, opc): + self.p_ins_1(opc, []) + + def p_operands_1(self, op1): + return [op1] + + def p_operands_2(self, ops, comma, op2): + assert type(ops) is list + ops.append(op2) + return ops + + def p_listitems_1(self, li1): + return [li1] + + def p_listitems_2(self, lis, comma, li2): + assert type(lis) is list + lis.append(li2) + return lis + + def p_list_op(self, brace_open, lst, brace_close): + return AUnop('{}', lst) + + def p_mem_op(self, brace_open, exp, brace_close): + return AUnop('[]', exp) + + def p_label(self, lname, cn): + lab = ALabel(lname.val) + self.emit(lab) + + def p_binop(self, exp1, op, exp2): + return ABinop(op, exp1, exp2) + + def parse(self, lexer, emitter): + self.emit = emitter + self.p.parse(lexer) + +# Pre construct parser to save time: +asmParser = Parser() + +class Assembler: + def __init__(self, target=None, stream=None): + self.target = target + self.stream = stream + self.restart() + self.p = asmParser + + # Top level interface: + def restart(self): + self.stack = [] + + def emit(self, a): + """ Emit a parsed instruction """ + self.stack.append(a) + + def parse_line(self, line): + """ Parse line into asm AST """ + tokens = Lexer(line) + self.p.parse(tokens, self.emit) + + def assemble(self, asmsrc): + """ Assemble this source snippet """ + if type(asmsrc) is not str: + asmsrc2 = asmsrc.read() + asmsrc.close() + asmsrc = asmsrc2 + for line in asmsrc.split('\n'): + self.assemble_line(line) + + def assemble_line(self, line): + """ + Assemble a single source line. + Do not take newlines into account + """ + self.parse_line(line) + self.assemble_aast() + + def assemble_aast(self): + """ Assemble a parsed asm line """ + if not self.target: + raise CompilerError('Cannot assemble without target') + while self.stack: + vi = self.stack.pop(0) + if type(vi) is AInstruction: + mi = self.target.mapInstruction(vi) + elif type(vi) is ALabel: + mi = Label(vi.name) + else: + raise NotImplementedError('{}'.format(vi)) + if self.stream: + self.stream.emit(mi)