Mercurial > lcfOS
view python/ppci/assembler.py @ 347:742588fb8cd6 devel
Merge into devel branch
author | Windel Bouwman |
---|---|
date | Fri, 07 Mar 2014 17:10:21 +0100 |
parents | 3bb7dcfe5529 |
children | 19eacf4f7270 |
line wrap: on
line source
import re import pyyacc from . import Token, CompilerError, SourceLocation from .target import Target, Label def bit_type(value): assert value < (2**32) assert value >= 0 t = 'val32' for n in [16, 12, 8, 5, 3]: if value < (2**n): t = 'val{}'.format(n) return t def tokenize(s, kws): """ Tokenizer, generates an iterator that returns tokens! This GREAT example was taken from python re doc page! """ tok_spec = [ ('REAL', r'\d+\.\d+'), ('HEXNUMBER', r'0x[\da-fA-F]+'), ('NUMBER', r'\d+'), ('ID', r'[A-Za-z][A-Za-z\d_]*'), ('SKIP', r'[ \t]'), ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), ('STRING', r"'.*?'"), ('COMMENT', r";.*") ] tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) gettok = re.compile(tok_re).match line = 1 pos = line_start = 0 mo = gettok(s) while mo is not None: typ = mo.lastgroup val = mo.group(typ) if typ == 'NEWLINE': line_start = pos line += 1 elif typ != 'SKIP': if typ == 'LEESTEKEN': typ = val elif typ == 'NUMBER': val = int(val) elif typ == 'HEXNUMBER': val = int(val[2:], 16) typ = 'NUMBER' elif typ == 'REAL': val = float(val) elif typ == 'STRING': val = val[1:-1] elif typ == 'ID': if val.lower() in kws: # ['r3', 'sp', 'add', 'yield', 'r4', 'r0', 'r1', 'sub', 'r5', 'r6', 'r2']: typ = val.lower() col = mo.start() - line_start loc = SourceLocation('', line, col, 0) # TODO retrieve length? if typ == 'NUMBER': typ = bit_type(val) yield Token(typ, val, loc) pos = mo.end() mo = gettok(s, pos) if pos != len(s): col = pos - line_start loc = SourceLocation('', line, col, 0) raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) yield Token('EOF', pyyacc.EOF) class Lexer: def __init__(self, src, kws): self.tokens = tokenize(src, kws) self.curTok = self.tokens.__next__() def next_token(self): t = self.curTok if t.typ != 'EOF': self.curTok = self.tokens.__next__() return t class Parser: def add_rule(self, prod, rhs, f): """ Helper function to add a rule, why this is required? """ if prod == 'instruction': def f_wrap(*args): i = f(args) self.emit(i) else: def f_wrap(*rhs): return f(rhs) self.g.add_production(prod, rhs, f_wrap) def __init__(self, kws, instruction_rules, emit): # Construct a parser given a grammar: tokens2 = ['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', pyyacc.EOF, 'val32', 'val16', 'val12', 'val8', 'val5', 'val3'] tokens2.extend(kws) self.kws = kws g = pyyacc.Grammar(tokens2) self.g = g # Global structure of assembly line: g.add_production('asmline', ['asmline2']) g.add_production('asmline', ['asmline2', 'COMMENT']) g.add_production('asmline2', ['label', 'instruction']) g.add_production('asmline2', ['instruction']) g.add_production('asmline2', ['label']) g.add_production('asmline2', []) g.add_production('label', ['ID', ':'], self.p_label) #g.add_production('label', []) # Add instruction rules for the target in question: for prod, rhs, f in instruction_rules: self.add_rule(prod, rhs, f) #g.add_production('instruction', []) g.add_production('expression', ['term'], lambda x: x) g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) g.add_production('addop', ['-'], lambda x: x.val) g.add_production('addop', ['+'], lambda x: x.val) g.add_production('mulop', ['*'], lambda x: x.val) g.add_production('term', ['factor'], lambda x: x) g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) g.start_symbol = 'asmline' self.emit = emit self.p = g.generate_parser() # print('length of table:', len(self.p.action_table)) # Parser handlers: def p_ins_1(self, opc, ops): ins = AInstruction(opc, ops) self.emit(ins) def p_ins_2(self, opc): self.p_ins_1(opc, []) def p_operands_1(self, op1): return [op1] def p_operands_2(self, ops, comma, op2): assert type(ops) is list ops.append(op2) return ops def p_listitems_1(self, li1): return [li1] def p_listitems_2(self, lis, comma, li2): assert type(lis) is list lis.append(li2) return lis def p_list_op(self, brace_open, lst, brace_close): return AUnop('{}', lst) def p_mem_op(self, brace_open, exp, brace_close): return AUnop('[]', exp) def p_label(self, lname, cn): lab = Label(lname.val) self.emit(lab) def p_binop(self, exp1, op, exp2): return ABinop(op, exp1, exp2) def parse(self, lexer): self.p.parse(lexer) class Assembler: def __init__(self, target): self.target = target assert isinstance(target, Target) self.parser = Parser(target.asm_keywords, target.assembler_rules, self.emit) def emit(self, *args): self.stream.emit(*args) # Top level interface: def parse_line(self, line): """ Parse line into assembly instructions """ tokens = Lexer(line, self.target.asm_keywords) self.parser.parse(tokens) def assemble(self, asmsrc, stream): """ Assemble this source snippet """ if hasattr(asmsrc, 'read'): asmsrc2 = asmsrc.read() asmsrc.close() asmsrc = asmsrc2 # TODO: use generic newline?? # TODO: the bothersome newline ... self.stream = stream for line in asmsrc.split('\n'): self.parse_line(line) self.stream = None