Mercurial > lcfOS
diff python/ppci/assembler.py @ 341:4d204f6f7d4e devel
Rewrite of assembler parts
author | Windel Bouwman |
---|---|
date | Fri, 28 Feb 2014 18:07:14 +0100 |
parents | c7cc54c0dfdf |
children | 86b02c98a717 |
line wrap: on
line diff
--- a/python/ppci/assembler.py Sun Feb 23 16:24:01 2014 +0100 +++ b/python/ppci/assembler.py Fri Feb 28 18:07:14 2014 +0100 @@ -6,7 +6,16 @@ from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber -def tokenize(s): +def bit_type(value): + assert value < (2**31) + assert value >= 0 + t = 'val32' + for n in [8, 5, 3]: + if value < (2**n): + t = 'val{}'.format(n) + return t + +def tokenize(s, kws): """ Tokenizer, generates an iterator that returns tokens! @@ -46,8 +55,13 @@ val = float(val) elif typ == 'STRING': val = val[1:-1] + elif typ == 'ID': + if val.lower() in kws: # ['r3', 'sp', 'add', 'yield', 'r4', 'r0', 'r1', 'sub', 'r5', 'r6', 'r2']: + typ = val.lower() col = mo.start() - line_start loc = SourceLocation('', line, col, 0) # TODO retrieve length? + if typ == 'NUMBER': + typ = bit_type(val) yield Token(typ, val, loc) pos = mo.end() mo = gettok(s, pos) @@ -59,8 +73,8 @@ class Lexer: - def __init__(self, src): - self.tokens = tokenize(src) + def __init__(self, src, kws): + self.tokens = tokenize(src, kws) self.curTok = self.tokens.__next__() def next_token(self): @@ -71,11 +85,26 @@ class Parser: - def __init__(self, tokens, instruction_rules): + def add_rule(self, prod, rhs, f): + """ Helper function to add a rule, why this is required? """ + if prod == 'instruction': + def f_wrap(*args): + i = f(args) + self.emit(i) + else: + def f_wrap(*rhs): + return f(rhs) + self.g.add_production(prod, rhs, f_wrap) + + def __init__(self, kws, instruction_rules, emit): # Construct a parser given a grammar: - ident = lambda x: x # Identity helper function - g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', - pyyacc.EOF]) + tokens2 = ['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', + pyyacc.EPS, 'COMMENT', '{', '}', + pyyacc.EOF, 'val32', 'val8', 'val5', 'val3'] + tokens2.extend(kws) + self.kws = kws + g = pyyacc.Grammar(tokens2) + self.g = g # Global structure of assembly line: g.add_production('asmline', ['asmline2']) g.add_production('asmline', ['asmline2', 'COMMENT']) @@ -88,27 +117,22 @@ # Add instruction rules for the target in question: for prod, rhs, f in instruction_rules: - if prod is 'instruction': - def f_wrap(*rhs): - i = f(rhs) - self.emit(i) - else: - def f_wrap(*rhs): - return f(rhs) - g.add_production(prod, rhs, f_wrap) + self.add_rule(prod, rhs, f) #g.add_production('instruction', []) - g.add_production('expression', ['term'], ident) + g.add_production('expression', ['term'], lambda x: x) g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) g.add_production('addop', ['-'], lambda x: x.val) g.add_production('addop', ['+'], lambda x: x.val) g.add_production('mulop', ['*'], lambda x: x.val) - g.add_production('term', ['factor'], ident) + g.add_production('term', ['factor'], lambda x: x) g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) g.start_symbol = 'asmline' - self.p = g.genParser() + self.emit = emit + self.p = g.generate_parser() + print('length of table:', len(self.p.action_table)) # Parser handlers: def p_ins_1(self, opc, ops): @@ -141,14 +165,13 @@ return AUnop('[]', exp) def p_label(self, lname, cn): - lab = ALabel(lname.val) + lab = Label(lname.val) self.emit(lab) def p_binop(self, exp1, op, exp2): return ABinop(op, exp1, exp2) - def parse(self, lexer, emitter): - self.emit = emitter + def parse(self, lexer): self.p.parse(lexer) @@ -157,12 +180,12 @@ self.target = target assert isinstance(target, Target) self.stream = stream - self.parser = Parser(None, target.assembler_rules, self.stream.emit) + self.parser = Parser(target.asm_keywords, target.assembler_rules, self.stream.emit) # Top level interface: def parse_line(self, line): """ Parse line into assembly instructions """ - tokens = Lexer(line) + tokens = Lexer(line, self.target.asm_keywords) self.parser.parse(tokens) def assemble(self, asmsrc):