Mercurial > lcfOS
diff python/asm.py @ 318:e84047f29c78
Add burg and yacc initial attempts
author | Windel Bouwman |
---|---|
date | Tue, 31 Dec 2013 12:38:15 +0100 |
parents | 084cccaa5deb |
children | 8d07a4254f04 |
line wrap: on
line diff
--- a/python/asm.py Sun Dec 22 15:50:59 2013 +0100 +++ b/python/asm.py Tue Dec 31 12:38:15 2013 +0100 @@ -1,19 +1,20 @@ #!/usr/bin/env python3 -import re, argparse +import re +import argparse import pyyacc from ppci import Token, CompilerError, SourceLocation from target import Target, Label from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber def tokenize(s): - """ + """ Tokenizer, generates an iterator that returns tokens! This GREAT example was taken from python re doc page! - """ - tok_spec = [ + """ + tok_spec = [ ('REAL', r'\d+\.\d+'), ('HEXNUMBER', r'0x[\da-fA-F]+'), ('NUMBER', r'\d+'), @@ -22,13 +23,13 @@ ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), ('STRING', r"'.*?'"), ('COMMENT', r";.*") - ] - tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - gettok = re.compile(tok_re).match - line = 1 - pos = line_start = 0 - mo = gettok(s) - while mo is not None: + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + while mo is not None: typ = mo.lastgroup val = mo.group(typ) if typ == 'NEWLINE': @@ -51,10 +52,11 @@ yield Token(typ, val, loc) pos = mo.end() mo = gettok(s, pos) - if pos != len(s): + if pos != len(s): col = pos - line_start loc = SourceLocation('', line, col, 0) raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) + yield Token('EOF', pyyacc.EOF) class Lexer: @@ -62,33 +64,29 @@ self.tokens = tokenize(src) self.curTok = self.tokens.__next__() - def eat(self): + def next_token(self): t = self.curTok - self.curTok = self.tokens.__next__() + if t.typ != 'EOF': + self.curTok = self.tokens.__next__() return t - @property - def Peak(self): - return self.curTok - class Parser: def __init__(self): # Construct a parser given a grammar: ident = lambda x: x # Identity helper function - g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}']) + g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', + pyyacc.EOF]) g.add_production('asmline', ['asmline2']) g.add_production('asmline', ['asmline2', 'COMMENT']) g.add_production('asmline2', ['label', 'instruction']) g.add_production('asmline2', ['instruction']) g.add_production('asmline2', ['label']) g.add_production('asmline2', []) - g.add_production('optcomment', []) - g.add_production('optcomment', ['COMMENT']) g.add_production('label', ['ID', ':'], self.p_label) g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) g.add_production('instruction', ['opcode'], self.p_ins_2) - g.add_production('opcode', ['ID'], ident) + g.add_production('opcode', ['ID'], lambda x: x.val) g.add_production('operands', ['operand'], self.p_operands_1) g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) g.add_production('operand', ['expression'], ident) @@ -98,13 +96,13 @@ g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2) g.add_production('expression', ['term'], ident) g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) - g.add_production('addop', ['-'], ident) - g.add_production('addop', ['+'], ident) - g.add_production('mulop', ['*'], ident) + g.add_production('addop', ['-'], lambda x: x.val) + g.add_production('addop', ['+'], lambda x: x.val) + g.add_production('mulop', ['*'], lambda x: x.val) g.add_production('term', ['factor'], ident) g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) - g.add_production('factor', ['ID'], lambda name: ASymbol(name)) - g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num))) + g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) + g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) g.start_symbol = 'asmline' self.p = g.genParser() @@ -112,10 +110,13 @@ def p_ins_1(self, opc, ops): ins = AInstruction(opc, ops) self.emit(ins) + def p_ins_2(self, opc): self.p_ins_1(opc, []) + def p_operands_1(self, op1): return [op1] + def p_operands_2(self, ops, comma, op2): assert type(ops) is list ops.append(op2) @@ -131,17 +132,20 @@ def p_list_op(self, brace_open, lst, brace_close): return AUnop('{}', lst) + def p_mem_op(self, brace_open, exp, brace_close): return AUnop('[]', exp) + def p_label(self, lname, cn): - lab = ALabel(lname) + lab = ALabel(lname.val) self.emit(lab) + def p_binop(self, exp1, op, exp2): return ABinop(op, exp1, exp2) - def parse(self, tokens, emitter): + def parse(self, lexer, emitter): self.emit = emitter - self.p.parse(tokens) + self.p.parse(lexer) # Pre construct parser to save time: asmParser = Parser() @@ -163,7 +167,7 @@ def parse_line(self, line): """ Parse line into asm AST """ - tokens = tokenize(line) + tokens = Lexer(line) self.p.parse(tokens, self.emit) def assemble(self, asmsrc): @@ -172,16 +176,15 @@ self.assemble_line(line) def assemble_line(self, line): - """ - Assemble a single source line. - Do not take newlines into account + """ + Assemble a single source line. + Do not take newlines into account """ self.parse_line(line) self.assemble_aast() def assemble_aast(self): """ Assemble a parsed asm line """ - # TODO if not self.target: raise CompilerError('Cannot assemble without target') while self.stack: @@ -199,8 +202,8 @@ if __name__ == '__main__': # When run as main file, try to grab command line arguments: parser = argparse.ArgumentParser(description="Assembler") - parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble') + parser.add_argument('sourcefile', type=argparse.FileType('r'), + help='the source file to assemble') args = parser.parse_args() a = Assembler() obj = a.assemble(args.sourcefile.read()) -