Mercurial > lcfOS
view python/libasm.py @ 194:b01429a5d695
Fixed test
author | Windel Bouwman |
---|---|
date | Wed, 29 May 2013 22:36:37 +0200 |
parents | f091e7d70996 |
children | 37ac6c016e0f |
line wrap: on
line source
import re import pyyacc from ppci import Token, CompilerError, SourceLocation # Different instruction sets: class InstructionSet: pass class X86(InstructionSet): pass # Generic assembler: keywords = ['global', 'db'] def tokenize(s): """ Tokenizer, generates an iterator that returns tokens! This GREAT example was taken from python re doc page! """ tok_spec = [ ('REAL', r'\d+\.\d+'), ('HEXNUMBER', r'0x[\da-fA-F]+'), ('NUMBER', r'\d+'), ('ID', r'[A-Za-z][A-Za-z\d_]*'), ('SKIP', r'[ \t]'), ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'), ('STRING', r"'.*?'") ] tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) gettok = re.compile(tok_re).match line = 1 pos = line_start = 0 mo = gettok(s) while mo is not None: typ = mo.lastgroup val = mo.group(typ) if typ == 'NEWLINE': line_start = pos line += 1 elif typ == 'COMMENTS': pass elif typ != 'SKIP': if typ == 'ID': if val in keywords: typ = val elif typ == 'LEESTEKEN': typ = val elif typ == 'NUMBER': val = int(val) elif typ == 'HEXNUMBER': val = int(val[2:], 16) typ = 'NUMBER' elif typ == 'REAL': val = float(val) elif typ == 'STRING': val = val[1:-1] col = mo.start() - line_start loc = SourceLocation(line, col, 0) # TODO retrieve length? yield Token(typ, val, loc) pos = mo.end() mo = gettok(s, pos) if pos != len(s): col = pos - line_start loc = SourceLocation(line, col, 0) raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) class Lexer: def __init__(self, src): self.tokens = tokenize(src) self.curTok = self.tokens.__next__() def eat(self): t = self.curTok self.curTok = self.tokens.__next__() return t @property def Peak(self): return self.curTok class Assembler: def handle_ins(self, id0): self.ins = id0 def p_label(self, lname, cn): self.label = lname def __init__(self): # Construct a parser given a grammar: g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', pyyacc.EPS]) g.add_production('asmline', ['label', 'instruction', 'operands']) g.add_production('asmline', ['instruction', 'operands']) g.add_production('label', ['ID', ':'], self.p_label) g.add_production('instruction', ['ID'], self.handle_ins) g.add_production('operands', ['operand']) g.add_production('operands', ['operands', ',', 'operand']) g.add_production('operand', ['expression']) g.add_production('operand', ['[', 'expression', ']']) g.add_production('expression', ['term']) g.add_production('expression', ['expression', 'addop', 'term']) g.add_production('addop', ['-']) g.add_production('addop', ['+']) g.add_production('term', ['factor']) g.add_production('factor', ['ID']) g.add_production('factor', ['NUMBER']) # TODO: expand grammar g.start_symbol = 'asmline' self.p = g.genParser() def parse_line(self, line): """ Parse line into asm AST """ tokens = tokenize(line) self.p.parse(tokens) aast = 1 # TODO return aast def assemble(self, asmsrc): lxr = Lexer(asmsrc) prsr = Parser(lxr) instructions = prsr.parse() return instructions def assembleLine(self, line): """ Assemble a single source line. Do not take newlines into account """ aast = self.parseLine(line) self.assemble_aast(aast) def assemble_aast(self, at): """ Assemble a parsed asm line """ pass