Mercurial > lcfOS
diff python/libasm.py @ 191:6b2bec5653f1
Added assembler testset
author | Windel Bouwman |
---|---|
date | Sun, 26 May 2013 15:28:07 +0200 |
parents | 5e1dd04cb61c |
children | f091e7d70996 |
line wrap: on
line diff
--- a/python/libasm.py Sat May 25 15:15:42 2013 +0200 +++ b/python/libasm.py Sun May 26 15:28:07 2013 +0200 @@ -1,4 +1,6 @@ -import collections, re +import re +import pyyacc +from ppci import Token, CompilerError, SourceLocation # Different instruction sets: class InstructionSet: @@ -8,19 +10,6 @@ pass # Generic assembler: - -class SourceLocation: - def __init__(self, x): - self.pos = x - -class SourceRange: - def __init__(self, p1, p2): - self.p1 = p1 - self.p2 = p2 - -# Token is used in the lexical analyzer: -Token = collections.namedtuple('Token', 'typ val row col') - keywords = ['global', 'db'] def tokenize(s): @@ -35,13 +24,11 @@ ('HEXNUMBER', r'0x[\da-fA-F]+'), ('NUMBER', r'\d+'), ('ID', r'[A-Za-z][A-Za-z\d_]*'), - ('NEWLINE', r'\n'), ('SKIP', r'[ \t]'), - ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'), + ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'), ('STRING', r"'.*?'") ] tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - print(tok_re) gettok = re.compile(tok_re).match line = 1 pos = line_start = 0 @@ -69,13 +56,15 @@ val = float(val) elif typ == 'STRING': val = val[1:-1] - yield Token(typ, val, line, mo.start()-line_start) + col = mo.start() - line_start + loc = SourceLocation(line, col, 0) # TODO retrieve length? + yield Token(typ, val, loc) pos = mo.end() mo = gettok(s, pos) if pos != len(s): col = pos - line_start - raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col) - yield Token('END', '', line, 0) + loc = SourceLocation(line, col, 0) + raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) class Lexer: def __init__(self, src): @@ -89,28 +78,40 @@ def Peak(self): return self.curTok -class Parser: - def __init__(self, lxr): - self.lxr = lxr - def parse(self): - t = self.lxr.eat() +class Assembler: + def __init__(self): + # Construct a parser given a grammar: + g = pyyacc.Grammar(['ID', ',', '[', ']', ':']) - while True: - ins = self.parseLine() - print(ins) - t = self.lxr.eat() - def parseLine(self): - self.parseLabel() - if self.lxr.Peak == ';': - self.eatComments() - def parseLabel(self): - i = self.lxr.eat() + g.add_production('asmline', ['label', 'instruction', 'operands']) + g.add_production('label', ['ID', ':']) + g.add_production('label', ['EPS']) + g.add_production('instruction', ['ID']) + g.add_production('operands', ['operand']) + g.add_production('operands', ['operands', ',', 'operand']) + g.add_production('operand', ['expression']) + g.add_production('expression', ['ID']) + # TODO: expand grammar + g.start_symbol = 'asmline' -class Assembler: - def assemble(self, asmsrc): - print('assembling', asmsrc) + self.p = g.genParser() + + def assemble(self, asmsrc): lxr = Lexer(asmsrc) prsr = Parser(lxr) instructions = prsr.parse() return instructions + def assembleLine(self, line): + """ + Assemble a single source line. + Do not take newlines into account + """ + tokens = tokenize(line) + self.p.parse(tokens) + + def assembleAst(self, at): + """ Assemble a parsed asm line """ + pass + +