Mercurial > lcfOS
view python/libasm.py @ 169:ee0d30533dae
Added more tests and improved the diagnostic update
author | Windel Bouwman |
---|---|
date | Sat, 23 Mar 2013 18:34:41 +0100 |
parents | 5e1dd04cb61c |
children | 6b2bec5653f1 |
line wrap: on
line source
import collections, re # Different instruction sets: class InstructionSet: pass class X86(InstructionSet): pass # Generic assembler: class SourceLocation: def __init__(self, x): self.pos = x class SourceRange: def __init__(self, p1, p2): self.p1 = p1 self.p2 = p2 # Token is used in the lexical analyzer: Token = collections.namedtuple('Token', 'typ val row col') keywords = ['global', 'db'] def tokenize(s): """ Tokenizer, generates an iterator that returns tokens! This GREAT example was taken from python re doc page! """ tok_spec = [ ('REAL', r'\d+\.\d+'), ('HEXNUMBER', r'0x[\da-fA-F]+'), ('NUMBER', r'\d+'), ('ID', r'[A-Za-z][A-Za-z\d_]*'), ('NEWLINE', r'\n'), ('SKIP', r'[ \t]'), ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'), ('STRING', r"'.*?'") ] tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) print(tok_re) gettok = re.compile(tok_re).match line = 1 pos = line_start = 0 mo = gettok(s) while mo is not None: typ = mo.lastgroup val = mo.group(typ) if typ == 'NEWLINE': line_start = pos line += 1 elif typ == 'COMMENTS': pass elif typ != 'SKIP': if typ == 'ID': if val in keywords: typ = val elif typ == 'LEESTEKEN': typ = val elif typ == 'NUMBER': val = int(val) elif typ == 'HEXNUMBER': val = int(val[2:], 16) typ = 'NUMBER' elif typ == 'REAL': val = float(val) elif typ == 'STRING': val = val[1:-1] yield Token(typ, val, line, mo.start()-line_start) pos = mo.end() mo = gettok(s, pos) if pos != len(s): col = pos - line_start raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col) yield Token('END', '', line, 0) class Lexer: def __init__(self, src): self.tokens = tokenize(src) self.curTok = self.tokens.__next__() def eat(self): t = self.curTok self.curTok = self.tokens.__next__() return t @property def Peak(self): return self.curTok class Parser: def __init__(self, lxr): self.lxr = lxr def parse(self): t = self.lxr.eat() while True: ins = self.parseLine() print(ins) t = self.lxr.eat() def parseLine(self): self.parseLabel() if self.lxr.Peak == ';': self.eatComments() def parseLabel(self): i = self.lxr.eat() class Assembler: def assemble(self, asmsrc): print('assembling', asmsrc) lxr = Lexer(asmsrc) prsr = Parser(lxr) instructions = prsr.parse() return instructions