lcfOS: python/libasm.py comparison

comparison python/libasm.py @ 191:6b2bec5653f1

Added assembler testset

author	Windel Bouwman
date	Sun, 26 May 2013 15:28:07 +0200
parents	5e1dd04cb61c
children	f091e7d70996

comparison

equal deleted inserted replaced

-:65dda7e7e8bd
+:6b2bec5653f1
-import collections, re
+import re
+import pyyacc
+from ppci import Token, CompilerError, SourceLocation
 # Different instruction sets:
 class InstructionSet:
 pass
 class X86(InstructionSet):
 pass
 # Generic assembler:
-class SourceLocation:
-def __init__(self, x):
-self.pos = x
-class SourceRange:
-def __init__(self, p1, p2):
-self.p1 = p1
-self.p2 = p2
-# Token is used in the lexical analyzer:
-Token = collections.namedtuple('Token', 'typ val row col')
 keywords = ['global', 'db']
 def tokenize(s):
 """
 Tokenizer, generates an iterator that
 tok_spec = [
 ('REAL', r'\d+\.\d+'),
 ('HEXNUMBER', r'0x[\da-fA-F]+'),
 ('NUMBER', r'\d+'),
 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
-('NEWLINE', r'\n'),
 ('SKIP', r'[ \t]'),
-('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
+('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
 ('STRING', r"'.*?'")
 ]
 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
-print(tok_re)
 gettok = re.compile(tok_re).match
 line = 1
 pos = line_start = 0
 mo = gettok(s)
 while mo is not None:
 typ = 'NUMBER'
 elif typ == 'REAL':
 val = float(val)
 elif typ == 'STRING':
 val = val[1:-1]
-yield Token(typ, val, line, mo.start()-line_start)
+col = mo.start() - line_start
+loc = SourceLocation(line, col, 0)   # TODO retrieve length?
+yield Token(typ, val, loc)
 pos = mo.end()
 mo = gettok(s, pos)
 if pos != len(s):
 col = pos - line_start
-raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col)
+loc = SourceLocation(line, col, 0)
-yield Token('END', '', line, 0)
+raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
 class Lexer:
 def __init__(self, src):
 self.tokens = tokenize(src)
 self.curTok = self.tokens.__next__()
 return t
 @property
 def Peak(self):
 return self.curTok
-class Parser:
+class Assembler:
-def __init__(self, lxr):
+def __init__(self):
-self.lxr = lxr
+# Construct a parser given a grammar:
-def parse(self):
+g = pyyacc.Grammar(['ID', ',', '[', ']', ':'])
-t = self.lxr.eat()
-while True:
+g.add_production('asmline', ['label', 'instruction', 'operands'])
-ins = self.parseLine()
+g.add_production('label', ['ID', ':'])
-print(ins)
+g.add_production('label', ['EPS'])
-t = self.lxr.eat()
+g.add_production('instruction', ['ID'])
-def parseLine(self):
+g.add_production('operands', ['operand'])
-self.parseLabel()
+g.add_production('operands', ['operands', ',', 'operand'])
-if self.lxr.Peak == ';':
+g.add_production('operand', ['expression'])
-self.eatComments()
+g.add_production('expression', ['ID'])
-def parseLabel(self):
+# TODO: expand grammar
-i = self.lxr.eat()
+g.start_symbol = 'asmline'
-class Assembler:
+self.p = g.genParser()
-def assemble(self, asmsrc):
-print('assembling', asmsrc)
+def assemble(self, asmsrc):
 lxr = Lexer(asmsrc)
 prsr = Parser(lxr)
 instructions = prsr.parse()
 return instructions
+def assembleLine(self, line):
+"""
+Assemble a single source line.
+Do not take newlines into account
+"""
+tokens = tokenize(line)
+self.p.parse(tokens)
+def assembleAst(self, at):
+""" Assemble a parsed asm line """
+pass

Mercurial > lcfOS

comparison python/libasm.py @ 191:6b2bec5653f1