lcfOS: python/asm.py comparison

comparison python/asm.py @ 318:e84047f29c78

Add burg and yacc initial attempts

author	Windel Bouwman
date	Tue, 31 Dec 2013 12:38:15 +0100
parents	084cccaa5deb
children	8d07a4254f04

comparison

equal deleted inserted replaced

-:e30a77ae359b
+:e84047f29c78
 #!/usr/bin/env python3
-import re, argparse
+import re
+import argparse
 import pyyacc
 from ppci import Token, CompilerError, SourceLocation
 from target import Target, Label
 from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber
 def tokenize(s):
 """
 Tokenizer, generates an iterator that
 returns tokens!
 This GREAT example was taken from python re doc page!
 """
 tok_spec = [
 ('REAL', r'\d+\.\d+'),
 ('HEXNUMBER', r'0x[\da-fA-F]+'),
 ('NUMBER', r'\d+'),
 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
 ('SKIP', r'[ \t]'),
 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'),
 ('STRING', r"'.*?'"),
 ('COMMENT', r";.*")
 ]
 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
 gettok = re.compile(tok_re).match
 line = 1
 pos = line_start = 0
 mo = gettok(s)
 while mo is not None:
 typ = mo.lastgroup
 val = mo.group(typ)
 if typ == 'NEWLINE':
 line_start = pos
 line += 1
 col = mo.start() - line_start
 loc = SourceLocation('', line, col, 0)   # TODO retrieve length?
 yield Token(typ, val, loc)
 pos = mo.end()
 mo = gettok(s, pos)
 if pos != len(s):
 col = pos - line_start
 loc = SourceLocation('', line, col, 0)
 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
+yield Token('EOF', pyyacc.EOF)
 class Lexer:
 def __init__(self, src):
 self.tokens = tokenize(src)
 self.curTok = self.tokens.__next__()
-def eat(self):
+def next_token(self):
 t = self.curTok
-self.curTok = self.tokens.__next__()
+if t.typ != 'EOF':
+self.curTok = self.tokens.__next__()
 return t
-@property
-def Peak(self):
-return self.curTok
 class Parser:
 def __init__(self):
 # Construct a parser given a grammar:
 ident = lambda x: x   # Identity helper function
-g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}'])
+g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}',
+pyyacc.EOF])
 g.add_production('asmline', ['asmline2'])
 g.add_production('asmline', ['asmline2', 'COMMENT'])
 g.add_production('asmline2', ['label', 'instruction'])
 g.add_production('asmline2', ['instruction'])
 g.add_production('asmline2', ['label'])
 g.add_production('asmline2', [])
-g.add_production('optcomment', [])
-g.add_production('optcomment', ['COMMENT'])
 g.add_production('label', ['ID', ':'], self.p_label)
 g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
 g.add_production('instruction', ['opcode'], self.p_ins_2)
-g.add_production('opcode', ['ID'], ident)
+g.add_production('opcode', ['ID'], lambda x: x.val)
 g.add_production('operands', ['operand'], self.p_operands_1)
 g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
 g.add_production('operand', ['expression'], ident)
 g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op)
 g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op)
 g.add_production('listitems', ['expression'], self.p_listitems_1)
 g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2)
 g.add_production('expression', ['term'], ident)
 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
-g.add_production('addop', ['-'], ident)
+g.add_production('addop', ['-'], lambda x: x.val)
-g.add_production('addop', ['+'], ident)
+g.add_production('addop', ['+'], lambda x: x.val)
-g.add_production('mulop', ['*'], ident)
+g.add_production('mulop', ['*'], lambda x: x.val)
 g.add_production('term', ['factor'], ident)
 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
-g.add_production('factor', ['ID'], lambda name: ASymbol(name))
+g.add_production('factor', ['ID'], lambda name: ASymbol(name.val))
-g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num)))
+g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val)))
 g.start_symbol = 'asmline'
 self.p = g.genParser()
 # Parser handlers:
 def p_ins_1(self, opc, ops):
 ins = AInstruction(opc, ops)
 self.emit(ins)
 def p_ins_2(self, opc):
 self.p_ins_1(opc, [])
 def p_operands_1(self, op1):
 return [op1]
 def p_operands_2(self, ops, comma, op2):
 assert type(ops) is list
 ops.append(op2)
 return ops
 lis.append(li2)
 return lis
 def p_list_op(self, brace_open, lst, brace_close):
 return AUnop('{}', lst)
 def p_mem_op(self, brace_open, exp, brace_close):
 return AUnop('[]', exp)
 def p_label(self, lname, cn):
-lab = ALabel(lname)
+lab = ALabel(lname.val)
 self.emit(lab)
 def p_binop(self, exp1, op, exp2):
 return ABinop(op, exp1, exp2)
-def parse(self, tokens, emitter):
+def parse(self, lexer, emitter):
 self.emit = emitter
-self.p.parse(tokens)
+self.p.parse(lexer)
 # Pre construct parser to save time:
 asmParser = Parser()
 class Assembler:
 """ Emit a parsed instruction """
 self.stack.append(a)
 def parse_line(self, line):
 """ Parse line into asm AST """
-tokens = tokenize(line)
+tokens = Lexer(line)
 self.p.parse(tokens, self.emit)
 def assemble(self, asmsrc):
 """ Assemble this source snippet """
 for line in asmsrc.split('\n'):
 self.assemble_line(line)
 def assemble_line(self, line):
 """
 Assemble a single source line.
 Do not take newlines into account
 """
 self.parse_line(line)
 self.assemble_aast()
 def assemble_aast(self):
 """ Assemble a parsed asm line """
-# TODO
 if not self.target:
 raise CompilerError('Cannot assemble without target')
 while self.stack:
 vi = self.stack.pop(0)
 if type(vi) is AInstruction:
 if __name__ == '__main__':
 # When run as main file, try to grab command line arguments:
 parser = argparse.ArgumentParser(description="Assembler")
-parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble')
+parser.add_argument('sourcefile', type=argparse.FileType('r'),
+help='the source file to assemble')
 args = parser.parse_args()
 a = Assembler()
 obj = a.assemble(args.sourcefile.read())

Mercurial > lcfOS

comparison python/asm.py @ 318:e84047f29c78