diff python/asm.py @ 318:e84047f29c78

Add burg and yacc initial attempts
author Windel Bouwman
date Tue, 31 Dec 2013 12:38:15 +0100
parents 084cccaa5deb
children 8d07a4254f04
line wrap: on
line diff
--- a/python/asm.py	Sun Dec 22 15:50:59 2013 +0100
+++ b/python/asm.py	Tue Dec 31 12:38:15 2013 +0100
@@ -1,19 +1,20 @@
 #!/usr/bin/env python3
 
-import re, argparse
+import re
+import argparse
 import pyyacc
 from ppci import Token, CompilerError, SourceLocation
 from target import Target, Label
 from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber
 
 def tokenize(s):
-     """
+    """
        Tokenizer, generates an iterator that
        returns tokens!
 
        This GREAT example was taken from python re doc page!
-     """
-     tok_spec = [
+    """
+    tok_spec = [
        ('REAL', r'\d+\.\d+'),
        ('HEXNUMBER', r'0x[\da-fA-F]+'),
        ('NUMBER', r'\d+'),
@@ -22,13 +23,13 @@
        ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'),
        ('STRING', r"'.*?'"),
        ('COMMENT', r";.*")
-     ]
-     tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
-     gettok = re.compile(tok_re).match
-     line = 1
-     pos = line_start = 0
-     mo = gettok(s)
-     while mo is not None:
+    ]
+    tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+    gettok = re.compile(tok_re).match
+    line = 1
+    pos = line_start = 0
+    mo = gettok(s)
+    while mo is not None:
        typ = mo.lastgroup
        val = mo.group(typ)
        if typ == 'NEWLINE':
@@ -51,10 +52,11 @@
          yield Token(typ, val, loc)
        pos = mo.end()
        mo = gettok(s, pos)
-     if pos != len(s):
+    if pos != len(s):
        col = pos - line_start
        loc = SourceLocation('', line, col, 0)
        raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
+    yield Token('EOF', pyyacc.EOF)
 
 
 class Lexer:
@@ -62,33 +64,29 @@
         self.tokens = tokenize(src)
         self.curTok = self.tokens.__next__()
 
-    def eat(self):
+    def next_token(self):
         t = self.curTok
-        self.curTok = self.tokens.__next__()
+        if t.typ != 'EOF':
+            self.curTok = self.tokens.__next__()
         return t
 
-    @property
-    def Peak(self):
-        return self.curTok
-
 
 class Parser:
     def __init__(self):
         # Construct a parser given a grammar:
         ident = lambda x: x   # Identity helper function
-        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}'])
+        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}',
+            pyyacc.EOF])
         g.add_production('asmline', ['asmline2'])
         g.add_production('asmline', ['asmline2', 'COMMENT'])
         g.add_production('asmline2', ['label', 'instruction'])
         g.add_production('asmline2', ['instruction'])
         g.add_production('asmline2', ['label'])
         g.add_production('asmline2', [])
-        g.add_production('optcomment', [])
-        g.add_production('optcomment', ['COMMENT'])
         g.add_production('label', ['ID', ':'], self.p_label)
         g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
         g.add_production('instruction', ['opcode'], self.p_ins_2)
-        g.add_production('opcode', ['ID'], ident)
+        g.add_production('opcode', ['ID'], lambda x: x.val)
         g.add_production('operands', ['operand'], self.p_operands_1)
         g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
         g.add_production('operand', ['expression'], ident)
@@ -98,13 +96,13 @@
         g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2)
         g.add_production('expression', ['term'], ident)
         g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
-        g.add_production('addop', ['-'], ident)
-        g.add_production('addop', ['+'], ident)
-        g.add_production('mulop', ['*'], ident)
+        g.add_production('addop', ['-'], lambda x: x.val)
+        g.add_production('addop', ['+'], lambda x: x.val)
+        g.add_production('mulop', ['*'], lambda x: x.val)
         g.add_production('term', ['factor'], ident)
         g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
-        g.add_production('factor', ['ID'], lambda name: ASymbol(name))
-        g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num)))
+        g.add_production('factor', ['ID'], lambda name: ASymbol(name.val))
+        g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val)))
         g.start_symbol = 'asmline'
         self.p = g.genParser()
 
@@ -112,10 +110,13 @@
     def p_ins_1(self, opc, ops):
         ins = AInstruction(opc, ops)
         self.emit(ins)
+
     def p_ins_2(self, opc):
         self.p_ins_1(opc, [])
+
     def p_operands_1(self, op1):
         return [op1]
+
     def p_operands_2(self, ops, comma, op2):
         assert type(ops) is list
         ops.append(op2)
@@ -131,17 +132,20 @@
 
     def p_list_op(self, brace_open, lst, brace_close):
         return AUnop('{}', lst)
+
     def p_mem_op(self, brace_open, exp, brace_close):
         return AUnop('[]', exp)
+
     def p_label(self, lname, cn):
-        lab = ALabel(lname)
+        lab = ALabel(lname.val)
         self.emit(lab)
+
     def p_binop(self, exp1, op, exp2):
         return ABinop(op, exp1, exp2)
 
-    def parse(self, tokens, emitter):
+    def parse(self, lexer, emitter):
         self.emit = emitter
-        self.p.parse(tokens)
+        self.p.parse(lexer)
 
 # Pre construct parser to save time:
 asmParser = Parser()
@@ -163,7 +167,7 @@
 
     def parse_line(self, line):
         """ Parse line into asm AST """
-        tokens = tokenize(line)
+        tokens = Lexer(line)
         self.p.parse(tokens, self.emit)
 
     def assemble(self, asmsrc):
@@ -172,16 +176,15 @@
             self.assemble_line(line)
 
     def assemble_line(self, line):
-        """ 
-            Assemble a single source line. 
-            Do not take newlines into account 
+        """
+            Assemble a single source line.
+            Do not take newlines into account
         """
         self.parse_line(line)
         self.assemble_aast()
 
     def assemble_aast(self):
         """ Assemble a parsed asm line """
-        # TODO
         if not self.target:
             raise CompilerError('Cannot assemble without target')
         while self.stack:
@@ -199,8 +202,8 @@
 if __name__ == '__main__':
     # When run as main file, try to grab command line arguments:
     parser = argparse.ArgumentParser(description="Assembler")
-    parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble')
+    parser.add_argument('sourcefile', type=argparse.FileType('r'),
+        help='the source file to assemble')
     args = parser.parse_args()
     a = Assembler()
     obj = a.assemble(args.sourcefile.read())
-