diff python/ppci/assembler.py @ 341:4d204f6f7d4e devel

Rewrite of assembler parts
author Windel Bouwman
date Fri, 28 Feb 2014 18:07:14 +0100
parents c7cc54c0dfdf
children 86b02c98a717
line wrap: on
line diff
--- a/python/ppci/assembler.py	Sun Feb 23 16:24:01 2014 +0100
+++ b/python/ppci/assembler.py	Fri Feb 28 18:07:14 2014 +0100
@@ -6,7 +6,16 @@
 from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber
 
 
-def tokenize(s):
+def bit_type(value):
+    assert value < (2**31)
+    assert value >= 0
+    t = 'val32'
+    for n in [8, 5, 3]:
+        if value < (2**n):
+            t = 'val{}'.format(n)
+    return t
+
+def tokenize(s, kws):
     """
        Tokenizer, generates an iterator that
        returns tokens!
@@ -46,8 +55,13 @@
            val = float(val)
          elif typ == 'STRING':
            val = val[1:-1]
+         elif typ == 'ID':
+            if val.lower() in kws: # ['r3', 'sp', 'add', 'yield', 'r4', 'r0', 'r1', 'sub', 'r5', 'r6', 'r2']:
+                typ = val.lower()
          col = mo.start() - line_start
          loc = SourceLocation('', line, col, 0)   # TODO retrieve length?
+         if typ == 'NUMBER':
+            typ = bit_type(val)
          yield Token(typ, val, loc)
        pos = mo.end()
        mo = gettok(s, pos)
@@ -59,8 +73,8 @@
 
 
 class Lexer:
-    def __init__(self, src):
-        self.tokens = tokenize(src)
+    def __init__(self, src, kws):
+        self.tokens = tokenize(src, kws)
         self.curTok = self.tokens.__next__()
 
     def next_token(self):
@@ -71,11 +85,26 @@
 
 
 class Parser:
-    def __init__(self, tokens, instruction_rules):
+    def add_rule(self, prod, rhs, f):
+        """ Helper function to add a rule, why this is required? """
+        if prod == 'instruction':
+            def f_wrap(*args):
+                i = f(args)
+                self.emit(i)
+        else:
+            def f_wrap(*rhs):
+                return f(rhs)
+        self.g.add_production(prod, rhs, f_wrap)
+
+    def __init__(self, kws, instruction_rules, emit):
         # Construct a parser given a grammar:
-        ident = lambda x: x   # Identity helper function
-        g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}',
-            pyyacc.EOF])
+        tokens2 = ['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*',
+                   pyyacc.EPS, 'COMMENT', '{', '}',
+                   pyyacc.EOF, 'val32', 'val8', 'val5', 'val3']
+        tokens2.extend(kws)
+        self.kws = kws
+        g = pyyacc.Grammar(tokens2)
+        self.g = g
         # Global structure of assembly line:
         g.add_production('asmline', ['asmline2'])
         g.add_production('asmline', ['asmline2', 'COMMENT'])
@@ -88,27 +117,22 @@
 
         # Add instruction rules for the target in question:
         for prod, rhs, f in instruction_rules:
-            if prod is 'instruction':
-                def f_wrap(*rhs):
-                    i = f(rhs)
-                    self.emit(i)
-            else:
-                def f_wrap(*rhs):
-                    return f(rhs)
-            g.add_production(prod, rhs, f_wrap)
+            self.add_rule(prod, rhs, f)
 
         #g.add_production('instruction', [])
-        g.add_production('expression', ['term'], ident)
+        g.add_production('expression', ['term'], lambda x: x)
         g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
         g.add_production('addop', ['-'], lambda x: x.val)
         g.add_production('addop', ['+'], lambda x: x.val)
         g.add_production('mulop', ['*'], lambda x: x.val)
-        g.add_production('term', ['factor'], ident)
+        g.add_production('term', ['factor'], lambda x: x)
         g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
         g.add_production('factor', ['ID'], lambda name: ASymbol(name.val))
         g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val)))
         g.start_symbol = 'asmline'
-        self.p = g.genParser()
+        self.emit = emit
+        self.p = g.generate_parser()
+        print('length of table:', len(self.p.action_table))
 
     # Parser handlers:
     def p_ins_1(self, opc, ops):
@@ -141,14 +165,13 @@
         return AUnop('[]', exp)
 
     def p_label(self, lname, cn):
-        lab = ALabel(lname.val)
+        lab = Label(lname.val)
         self.emit(lab)
 
     def p_binop(self, exp1, op, exp2):
         return ABinop(op, exp1, exp2)
 
-    def parse(self, lexer, emitter):
-        self.emit = emitter
+    def parse(self, lexer):
         self.p.parse(lexer)
 
 
@@ -157,12 +180,12 @@
         self.target = target
         assert isinstance(target, Target)
         self.stream = stream
-        self.parser = Parser(None, target.assembler_rules, self.stream.emit)
+        self.parser = Parser(target.asm_keywords, target.assembler_rules, self.stream.emit)
 
     # Top level interface:
     def parse_line(self, line):
         """ Parse line into assembly instructions """
-        tokens = Lexer(line)
+        tokens = Lexer(line, self.target.asm_keywords)
         self.parser.parse(tokens)
 
     def assemble(self, asmsrc):