comparison python/ppci/assembler.py @ 341:4d204f6f7d4e devel

Rewrite of assembler parts
author Windel Bouwman
date Fri, 28 Feb 2014 18:07:14 +0100
parents c7cc54c0dfdf
children 86b02c98a717
comparison
equal deleted inserted replaced
340:c7cc54c0dfdf 341:4d204f6f7d4e
4 from . import Token, CompilerError, SourceLocation 4 from . import Token, CompilerError, SourceLocation
5 from target import Target, Label 5 from target import Target, Label
6 from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber 6 from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber
7 7
8 8
9 def tokenize(s): 9 def bit_type(value):
10 assert value < (2**31)
11 assert value >= 0
12 t = 'val32'
13 for n in [8, 5, 3]:
14 if value < (2**n):
15 t = 'val{}'.format(n)
16 return t
17
18 def tokenize(s, kws):
10 """ 19 """
11 Tokenizer, generates an iterator that 20 Tokenizer, generates an iterator that
12 returns tokens! 21 returns tokens!
13 22
14 This GREAT example was taken from python re doc page! 23 This GREAT example was taken from python re doc page!
44 typ = 'NUMBER' 53 typ = 'NUMBER'
45 elif typ == 'REAL': 54 elif typ == 'REAL':
46 val = float(val) 55 val = float(val)
47 elif typ == 'STRING': 56 elif typ == 'STRING':
48 val = val[1:-1] 57 val = val[1:-1]
58 elif typ == 'ID':
59 if val.lower() in kws: # ['r3', 'sp', 'add', 'yield', 'r4', 'r0', 'r1', 'sub', 'r5', 'r6', 'r2']:
60 typ = val.lower()
49 col = mo.start() - line_start 61 col = mo.start() - line_start
50 loc = SourceLocation('', line, col, 0) # TODO retrieve length? 62 loc = SourceLocation('', line, col, 0) # TODO retrieve length?
63 if typ == 'NUMBER':
64 typ = bit_type(val)
51 yield Token(typ, val, loc) 65 yield Token(typ, val, loc)
52 pos = mo.end() 66 pos = mo.end()
53 mo = gettok(s, pos) 67 mo = gettok(s, pos)
54 if pos != len(s): 68 if pos != len(s):
55 col = pos - line_start 69 col = pos - line_start
57 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) 71 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
58 yield Token('EOF', pyyacc.EOF) 72 yield Token('EOF', pyyacc.EOF)
59 73
60 74
61 class Lexer: 75 class Lexer:
62 def __init__(self, src): 76 def __init__(self, src, kws):
63 self.tokens = tokenize(src) 77 self.tokens = tokenize(src, kws)
64 self.curTok = self.tokens.__next__() 78 self.curTok = self.tokens.__next__()
65 79
66 def next_token(self): 80 def next_token(self):
67 t = self.curTok 81 t = self.curTok
68 if t.typ != 'EOF': 82 if t.typ != 'EOF':
69 self.curTok = self.tokens.__next__() 83 self.curTok = self.tokens.__next__()
70 return t 84 return t
71 85
72 86
73 class Parser: 87 class Parser:
74 def __init__(self, tokens, instruction_rules): 88 def add_rule(self, prod, rhs, f):
89 """ Helper function to add a rule, why this is required? """
90 if prod == 'instruction':
91 def f_wrap(*args):
92 i = f(args)
93 self.emit(i)
94 else:
95 def f_wrap(*rhs):
96 return f(rhs)
97 self.g.add_production(prod, rhs, f_wrap)
98
99 def __init__(self, kws, instruction_rules, emit):
75 # Construct a parser given a grammar: 100 # Construct a parser given a grammar:
76 ident = lambda x: x # Identity helper function 101 tokens2 = ['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*',
77 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', 102 pyyacc.EPS, 'COMMENT', '{', '}',
78 pyyacc.EOF]) 103 pyyacc.EOF, 'val32', 'val8', 'val5', 'val3']
104 tokens2.extend(kws)
105 self.kws = kws
106 g = pyyacc.Grammar(tokens2)
107 self.g = g
79 # Global structure of assembly line: 108 # Global structure of assembly line:
80 g.add_production('asmline', ['asmline2']) 109 g.add_production('asmline', ['asmline2'])
81 g.add_production('asmline', ['asmline2', 'COMMENT']) 110 g.add_production('asmline', ['asmline2', 'COMMENT'])
82 g.add_production('asmline2', ['label', 'instruction']) 111 g.add_production('asmline2', ['label', 'instruction'])
83 g.add_production('asmline2', ['instruction']) 112 g.add_production('asmline2', ['instruction'])
86 g.add_production('label', ['ID', ':'], self.p_label) 115 g.add_production('label', ['ID', ':'], self.p_label)
87 #g.add_production('label', []) 116 #g.add_production('label', [])
88 117
89 # Add instruction rules for the target in question: 118 # Add instruction rules for the target in question:
90 for prod, rhs, f in instruction_rules: 119 for prod, rhs, f in instruction_rules:
91 if prod is 'instruction': 120 self.add_rule(prod, rhs, f)
92 def f_wrap(*rhs):
93 i = f(rhs)
94 self.emit(i)
95 else:
96 def f_wrap(*rhs):
97 return f(rhs)
98 g.add_production(prod, rhs, f_wrap)
99 121
100 #g.add_production('instruction', []) 122 #g.add_production('instruction', [])
101 g.add_production('expression', ['term'], ident) 123 g.add_production('expression', ['term'], lambda x: x)
102 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) 124 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
103 g.add_production('addop', ['-'], lambda x: x.val) 125 g.add_production('addop', ['-'], lambda x: x.val)
104 g.add_production('addop', ['+'], lambda x: x.val) 126 g.add_production('addop', ['+'], lambda x: x.val)
105 g.add_production('mulop', ['*'], lambda x: x.val) 127 g.add_production('mulop', ['*'], lambda x: x.val)
106 g.add_production('term', ['factor'], ident) 128 g.add_production('term', ['factor'], lambda x: x)
107 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) 129 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
108 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) 130 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val))
109 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) 131 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val)))
110 g.start_symbol = 'asmline' 132 g.start_symbol = 'asmline'
111 self.p = g.genParser() 133 self.emit = emit
134 self.p = g.generate_parser()
135 print('length of table:', len(self.p.action_table))
112 136
113 # Parser handlers: 137 # Parser handlers:
114 def p_ins_1(self, opc, ops): 138 def p_ins_1(self, opc, ops):
115 ins = AInstruction(opc, ops) 139 ins = AInstruction(opc, ops)
116 self.emit(ins) 140 self.emit(ins)
139 163
140 def p_mem_op(self, brace_open, exp, brace_close): 164 def p_mem_op(self, brace_open, exp, brace_close):
141 return AUnop('[]', exp) 165 return AUnop('[]', exp)
142 166
143 def p_label(self, lname, cn): 167 def p_label(self, lname, cn):
144 lab = ALabel(lname.val) 168 lab = Label(lname.val)
145 self.emit(lab) 169 self.emit(lab)
146 170
147 def p_binop(self, exp1, op, exp2): 171 def p_binop(self, exp1, op, exp2):
148 return ABinop(op, exp1, exp2) 172 return ABinop(op, exp1, exp2)
149 173
150 def parse(self, lexer, emitter): 174 def parse(self, lexer):
151 self.emit = emitter
152 self.p.parse(lexer) 175 self.p.parse(lexer)
153 176
154 177
155 class Assembler: 178 class Assembler:
156 def __init__(self, target, stream): 179 def __init__(self, target, stream):
157 self.target = target 180 self.target = target
158 assert isinstance(target, Target) 181 assert isinstance(target, Target)
159 self.stream = stream 182 self.stream = stream
160 self.parser = Parser(None, target.assembler_rules, self.stream.emit) 183 self.parser = Parser(target.asm_keywords, target.assembler_rules, self.stream.emit)
161 184
162 # Top level interface: 185 # Top level interface:
163 def parse_line(self, line): 186 def parse_line(self, line):
164 """ Parse line into assembly instructions """ 187 """ Parse line into assembly instructions """
165 tokens = Lexer(line) 188 tokens = Lexer(line, self.target.asm_keywords)
166 self.parser.parse(tokens) 189 self.parser.parse(tokens)
167 190
168 def assemble(self, asmsrc): 191 def assemble(self, asmsrc):
169 """ Assemble this source snippet """ 192 """ Assemble this source snippet """
170 if hasattr(asmsrc, 'read'): 193 if hasattr(asmsrc, 'read'):