Mercurial > lcfOS
annotate python/ppci/assembler.py @ 341:4d204f6f7d4e devel
Rewrite of assembler parts
author | Windel Bouwman |
---|---|
date | Fri, 28 Feb 2014 18:07:14 +0100 |
parents | c7cc54c0dfdf |
children | 86b02c98a717 |
rev | line source |
---|---|
334 | 1 |
2 import re | |
3 import pyyacc | |
4 from . import Token, CompilerError, SourceLocation | |
5 from target import Target, Label | |
6 from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber | |
7 | |
340 | 8 |
341 | 9 def bit_type(value): |
10 assert value < (2**31) | |
11 assert value >= 0 | |
12 t = 'val32' | |
13 for n in [8, 5, 3]: | |
14 if value < (2**n): | |
15 t = 'val{}'.format(n) | |
16 return t | |
17 | |
18 def tokenize(s, kws): | |
334 | 19 """ |
20 Tokenizer, generates an iterator that | |
21 returns tokens! | |
22 | |
23 This GREAT example was taken from python re doc page! | |
24 """ | |
25 tok_spec = [ | |
26 ('REAL', r'\d+\.\d+'), | |
27 ('HEXNUMBER', r'0x[\da-fA-F]+'), | |
28 ('NUMBER', r'\d+'), | |
29 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | |
30 ('SKIP', r'[ \t]'), | |
31 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), | |
32 ('STRING', r"'.*?'"), | |
33 ('COMMENT', r";.*") | |
34 ] | |
35 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | |
36 gettok = re.compile(tok_re).match | |
37 line = 1 | |
38 pos = line_start = 0 | |
39 mo = gettok(s) | |
40 while mo is not None: | |
41 typ = mo.lastgroup | |
42 val = mo.group(typ) | |
43 if typ == 'NEWLINE': | |
44 line_start = pos | |
45 line += 1 | |
46 elif typ != 'SKIP': | |
47 if typ == 'LEESTEKEN': | |
48 typ = val | |
49 elif typ == 'NUMBER': | |
50 val = int(val) | |
51 elif typ == 'HEXNUMBER': | |
52 val = int(val[2:], 16) | |
53 typ = 'NUMBER' | |
54 elif typ == 'REAL': | |
55 val = float(val) | |
56 elif typ == 'STRING': | |
57 val = val[1:-1] | |
341 | 58 elif typ == 'ID': |
59 if val.lower() in kws: # ['r3', 'sp', 'add', 'yield', 'r4', 'r0', 'r1', 'sub', 'r5', 'r6', 'r2']: | |
60 typ = val.lower() | |
334 | 61 col = mo.start() - line_start |
62 loc = SourceLocation('', line, col, 0) # TODO retrieve length? | |
341 | 63 if typ == 'NUMBER': |
64 typ = bit_type(val) | |
334 | 65 yield Token(typ, val, loc) |
66 pos = mo.end() | |
67 mo = gettok(s, pos) | |
68 if pos != len(s): | |
69 col = pos - line_start | |
70 loc = SourceLocation('', line, col, 0) | |
71 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) | |
72 yield Token('EOF', pyyacc.EOF) | |
73 | |
74 | |
75 class Lexer: | |
341 | 76 def __init__(self, src, kws): |
77 self.tokens = tokenize(src, kws) | |
334 | 78 self.curTok = self.tokens.__next__() |
79 | |
80 def next_token(self): | |
81 t = self.curTok | |
82 if t.typ != 'EOF': | |
83 self.curTok = self.tokens.__next__() | |
84 return t | |
85 | |
86 | |
87 class Parser: | |
341 | 88 def add_rule(self, prod, rhs, f): |
89 """ Helper function to add a rule, why this is required? """ | |
90 if prod == 'instruction': | |
91 def f_wrap(*args): | |
92 i = f(args) | |
93 self.emit(i) | |
94 else: | |
95 def f_wrap(*rhs): | |
96 return f(rhs) | |
97 self.g.add_production(prod, rhs, f_wrap) | |
98 | |
99 def __init__(self, kws, instruction_rules, emit): | |
334 | 100 # Construct a parser given a grammar: |
341 | 101 tokens2 = ['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', |
102 pyyacc.EPS, 'COMMENT', '{', '}', | |
103 pyyacc.EOF, 'val32', 'val8', 'val5', 'val3'] | |
104 tokens2.extend(kws) | |
105 self.kws = kws | |
106 g = pyyacc.Grammar(tokens2) | |
107 self.g = g | |
340 | 108 # Global structure of assembly line: |
334 | 109 g.add_production('asmline', ['asmline2']) |
110 g.add_production('asmline', ['asmline2', 'COMMENT']) | |
111 g.add_production('asmline2', ['label', 'instruction']) | |
112 g.add_production('asmline2', ['instruction']) | |
113 g.add_production('asmline2', ['label']) | |
114 g.add_production('asmline2', []) | |
115 g.add_production('label', ['ID', ':'], self.p_label) | |
116 #g.add_production('label', []) | |
340 | 117 |
118 # Add instruction rules for the target in question: | |
119 for prod, rhs, f in instruction_rules: | |
341 | 120 self.add_rule(prod, rhs, f) |
340 | 121 |
334 | 122 #g.add_production('instruction', []) |
341 | 123 g.add_production('expression', ['term'], lambda x: x) |
334 | 124 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) |
125 g.add_production('addop', ['-'], lambda x: x.val) | |
126 g.add_production('addop', ['+'], lambda x: x.val) | |
127 g.add_production('mulop', ['*'], lambda x: x.val) | |
341 | 128 g.add_production('term', ['factor'], lambda x: x) |
334 | 129 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) |
130 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) | |
131 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) | |
132 g.start_symbol = 'asmline' | |
341 | 133 self.emit = emit |
134 self.p = g.generate_parser() | |
135 print('length of table:', len(self.p.action_table)) | |
334 | 136 |
137 # Parser handlers: | |
138 def p_ins_1(self, opc, ops): | |
139 ins = AInstruction(opc, ops) | |
140 self.emit(ins) | |
141 | |
142 def p_ins_2(self, opc): | |
143 self.p_ins_1(opc, []) | |
144 | |
145 def p_operands_1(self, op1): | |
146 return [op1] | |
147 | |
148 def p_operands_2(self, ops, comma, op2): | |
149 assert type(ops) is list | |
150 ops.append(op2) | |
151 return ops | |
152 | |
153 def p_listitems_1(self, li1): | |
154 return [li1] | |
155 | |
156 def p_listitems_2(self, lis, comma, li2): | |
157 assert type(lis) is list | |
158 lis.append(li2) | |
159 return lis | |
160 | |
161 def p_list_op(self, brace_open, lst, brace_close): | |
162 return AUnop('{}', lst) | |
163 | |
164 def p_mem_op(self, brace_open, exp, brace_close): | |
165 return AUnop('[]', exp) | |
166 | |
167 def p_label(self, lname, cn): | |
341 | 168 lab = Label(lname.val) |
334 | 169 self.emit(lab) |
170 | |
171 def p_binop(self, exp1, op, exp2): | |
172 return ABinop(op, exp1, exp2) | |
173 | |
341 | 174 def parse(self, lexer): |
334 | 175 self.p.parse(lexer) |
176 | |
177 | |
178 class Assembler: | |
336
d1ecc493384e
Added spiffy armtoken class for bit fiddeling. Added cool test that checks for build repeatability
Windel Bouwman
parents:
334
diff
changeset
|
179 def __init__(self, target, stream): |
334 | 180 self.target = target |
340 | 181 assert isinstance(target, Target) |
334 | 182 self.stream = stream |
341 | 183 self.parser = Parser(target.asm_keywords, target.assembler_rules, self.stream.emit) |
334 | 184 |
185 # Top level interface: | |
186 def parse_line(self, line): | |
340 | 187 """ Parse line into assembly instructions """ |
341 | 188 tokens = Lexer(line, self.target.asm_keywords) |
340 | 189 self.parser.parse(tokens) |
334 | 190 |
191 def assemble(self, asmsrc): | |
192 """ Assemble this source snippet """ | |
340 | 193 if hasattr(asmsrc, 'read'): |
334 | 194 asmsrc2 = asmsrc.read() |
195 asmsrc.close() | |
196 asmsrc = asmsrc2 | |
340 | 197 # TODO: use generic newline?? |
198 # TODO: the bothersome newline ... | |
334 | 199 for line in asmsrc.split('\n'): |
340 | 200 self.parse_line(line) |
334 | 201 |
202 def assemble_line(self, line): | |
340 | 203 """ Assemble a single assembly line. """ |
334 | 204 self.parse_line(line) |