Mercurial > lcfOS
annotate python/ppci/assembler.py @ 336:d1ecc493384e
Added spiffy armtoken class for bit fiddeling. Added cool test that checks for build repeatability
author | Windel Bouwman |
---|---|
date | Wed, 19 Feb 2014 22:32:15 +0100 |
parents | 6f4753202b9a |
children | b00219172a42 |
rev | line source |
---|---|
334 | 1 |
2 import re | |
3 import pyyacc | |
4 from . import Token, CompilerError, SourceLocation | |
5 from target import Target, Label | |
6 from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber | |
7 | |
8 def tokenize(s): | |
9 """ | |
10 Tokenizer, generates an iterator that | |
11 returns tokens! | |
12 | |
13 This GREAT example was taken from python re doc page! | |
14 """ | |
15 tok_spec = [ | |
16 ('REAL', r'\d+\.\d+'), | |
17 ('HEXNUMBER', r'0x[\da-fA-F]+'), | |
18 ('NUMBER', r'\d+'), | |
19 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | |
20 ('SKIP', r'[ \t]'), | |
21 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), | |
22 ('STRING', r"'.*?'"), | |
23 ('COMMENT', r";.*") | |
24 ] | |
25 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | |
26 gettok = re.compile(tok_re).match | |
27 line = 1 | |
28 pos = line_start = 0 | |
29 mo = gettok(s) | |
30 while mo is not None: | |
31 typ = mo.lastgroup | |
32 val = mo.group(typ) | |
33 if typ == 'NEWLINE': | |
34 line_start = pos | |
35 line += 1 | |
36 elif typ != 'SKIP': | |
37 if typ == 'LEESTEKEN': | |
38 typ = val | |
39 elif typ == 'NUMBER': | |
40 val = int(val) | |
41 elif typ == 'HEXNUMBER': | |
42 val = int(val[2:], 16) | |
43 typ = 'NUMBER' | |
44 elif typ == 'REAL': | |
45 val = float(val) | |
46 elif typ == 'STRING': | |
47 val = val[1:-1] | |
48 col = mo.start() - line_start | |
49 loc = SourceLocation('', line, col, 0) # TODO retrieve length? | |
50 yield Token(typ, val, loc) | |
51 pos = mo.end() | |
52 mo = gettok(s, pos) | |
53 if pos != len(s): | |
54 col = pos - line_start | |
55 loc = SourceLocation('', line, col, 0) | |
56 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) | |
57 yield Token('EOF', pyyacc.EOF) | |
58 | |
59 | |
60 class Lexer: | |
61 def __init__(self, src): | |
62 self.tokens = tokenize(src) | |
63 self.curTok = self.tokens.__next__() | |
64 | |
65 def next_token(self): | |
66 t = self.curTok | |
67 if t.typ != 'EOF': | |
68 self.curTok = self.tokens.__next__() | |
69 return t | |
70 | |
71 | |
72 class Parser: | |
73 def __init__(self): | |
74 # Construct a parser given a grammar: | |
75 ident = lambda x: x # Identity helper function | |
76 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', | |
77 pyyacc.EOF]) | |
78 g.add_production('asmline', ['asmline2']) | |
79 g.add_production('asmline', ['asmline2', 'COMMENT']) | |
80 g.add_production('asmline2', ['label', 'instruction']) | |
81 g.add_production('asmline2', ['instruction']) | |
82 g.add_production('asmline2', ['label']) | |
83 g.add_production('asmline2', []) | |
84 g.add_production('label', ['ID', ':'], self.p_label) | |
85 #g.add_production('label', []) | |
86 g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) | |
87 g.add_production('instruction', ['opcode'], self.p_ins_2) | |
88 #g.add_production('instruction', []) | |
89 g.add_production('opcode', ['ID'], lambda x: x.val) | |
90 g.add_production('operands', ['operand'], self.p_operands_1) | |
91 g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) | |
92 g.add_production('operand', ['expression'], ident) | |
93 g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) | |
94 g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op) | |
95 g.add_production('listitems', ['expression'], self.p_listitems_1) | |
96 g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2) | |
97 g.add_production('expression', ['term'], ident) | |
98 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) | |
99 g.add_production('addop', ['-'], lambda x: x.val) | |
100 g.add_production('addop', ['+'], lambda x: x.val) | |
101 g.add_production('mulop', ['*'], lambda x: x.val) | |
102 g.add_production('term', ['factor'], ident) | |
103 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) | |
104 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) | |
105 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) | |
106 g.start_symbol = 'asmline' | |
107 self.p = g.genParser() | |
108 | |
109 # Parser handlers: | |
110 def p_ins_1(self, opc, ops): | |
111 ins = AInstruction(opc, ops) | |
112 self.emit(ins) | |
113 | |
114 def p_ins_2(self, opc): | |
115 self.p_ins_1(opc, []) | |
116 | |
117 def p_operands_1(self, op1): | |
118 return [op1] | |
119 | |
120 def p_operands_2(self, ops, comma, op2): | |
121 assert type(ops) is list | |
122 ops.append(op2) | |
123 return ops | |
124 | |
125 def p_listitems_1(self, li1): | |
126 return [li1] | |
127 | |
128 def p_listitems_2(self, lis, comma, li2): | |
129 assert type(lis) is list | |
130 lis.append(li2) | |
131 return lis | |
132 | |
133 def p_list_op(self, brace_open, lst, brace_close): | |
134 return AUnop('{}', lst) | |
135 | |
136 def p_mem_op(self, brace_open, exp, brace_close): | |
137 return AUnop('[]', exp) | |
138 | |
139 def p_label(self, lname, cn): | |
140 lab = ALabel(lname.val) | |
141 self.emit(lab) | |
142 | |
143 def p_binop(self, exp1, op, exp2): | |
144 return ABinop(op, exp1, exp2) | |
145 | |
146 def parse(self, lexer, emitter): | |
147 self.emit = emitter | |
148 self.p.parse(lexer) | |
149 | |
150 # Pre construct parser to save time: | |
151 asmParser = Parser() | |
152 | |
153 class Assembler: | |
336
d1ecc493384e
Added spiffy armtoken class for bit fiddeling. Added cool test that checks for build repeatability
Windel Bouwman
parents:
334
diff
changeset
|
154 def __init__(self, target, stream): |
334 | 155 self.target = target |
156 self.stream = stream | |
157 self.restart() | |
158 self.p = asmParser | |
159 | |
160 # Top level interface: | |
161 def restart(self): | |
162 self.stack = [] | |
163 | |
164 def emit(self, a): | |
165 """ Emit a parsed instruction """ | |
166 self.stack.append(a) | |
167 | |
168 def parse_line(self, line): | |
169 """ Parse line into asm AST """ | |
170 tokens = Lexer(line) | |
171 self.p.parse(tokens, self.emit) | |
172 | |
173 def assemble(self, asmsrc): | |
174 """ Assemble this source snippet """ | |
175 if type(asmsrc) is not str: | |
176 asmsrc2 = asmsrc.read() | |
177 asmsrc.close() | |
178 asmsrc = asmsrc2 | |
179 for line in asmsrc.split('\n'): | |
180 self.assemble_line(line) | |
181 | |
182 def assemble_line(self, line): | |
183 """ | |
184 Assemble a single source line. | |
185 Do not take newlines into account | |
186 """ | |
187 self.parse_line(line) | |
188 self.assemble_aast() | |
189 | |
190 def assemble_aast(self): | |
191 """ Assemble a parsed asm line """ | |
192 if not self.target: | |
193 raise CompilerError('Cannot assemble without target') | |
194 while self.stack: | |
195 vi = self.stack.pop(0) | |
196 if type(vi) is AInstruction: | |
197 mi = self.target.mapInstruction(vi) | |
198 elif type(vi) is ALabel: | |
199 mi = Label(vi.name) | |
200 else: | |
201 raise NotImplementedError('{}'.format(vi)) | |
202 if self.stream: | |
203 self.stream.emit(mi) |