Mercurial > lcfOS
comparison python/asm.py @ 334:6f4753202b9a
Added more recipes
author | Windel Bouwman |
---|---|
date | Thu, 13 Feb 2014 22:02:08 +0100 |
parents | 8d07a4254f04 |
children | 6df89163e114 |
comparison
equal
deleted
inserted
replaced
333:dcae6574c974 | 334:6f4753202b9a |
---|---|
1 #!/usr/bin/env python3 | 1 #!/usr/bin/env python3 |
2 | 2 |
3 import re | |
4 import argparse | 3 import argparse |
5 import pyyacc | 4 from ppci.assembler import Assembler |
6 from ppci import Token, CompilerError, SourceLocation | |
7 from target import Target, Label | |
8 from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber | |
9 | |
10 def tokenize(s): | |
11 """ | |
12 Tokenizer, generates an iterator that | |
13 returns tokens! | |
14 | |
15 This GREAT example was taken from python re doc page! | |
16 """ | |
17 tok_spec = [ | |
18 ('REAL', r'\d+\.\d+'), | |
19 ('HEXNUMBER', r'0x[\da-fA-F]+'), | |
20 ('NUMBER', r'\d+'), | |
21 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | |
22 ('SKIP', r'[ \t]'), | |
23 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), | |
24 ('STRING', r"'.*?'"), | |
25 ('COMMENT', r";.*") | |
26 ] | |
27 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | |
28 gettok = re.compile(tok_re).match | |
29 line = 1 | |
30 pos = line_start = 0 | |
31 mo = gettok(s) | |
32 while mo is not None: | |
33 typ = mo.lastgroup | |
34 val = mo.group(typ) | |
35 if typ == 'NEWLINE': | |
36 line_start = pos | |
37 line += 1 | |
38 elif typ != 'SKIP': | |
39 if typ == 'LEESTEKEN': | |
40 typ = val | |
41 elif typ == 'NUMBER': | |
42 val = int(val) | |
43 elif typ == 'HEXNUMBER': | |
44 val = int(val[2:], 16) | |
45 typ = 'NUMBER' | |
46 elif typ == 'REAL': | |
47 val = float(val) | |
48 elif typ == 'STRING': | |
49 val = val[1:-1] | |
50 col = mo.start() - line_start | |
51 loc = SourceLocation('', line, col, 0) # TODO retrieve length? | |
52 yield Token(typ, val, loc) | |
53 pos = mo.end() | |
54 mo = gettok(s, pos) | |
55 if pos != len(s): | |
56 col = pos - line_start | |
57 loc = SourceLocation('', line, col, 0) | |
58 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) | |
59 yield Token('EOF', pyyacc.EOF) | |
60 | |
61 | |
62 class Lexer: | |
63 def __init__(self, src): | |
64 self.tokens = tokenize(src) | |
65 self.curTok = self.tokens.__next__() | |
66 | |
67 def next_token(self): | |
68 t = self.curTok | |
69 if t.typ != 'EOF': | |
70 self.curTok = self.tokens.__next__() | |
71 return t | |
72 | |
73 | |
74 class Parser: | |
75 def __init__(self): | |
76 # Construct a parser given a grammar: | |
77 ident = lambda x: x # Identity helper function | |
78 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', | |
79 pyyacc.EOF]) | |
80 g.add_production('asmline', ['asmline2']) | |
81 g.add_production('asmline', ['asmline2', 'COMMENT']) | |
82 g.add_production('asmline2', ['label', 'instruction']) | |
83 g.add_production('asmline2', ['instruction']) | |
84 g.add_production('asmline2', ['label']) | |
85 g.add_production('asmline2', []) | |
86 g.add_production('label', ['ID', ':'], self.p_label) | |
87 #g.add_production('label', []) | |
88 g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) | |
89 g.add_production('instruction', ['opcode'], self.p_ins_2) | |
90 #g.add_production('instruction', []) | |
91 g.add_production('opcode', ['ID'], lambda x: x.val) | |
92 g.add_production('operands', ['operand'], self.p_operands_1) | |
93 g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) | |
94 g.add_production('operand', ['expression'], ident) | |
95 g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) | |
96 g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op) | |
97 g.add_production('listitems', ['expression'], self.p_listitems_1) | |
98 g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2) | |
99 g.add_production('expression', ['term'], ident) | |
100 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) | |
101 g.add_production('addop', ['-'], lambda x: x.val) | |
102 g.add_production('addop', ['+'], lambda x: x.val) | |
103 g.add_production('mulop', ['*'], lambda x: x.val) | |
104 g.add_production('term', ['factor'], ident) | |
105 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) | |
106 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) | |
107 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) | |
108 g.start_symbol = 'asmline' | |
109 self.p = g.genParser() | |
110 | |
111 # Parser handlers: | |
112 def p_ins_1(self, opc, ops): | |
113 ins = AInstruction(opc, ops) | |
114 self.emit(ins) | |
115 | |
116 def p_ins_2(self, opc): | |
117 self.p_ins_1(opc, []) | |
118 | |
119 def p_operands_1(self, op1): | |
120 return [op1] | |
121 | |
122 def p_operands_2(self, ops, comma, op2): | |
123 assert type(ops) is list | |
124 ops.append(op2) | |
125 return ops | |
126 | |
127 def p_listitems_1(self, li1): | |
128 return [li1] | |
129 | |
130 def p_listitems_2(self, lis, comma, li2): | |
131 assert type(lis) is list | |
132 lis.append(li2) | |
133 return lis | |
134 | |
135 def p_list_op(self, brace_open, lst, brace_close): | |
136 return AUnop('{}', lst) | |
137 | |
138 def p_mem_op(self, brace_open, exp, brace_close): | |
139 return AUnop('[]', exp) | |
140 | |
141 def p_label(self, lname, cn): | |
142 lab = ALabel(lname.val) | |
143 self.emit(lab) | |
144 | |
145 def p_binop(self, exp1, op, exp2): | |
146 return ABinop(op, exp1, exp2) | |
147 | |
148 def parse(self, lexer, emitter): | |
149 self.emit = emitter | |
150 self.p.parse(lexer) | |
151 | |
152 # Pre construct parser to save time: | |
153 asmParser = Parser() | |
154 | |
155 class Assembler: | |
156 def __init__(self, target=None, stream=None): | |
157 self.target = target | |
158 self.stream = stream | |
159 self.restart() | |
160 self.p = asmParser | |
161 | |
162 # Top level interface: | |
163 def restart(self): | |
164 self.stack = [] | |
165 | |
166 def emit(self, a): | |
167 """ Emit a parsed instruction """ | |
168 self.stack.append(a) | |
169 | |
170 def parse_line(self, line): | |
171 """ Parse line into asm AST """ | |
172 tokens = Lexer(line) | |
173 self.p.parse(tokens, self.emit) | |
174 | |
175 def assemble(self, asmsrc): | |
176 """ Assemble this source snippet """ | |
177 for line in asmsrc.split('\n'): | |
178 self.assemble_line(line) | |
179 | |
180 def assemble_line(self, line): | |
181 """ | |
182 Assemble a single source line. | |
183 Do not take newlines into account | |
184 """ | |
185 self.parse_line(line) | |
186 self.assemble_aast() | |
187 | |
188 def assemble_aast(self): | |
189 """ Assemble a parsed asm line """ | |
190 if not self.target: | |
191 raise CompilerError('Cannot assemble without target') | |
192 while self.stack: | |
193 vi = self.stack.pop(0) | |
194 if type(vi) is AInstruction: | |
195 mi = self.target.mapInstruction(vi) | |
196 elif type(vi) is ALabel: | |
197 mi = Label(vi.name) | |
198 else: | |
199 raise NotImplementedError('{}'.format(vi)) | |
200 if self.stream: | |
201 self.stream.emit(mi) | |
202 | |
203 | 5 |
204 if __name__ == '__main__': | 6 if __name__ == '__main__': |
205 # When run as main file, try to grab command line arguments: | 7 # When run as main file, try to grab command line arguments: |
206 parser = argparse.ArgumentParser(description="Assembler") | 8 parser = argparse.ArgumentParser(description="Assembler") |
207 parser.add_argument('sourcefile', type=argparse.FileType('r'), | 9 parser.add_argument('sourcefile', type=argparse.FileType('r'), |