Mercurial > lcfOS
comparison ide/compiler/assembler.py @ 1:92df07bc2081
Initial import of compiler
author | windel |
---|---|
date | Sun, 18 Sep 2011 19:00:29 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:1a4faf9ef1ea | 1:92df07bc2081 |
---|---|
1 """ | |
2 Assembler code generation functions | |
3 """ | |
4 | |
5 from .errors import Error | |
6 | |
7 modrm = {'rax': 0, 'rbx': 1} | |
8 | |
9 # Table 3.1 of the intel manual: | |
10 # use REX.W on the table below: | |
11 regs64 = {'rax': 0,'rcx':1,'rdx':2,'rbx':3,'rsp':4,'rbp':5,'rsi':6,'rdi':7,'r8':0,'r9':1,'r10':2,'r11':3,'r12':4,'r13':5,'r14':6,'r15':7} | |
12 regs32 = {'eax': 0, 'ecx':1, 'edx':2, 'ebx': 3, 'esp': 4, 'ebp': 5, 'esi':6, 'edi':7} | |
13 regs8 = {'al':0,'cl':1,'dl':2,'bl':3,'ah':4,'ch':5,'dh':6,'bh':7} | |
14 | |
15 # Calculation of the rexb bit: | |
16 rexbit = {'rax': 0, 'rcx':0, 'rdx':0, 'rbx': 0, 'rsp': 0, 'rbp': 0, 'rsi':0, 'rdi':0,'r8':1,'r9':1,'r10':1,'r11':1,'r12':1,'r13':1,'r14':1,'r15':1} | |
17 | |
18 # Helper functions: | |
19 def imm64(x): | |
20 """ represent 64 bits integer in little endian 8 bytes""" | |
21 if x < 0: | |
22 x = x + (1 << 64) | |
23 x = x & 0xFFFFFFFFFFFFFFFF | |
24 return [ (x >> (p*8)) & 0xFF for p in range(8) ] | |
25 | |
26 def imm32(x): | |
27 """ represent 32 bits integer in little endian 4 bytes""" | |
28 if x < 0: | |
29 x = x + (1 << 32) | |
30 x = x & 0xFFFFFFFF | |
31 return [ (x >> (p*8)) & 0xFF for p in range(4) ] | |
32 | |
33 def imm8(x): | |
34 if x < 0: | |
35 x = x + (1 << 8) | |
36 x = x & 0xFF | |
37 return [ x ] | |
38 | |
39 def modrm(mod=0, rm=0, reg=0): | |
40 """ Construct the modrm byte from its components """ | |
41 assert(mod <= 3) | |
42 assert(rm <= 7) | |
43 assert(reg <= 7) | |
44 return (mod << 6) | (reg << 3) | rm | |
45 | |
46 def rex(w=0, r=0, x=0, b=0): | |
47 """ Create a REX prefix byte """ | |
48 assert(w <= 1) | |
49 assert(r <= 1) | |
50 assert(x <= 1) | |
51 assert(b <= 1) | |
52 return 0x40 | (w<<3) | (r<<2) | (x<<1) | b | |
53 | |
54 def sib(ss=0, index=0, base=0): | |
55 assert(ss <= 3) | |
56 assert(index <= 7) | |
57 assert(base <= 7) | |
58 return (ss << 6) | (index << 3) | base | |
59 | |
60 tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4} | |
61 | |
62 # Actual instructions: | |
63 def nearjump(distance, condition=None): | |
64 """ jmp imm32 """ | |
65 lim = (1<<30) | |
66 if abs(distance) > lim: | |
67 Error('near jump cannot jump over more than {0} bytes'.format(lim)) | |
68 if condition: | |
69 if distance < 0: | |
70 distance -= 6 # Skip own instruction | |
71 opcode = 0x80 | tttn[condition] # Jcc imm32 | |
72 return [0x0F, opcode] + imm32(distance) | |
73 else: | |
74 if distance < 0: | |
75 distance -= 5 # Skip own instruction | |
76 return [ 0xE9 ] + imm32(distance) | |
77 | |
78 def shortjump(distance, condition=None): | |
79 """ jmp imm8 """ | |
80 lim = 118 | |
81 if abs(distance) > lim: | |
82 Error('short jump cannot jump over more than {0} bytes'.format(lim)) | |
83 if distance < 0: | |
84 distance -= 2 # Skip own instruction | |
85 if condition: | |
86 opcode = 0x70 | tttn[condition] # Jcc rel8 | |
87 else: | |
88 opcode = 0xeb # jmp rel8 | |
89 return [opcode] + imm8(distance) | |
90 | |
91 # Helper that determines jump type: | |
92 def reljump(distance): | |
93 if abs(distance) < 110: | |
94 return shortjump(distance) | |
95 else: | |
96 return nearjump(distance) | |
97 | |
98 def push(reg): | |
99 if reg in regs64: | |
100 if rexbit[reg] == 1: | |
101 return [0x41, 0x50 + regs64[reg]] | |
102 else: | |
103 return [0x50 + regs64[reg]] | |
104 else: | |
105 Error('push for {0} not implemented'.format(reg)) | |
106 | |
107 def pop(reg): | |
108 if reg in regs64: | |
109 if rexbit[reg] == 1: | |
110 rexprefix = rex(b=1) | |
111 opcode = 0x58 + regs64[reg] | |
112 return [rexprefix, opcode] | |
113 else: | |
114 opcode = 0x58 + regs64[reg] | |
115 return [ opcode ] | |
116 else: | |
117 Error('pop for {0} not implemented'.format(reg)) | |
118 | |
119 def INT(number): | |
120 opcode = 0xcd | |
121 return [opcode] + imm8(number) | |
122 | |
123 def syscall(): | |
124 return [0x0F, 0x05] | |
125 | |
126 def call(distance): | |
127 if type(distance) is int: | |
128 return [0xe8]+imm32(distance) | |
129 elif type(distance) is str and distance in regs64: | |
130 reg = distance | |
131 opcode = 0xFF # 0xFF /2 == call r/m64 | |
132 mod_rm = modrm(mod=3, reg=2, rm=regs64[reg]) | |
133 if rexbit[reg] == 1: | |
134 rexprefix = rex(b=rexbit[reg]) | |
135 return [rexprefix, opcode, mod_rm] | |
136 else: | |
137 return [opcode, mod_rm] | |
138 else: | |
139 Error('Cannot call to {0}'.format(distance)) | |
140 | |
141 def ret(): | |
142 return [ 0xc3 ] | |
143 | |
144 def increg64(reg): | |
145 assert(reg in regs64) | |
146 rexprefix = rex(w=1, b=rexbit[reg]) | |
147 opcode = 0xff | |
148 mod_rm = modrm(mod=3, rm=regs64[reg]) | |
149 return [rexprefix, opcode, mod_rm] | |
150 | |
151 def prepost8(r8, rm8): | |
152 assert(r8 in regs8) | |
153 pre = [] | |
154 if type(rm8) is list: | |
155 # TODO: merge mem access with prepost for 64 bits | |
156 if len(rm8) == 1: | |
157 base, = rm8 | |
158 if type(base) is str and base in regs64: | |
159 assert(not base in ['rbp', 'rsp', 'r12', 'r13']) | |
160 mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8]) | |
161 if rexbit[base] == 1: | |
162 pre.append(rex(b=1)) | |
163 post = [mod_rm] | |
164 else: | |
165 Error('One arg of type {0} not implemented'.format(base)) | |
166 elif len(rm8) == 2: | |
167 base, offset = rm8 | |
168 assert(type(offset) is int) | |
169 assert(base in regs64) | |
170 | |
171 if base == 'rsp' or base == 'r12': | |
172 Error('Cannot use rsp or r12 as base yet') | |
173 if rexbit[base] == 1: | |
174 pre.append( rex(b=1) ) | |
175 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8]) | |
176 post = [mod_rm] + imm8(offset) | |
177 else: | |
178 Error('not supporting prepost8 with list len {0}'.format(len(rm8))) | |
179 else: | |
180 Error('Not supporting move with reg8 {0}'.format(r8)) | |
181 return pre, post | |
182 | |
183 def prepost(r64, rm64): | |
184 assert(r64 in regs64) | |
185 if type(rm64) is list: | |
186 if len(rm64) == 3: | |
187 base, index, disp = rm64 | |
188 assert(base in regs64) | |
189 assert(index in regs64) | |
190 assert(type(disp) is int) | |
191 # Assert that no special cases are used: | |
192 # TODO: swap base and index to avoid special cases | |
193 # TODO: exploit special cases and make better code | |
194 assert(index != 'rsp') | |
195 | |
196 rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base]) | |
197 # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8 | |
198 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) | |
199 si_b = sib(ss=0, index=regs64[index], base=regs64[base]) | |
200 return [rexprefix], [mod_rm, si_b] + imm8(disp) | |
201 elif len(rm64) == 2: | |
202 base, offset = rm64 | |
203 assert(type(offset) is int) | |
204 if base == 'RIP': | |
205 # RIP pointer relative addressing mode! | |
206 rexprefix = rex(w=1, r=rexbit[r64]) | |
207 mod_rm = modrm(mod=0, rm=5, reg=regs64[r64]) | |
208 return [rexprefix], [mod_rm] + imm32(offset) | |
209 else: | |
210 assert(base in regs64) | |
211 | |
212 if base == 'rsp' or base == 'r12': | |
213 # extended function that uses SIB byte | |
214 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) | |
215 # rm=4 indicates a SIB byte follows | |
216 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) | |
217 # index=4 indicates that index is not used | |
218 si_b = sib(ss=0, index=4, base=regs64[base]) | |
219 return [rexprefix], [mod_rm, si_b] + imm8(offset) | |
220 else: | |
221 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) | |
222 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64]) | |
223 return [rexprefix], [mod_rm] + imm8(offset) | |
224 elif len(rm64) == 1: | |
225 offset = rm64[0] | |
226 if type(offset) is int: | |
227 rexprefix = rex(w=1, r=rexbit[r64]) | |
228 mod_rm = modrm(mod=0, rm=4,reg=regs64[r64]) | |
229 si_b = sib(ss=0, index=4,base=5) # 0x25 | |
230 return [rexprefix], [mod_rm, si_b] + imm32(offset) | |
231 else: | |
232 Error('Memory reference of type {0} not implemented'.format(offset)) | |
233 else: | |
234 Error('Memory reference not implemented') | |
235 elif rm64 in regs64: | |
236 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64]) | |
237 mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64]) | |
238 return [rexprefix], [mod_rm] | |
239 | |
240 def leareg64(rega, m): | |
241 opcode = 0x8d # lea r64, m | |
242 pre, post = prepost(rega, m) | |
243 return pre + [opcode] + post | |
244 | |
245 def mov(rega, regb): | |
246 if type(regb) is int: | |
247 pre = [rex(w=1, b=rexbit[rega])] | |
248 opcode = 0xb8 + regs64[rega] | |
249 post = imm64(regb) | |
250 elif type(regb) is str: | |
251 if regb in regs64: | |
252 opcode = 0x89 # mov r/m64, r64 | |
253 pre, post = prepost(regb, rega) | |
254 elif regb in regs8: | |
255 opcode = 0x88 # mov r/m8, r8 | |
256 pre, post = prepost8(regb, rega) | |
257 else: | |
258 Error('Unknown register {0}'.format(regb)) | |
259 elif type(rega) is str: | |
260 if rega in regs64: | |
261 opcode = 0x8b # mov r64, r/m64 | |
262 pre, post = prepost(rega, regb) | |
263 else: | |
264 Error('Unknown register {0}'.format(rega)) | |
265 else: | |
266 Error('Move of this kind {0}, {1} not implemented'.format(rega, regb)) | |
267 return pre + [opcode] + post | |
268 | |
269 def xorreg64(rega, regb): | |
270 rexprefix = rex(w=1, r=rexbit[regb], b=rexbit[rega]) | |
271 opcode = 0x31 # XOR r/m64, r64 | |
272 # Alternative is 0x33 XOR r64, r/m64 | |
273 mod_rm = modrm(3, rm=regs64[rega], reg=regs64[regb]) | |
274 return [rexprefix, opcode, mod_rm] | |
275 | |
276 # integer arithmatic: | |
277 def addreg64(rega, regb): | |
278 if regb in regs64: | |
279 pre, post = prepost(regb, rega) | |
280 opcode = 0x01 # ADD r/m64, r64 | |
281 return pre + [opcode] + post | |
282 elif type(regb) is int: | |
283 if regb < 100: | |
284 rexprefix = rex(w=1, b=rexbit[rega]) | |
285 opcode = 0x83 # add r/m, imm8 | |
286 mod_rm = modrm(3, rm=regs64[rega], reg=0) | |
287 return [rexprefix, opcode, mod_rm]+imm8(regb) | |
288 elif regb < (1<<31): | |
289 rexprefix = rex(w=1, b=rexbit[rega]) | |
290 opcode = 0x81 # add r/m64, imm32 | |
291 mod_rm = modrm(3, rm=regs64[rega], reg=0) | |
292 return [rexprefix, opcode, mod_rm]+imm32(regb) | |
293 else: | |
294 Error('Constant value too large!') | |
295 else: | |
296 Error('unknown second operand!'.format(regb)) | |
297 | |
298 def subreg64(rega, regb): | |
299 if regb in regs64: | |
300 pre, post = prepost(regb, rega) | |
301 opcode = 0x29 # SUB r/m64, r64 | |
302 return pre + [opcode] + post | |
303 elif type(regb) is int: | |
304 if regb < 100: | |
305 rexprefix = rex(w=1, b=rexbit[rega]) | |
306 opcode = 0x83 # sub r/m, imm8 | |
307 mod_rm = modrm(3, rm=regs64[rega], reg=5) | |
308 return [rexprefix, opcode, mod_rm]+imm8(regb) | |
309 elif regb < (1<<31): | |
310 rexprefix = rex(w=1, b=rexbit[rega]) | |
311 opcode = 0x81 # sub r/m64, imm32 | |
312 mod_rm = modrm(3, rm=regs64[rega], reg=5) | |
313 return [rexprefix, opcode, mod_rm]+imm32(regb) | |
314 else: | |
315 Error('Constant value too large!') | |
316 | |
317 else: | |
318 Error('unknown second operand!'.format(regb)) | |
319 | |
320 def idivreg64(reg): | |
321 rexprefix = rex(w=1, b=rexbit[reg]) | |
322 opcode = 0xf7 # IDIV r/m64 | |
323 mod_rm = modrm(3, rm=regs64[reg], reg=7) | |
324 return [rexprefix, opcode, mod_rm] | |
325 | |
326 def imulreg64_rax(reg): | |
327 rexprefix = rex(w=1, b=rexbit[reg]) | |
328 opcode = 0xf7 # IMUL r/m64 | |
329 mod_rm = modrm(3, rm=regs64[reg], reg=5) | |
330 return [rexprefix, opcode, mod_rm] | |
331 | |
332 def imulreg64(rega, regb): | |
333 pre, post = prepost(rega, regb) | |
334 opcode = 0x0f # IMUL r64, r/m64 | |
335 opcode2 = 0xaf | |
336 return pre + [opcode, opcode2] + post | |
337 | |
338 def cmpreg64(rega, regb): | |
339 if regb in regs64: | |
340 pre, post = prepost(regb, rega) | |
341 opcode = 0x39 # CMP r/m64, r64 | |
342 return pre + [opcode] + post | |
343 elif type(regb) is int: | |
344 rexprefix = rex(w=1, b=rexbit[rega]) | |
345 opcode = 0x83 # CMP r/m64, imm8 | |
346 mod_rm = modrm(3, rm=regs64[rega], reg=7) | |
347 return [rexprefix, opcode, mod_rm] + imm8(regb) | |
348 | |
349 else: | |
350 Error('not implemented cmp64') | |
351 | |
352 # Mapping that maps string names to the right functions: | |
353 opcodes = {'mov':(mov,2), 'lea':(leareg64,2), 'int':(INT,1), 'syscall':(syscall,0)} | |
354 |