Mercurial > lcfOS
comparison python/ppci/target/x86/instructions.py @ 398:c0d9837acde8
x86 target refactor
author | Windel Bouwman |
---|---|
date | Thu, 29 May 2014 12:13:37 +0200 |
parents | python/ppci/target/x86/x86_2.py@86b02c98a717 |
children |
comparison
equal
deleted
inserted
replaced
397:5d03c10fe19d | 398:c0d9837acde8 |
---|---|
1 """ | |
2 X86 target descriptions and encodings. | |
3 """ | |
4 | |
5 from ..basetarget import Register, Instruction | |
6 from .registers import regs64, X86Register | |
7 | |
8 from ..token import Token, u32, u8, bit_range | |
9 | |
10 | |
11 modrm = {'rax': 0, 'rbx': 1} | |
12 | |
13 # Table 3.1 of the intel manual: | |
14 # use REX.W on the table below: | |
15 | |
16 | |
17 # Helper functions: | |
18 def imm64(x): | |
19 """ represent 64 bits integer in little endian 8 bytes""" | |
20 if x < 0: | |
21 x = x + (1 << 64) | |
22 x = x & 0xFFFFFFFFFFFFFFFF | |
23 return [ (x >> (p*8)) & 0xFF for p in range(8) ] | |
24 | |
25 def imm32(x): | |
26 """ represent 32 bits integer in little endian 4 bytes""" | |
27 if x < 0: | |
28 x = x + (1 << 32) | |
29 x = x & 0xFFFFFFFF | |
30 return [ (x >> (p*8)) & 0xFF for p in range(4) ] | |
31 | |
32 | |
33 def imm8(x): | |
34 if x < 0: | |
35 x = x + (1 << 8) | |
36 x = x & 0xFF | |
37 return [ x ] | |
38 | |
39 | |
40 class ModRmToken(Token): | |
41 """ Construct the modrm byte from its components """ | |
42 def __init__(self, mod=0, rm=0, reg=0): | |
43 super().__init__(8) | |
44 assert(mod <= 3) | |
45 assert(rm <= 7) | |
46 assert(reg <= 7) | |
47 self.mod = mod | |
48 self.rm = rm | |
49 self.reg = reg | |
50 | |
51 mod = bit_range(6, 8) | |
52 rm = bit_range(0, 3) | |
53 reg = bit_range(3, 6) | |
54 | |
55 def encode(self): | |
56 return u8(self.bit_value) | |
57 | |
58 | |
59 class RexToken(Token): | |
60 """ Create a REX prefix byte """ | |
61 def __init__(self, w=0, r=0, x=0, b=0): | |
62 super().__init__(8) | |
63 assert(w <= 1) | |
64 assert(r <= 1) | |
65 assert(x <= 1) | |
66 assert(b <= 1) | |
67 self.w = w | |
68 self.r = r | |
69 self.x = x | |
70 self.b = b | |
71 self.set_bit(6, 1) | |
72 | |
73 w = bit_range(3, 4) | |
74 r = bit_range(2, 3) | |
75 x = bit_range(1, 2) | |
76 b = bit_range(0, 1) | |
77 | |
78 def encode(self): | |
79 return u8(self.bit_value) | |
80 | |
81 | |
82 def sib(ss=0, index=0, base=0): | |
83 assert(ss <= 3) | |
84 assert(index <= 7) | |
85 assert(base <= 7) | |
86 return (ss << 6) | (index << 3) | base | |
87 | |
88 tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4} | |
89 | |
90 # Actual instructions: | |
91 def nearjump(distance, condition=None): | |
92 """ jmp imm32 """ | |
93 lim = (1<<30) | |
94 if abs(distance) > lim: | |
95 Error('near jump cannot jump over more than {0} bytes'.format(lim)) | |
96 if condition: | |
97 if distance < 0: | |
98 distance -= 6 # Skip own instruction | |
99 opcode = 0x80 | tttn[condition] # Jcc imm32 | |
100 return [0x0F, opcode] + imm32(distance) | |
101 else: | |
102 if distance < 0: | |
103 distance -= 5 # Skip own instruction | |
104 return [ 0xE9 ] + imm32(distance) | |
105 | |
106 def shortjump(distance, condition=None): | |
107 """ jmp imm8 """ | |
108 lim = 118 | |
109 if abs(distance) > lim: | |
110 Error('short jump cannot jump over more than {0} bytes'.format(lim)) | |
111 if distance < 0: | |
112 distance -= 2 # Skip own instruction | |
113 if condition: | |
114 opcode = 0x70 | tttn[condition] # Jcc rel8 | |
115 else: | |
116 opcode = 0xeb # jmp rel8 | |
117 return [opcode] + imm8(distance) | |
118 | |
119 # Helper that determines jump type: | |
120 def reljump(distance): | |
121 if abs(distance) < 110: | |
122 return shortjump(distance) | |
123 else: | |
124 return nearjump(distance) | |
125 | |
126 | |
127 class Push(Instruction): | |
128 def __init__(self, reg): | |
129 assert(reg in regs64), str(reg) | |
130 self.reg = reg | |
131 | |
132 def encode(self): | |
133 code = [] | |
134 if self.reg.rexbit == 1: | |
135 code.append(0x41) | |
136 code.append(0x50 + self.reg.regbits) | |
137 return bytes(code) | |
138 | |
139 | |
140 class Pop(Instruction): | |
141 def __init__(self, reg): | |
142 assert(reg in regs64), str(reg) | |
143 self.reg = reg | |
144 | |
145 def encode(self): | |
146 code = [] | |
147 if self.reg.rexbit == 1: | |
148 code.append(0x41) | |
149 code.append(0x58 + self.reg.regbits) | |
150 return bytes(code) | |
151 | |
152 | |
153 def pop(reg): | |
154 if reg in regs64: | |
155 if rexbit[reg] == 1: | |
156 rexprefix = rex(b=1) | |
157 opcode = 0x58 + regs64[reg] | |
158 return [rexprefix, opcode] | |
159 else: | |
160 opcode = 0x58 + regs64[reg] | |
161 return [ opcode ] | |
162 else: | |
163 Error('pop for {0} not implemented'.format(reg)) | |
164 | |
165 def INT(number): | |
166 opcode = 0xcd | |
167 return [opcode] + imm8(number) | |
168 | |
169 def syscall(): | |
170 return [0x0F, 0x05] | |
171 | |
172 def call(distance): | |
173 if type(distance) is int: | |
174 return [0xe8]+imm32(distance) | |
175 elif type(distance) is str and distance in regs64: | |
176 reg = distance | |
177 opcode = 0xFF # 0xFF /2 == call r/m64 | |
178 mod_rm = modrm(mod=3, reg=2, rm=regs64[reg]) | |
179 if rexbit[reg] == 1: | |
180 rexprefix = rex(b=rexbit[reg]) | |
181 return [rexprefix, opcode, mod_rm] | |
182 else: | |
183 return [opcode, mod_rm] | |
184 else: | |
185 Error('Cannot call to {0}'.format(distance)) | |
186 | |
187 | |
188 class Ret(Instruction): | |
189 def __init__(self): | |
190 pass | |
191 | |
192 def encode(self): | |
193 return [ 0xc3 ] | |
194 | |
195 | |
196 class Inc(Instruction): | |
197 def __init__(self, reg): | |
198 assert(reg in regs64), str(reg) | |
199 self.rex = RexToken(w=1, b=reg.rexbit) | |
200 self.opcode = 0xff | |
201 self.mod_rm = ModRmToken(mod=3, rm=reg.regbits) | |
202 | |
203 def encode(self): | |
204 code = bytes([self.opcode]) | |
205 return self.rex.encode() + code + self.mod_rm.encode() | |
206 | |
207 | |
208 def prepost8(r8, rm8): | |
209 assert(r8 in regs8) | |
210 pre = [] | |
211 if type(rm8) is list: | |
212 # TODO: merge mem access with prepost for 64 bits | |
213 if len(rm8) == 1: | |
214 base, = rm8 | |
215 if type(base) is str and base in regs64: | |
216 assert(not base in ['rbp', 'rsp', 'r12', 'r13']) | |
217 mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8]) | |
218 if rexbit[base] == 1: | |
219 pre.append(rex(b=1)) | |
220 post = [mod_rm] | |
221 else: | |
222 Error('One arg of type {0} not implemented'.format(base)) | |
223 elif len(rm8) == 2: | |
224 base, offset = rm8 | |
225 assert(type(offset) is int) | |
226 assert(base in regs64) | |
227 | |
228 if base == 'rsp' or base == 'r12': | |
229 Error('Cannot use rsp or r12 as base yet') | |
230 if rexbit[base] == 1: | |
231 pre.append( rex(b=1) ) | |
232 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8]) | |
233 post = [mod_rm] + imm8(offset) | |
234 else: | |
235 Error('not supporting prepost8 with list len {0}'.format(len(rm8))) | |
236 else: | |
237 Error('Not supporting move with reg8 {0}'.format(r8)) | |
238 return pre, post | |
239 | |
240 def prepost(r64, rm64): | |
241 assert(r64 in regs64) | |
242 if type(rm64) is list: | |
243 if len(rm64) == 3: | |
244 base, index, disp = rm64 | |
245 assert(base in regs64) | |
246 assert(index in regs64) | |
247 assert(type(disp) is int) | |
248 # Assert that no special cases are used: | |
249 # TODO: swap base and index to avoid special cases | |
250 # TODO: exploit special cases and make better code | |
251 assert(index != 'rsp') | |
252 | |
253 rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base]) | |
254 # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8 | |
255 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) | |
256 si_b = sib(ss=0, index=regs64[index], base=regs64[base]) | |
257 return [rexprefix], [mod_rm, si_b] + imm8(disp) | |
258 elif len(rm64) == 2: | |
259 base, offset = rm64 | |
260 assert(type(offset) is int) | |
261 if base == 'RIP': | |
262 # RIP pointer relative addressing mode! | |
263 rexprefix = rex(w=1, r=rexbit[r64]) | |
264 mod_rm = modrm(mod=0, rm=5, reg=regs64[r64]) | |
265 return [rexprefix], [mod_rm] + imm32(offset) | |
266 else: | |
267 assert(base in regs64) | |
268 | |
269 if base == 'rsp' or base == 'r12': | |
270 # extended function that uses SIB byte | |
271 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) | |
272 # rm=4 indicates a SIB byte follows | |
273 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) | |
274 # index=4 indicates that index is not used | |
275 si_b = sib(ss=0, index=4, base=regs64[base]) | |
276 return [rexprefix], [mod_rm, si_b] + imm8(offset) | |
277 else: | |
278 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) | |
279 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64]) | |
280 return [rexprefix], [mod_rm] + imm8(offset) | |
281 elif len(rm64) == 1: | |
282 offset = rm64[0] | |
283 if type(offset) is int: | |
284 rexprefix = rex(w=1, r=rexbit[r64]) | |
285 mod_rm = modrm(mod=0, rm=4,reg=regs64[r64]) | |
286 si_b = sib(ss=0, index=4,base=5) # 0x25 | |
287 return [rexprefix], [mod_rm, si_b] + imm32(offset) | |
288 else: | |
289 Error('Memory reference of type {0} not implemented'.format(offset)) | |
290 else: | |
291 Error('Memory reference not implemented') | |
292 elif rm64 in regs64: | |
293 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64]) | |
294 mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64]) | |
295 return [rexprefix], [mod_rm] | |
296 | |
297 | |
298 def leareg64(rega, m): | |
299 opcode = 0x8d # lea r64, m | |
300 pre, post = prepost(rega, m) | |
301 return pre + [opcode] + post | |
302 | |
303 | |
304 class Mov1(Instruction): | |
305 """ Mov r64 to r64 """ | |
306 def __init__(self, dst, src): | |
307 assert src in regs64, str(src) | |
308 assert dst in regs64, str(dst) | |
309 self.rex = RexToken(w=1, r=dst.rexbit, b=src.rexbit) | |
310 self.mod_rm = ModRmToken(mod=3, rm=dst.regbits, reg=src.regbits) | |
311 | |
312 def encode(self): | |
313 opcode = 0x89 # mov r/m64, r64 | |
314 code = bytes([opcode]) | |
315 return self.rex.encode() + code + self.mod_rm.encode() | |
316 | |
317 | |
318 def Mov(dst, src): | |
319 if type(src) is int: | |
320 pre = [rex(w=1, b=rexbit[rega])] | |
321 opcode = 0xb8 + regs64[rega] | |
322 post = imm64(regb) | |
323 elif type(src) is X86Register: | |
324 return Mov1(dst, src) | |
325 elif type(src) is str: | |
326 if rega in regs64: | |
327 opcode = 0x8b # mov r64, r/m64 | |
328 pre, post = prepost(rega, regb) | |
329 else: | |
330 raise Exception('Unknown register {0}'.format(rega)) | |
331 else: | |
332 raise Exception('Move of this kind {0}, {1} not implemented'.format(rega, regb)) | |
333 return pre + [opcode] + post | |
334 | |
335 | |
336 def Xor(rega, regb): | |
337 return Xor1(rega, regb) | |
338 | |
339 | |
340 class Xor1(Instruction): | |
341 def __init__(self, a, b): | |
342 self.rex = RexToken(w=1, r=b.rexbit, b=a.rexbit) | |
343 self.mod_rm = ModRmToken(mod=3, rm=a.regbits, reg=b.regbits) | |
344 | |
345 def encode(self): | |
346 opcode = 0x31 # XOR r/m64, r64 | |
347 # Alternative is 0x33 XOR r64, r/m64 | |
348 code = bytes([opcode]) | |
349 return self.rex.encode() + code + self.mod_rm.encode() | |
350 | |
351 | |
352 # integer arithmatic: | |
353 def addreg64(rega, regb): | |
354 if regb in regs64: | |
355 pre, post = prepost(regb, rega) | |
356 opcode = 0x01 # ADD r/m64, r64 | |
357 return pre + [opcode] + post | |
358 elif type(regb) is int: | |
359 if regb < 100: | |
360 rexprefix = rex(w=1, b=rexbit[rega]) | |
361 opcode = 0x83 # add r/m, imm8 | |
362 mod_rm = modrm(3, rm=regs64[rega], reg=0) | |
363 return [rexprefix, opcode, mod_rm]+imm8(regb) | |
364 elif regb < (1<<31): | |
365 rexprefix = rex(w=1, b=rexbit[rega]) | |
366 opcode = 0x81 # add r/m64, imm32 | |
367 mod_rm = modrm(3, rm=regs64[rega], reg=0) | |
368 return [rexprefix, opcode, mod_rm]+imm32(regb) | |
369 else: | |
370 Error('Constant value too large!') | |
371 else: | |
372 Error('unknown second operand!'.format(regb)) | |
373 | |
374 def subreg64(rega, regb): | |
375 if regb in regs64: | |
376 pre, post = prepost(regb, rega) | |
377 opcode = 0x29 # SUB r/m64, r64 | |
378 return pre + [opcode] + post | |
379 elif type(regb) is int: | |
380 if regb < 100: | |
381 rexprefix = rex(w=1, b=rexbit[rega]) | |
382 opcode = 0x83 # sub r/m, imm8 | |
383 mod_rm = modrm(3, rm=regs64[rega], reg=5) | |
384 return [rexprefix, opcode, mod_rm]+imm8(regb) | |
385 elif regb < (1<<31): | |
386 rexprefix = rex(w=1, b=rexbit[rega]) | |
387 opcode = 0x81 # sub r/m64, imm32 | |
388 mod_rm = modrm(3, rm=regs64[rega], reg=5) | |
389 return [rexprefix, opcode, mod_rm]+imm32(regb) | |
390 else: | |
391 Error('Constant value too large!') | |
392 | |
393 else: | |
394 Error('unknown second operand!'.format(regb)) | |
395 | |
396 def idivreg64(reg): | |
397 rexprefix = rex(w=1, b=rexbit[reg]) | |
398 opcode = 0xf7 # IDIV r/m64 | |
399 mod_rm = modrm(3, rm=regs64[reg], reg=7) | |
400 return [rexprefix, opcode, mod_rm] | |
401 | |
402 def imulreg64_rax(reg): | |
403 rexprefix = rex(w=1, b=rexbit[reg]) | |
404 opcode = 0xf7 # IMUL r/m64 | |
405 mod_rm = modrm(3, rm=regs64[reg], reg=5) | |
406 return [rexprefix, opcode, mod_rm] | |
407 | |
408 def imulreg64(rega, regb): | |
409 pre, post = prepost(rega, regb) | |
410 opcode = 0x0f # IMUL r64, r/m64 | |
411 opcode2 = 0xaf | |
412 return pre + [opcode, opcode2] + post | |
413 | |
414 | |
415 def cmpreg64(rega, regb): | |
416 if regb in regs64: | |
417 pre, post = prepost(regb, rega) | |
418 opcode = 0x39 # CMP r/m64, r64 | |
419 return pre + [opcode] + post | |
420 elif type(regb) is int: | |
421 rexprefix = rex(w=1, b=rexbit[rega]) | |
422 opcode = 0x83 # CMP r/m64, imm8 | |
423 mod_rm = modrm(3, rm=regs64[rega], reg=7) | |
424 return [rexprefix, opcode, mod_rm] + imm8(regb) | |
425 else: | |
426 Error('not implemented cmp64') |