comparison python/ppci/target/x86/instructions.py @ 398:c0d9837acde8

x86 target refactor
author Windel Bouwman
date Thu, 29 May 2014 12:13:37 +0200
parents python/ppci/target/x86/x86_2.py@86b02c98a717
children
comparison
equal deleted inserted replaced
397:5d03c10fe19d 398:c0d9837acde8
1 """
2 X86 target descriptions and encodings.
3 """
4
5 from ..basetarget import Register, Instruction
6 from .registers import regs64, X86Register
7
8 from ..token import Token, u32, u8, bit_range
9
10
11 modrm = {'rax': 0, 'rbx': 1}
12
13 # Table 3.1 of the intel manual:
14 # use REX.W on the table below:
15
16
17 # Helper functions:
18 def imm64(x):
19 """ represent 64 bits integer in little endian 8 bytes"""
20 if x < 0:
21 x = x + (1 << 64)
22 x = x & 0xFFFFFFFFFFFFFFFF
23 return [ (x >> (p*8)) & 0xFF for p in range(8) ]
24
25 def imm32(x):
26 """ represent 32 bits integer in little endian 4 bytes"""
27 if x < 0:
28 x = x + (1 << 32)
29 x = x & 0xFFFFFFFF
30 return [ (x >> (p*8)) & 0xFF for p in range(4) ]
31
32
33 def imm8(x):
34 if x < 0:
35 x = x + (1 << 8)
36 x = x & 0xFF
37 return [ x ]
38
39
40 class ModRmToken(Token):
41 """ Construct the modrm byte from its components """
42 def __init__(self, mod=0, rm=0, reg=0):
43 super().__init__(8)
44 assert(mod <= 3)
45 assert(rm <= 7)
46 assert(reg <= 7)
47 self.mod = mod
48 self.rm = rm
49 self.reg = reg
50
51 mod = bit_range(6, 8)
52 rm = bit_range(0, 3)
53 reg = bit_range(3, 6)
54
55 def encode(self):
56 return u8(self.bit_value)
57
58
59 class RexToken(Token):
60 """ Create a REX prefix byte """
61 def __init__(self, w=0, r=0, x=0, b=0):
62 super().__init__(8)
63 assert(w <= 1)
64 assert(r <= 1)
65 assert(x <= 1)
66 assert(b <= 1)
67 self.w = w
68 self.r = r
69 self.x = x
70 self.b = b
71 self.set_bit(6, 1)
72
73 w = bit_range(3, 4)
74 r = bit_range(2, 3)
75 x = bit_range(1, 2)
76 b = bit_range(0, 1)
77
78 def encode(self):
79 return u8(self.bit_value)
80
81
82 def sib(ss=0, index=0, base=0):
83 assert(ss <= 3)
84 assert(index <= 7)
85 assert(base <= 7)
86 return (ss << 6) | (index << 3) | base
87
88 tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4}
89
90 # Actual instructions:
91 def nearjump(distance, condition=None):
92 """ jmp imm32 """
93 lim = (1<<30)
94 if abs(distance) > lim:
95 Error('near jump cannot jump over more than {0} bytes'.format(lim))
96 if condition:
97 if distance < 0:
98 distance -= 6 # Skip own instruction
99 opcode = 0x80 | tttn[condition] # Jcc imm32
100 return [0x0F, opcode] + imm32(distance)
101 else:
102 if distance < 0:
103 distance -= 5 # Skip own instruction
104 return [ 0xE9 ] + imm32(distance)
105
106 def shortjump(distance, condition=None):
107 """ jmp imm8 """
108 lim = 118
109 if abs(distance) > lim:
110 Error('short jump cannot jump over more than {0} bytes'.format(lim))
111 if distance < 0:
112 distance -= 2 # Skip own instruction
113 if condition:
114 opcode = 0x70 | tttn[condition] # Jcc rel8
115 else:
116 opcode = 0xeb # jmp rel8
117 return [opcode] + imm8(distance)
118
119 # Helper that determines jump type:
120 def reljump(distance):
121 if abs(distance) < 110:
122 return shortjump(distance)
123 else:
124 return nearjump(distance)
125
126
127 class Push(Instruction):
128 def __init__(self, reg):
129 assert(reg in regs64), str(reg)
130 self.reg = reg
131
132 def encode(self):
133 code = []
134 if self.reg.rexbit == 1:
135 code.append(0x41)
136 code.append(0x50 + self.reg.regbits)
137 return bytes(code)
138
139
140 class Pop(Instruction):
141 def __init__(self, reg):
142 assert(reg in regs64), str(reg)
143 self.reg = reg
144
145 def encode(self):
146 code = []
147 if self.reg.rexbit == 1:
148 code.append(0x41)
149 code.append(0x58 + self.reg.regbits)
150 return bytes(code)
151
152
153 def pop(reg):
154 if reg in regs64:
155 if rexbit[reg] == 1:
156 rexprefix = rex(b=1)
157 opcode = 0x58 + regs64[reg]
158 return [rexprefix, opcode]
159 else:
160 opcode = 0x58 + regs64[reg]
161 return [ opcode ]
162 else:
163 Error('pop for {0} not implemented'.format(reg))
164
165 def INT(number):
166 opcode = 0xcd
167 return [opcode] + imm8(number)
168
169 def syscall():
170 return [0x0F, 0x05]
171
172 def call(distance):
173 if type(distance) is int:
174 return [0xe8]+imm32(distance)
175 elif type(distance) is str and distance in regs64:
176 reg = distance
177 opcode = 0xFF # 0xFF /2 == call r/m64
178 mod_rm = modrm(mod=3, reg=2, rm=regs64[reg])
179 if rexbit[reg] == 1:
180 rexprefix = rex(b=rexbit[reg])
181 return [rexprefix, opcode, mod_rm]
182 else:
183 return [opcode, mod_rm]
184 else:
185 Error('Cannot call to {0}'.format(distance))
186
187
188 class Ret(Instruction):
189 def __init__(self):
190 pass
191
192 def encode(self):
193 return [ 0xc3 ]
194
195
196 class Inc(Instruction):
197 def __init__(self, reg):
198 assert(reg in regs64), str(reg)
199 self.rex = RexToken(w=1, b=reg.rexbit)
200 self.opcode = 0xff
201 self.mod_rm = ModRmToken(mod=3, rm=reg.regbits)
202
203 def encode(self):
204 code = bytes([self.opcode])
205 return self.rex.encode() + code + self.mod_rm.encode()
206
207
208 def prepost8(r8, rm8):
209 assert(r8 in regs8)
210 pre = []
211 if type(rm8) is list:
212 # TODO: merge mem access with prepost for 64 bits
213 if len(rm8) == 1:
214 base, = rm8
215 if type(base) is str and base in regs64:
216 assert(not base in ['rbp', 'rsp', 'r12', 'r13'])
217 mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8])
218 if rexbit[base] == 1:
219 pre.append(rex(b=1))
220 post = [mod_rm]
221 else:
222 Error('One arg of type {0} not implemented'.format(base))
223 elif len(rm8) == 2:
224 base, offset = rm8
225 assert(type(offset) is int)
226 assert(base in regs64)
227
228 if base == 'rsp' or base == 'r12':
229 Error('Cannot use rsp or r12 as base yet')
230 if rexbit[base] == 1:
231 pre.append( rex(b=1) )
232 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8])
233 post = [mod_rm] + imm8(offset)
234 else:
235 Error('not supporting prepost8 with list len {0}'.format(len(rm8)))
236 else:
237 Error('Not supporting move with reg8 {0}'.format(r8))
238 return pre, post
239
240 def prepost(r64, rm64):
241 assert(r64 in regs64)
242 if type(rm64) is list:
243 if len(rm64) == 3:
244 base, index, disp = rm64
245 assert(base in regs64)
246 assert(index in regs64)
247 assert(type(disp) is int)
248 # Assert that no special cases are used:
249 # TODO: swap base and index to avoid special cases
250 # TODO: exploit special cases and make better code
251 assert(index != 'rsp')
252
253 rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base])
254 # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8
255 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64])
256 si_b = sib(ss=0, index=regs64[index], base=regs64[base])
257 return [rexprefix], [mod_rm, si_b] + imm8(disp)
258 elif len(rm64) == 2:
259 base, offset = rm64
260 assert(type(offset) is int)
261 if base == 'RIP':
262 # RIP pointer relative addressing mode!
263 rexprefix = rex(w=1, r=rexbit[r64])
264 mod_rm = modrm(mod=0, rm=5, reg=regs64[r64])
265 return [rexprefix], [mod_rm] + imm32(offset)
266 else:
267 assert(base in regs64)
268
269 if base == 'rsp' or base == 'r12':
270 # extended function that uses SIB byte
271 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base])
272 # rm=4 indicates a SIB byte follows
273 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64])
274 # index=4 indicates that index is not used
275 si_b = sib(ss=0, index=4, base=regs64[base])
276 return [rexprefix], [mod_rm, si_b] + imm8(offset)
277 else:
278 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base])
279 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64])
280 return [rexprefix], [mod_rm] + imm8(offset)
281 elif len(rm64) == 1:
282 offset = rm64[0]
283 if type(offset) is int:
284 rexprefix = rex(w=1, r=rexbit[r64])
285 mod_rm = modrm(mod=0, rm=4,reg=regs64[r64])
286 si_b = sib(ss=0, index=4,base=5) # 0x25
287 return [rexprefix], [mod_rm, si_b] + imm32(offset)
288 else:
289 Error('Memory reference of type {0} not implemented'.format(offset))
290 else:
291 Error('Memory reference not implemented')
292 elif rm64 in regs64:
293 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64])
294 mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64])
295 return [rexprefix], [mod_rm]
296
297
298 def leareg64(rega, m):
299 opcode = 0x8d # lea r64, m
300 pre, post = prepost(rega, m)
301 return pre + [opcode] + post
302
303
304 class Mov1(Instruction):
305 """ Mov r64 to r64 """
306 def __init__(self, dst, src):
307 assert src in regs64, str(src)
308 assert dst in regs64, str(dst)
309 self.rex = RexToken(w=1, r=dst.rexbit, b=src.rexbit)
310 self.mod_rm = ModRmToken(mod=3, rm=dst.regbits, reg=src.regbits)
311
312 def encode(self):
313 opcode = 0x89 # mov r/m64, r64
314 code = bytes([opcode])
315 return self.rex.encode() + code + self.mod_rm.encode()
316
317
318 def Mov(dst, src):
319 if type(src) is int:
320 pre = [rex(w=1, b=rexbit[rega])]
321 opcode = 0xb8 + regs64[rega]
322 post = imm64(regb)
323 elif type(src) is X86Register:
324 return Mov1(dst, src)
325 elif type(src) is str:
326 if rega in regs64:
327 opcode = 0x8b # mov r64, r/m64
328 pre, post = prepost(rega, regb)
329 else:
330 raise Exception('Unknown register {0}'.format(rega))
331 else:
332 raise Exception('Move of this kind {0}, {1} not implemented'.format(rega, regb))
333 return pre + [opcode] + post
334
335
336 def Xor(rega, regb):
337 return Xor1(rega, regb)
338
339
340 class Xor1(Instruction):
341 def __init__(self, a, b):
342 self.rex = RexToken(w=1, r=b.rexbit, b=a.rexbit)
343 self.mod_rm = ModRmToken(mod=3, rm=a.regbits, reg=b.regbits)
344
345 def encode(self):
346 opcode = 0x31 # XOR r/m64, r64
347 # Alternative is 0x33 XOR r64, r/m64
348 code = bytes([opcode])
349 return self.rex.encode() + code + self.mod_rm.encode()
350
351
352 # integer arithmatic:
353 def addreg64(rega, regb):
354 if regb in regs64:
355 pre, post = prepost(regb, rega)
356 opcode = 0x01 # ADD r/m64, r64
357 return pre + [opcode] + post
358 elif type(regb) is int:
359 if regb < 100:
360 rexprefix = rex(w=1, b=rexbit[rega])
361 opcode = 0x83 # add r/m, imm8
362 mod_rm = modrm(3, rm=regs64[rega], reg=0)
363 return [rexprefix, opcode, mod_rm]+imm8(regb)
364 elif regb < (1<<31):
365 rexprefix = rex(w=1, b=rexbit[rega])
366 opcode = 0x81 # add r/m64, imm32
367 mod_rm = modrm(3, rm=regs64[rega], reg=0)
368 return [rexprefix, opcode, mod_rm]+imm32(regb)
369 else:
370 Error('Constant value too large!')
371 else:
372 Error('unknown second operand!'.format(regb))
373
374 def subreg64(rega, regb):
375 if regb in regs64:
376 pre, post = prepost(regb, rega)
377 opcode = 0x29 # SUB r/m64, r64
378 return pre + [opcode] + post
379 elif type(regb) is int:
380 if regb < 100:
381 rexprefix = rex(w=1, b=rexbit[rega])
382 opcode = 0x83 # sub r/m, imm8
383 mod_rm = modrm(3, rm=regs64[rega], reg=5)
384 return [rexprefix, opcode, mod_rm]+imm8(regb)
385 elif regb < (1<<31):
386 rexprefix = rex(w=1, b=rexbit[rega])
387 opcode = 0x81 # sub r/m64, imm32
388 mod_rm = modrm(3, rm=regs64[rega], reg=5)
389 return [rexprefix, opcode, mod_rm]+imm32(regb)
390 else:
391 Error('Constant value too large!')
392
393 else:
394 Error('unknown second operand!'.format(regb))
395
396 def idivreg64(reg):
397 rexprefix = rex(w=1, b=rexbit[reg])
398 opcode = 0xf7 # IDIV r/m64
399 mod_rm = modrm(3, rm=regs64[reg], reg=7)
400 return [rexprefix, opcode, mod_rm]
401
402 def imulreg64_rax(reg):
403 rexprefix = rex(w=1, b=rexbit[reg])
404 opcode = 0xf7 # IMUL r/m64
405 mod_rm = modrm(3, rm=regs64[reg], reg=5)
406 return [rexprefix, opcode, mod_rm]
407
408 def imulreg64(rega, regb):
409 pre, post = prepost(rega, regb)
410 opcode = 0x0f # IMUL r64, r/m64
411 opcode2 = 0xaf
412 return pre + [opcode, opcode2] + post
413
414
415 def cmpreg64(rega, regb):
416 if regb in regs64:
417 pre, post = prepost(regb, rega)
418 opcode = 0x39 # CMP r/m64, r64
419 return pre + [opcode] + post
420 elif type(regb) is int:
421 rexprefix = rex(w=1, b=rexbit[rega])
422 opcode = 0x83 # CMP r/m64, imm8
423 mod_rm = modrm(3, rm=regs64[rega], reg=7)
424 return [rexprefix, opcode, mod_rm] + imm8(regb)
425 else:
426 Error('not implemented cmp64')