comparison python/x86_2.py @ 287:1c7c1e619be8

File movage
author Windel Bouwman
date Thu, 21 Nov 2013 11:57:27 +0100
parents python/old/assembler.py@91af0e40f868
children
comparison
equal deleted inserted replaced
286:d9df72971cbf 287:1c7c1e619be8
1 """
2 X86 target descriptions and encodings.
3
4 """
5
6 from target import Register, Instruction, Target, Imm8, Label, Imm3, LabelRef
7
8
9 modrm = {'rax': 0, 'rbx': 1}
10
11 # Table 3.1 of the intel manual:
12 # use REX.W on the table below:
13 regs64 = {'rax': 0,'rcx':1,'rdx':2,'rbx':3,'rsp':4,'rbp':5,'rsi':6,'rdi':7,'r8':0,'r9':1,'r10':2,'r11':3,'r12':4,'r13':5,'r14':6,'r15':7}
14 regs32 = {'eax': 0, 'ecx':1, 'edx':2, 'ebx': 3, 'esp': 4, 'ebp': 5, 'esi':6, 'edi':7}
15 regs8 = {'al':0,'cl':1,'dl':2,'bl':3,'ah':4,'ch':5,'dh':6,'bh':7}
16
17 # Calculation of the rexb bit:
18 rexbit = {'rax': 0, 'rcx':0, 'rdx':0, 'rbx': 0, 'rsp': 0, 'rbp': 0, 'rsi':0, 'rdi':0,'r8':1,'r9':1,'r10':1,'r11':1,'r12':1,'r13':1,'r14':1,'r15':1}
19
20 # Helper functions:
21 def imm64(x):
22 """ represent 64 bits integer in little endian 8 bytes"""
23 if x < 0:
24 x = x + (1 << 64)
25 x = x & 0xFFFFFFFFFFFFFFFF
26 return [ (x >> (p*8)) & 0xFF for p in range(8) ]
27
28 def imm32(x):
29 """ represent 32 bits integer in little endian 4 bytes"""
30 if x < 0:
31 x = x + (1 << 32)
32 x = x & 0xFFFFFFFF
33 return [ (x >> (p*8)) & 0xFF for p in range(4) ]
34
35 def imm8(x):
36 if x < 0:
37 x = x + (1 << 8)
38 x = x & 0xFF
39 return [ x ]
40
41 def modrm(mod=0, rm=0, reg=0):
42 """ Construct the modrm byte from its components """
43 assert(mod <= 3)
44 assert(rm <= 7)
45 assert(reg <= 7)
46 return (mod << 6) | (reg << 3) | rm
47
48 def rex(w=0, r=0, x=0, b=0):
49 """ Create a REX prefix byte """
50 assert(w <= 1)
51 assert(r <= 1)
52 assert(x <= 1)
53 assert(b <= 1)
54 return 0x40 | (w<<3) | (r<<2) | (x<<1) | b
55
56 def sib(ss=0, index=0, base=0):
57 assert(ss <= 3)
58 assert(index <= 7)
59 assert(base <= 7)
60 return (ss << 6) | (index << 3) | base
61
62 tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4}
63
64 # Actual instructions:
65 def nearjump(distance, condition=None):
66 """ jmp imm32 """
67 lim = (1<<30)
68 if abs(distance) > lim:
69 Error('near jump cannot jump over more than {0} bytes'.format(lim))
70 if condition:
71 if distance < 0:
72 distance -= 6 # Skip own instruction
73 opcode = 0x80 | tttn[condition] # Jcc imm32
74 return [0x0F, opcode] + imm32(distance)
75 else:
76 if distance < 0:
77 distance -= 5 # Skip own instruction
78 return [ 0xE9 ] + imm32(distance)
79
80 def shortjump(distance, condition=None):
81 """ jmp imm8 """
82 lim = 118
83 if abs(distance) > lim:
84 Error('short jump cannot jump over more than {0} bytes'.format(lim))
85 if distance < 0:
86 distance -= 2 # Skip own instruction
87 if condition:
88 opcode = 0x70 | tttn[condition] # Jcc rel8
89 else:
90 opcode = 0xeb # jmp rel8
91 return [opcode] + imm8(distance)
92
93 # Helper that determines jump type:
94 def reljump(distance):
95 if abs(distance) < 110:
96 return shortjump(distance)
97 else:
98 return nearjump(distance)
99
100 def push(reg):
101 if reg in regs64:
102 if rexbit[reg] == 1:
103 return [0x41, 0x50 + regs64[reg]]
104 else:
105 return [0x50 + regs64[reg]]
106 else:
107 Error('push for {0} not implemented'.format(reg))
108
109 def pop(reg):
110 if reg in regs64:
111 if rexbit[reg] == 1:
112 rexprefix = rex(b=1)
113 opcode = 0x58 + regs64[reg]
114 return [rexprefix, opcode]
115 else:
116 opcode = 0x58 + regs64[reg]
117 return [ opcode ]
118 else:
119 Error('pop for {0} not implemented'.format(reg))
120
121 def INT(number):
122 opcode = 0xcd
123 return [opcode] + imm8(number)
124
125 def syscall():
126 return [0x0F, 0x05]
127
128 def call(distance):
129 if type(distance) is int:
130 return [0xe8]+imm32(distance)
131 elif type(distance) is str and distance in regs64:
132 reg = distance
133 opcode = 0xFF # 0xFF /2 == call r/m64
134 mod_rm = modrm(mod=3, reg=2, rm=regs64[reg])
135 if rexbit[reg] == 1:
136 rexprefix = rex(b=rexbit[reg])
137 return [rexprefix, opcode, mod_rm]
138 else:
139 return [opcode, mod_rm]
140 else:
141 Error('Cannot call to {0}'.format(distance))
142
143 def ret():
144 return [ 0xc3 ]
145
146 def increg64(reg):
147 assert(reg in regs64)
148 rexprefix = rex(w=1, b=rexbit[reg])
149 opcode = 0xff
150 mod_rm = modrm(mod=3, rm=regs64[reg])
151 return [rexprefix, opcode, mod_rm]
152
153 def prepost8(r8, rm8):
154 assert(r8 in regs8)
155 pre = []
156 if type(rm8) is list:
157 # TODO: merge mem access with prepost for 64 bits
158 if len(rm8) == 1:
159 base, = rm8
160 if type(base) is str and base in regs64:
161 assert(not base in ['rbp', 'rsp', 'r12', 'r13'])
162 mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8])
163 if rexbit[base] == 1:
164 pre.append(rex(b=1))
165 post = [mod_rm]
166 else:
167 Error('One arg of type {0} not implemented'.format(base))
168 elif len(rm8) == 2:
169 base, offset = rm8
170 assert(type(offset) is int)
171 assert(base in regs64)
172
173 if base == 'rsp' or base == 'r12':
174 Error('Cannot use rsp or r12 as base yet')
175 if rexbit[base] == 1:
176 pre.append( rex(b=1) )
177 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8])
178 post = [mod_rm] + imm8(offset)
179 else:
180 Error('not supporting prepost8 with list len {0}'.format(len(rm8)))
181 else:
182 Error('Not supporting move with reg8 {0}'.format(r8))
183 return pre, post
184
185 def prepost(r64, rm64):
186 assert(r64 in regs64)
187 if type(rm64) is list:
188 if len(rm64) == 3:
189 base, index, disp = rm64
190 assert(base in regs64)
191 assert(index in regs64)
192 assert(type(disp) is int)
193 # Assert that no special cases are used:
194 # TODO: swap base and index to avoid special cases
195 # TODO: exploit special cases and make better code
196 assert(index != 'rsp')
197
198 rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base])
199 # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8
200 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64])
201 si_b = sib(ss=0, index=regs64[index], base=regs64[base])
202 return [rexprefix], [mod_rm, si_b] + imm8(disp)
203 elif len(rm64) == 2:
204 base, offset = rm64
205 assert(type(offset) is int)
206 if base == 'RIP':
207 # RIP pointer relative addressing mode!
208 rexprefix = rex(w=1, r=rexbit[r64])
209 mod_rm = modrm(mod=0, rm=5, reg=regs64[r64])
210 return [rexprefix], [mod_rm] + imm32(offset)
211 else:
212 assert(base in regs64)
213
214 if base == 'rsp' or base == 'r12':
215 # extended function that uses SIB byte
216 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base])
217 # rm=4 indicates a SIB byte follows
218 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64])
219 # index=4 indicates that index is not used
220 si_b = sib(ss=0, index=4, base=regs64[base])
221 return [rexprefix], [mod_rm, si_b] + imm8(offset)
222 else:
223 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base])
224 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64])
225 return [rexprefix], [mod_rm] + imm8(offset)
226 elif len(rm64) == 1:
227 offset = rm64[0]
228 if type(offset) is int:
229 rexprefix = rex(w=1, r=rexbit[r64])
230 mod_rm = modrm(mod=0, rm=4,reg=regs64[r64])
231 si_b = sib(ss=0, index=4,base=5) # 0x25
232 return [rexprefix], [mod_rm, si_b] + imm32(offset)
233 else:
234 Error('Memory reference of type {0} not implemented'.format(offset))
235 else:
236 Error('Memory reference not implemented')
237 elif rm64 in regs64:
238 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64])
239 mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64])
240 return [rexprefix], [mod_rm]
241
242 def leareg64(rega, m):
243 opcode = 0x8d # lea r64, m
244 pre, post = prepost(rega, m)
245 return pre + [opcode] + post
246
247 def mov(rega, regb):
248 if type(regb) is int:
249 pre = [rex(w=1, b=rexbit[rega])]
250 opcode = 0xb8 + regs64[rega]
251 post = imm64(regb)
252 elif type(regb) is str:
253 if regb in regs64:
254 opcode = 0x89 # mov r/m64, r64
255 pre, post = prepost(regb, rega)
256 elif regb in regs8:
257 opcode = 0x88 # mov r/m8, r8
258 pre, post = prepost8(regb, rega)
259 else:
260 Error('Unknown register {0}'.format(regb))
261 elif type(rega) is str:
262 if rega in regs64:
263 opcode = 0x8b # mov r64, r/m64
264 pre, post = prepost(rega, regb)
265 else:
266 Error('Unknown register {0}'.format(rega))
267 else:
268 Error('Move of this kind {0}, {1} not implemented'.format(rega, regb))
269 return pre + [opcode] + post
270
271 def xorreg64(rega, regb):
272 rexprefix = rex(w=1, r=rexbit[regb], b=rexbit[rega])
273 opcode = 0x31 # XOR r/m64, r64
274 # Alternative is 0x33 XOR r64, r/m64
275 mod_rm = modrm(3, rm=regs64[rega], reg=regs64[regb])
276 return [rexprefix, opcode, mod_rm]
277
278 # integer arithmatic:
279 def addreg64(rega, regb):
280 if regb in regs64:
281 pre, post = prepost(regb, rega)
282 opcode = 0x01 # ADD r/m64, r64
283 return pre + [opcode] + post
284 elif type(regb) is int:
285 if regb < 100:
286 rexprefix = rex(w=1, b=rexbit[rega])
287 opcode = 0x83 # add r/m, imm8
288 mod_rm = modrm(3, rm=regs64[rega], reg=0)
289 return [rexprefix, opcode, mod_rm]+imm8(regb)
290 elif regb < (1<<31):
291 rexprefix = rex(w=1, b=rexbit[rega])
292 opcode = 0x81 # add r/m64, imm32
293 mod_rm = modrm(3, rm=regs64[rega], reg=0)
294 return [rexprefix, opcode, mod_rm]+imm32(regb)
295 else:
296 Error('Constant value too large!')
297 else:
298 Error('unknown second operand!'.format(regb))
299
300 def subreg64(rega, regb):
301 if regb in regs64:
302 pre, post = prepost(regb, rega)
303 opcode = 0x29 # SUB r/m64, r64
304 return pre + [opcode] + post
305 elif type(regb) is int:
306 if regb < 100:
307 rexprefix = rex(w=1, b=rexbit[rega])
308 opcode = 0x83 # sub r/m, imm8
309 mod_rm = modrm(3, rm=regs64[rega], reg=5)
310 return [rexprefix, opcode, mod_rm]+imm8(regb)
311 elif regb < (1<<31):
312 rexprefix = rex(w=1, b=rexbit[rega])
313 opcode = 0x81 # sub r/m64, imm32
314 mod_rm = modrm(3, rm=regs64[rega], reg=5)
315 return [rexprefix, opcode, mod_rm]+imm32(regb)
316 else:
317 Error('Constant value too large!')
318
319 else:
320 Error('unknown second operand!'.format(regb))
321
322 def idivreg64(reg):
323 rexprefix = rex(w=1, b=rexbit[reg])
324 opcode = 0xf7 # IDIV r/m64
325 mod_rm = modrm(3, rm=regs64[reg], reg=7)
326 return [rexprefix, opcode, mod_rm]
327
328 def imulreg64_rax(reg):
329 rexprefix = rex(w=1, b=rexbit[reg])
330 opcode = 0xf7 # IMUL r/m64
331 mod_rm = modrm(3, rm=regs64[reg], reg=5)
332 return [rexprefix, opcode, mod_rm]
333
334 def imulreg64(rega, regb):
335 pre, post = prepost(rega, regb)
336 opcode = 0x0f # IMUL r64, r/m64
337 opcode2 = 0xaf
338 return pre + [opcode, opcode2] + post
339
340 def cmpreg64(rega, regb):
341 if regb in regs64:
342 pre, post = prepost(regb, rega)
343 opcode = 0x39 # CMP r/m64, r64
344 return pre + [opcode] + post
345 elif type(regb) is int:
346 rexprefix = rex(w=1, b=rexbit[rega])
347 opcode = 0x83 # CMP r/m64, imm8
348 mod_rm = modrm(3, rm=regs64[rega], reg=7)
349 return [rexprefix, opcode, mod_rm] + imm8(regb)
350
351 else:
352 Error('not implemented cmp64')
353
354 # Mapping that maps string names to the right functions:
355 opcodes = {'mov':(mov,2), 'lea':(leareg64,2), 'int':(INT,1), 'syscall':(syscall,0)}
356