1
|
1 """
|
|
2 Assembler code generation functions
|
|
3 """
|
|
4
|
|
5 from .errors import Error
|
|
6
|
|
7 modrm = {'rax': 0, 'rbx': 1}
|
|
8
|
|
9 # Table 3.1 of the intel manual:
|
|
10 # use REX.W on the table below:
|
|
11 regs64 = {'rax': 0,'rcx':1,'rdx':2,'rbx':3,'rsp':4,'rbp':5,'rsi':6,'rdi':7,'r8':0,'r9':1,'r10':2,'r11':3,'r12':4,'r13':5,'r14':6,'r15':7}
|
|
12 regs32 = {'eax': 0, 'ecx':1, 'edx':2, 'ebx': 3, 'esp': 4, 'ebp': 5, 'esi':6, 'edi':7}
|
|
13 regs8 = {'al':0,'cl':1,'dl':2,'bl':3,'ah':4,'ch':5,'dh':6,'bh':7}
|
|
14
|
|
15 # Calculation of the rexb bit:
|
|
16 rexbit = {'rax': 0, 'rcx':0, 'rdx':0, 'rbx': 0, 'rsp': 0, 'rbp': 0, 'rsi':0, 'rdi':0,'r8':1,'r9':1,'r10':1,'r11':1,'r12':1,'r13':1,'r14':1,'r15':1}
|
|
17
|
|
18 # Helper functions:
|
|
19 def imm64(x):
|
|
20 """ represent 64 bits integer in little endian 8 bytes"""
|
|
21 if x < 0:
|
|
22 x = x + (1 << 64)
|
|
23 x = x & 0xFFFFFFFFFFFFFFFF
|
|
24 return [ (x >> (p*8)) & 0xFF for p in range(8) ]
|
|
25
|
|
26 def imm32(x):
|
|
27 """ represent 32 bits integer in little endian 4 bytes"""
|
|
28 if x < 0:
|
|
29 x = x + (1 << 32)
|
|
30 x = x & 0xFFFFFFFF
|
|
31 return [ (x >> (p*8)) & 0xFF for p in range(4) ]
|
|
32
|
|
33 def imm8(x):
|
|
34 if x < 0:
|
|
35 x = x + (1 << 8)
|
|
36 x = x & 0xFF
|
|
37 return [ x ]
|
|
38
|
|
39 def modrm(mod=0, rm=0, reg=0):
|
|
40 """ Construct the modrm byte from its components """
|
|
41 assert(mod <= 3)
|
|
42 assert(rm <= 7)
|
|
43 assert(reg <= 7)
|
|
44 return (mod << 6) | (reg << 3) | rm
|
|
45
|
|
46 def rex(w=0, r=0, x=0, b=0):
|
|
47 """ Create a REX prefix byte """
|
|
48 assert(w <= 1)
|
|
49 assert(r <= 1)
|
|
50 assert(x <= 1)
|
|
51 assert(b <= 1)
|
|
52 return 0x40 | (w<<3) | (r<<2) | (x<<1) | b
|
|
53
|
|
54 def sib(ss=0, index=0, base=0):
|
|
55 assert(ss <= 3)
|
|
56 assert(index <= 7)
|
|
57 assert(base <= 7)
|
|
58 return (ss << 6) | (index << 3) | base
|
|
59
|
|
60 tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4}
|
|
61
|
|
62 # Actual instructions:
|
|
63 def nearjump(distance, condition=None):
|
|
64 """ jmp imm32 """
|
|
65 lim = (1<<30)
|
|
66 if abs(distance) > lim:
|
|
67 Error('near jump cannot jump over more than {0} bytes'.format(lim))
|
|
68 if condition:
|
|
69 if distance < 0:
|
|
70 distance -= 6 # Skip own instruction
|
|
71 opcode = 0x80 | tttn[condition] # Jcc imm32
|
|
72 return [0x0F, opcode] + imm32(distance)
|
|
73 else:
|
|
74 if distance < 0:
|
|
75 distance -= 5 # Skip own instruction
|
|
76 return [ 0xE9 ] + imm32(distance)
|
|
77
|
|
78 def shortjump(distance, condition=None):
|
|
79 """ jmp imm8 """
|
|
80 lim = 118
|
|
81 if abs(distance) > lim:
|
|
82 Error('short jump cannot jump over more than {0} bytes'.format(lim))
|
|
83 if distance < 0:
|
|
84 distance -= 2 # Skip own instruction
|
|
85 if condition:
|
|
86 opcode = 0x70 | tttn[condition] # Jcc rel8
|
|
87 else:
|
|
88 opcode = 0xeb # jmp rel8
|
|
89 return [opcode] + imm8(distance)
|
|
90
|
|
91 # Helper that determines jump type:
|
|
92 def reljump(distance):
|
|
93 if abs(distance) < 110:
|
|
94 return shortjump(distance)
|
|
95 else:
|
|
96 return nearjump(distance)
|
|
97
|
|
98 def push(reg):
|
|
99 if reg in regs64:
|
|
100 if rexbit[reg] == 1:
|
|
101 return [0x41, 0x50 + regs64[reg]]
|
|
102 else:
|
|
103 return [0x50 + regs64[reg]]
|
|
104 else:
|
|
105 Error('push for {0} not implemented'.format(reg))
|
|
106
|
|
107 def pop(reg):
|
|
108 if reg in regs64:
|
|
109 if rexbit[reg] == 1:
|
|
110 rexprefix = rex(b=1)
|
|
111 opcode = 0x58 + regs64[reg]
|
|
112 return [rexprefix, opcode]
|
|
113 else:
|
|
114 opcode = 0x58 + regs64[reg]
|
|
115 return [ opcode ]
|
|
116 else:
|
|
117 Error('pop for {0} not implemented'.format(reg))
|
|
118
|
|
119 def INT(number):
|
|
120 opcode = 0xcd
|
|
121 return [opcode] + imm8(number)
|
|
122
|
|
123 def syscall():
|
|
124 return [0x0F, 0x05]
|
|
125
|
|
126 def call(distance):
|
|
127 if type(distance) is int:
|
|
128 return [0xe8]+imm32(distance)
|
|
129 elif type(distance) is str and distance in regs64:
|
|
130 reg = distance
|
|
131 opcode = 0xFF # 0xFF /2 == call r/m64
|
|
132 mod_rm = modrm(mod=3, reg=2, rm=regs64[reg])
|
|
133 if rexbit[reg] == 1:
|
|
134 rexprefix = rex(b=rexbit[reg])
|
|
135 return [rexprefix, opcode, mod_rm]
|
|
136 else:
|
|
137 return [opcode, mod_rm]
|
|
138 else:
|
|
139 Error('Cannot call to {0}'.format(distance))
|
|
140
|
|
141 def ret():
|
|
142 return [ 0xc3 ]
|
|
143
|
|
144 def increg64(reg):
|
|
145 assert(reg in regs64)
|
|
146 rexprefix = rex(w=1, b=rexbit[reg])
|
|
147 opcode = 0xff
|
|
148 mod_rm = modrm(mod=3, rm=regs64[reg])
|
|
149 return [rexprefix, opcode, mod_rm]
|
|
150
|
|
151 def prepost8(r8, rm8):
|
|
152 assert(r8 in regs8)
|
|
153 pre = []
|
|
154 if type(rm8) is list:
|
|
155 # TODO: merge mem access with prepost for 64 bits
|
|
156 if len(rm8) == 1:
|
|
157 base, = rm8
|
|
158 if type(base) is str and base in regs64:
|
|
159 assert(not base in ['rbp', 'rsp', 'r12', 'r13'])
|
|
160 mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8])
|
|
161 if rexbit[base] == 1:
|
|
162 pre.append(rex(b=1))
|
|
163 post = [mod_rm]
|
|
164 else:
|
|
165 Error('One arg of type {0} not implemented'.format(base))
|
|
166 elif len(rm8) == 2:
|
|
167 base, offset = rm8
|
|
168 assert(type(offset) is int)
|
|
169 assert(base in regs64)
|
|
170
|
|
171 if base == 'rsp' or base == 'r12':
|
|
172 Error('Cannot use rsp or r12 as base yet')
|
|
173 if rexbit[base] == 1:
|
|
174 pre.append( rex(b=1) )
|
|
175 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8])
|
|
176 post = [mod_rm] + imm8(offset)
|
|
177 else:
|
|
178 Error('not supporting prepost8 with list len {0}'.format(len(rm8)))
|
|
179 else:
|
|
180 Error('Not supporting move with reg8 {0}'.format(r8))
|
|
181 return pre, post
|
|
182
|
|
183 def prepost(r64, rm64):
|
|
184 assert(r64 in regs64)
|
|
185 if type(rm64) is list:
|
|
186 if len(rm64) == 3:
|
|
187 base, index, disp = rm64
|
|
188 assert(base in regs64)
|
|
189 assert(index in regs64)
|
|
190 assert(type(disp) is int)
|
|
191 # Assert that no special cases are used:
|
|
192 # TODO: swap base and index to avoid special cases
|
|
193 # TODO: exploit special cases and make better code
|
|
194 assert(index != 'rsp')
|
|
195
|
|
196 rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base])
|
|
197 # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8
|
|
198 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64])
|
|
199 si_b = sib(ss=0, index=regs64[index], base=regs64[base])
|
|
200 return [rexprefix], [mod_rm, si_b] + imm8(disp)
|
|
201 elif len(rm64) == 2:
|
|
202 base, offset = rm64
|
|
203 assert(type(offset) is int)
|
|
204 if base == 'RIP':
|
|
205 # RIP pointer relative addressing mode!
|
|
206 rexprefix = rex(w=1, r=rexbit[r64])
|
|
207 mod_rm = modrm(mod=0, rm=5, reg=regs64[r64])
|
|
208 return [rexprefix], [mod_rm] + imm32(offset)
|
|
209 else:
|
|
210 assert(base in regs64)
|
|
211
|
|
212 if base == 'rsp' or base == 'r12':
|
|
213 # extended function that uses SIB byte
|
|
214 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base])
|
|
215 # rm=4 indicates a SIB byte follows
|
|
216 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64])
|
|
217 # index=4 indicates that index is not used
|
|
218 si_b = sib(ss=0, index=4, base=regs64[base])
|
|
219 return [rexprefix], [mod_rm, si_b] + imm8(offset)
|
|
220 else:
|
|
221 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base])
|
|
222 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64])
|
|
223 return [rexprefix], [mod_rm] + imm8(offset)
|
|
224 elif len(rm64) == 1:
|
|
225 offset = rm64[0]
|
|
226 if type(offset) is int:
|
|
227 rexprefix = rex(w=1, r=rexbit[r64])
|
|
228 mod_rm = modrm(mod=0, rm=4,reg=regs64[r64])
|
|
229 si_b = sib(ss=0, index=4,base=5) # 0x25
|
|
230 return [rexprefix], [mod_rm, si_b] + imm32(offset)
|
|
231 else:
|
|
232 Error('Memory reference of type {0} not implemented'.format(offset))
|
|
233 else:
|
|
234 Error('Memory reference not implemented')
|
|
235 elif rm64 in regs64:
|
|
236 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64])
|
|
237 mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64])
|
|
238 return [rexprefix], [mod_rm]
|
|
239
|
|
240 def leareg64(rega, m):
|
|
241 opcode = 0x8d # lea r64, m
|
|
242 pre, post = prepost(rega, m)
|
|
243 return pre + [opcode] + post
|
|
244
|
|
245 def mov(rega, regb):
|
|
246 if type(regb) is int:
|
|
247 pre = [rex(w=1, b=rexbit[rega])]
|
|
248 opcode = 0xb8 + regs64[rega]
|
|
249 post = imm64(regb)
|
|
250 elif type(regb) is str:
|
|
251 if regb in regs64:
|
|
252 opcode = 0x89 # mov r/m64, r64
|
|
253 pre, post = prepost(regb, rega)
|
|
254 elif regb in regs8:
|
|
255 opcode = 0x88 # mov r/m8, r8
|
|
256 pre, post = prepost8(regb, rega)
|
|
257 else:
|
|
258 Error('Unknown register {0}'.format(regb))
|
|
259 elif type(rega) is str:
|
|
260 if rega in regs64:
|
|
261 opcode = 0x8b # mov r64, r/m64
|
|
262 pre, post = prepost(rega, regb)
|
|
263 else:
|
|
264 Error('Unknown register {0}'.format(rega))
|
|
265 else:
|
|
266 Error('Move of this kind {0}, {1} not implemented'.format(rega, regb))
|
|
267 return pre + [opcode] + post
|
|
268
|
|
269 def xorreg64(rega, regb):
|
|
270 rexprefix = rex(w=1, r=rexbit[regb], b=rexbit[rega])
|
|
271 opcode = 0x31 # XOR r/m64, r64
|
|
272 # Alternative is 0x33 XOR r64, r/m64
|
|
273 mod_rm = modrm(3, rm=regs64[rega], reg=regs64[regb])
|
|
274 return [rexprefix, opcode, mod_rm]
|
|
275
|
|
276 # integer arithmatic:
|
|
277 def addreg64(rega, regb):
|
|
278 if regb in regs64:
|
|
279 pre, post = prepost(regb, rega)
|
|
280 opcode = 0x01 # ADD r/m64, r64
|
|
281 return pre + [opcode] + post
|
|
282 elif type(regb) is int:
|
|
283 if regb < 100:
|
|
284 rexprefix = rex(w=1, b=rexbit[rega])
|
|
285 opcode = 0x83 # add r/m, imm8
|
|
286 mod_rm = modrm(3, rm=regs64[rega], reg=0)
|
|
287 return [rexprefix, opcode, mod_rm]+imm8(regb)
|
|
288 elif regb < (1<<31):
|
|
289 rexprefix = rex(w=1, b=rexbit[rega])
|
|
290 opcode = 0x81 # add r/m64, imm32
|
|
291 mod_rm = modrm(3, rm=regs64[rega], reg=0)
|
|
292 return [rexprefix, opcode, mod_rm]+imm32(regb)
|
|
293 else:
|
|
294 Error('Constant value too large!')
|
|
295 else:
|
|
296 Error('unknown second operand!'.format(regb))
|
|
297
|
|
298 def subreg64(rega, regb):
|
|
299 if regb in regs64:
|
|
300 pre, post = prepost(regb, rega)
|
|
301 opcode = 0x29 # SUB r/m64, r64
|
|
302 return pre + [opcode] + post
|
|
303 elif type(regb) is int:
|
|
304 if regb < 100:
|
|
305 rexprefix = rex(w=1, b=rexbit[rega])
|
|
306 opcode = 0x83 # sub r/m, imm8
|
|
307 mod_rm = modrm(3, rm=regs64[rega], reg=5)
|
|
308 return [rexprefix, opcode, mod_rm]+imm8(regb)
|
|
309 elif regb < (1<<31):
|
|
310 rexprefix = rex(w=1, b=rexbit[rega])
|
|
311 opcode = 0x81 # sub r/m64, imm32
|
|
312 mod_rm = modrm(3, rm=regs64[rega], reg=5)
|
|
313 return [rexprefix, opcode, mod_rm]+imm32(regb)
|
|
314 else:
|
|
315 Error('Constant value too large!')
|
|
316
|
|
317 else:
|
|
318 Error('unknown second operand!'.format(regb))
|
|
319
|
|
320 def idivreg64(reg):
|
|
321 rexprefix = rex(w=1, b=rexbit[reg])
|
|
322 opcode = 0xf7 # IDIV r/m64
|
|
323 mod_rm = modrm(3, rm=regs64[reg], reg=7)
|
|
324 return [rexprefix, opcode, mod_rm]
|
|
325
|
|
326 def imulreg64_rax(reg):
|
|
327 rexprefix = rex(w=1, b=rexbit[reg])
|
|
328 opcode = 0xf7 # IMUL r/m64
|
|
329 mod_rm = modrm(3, rm=regs64[reg], reg=5)
|
|
330 return [rexprefix, opcode, mod_rm]
|
|
331
|
|
332 def imulreg64(rega, regb):
|
|
333 pre, post = prepost(rega, regb)
|
|
334 opcode = 0x0f # IMUL r64, r/m64
|
|
335 opcode2 = 0xaf
|
|
336 return pre + [opcode, opcode2] + post
|
|
337
|
|
338 def cmpreg64(rega, regb):
|
|
339 if regb in regs64:
|
|
340 pre, post = prepost(regb, rega)
|
|
341 opcode = 0x39 # CMP r/m64, r64
|
|
342 return pre + [opcode] + post
|
|
343 elif type(regb) is int:
|
|
344 rexprefix = rex(w=1, b=rexbit[rega])
|
|
345 opcode = 0x83 # CMP r/m64, imm8
|
|
346 mod_rm = modrm(3, rm=regs64[rega], reg=7)
|
|
347 return [rexprefix, opcode, mod_rm] + imm8(regb)
|
|
348
|
|
349 else:
|
|
350 Error('not implemented cmp64')
|
|
351
|
|
352 # Mapping that maps string names to the right functions:
|
|
353 opcodes = {'mov':(mov,2), 'lea':(leareg64,2), 'int':(INT,1), 'syscall':(syscall,0)}
|
|
354
|