Mercurial > lcfOS
comparison python/x86_2.py @ 287:1c7c1e619be8
File movage
author | Windel Bouwman |
---|---|
date | Thu, 21 Nov 2013 11:57:27 +0100 |
parents | python/old/assembler.py@91af0e40f868 |
children |
comparison
equal
deleted
inserted
replaced
286:d9df72971cbf | 287:1c7c1e619be8 |
---|---|
1 """ | |
2 X86 target descriptions and encodings. | |
3 | |
4 """ | |
5 | |
6 from target import Register, Instruction, Target, Imm8, Label, Imm3, LabelRef | |
7 | |
8 | |
9 modrm = {'rax': 0, 'rbx': 1} | |
10 | |
11 # Table 3.1 of the intel manual: | |
12 # use REX.W on the table below: | |
13 regs64 = {'rax': 0,'rcx':1,'rdx':2,'rbx':3,'rsp':4,'rbp':5,'rsi':6,'rdi':7,'r8':0,'r9':1,'r10':2,'r11':3,'r12':4,'r13':5,'r14':6,'r15':7} | |
14 regs32 = {'eax': 0, 'ecx':1, 'edx':2, 'ebx': 3, 'esp': 4, 'ebp': 5, 'esi':6, 'edi':7} | |
15 regs8 = {'al':0,'cl':1,'dl':2,'bl':3,'ah':4,'ch':5,'dh':6,'bh':7} | |
16 | |
17 # Calculation of the rexb bit: | |
18 rexbit = {'rax': 0, 'rcx':0, 'rdx':0, 'rbx': 0, 'rsp': 0, 'rbp': 0, 'rsi':0, 'rdi':0,'r8':1,'r9':1,'r10':1,'r11':1,'r12':1,'r13':1,'r14':1,'r15':1} | |
19 | |
20 # Helper functions: | |
21 def imm64(x): | |
22 """ represent 64 bits integer in little endian 8 bytes""" | |
23 if x < 0: | |
24 x = x + (1 << 64) | |
25 x = x & 0xFFFFFFFFFFFFFFFF | |
26 return [ (x >> (p*8)) & 0xFF for p in range(8) ] | |
27 | |
28 def imm32(x): | |
29 """ represent 32 bits integer in little endian 4 bytes""" | |
30 if x < 0: | |
31 x = x + (1 << 32) | |
32 x = x & 0xFFFFFFFF | |
33 return [ (x >> (p*8)) & 0xFF for p in range(4) ] | |
34 | |
35 def imm8(x): | |
36 if x < 0: | |
37 x = x + (1 << 8) | |
38 x = x & 0xFF | |
39 return [ x ] | |
40 | |
41 def modrm(mod=0, rm=0, reg=0): | |
42 """ Construct the modrm byte from its components """ | |
43 assert(mod <= 3) | |
44 assert(rm <= 7) | |
45 assert(reg <= 7) | |
46 return (mod << 6) | (reg << 3) | rm | |
47 | |
48 def rex(w=0, r=0, x=0, b=0): | |
49 """ Create a REX prefix byte """ | |
50 assert(w <= 1) | |
51 assert(r <= 1) | |
52 assert(x <= 1) | |
53 assert(b <= 1) | |
54 return 0x40 | (w<<3) | (r<<2) | (x<<1) | b | |
55 | |
56 def sib(ss=0, index=0, base=0): | |
57 assert(ss <= 3) | |
58 assert(index <= 7) | |
59 assert(base <= 7) | |
60 return (ss << 6) | (index << 3) | base | |
61 | |
62 tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4} | |
63 | |
64 # Actual instructions: | |
65 def nearjump(distance, condition=None): | |
66 """ jmp imm32 """ | |
67 lim = (1<<30) | |
68 if abs(distance) > lim: | |
69 Error('near jump cannot jump over more than {0} bytes'.format(lim)) | |
70 if condition: | |
71 if distance < 0: | |
72 distance -= 6 # Skip own instruction | |
73 opcode = 0x80 | tttn[condition] # Jcc imm32 | |
74 return [0x0F, opcode] + imm32(distance) | |
75 else: | |
76 if distance < 0: | |
77 distance -= 5 # Skip own instruction | |
78 return [ 0xE9 ] + imm32(distance) | |
79 | |
80 def shortjump(distance, condition=None): | |
81 """ jmp imm8 """ | |
82 lim = 118 | |
83 if abs(distance) > lim: | |
84 Error('short jump cannot jump over more than {0} bytes'.format(lim)) | |
85 if distance < 0: | |
86 distance -= 2 # Skip own instruction | |
87 if condition: | |
88 opcode = 0x70 | tttn[condition] # Jcc rel8 | |
89 else: | |
90 opcode = 0xeb # jmp rel8 | |
91 return [opcode] + imm8(distance) | |
92 | |
93 # Helper that determines jump type: | |
94 def reljump(distance): | |
95 if abs(distance) < 110: | |
96 return shortjump(distance) | |
97 else: | |
98 return nearjump(distance) | |
99 | |
100 def push(reg): | |
101 if reg in regs64: | |
102 if rexbit[reg] == 1: | |
103 return [0x41, 0x50 + regs64[reg]] | |
104 else: | |
105 return [0x50 + regs64[reg]] | |
106 else: | |
107 Error('push for {0} not implemented'.format(reg)) | |
108 | |
109 def pop(reg): | |
110 if reg in regs64: | |
111 if rexbit[reg] == 1: | |
112 rexprefix = rex(b=1) | |
113 opcode = 0x58 + regs64[reg] | |
114 return [rexprefix, opcode] | |
115 else: | |
116 opcode = 0x58 + regs64[reg] | |
117 return [ opcode ] | |
118 else: | |
119 Error('pop for {0} not implemented'.format(reg)) | |
120 | |
121 def INT(number): | |
122 opcode = 0xcd | |
123 return [opcode] + imm8(number) | |
124 | |
125 def syscall(): | |
126 return [0x0F, 0x05] | |
127 | |
128 def call(distance): | |
129 if type(distance) is int: | |
130 return [0xe8]+imm32(distance) | |
131 elif type(distance) is str and distance in regs64: | |
132 reg = distance | |
133 opcode = 0xFF # 0xFF /2 == call r/m64 | |
134 mod_rm = modrm(mod=3, reg=2, rm=regs64[reg]) | |
135 if rexbit[reg] == 1: | |
136 rexprefix = rex(b=rexbit[reg]) | |
137 return [rexprefix, opcode, mod_rm] | |
138 else: | |
139 return [opcode, mod_rm] | |
140 else: | |
141 Error('Cannot call to {0}'.format(distance)) | |
142 | |
143 def ret(): | |
144 return [ 0xc3 ] | |
145 | |
146 def increg64(reg): | |
147 assert(reg in regs64) | |
148 rexprefix = rex(w=1, b=rexbit[reg]) | |
149 opcode = 0xff | |
150 mod_rm = modrm(mod=3, rm=regs64[reg]) | |
151 return [rexprefix, opcode, mod_rm] | |
152 | |
153 def prepost8(r8, rm8): | |
154 assert(r8 in regs8) | |
155 pre = [] | |
156 if type(rm8) is list: | |
157 # TODO: merge mem access with prepost for 64 bits | |
158 if len(rm8) == 1: | |
159 base, = rm8 | |
160 if type(base) is str and base in regs64: | |
161 assert(not base in ['rbp', 'rsp', 'r12', 'r13']) | |
162 mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8]) | |
163 if rexbit[base] == 1: | |
164 pre.append(rex(b=1)) | |
165 post = [mod_rm] | |
166 else: | |
167 Error('One arg of type {0} not implemented'.format(base)) | |
168 elif len(rm8) == 2: | |
169 base, offset = rm8 | |
170 assert(type(offset) is int) | |
171 assert(base in regs64) | |
172 | |
173 if base == 'rsp' or base == 'r12': | |
174 Error('Cannot use rsp or r12 as base yet') | |
175 if rexbit[base] == 1: | |
176 pre.append( rex(b=1) ) | |
177 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8]) | |
178 post = [mod_rm] + imm8(offset) | |
179 else: | |
180 Error('not supporting prepost8 with list len {0}'.format(len(rm8))) | |
181 else: | |
182 Error('Not supporting move with reg8 {0}'.format(r8)) | |
183 return pre, post | |
184 | |
185 def prepost(r64, rm64): | |
186 assert(r64 in regs64) | |
187 if type(rm64) is list: | |
188 if len(rm64) == 3: | |
189 base, index, disp = rm64 | |
190 assert(base in regs64) | |
191 assert(index in regs64) | |
192 assert(type(disp) is int) | |
193 # Assert that no special cases are used: | |
194 # TODO: swap base and index to avoid special cases | |
195 # TODO: exploit special cases and make better code | |
196 assert(index != 'rsp') | |
197 | |
198 rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base]) | |
199 # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8 | |
200 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) | |
201 si_b = sib(ss=0, index=regs64[index], base=regs64[base]) | |
202 return [rexprefix], [mod_rm, si_b] + imm8(disp) | |
203 elif len(rm64) == 2: | |
204 base, offset = rm64 | |
205 assert(type(offset) is int) | |
206 if base == 'RIP': | |
207 # RIP pointer relative addressing mode! | |
208 rexprefix = rex(w=1, r=rexbit[r64]) | |
209 mod_rm = modrm(mod=0, rm=5, reg=regs64[r64]) | |
210 return [rexprefix], [mod_rm] + imm32(offset) | |
211 else: | |
212 assert(base in regs64) | |
213 | |
214 if base == 'rsp' or base == 'r12': | |
215 # extended function that uses SIB byte | |
216 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) | |
217 # rm=4 indicates a SIB byte follows | |
218 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) | |
219 # index=4 indicates that index is not used | |
220 si_b = sib(ss=0, index=4, base=regs64[base]) | |
221 return [rexprefix], [mod_rm, si_b] + imm8(offset) | |
222 else: | |
223 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) | |
224 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64]) | |
225 return [rexprefix], [mod_rm] + imm8(offset) | |
226 elif len(rm64) == 1: | |
227 offset = rm64[0] | |
228 if type(offset) is int: | |
229 rexprefix = rex(w=1, r=rexbit[r64]) | |
230 mod_rm = modrm(mod=0, rm=4,reg=regs64[r64]) | |
231 si_b = sib(ss=0, index=4,base=5) # 0x25 | |
232 return [rexprefix], [mod_rm, si_b] + imm32(offset) | |
233 else: | |
234 Error('Memory reference of type {0} not implemented'.format(offset)) | |
235 else: | |
236 Error('Memory reference not implemented') | |
237 elif rm64 in regs64: | |
238 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64]) | |
239 mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64]) | |
240 return [rexprefix], [mod_rm] | |
241 | |
242 def leareg64(rega, m): | |
243 opcode = 0x8d # lea r64, m | |
244 pre, post = prepost(rega, m) | |
245 return pre + [opcode] + post | |
246 | |
247 def mov(rega, regb): | |
248 if type(regb) is int: | |
249 pre = [rex(w=1, b=rexbit[rega])] | |
250 opcode = 0xb8 + regs64[rega] | |
251 post = imm64(regb) | |
252 elif type(regb) is str: | |
253 if regb in regs64: | |
254 opcode = 0x89 # mov r/m64, r64 | |
255 pre, post = prepost(regb, rega) | |
256 elif regb in regs8: | |
257 opcode = 0x88 # mov r/m8, r8 | |
258 pre, post = prepost8(regb, rega) | |
259 else: | |
260 Error('Unknown register {0}'.format(regb)) | |
261 elif type(rega) is str: | |
262 if rega in regs64: | |
263 opcode = 0x8b # mov r64, r/m64 | |
264 pre, post = prepost(rega, regb) | |
265 else: | |
266 Error('Unknown register {0}'.format(rega)) | |
267 else: | |
268 Error('Move of this kind {0}, {1} not implemented'.format(rega, regb)) | |
269 return pre + [opcode] + post | |
270 | |
271 def xorreg64(rega, regb): | |
272 rexprefix = rex(w=1, r=rexbit[regb], b=rexbit[rega]) | |
273 opcode = 0x31 # XOR r/m64, r64 | |
274 # Alternative is 0x33 XOR r64, r/m64 | |
275 mod_rm = modrm(3, rm=regs64[rega], reg=regs64[regb]) | |
276 return [rexprefix, opcode, mod_rm] | |
277 | |
278 # integer arithmatic: | |
279 def addreg64(rega, regb): | |
280 if regb in regs64: | |
281 pre, post = prepost(regb, rega) | |
282 opcode = 0x01 # ADD r/m64, r64 | |
283 return pre + [opcode] + post | |
284 elif type(regb) is int: | |
285 if regb < 100: | |
286 rexprefix = rex(w=1, b=rexbit[rega]) | |
287 opcode = 0x83 # add r/m, imm8 | |
288 mod_rm = modrm(3, rm=regs64[rega], reg=0) | |
289 return [rexprefix, opcode, mod_rm]+imm8(regb) | |
290 elif regb < (1<<31): | |
291 rexprefix = rex(w=1, b=rexbit[rega]) | |
292 opcode = 0x81 # add r/m64, imm32 | |
293 mod_rm = modrm(3, rm=regs64[rega], reg=0) | |
294 return [rexprefix, opcode, mod_rm]+imm32(regb) | |
295 else: | |
296 Error('Constant value too large!') | |
297 else: | |
298 Error('unknown second operand!'.format(regb)) | |
299 | |
300 def subreg64(rega, regb): | |
301 if regb in regs64: | |
302 pre, post = prepost(regb, rega) | |
303 opcode = 0x29 # SUB r/m64, r64 | |
304 return pre + [opcode] + post | |
305 elif type(regb) is int: | |
306 if regb < 100: | |
307 rexprefix = rex(w=1, b=rexbit[rega]) | |
308 opcode = 0x83 # sub r/m, imm8 | |
309 mod_rm = modrm(3, rm=regs64[rega], reg=5) | |
310 return [rexprefix, opcode, mod_rm]+imm8(regb) | |
311 elif regb < (1<<31): | |
312 rexprefix = rex(w=1, b=rexbit[rega]) | |
313 opcode = 0x81 # sub r/m64, imm32 | |
314 mod_rm = modrm(3, rm=regs64[rega], reg=5) | |
315 return [rexprefix, opcode, mod_rm]+imm32(regb) | |
316 else: | |
317 Error('Constant value too large!') | |
318 | |
319 else: | |
320 Error('unknown second operand!'.format(regb)) | |
321 | |
322 def idivreg64(reg): | |
323 rexprefix = rex(w=1, b=rexbit[reg]) | |
324 opcode = 0xf7 # IDIV r/m64 | |
325 mod_rm = modrm(3, rm=regs64[reg], reg=7) | |
326 return [rexprefix, opcode, mod_rm] | |
327 | |
328 def imulreg64_rax(reg): | |
329 rexprefix = rex(w=1, b=rexbit[reg]) | |
330 opcode = 0xf7 # IMUL r/m64 | |
331 mod_rm = modrm(3, rm=regs64[reg], reg=5) | |
332 return [rexprefix, opcode, mod_rm] | |
333 | |
334 def imulreg64(rega, regb): | |
335 pre, post = prepost(rega, regb) | |
336 opcode = 0x0f # IMUL r64, r/m64 | |
337 opcode2 = 0xaf | |
338 return pre + [opcode, opcode2] + post | |
339 | |
340 def cmpreg64(rega, regb): | |
341 if regb in regs64: | |
342 pre, post = prepost(regb, rega) | |
343 opcode = 0x39 # CMP r/m64, r64 | |
344 return pre + [opcode] + post | |
345 elif type(regb) is int: | |
346 rexprefix = rex(w=1, b=rexbit[rega]) | |
347 opcode = 0x83 # CMP r/m64, imm8 | |
348 mod_rm = modrm(3, rm=regs64[rega], reg=7) | |
349 return [rexprefix, opcode, mod_rm] + imm8(regb) | |
350 | |
351 else: | |
352 Error('not implemented cmp64') | |
353 | |
354 # Mapping that maps string names to the right functions: | |
355 opcodes = {'mov':(mov,2), 'lea':(leareg64,2), 'int':(INT,1), 'syscall':(syscall,0)} | |
356 |