comparison ide/compiler/assembler.py @ 1:92df07bc2081

Initial import of compiler
author windel
date Sun, 18 Sep 2011 19:00:29 +0200
parents
children
comparison
equal deleted inserted replaced
0:1a4faf9ef1ea 1:92df07bc2081
1 """
2 Assembler code generation functions
3 """
4
5 from .errors import Error
6
7 modrm = {'rax': 0, 'rbx': 1}
8
9 # Table 3.1 of the intel manual:
10 # use REX.W on the table below:
11 regs64 = {'rax': 0,'rcx':1,'rdx':2,'rbx':3,'rsp':4,'rbp':5,'rsi':6,'rdi':7,'r8':0,'r9':1,'r10':2,'r11':3,'r12':4,'r13':5,'r14':6,'r15':7}
12 regs32 = {'eax': 0, 'ecx':1, 'edx':2, 'ebx': 3, 'esp': 4, 'ebp': 5, 'esi':6, 'edi':7}
13 regs8 = {'al':0,'cl':1,'dl':2,'bl':3,'ah':4,'ch':5,'dh':6,'bh':7}
14
15 # Calculation of the rexb bit:
16 rexbit = {'rax': 0, 'rcx':0, 'rdx':0, 'rbx': 0, 'rsp': 0, 'rbp': 0, 'rsi':0, 'rdi':0,'r8':1,'r9':1,'r10':1,'r11':1,'r12':1,'r13':1,'r14':1,'r15':1}
17
18 # Helper functions:
19 def imm64(x):
20 """ represent 64 bits integer in little endian 8 bytes"""
21 if x < 0:
22 x = x + (1 << 64)
23 x = x & 0xFFFFFFFFFFFFFFFF
24 return [ (x >> (p*8)) & 0xFF for p in range(8) ]
25
26 def imm32(x):
27 """ represent 32 bits integer in little endian 4 bytes"""
28 if x < 0:
29 x = x + (1 << 32)
30 x = x & 0xFFFFFFFF
31 return [ (x >> (p*8)) & 0xFF for p in range(4) ]
32
33 def imm8(x):
34 if x < 0:
35 x = x + (1 << 8)
36 x = x & 0xFF
37 return [ x ]
38
39 def modrm(mod=0, rm=0, reg=0):
40 """ Construct the modrm byte from its components """
41 assert(mod <= 3)
42 assert(rm <= 7)
43 assert(reg <= 7)
44 return (mod << 6) | (reg << 3) | rm
45
46 def rex(w=0, r=0, x=0, b=0):
47 """ Create a REX prefix byte """
48 assert(w <= 1)
49 assert(r <= 1)
50 assert(x <= 1)
51 assert(b <= 1)
52 return 0x40 | (w<<3) | (r<<2) | (x<<1) | b
53
54 def sib(ss=0, index=0, base=0):
55 assert(ss <= 3)
56 assert(index <= 7)
57 assert(base <= 7)
58 return (ss << 6) | (index << 3) | base
59
60 tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4}
61
62 # Actual instructions:
63 def nearjump(distance, condition=None):
64 """ jmp imm32 """
65 lim = (1<<30)
66 if abs(distance) > lim:
67 Error('near jump cannot jump over more than {0} bytes'.format(lim))
68 if condition:
69 if distance < 0:
70 distance -= 6 # Skip own instruction
71 opcode = 0x80 | tttn[condition] # Jcc imm32
72 return [0x0F, opcode] + imm32(distance)
73 else:
74 if distance < 0:
75 distance -= 5 # Skip own instruction
76 return [ 0xE9 ] + imm32(distance)
77
78 def shortjump(distance, condition=None):
79 """ jmp imm8 """
80 lim = 118
81 if abs(distance) > lim:
82 Error('short jump cannot jump over more than {0} bytes'.format(lim))
83 if distance < 0:
84 distance -= 2 # Skip own instruction
85 if condition:
86 opcode = 0x70 | tttn[condition] # Jcc rel8
87 else:
88 opcode = 0xeb # jmp rel8
89 return [opcode] + imm8(distance)
90
91 # Helper that determines jump type:
92 def reljump(distance):
93 if abs(distance) < 110:
94 return shortjump(distance)
95 else:
96 return nearjump(distance)
97
98 def push(reg):
99 if reg in regs64:
100 if rexbit[reg] == 1:
101 return [0x41, 0x50 + regs64[reg]]
102 else:
103 return [0x50 + regs64[reg]]
104 else:
105 Error('push for {0} not implemented'.format(reg))
106
107 def pop(reg):
108 if reg in regs64:
109 if rexbit[reg] == 1:
110 rexprefix = rex(b=1)
111 opcode = 0x58 + regs64[reg]
112 return [rexprefix, opcode]
113 else:
114 opcode = 0x58 + regs64[reg]
115 return [ opcode ]
116 else:
117 Error('pop for {0} not implemented'.format(reg))
118
119 def INT(number):
120 opcode = 0xcd
121 return [opcode] + imm8(number)
122
123 def syscall():
124 return [0x0F, 0x05]
125
126 def call(distance):
127 if type(distance) is int:
128 return [0xe8]+imm32(distance)
129 elif type(distance) is str and distance in regs64:
130 reg = distance
131 opcode = 0xFF # 0xFF /2 == call r/m64
132 mod_rm = modrm(mod=3, reg=2, rm=regs64[reg])
133 if rexbit[reg] == 1:
134 rexprefix = rex(b=rexbit[reg])
135 return [rexprefix, opcode, mod_rm]
136 else:
137 return [opcode, mod_rm]
138 else:
139 Error('Cannot call to {0}'.format(distance))
140
141 def ret():
142 return [ 0xc3 ]
143
144 def increg64(reg):
145 assert(reg in regs64)
146 rexprefix = rex(w=1, b=rexbit[reg])
147 opcode = 0xff
148 mod_rm = modrm(mod=3, rm=regs64[reg])
149 return [rexprefix, opcode, mod_rm]
150
151 def prepost8(r8, rm8):
152 assert(r8 in regs8)
153 pre = []
154 if type(rm8) is list:
155 # TODO: merge mem access with prepost for 64 bits
156 if len(rm8) == 1:
157 base, = rm8
158 if type(base) is str and base in regs64:
159 assert(not base in ['rbp', 'rsp', 'r12', 'r13'])
160 mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8])
161 if rexbit[base] == 1:
162 pre.append(rex(b=1))
163 post = [mod_rm]
164 else:
165 Error('One arg of type {0} not implemented'.format(base))
166 elif len(rm8) == 2:
167 base, offset = rm8
168 assert(type(offset) is int)
169 assert(base in regs64)
170
171 if base == 'rsp' or base == 'r12':
172 Error('Cannot use rsp or r12 as base yet')
173 if rexbit[base] == 1:
174 pre.append( rex(b=1) )
175 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8])
176 post = [mod_rm] + imm8(offset)
177 else:
178 Error('not supporting prepost8 with list len {0}'.format(len(rm8)))
179 else:
180 Error('Not supporting move with reg8 {0}'.format(r8))
181 return pre, post
182
183 def prepost(r64, rm64):
184 assert(r64 in regs64)
185 if type(rm64) is list:
186 if len(rm64) == 3:
187 base, index, disp = rm64
188 assert(base in regs64)
189 assert(index in regs64)
190 assert(type(disp) is int)
191 # Assert that no special cases are used:
192 # TODO: swap base and index to avoid special cases
193 # TODO: exploit special cases and make better code
194 assert(index != 'rsp')
195
196 rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base])
197 # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8
198 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64])
199 si_b = sib(ss=0, index=regs64[index], base=regs64[base])
200 return [rexprefix], [mod_rm, si_b] + imm8(disp)
201 elif len(rm64) == 2:
202 base, offset = rm64
203 assert(type(offset) is int)
204 if base == 'RIP':
205 # RIP pointer relative addressing mode!
206 rexprefix = rex(w=1, r=rexbit[r64])
207 mod_rm = modrm(mod=0, rm=5, reg=regs64[r64])
208 return [rexprefix], [mod_rm] + imm32(offset)
209 else:
210 assert(base in regs64)
211
212 if base == 'rsp' or base == 'r12':
213 # extended function that uses SIB byte
214 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base])
215 # rm=4 indicates a SIB byte follows
216 mod_rm = modrm(mod=1, rm=4, reg=regs64[r64])
217 # index=4 indicates that index is not used
218 si_b = sib(ss=0, index=4, base=regs64[base])
219 return [rexprefix], [mod_rm, si_b] + imm8(offset)
220 else:
221 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base])
222 mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64])
223 return [rexprefix], [mod_rm] + imm8(offset)
224 elif len(rm64) == 1:
225 offset = rm64[0]
226 if type(offset) is int:
227 rexprefix = rex(w=1, r=rexbit[r64])
228 mod_rm = modrm(mod=0, rm=4,reg=regs64[r64])
229 si_b = sib(ss=0, index=4,base=5) # 0x25
230 return [rexprefix], [mod_rm, si_b] + imm32(offset)
231 else:
232 Error('Memory reference of type {0} not implemented'.format(offset))
233 else:
234 Error('Memory reference not implemented')
235 elif rm64 in regs64:
236 rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64])
237 mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64])
238 return [rexprefix], [mod_rm]
239
240 def leareg64(rega, m):
241 opcode = 0x8d # lea r64, m
242 pre, post = prepost(rega, m)
243 return pre + [opcode] + post
244
245 def mov(rega, regb):
246 if type(regb) is int:
247 pre = [rex(w=1, b=rexbit[rega])]
248 opcode = 0xb8 + regs64[rega]
249 post = imm64(regb)
250 elif type(regb) is str:
251 if regb in regs64:
252 opcode = 0x89 # mov r/m64, r64
253 pre, post = prepost(regb, rega)
254 elif regb in regs8:
255 opcode = 0x88 # mov r/m8, r8
256 pre, post = prepost8(regb, rega)
257 else:
258 Error('Unknown register {0}'.format(regb))
259 elif type(rega) is str:
260 if rega in regs64:
261 opcode = 0x8b # mov r64, r/m64
262 pre, post = prepost(rega, regb)
263 else:
264 Error('Unknown register {0}'.format(rega))
265 else:
266 Error('Move of this kind {0}, {1} not implemented'.format(rega, regb))
267 return pre + [opcode] + post
268
269 def xorreg64(rega, regb):
270 rexprefix = rex(w=1, r=rexbit[regb], b=rexbit[rega])
271 opcode = 0x31 # XOR r/m64, r64
272 # Alternative is 0x33 XOR r64, r/m64
273 mod_rm = modrm(3, rm=regs64[rega], reg=regs64[regb])
274 return [rexprefix, opcode, mod_rm]
275
276 # integer arithmatic:
277 def addreg64(rega, regb):
278 if regb in regs64:
279 pre, post = prepost(regb, rega)
280 opcode = 0x01 # ADD r/m64, r64
281 return pre + [opcode] + post
282 elif type(regb) is int:
283 if regb < 100:
284 rexprefix = rex(w=1, b=rexbit[rega])
285 opcode = 0x83 # add r/m, imm8
286 mod_rm = modrm(3, rm=regs64[rega], reg=0)
287 return [rexprefix, opcode, mod_rm]+imm8(regb)
288 elif regb < (1<<31):
289 rexprefix = rex(w=1, b=rexbit[rega])
290 opcode = 0x81 # add r/m64, imm32
291 mod_rm = modrm(3, rm=regs64[rega], reg=0)
292 return [rexprefix, opcode, mod_rm]+imm32(regb)
293 else:
294 Error('Constant value too large!')
295 else:
296 Error('unknown second operand!'.format(regb))
297
298 def subreg64(rega, regb):
299 if regb in regs64:
300 pre, post = prepost(regb, rega)
301 opcode = 0x29 # SUB r/m64, r64
302 return pre + [opcode] + post
303 elif type(regb) is int:
304 if regb < 100:
305 rexprefix = rex(w=1, b=rexbit[rega])
306 opcode = 0x83 # sub r/m, imm8
307 mod_rm = modrm(3, rm=regs64[rega], reg=5)
308 return [rexprefix, opcode, mod_rm]+imm8(regb)
309 elif regb < (1<<31):
310 rexprefix = rex(w=1, b=rexbit[rega])
311 opcode = 0x81 # sub r/m64, imm32
312 mod_rm = modrm(3, rm=regs64[rega], reg=5)
313 return [rexprefix, opcode, mod_rm]+imm32(regb)
314 else:
315 Error('Constant value too large!')
316
317 else:
318 Error('unknown second operand!'.format(regb))
319
320 def idivreg64(reg):
321 rexprefix = rex(w=1, b=rexbit[reg])
322 opcode = 0xf7 # IDIV r/m64
323 mod_rm = modrm(3, rm=regs64[reg], reg=7)
324 return [rexprefix, opcode, mod_rm]
325
326 def imulreg64_rax(reg):
327 rexprefix = rex(w=1, b=rexbit[reg])
328 opcode = 0xf7 # IMUL r/m64
329 mod_rm = modrm(3, rm=regs64[reg], reg=5)
330 return [rexprefix, opcode, mod_rm]
331
332 def imulreg64(rega, regb):
333 pre, post = prepost(rega, regb)
334 opcode = 0x0f # IMUL r64, r/m64
335 opcode2 = 0xaf
336 return pre + [opcode, opcode2] + post
337
338 def cmpreg64(rega, regb):
339 if regb in regs64:
340 pre, post = prepost(regb, rega)
341 opcode = 0x39 # CMP r/m64, r64
342 return pre + [opcode] + post
343 elif type(regb) is int:
344 rexprefix = rex(w=1, b=rexbit[rega])
345 opcode = 0x83 # CMP r/m64, imm8
346 mod_rm = modrm(3, rm=regs64[rega], reg=7)
347 return [rexprefix, opcode, mod_rm] + imm8(regb)
348
349 else:
350 Error('not implemented cmp64')
351
352 # Mapping that maps string names to the right functions:
353 opcodes = {'mov':(mov,2), 'lea':(leareg64,2), 'int':(INT,1), 'syscall':(syscall,0)}
354