Mercurial > lcfOS
diff python/ppci/target/x86/instructions.py @ 398:c0d9837acde8
x86 target refactor
author | Windel Bouwman |
---|---|
date | Thu, 29 May 2014 12:13:37 +0200 |
parents | python/ppci/target/x86/x86_2.py@86b02c98a717 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/ppci/target/x86/instructions.py Thu May 29 12:13:37 2014 +0200 @@ -0,0 +1,426 @@ +""" + X86 target descriptions and encodings. +""" + +from ..basetarget import Register, Instruction +from .registers import regs64, X86Register + +from ..token import Token, u32, u8, bit_range + + +modrm = {'rax': 0, 'rbx': 1} + +# Table 3.1 of the intel manual: +# use REX.W on the table below: + + +# Helper functions: +def imm64(x): + """ represent 64 bits integer in little endian 8 bytes""" + if x < 0: + x = x + (1 << 64) + x = x & 0xFFFFFFFFFFFFFFFF + return [ (x >> (p*8)) & 0xFF for p in range(8) ] + +def imm32(x): + """ represent 32 bits integer in little endian 4 bytes""" + if x < 0: + x = x + (1 << 32) + x = x & 0xFFFFFFFF + return [ (x >> (p*8)) & 0xFF for p in range(4) ] + + +def imm8(x): + if x < 0: + x = x + (1 << 8) + x = x & 0xFF + return [ x ] + + +class ModRmToken(Token): + """ Construct the modrm byte from its components """ + def __init__(self, mod=0, rm=0, reg=0): + super().__init__(8) + assert(mod <= 3) + assert(rm <= 7) + assert(reg <= 7) + self.mod = mod + self.rm = rm + self.reg = reg + + mod = bit_range(6, 8) + rm = bit_range(0, 3) + reg = bit_range(3, 6) + + def encode(self): + return u8(self.bit_value) + + +class RexToken(Token): + """ Create a REX prefix byte """ + def __init__(self, w=0, r=0, x=0, b=0): + super().__init__(8) + assert(w <= 1) + assert(r <= 1) + assert(x <= 1) + assert(b <= 1) + self.w = w + self.r = r + self.x = x + self.b = b + self.set_bit(6, 1) + + w = bit_range(3, 4) + r = bit_range(2, 3) + x = bit_range(1, 2) + b = bit_range(0, 1) + + def encode(self): + return u8(self.bit_value) + + +def sib(ss=0, index=0, base=0): + assert(ss <= 3) + assert(index <= 7) + assert(base <= 7) + return (ss << 6) | (index << 3) | base + +tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4} + +# Actual instructions: +def nearjump(distance, condition=None): + """ jmp imm32 """ + lim = (1<<30) + if abs(distance) > lim: + Error('near jump cannot jump over more than {0} bytes'.format(lim)) + if condition: + if distance < 0: + distance -= 6 # Skip own instruction + opcode = 0x80 | tttn[condition] # Jcc imm32 + return [0x0F, opcode] + imm32(distance) + else: + if distance < 0: + distance -= 5 # Skip own instruction + return [ 0xE9 ] + imm32(distance) + +def shortjump(distance, condition=None): + """ jmp imm8 """ + lim = 118 + if abs(distance) > lim: + Error('short jump cannot jump over more than {0} bytes'.format(lim)) + if distance < 0: + distance -= 2 # Skip own instruction + if condition: + opcode = 0x70 | tttn[condition] # Jcc rel8 + else: + opcode = 0xeb # jmp rel8 + return [opcode] + imm8(distance) + +# Helper that determines jump type: +def reljump(distance): + if abs(distance) < 110: + return shortjump(distance) + else: + return nearjump(distance) + + +class Push(Instruction): + def __init__(self, reg): + assert(reg in regs64), str(reg) + self.reg = reg + + def encode(self): + code = [] + if self.reg.rexbit == 1: + code.append(0x41) + code.append(0x50 + self.reg.regbits) + return bytes(code) + + +class Pop(Instruction): + def __init__(self, reg): + assert(reg in regs64), str(reg) + self.reg = reg + + def encode(self): + code = [] + if self.reg.rexbit == 1: + code.append(0x41) + code.append(0x58 + self.reg.regbits) + return bytes(code) + + +def pop(reg): + if reg in regs64: + if rexbit[reg] == 1: + rexprefix = rex(b=1) + opcode = 0x58 + regs64[reg] + return [rexprefix, opcode] + else: + opcode = 0x58 + regs64[reg] + return [ opcode ] + else: + Error('pop for {0} not implemented'.format(reg)) + +def INT(number): + opcode = 0xcd + return [opcode] + imm8(number) + +def syscall(): + return [0x0F, 0x05] + +def call(distance): + if type(distance) is int: + return [0xe8]+imm32(distance) + elif type(distance) is str and distance in regs64: + reg = distance + opcode = 0xFF # 0xFF /2 == call r/m64 + mod_rm = modrm(mod=3, reg=2, rm=regs64[reg]) + if rexbit[reg] == 1: + rexprefix = rex(b=rexbit[reg]) + return [rexprefix, opcode, mod_rm] + else: + return [opcode, mod_rm] + else: + Error('Cannot call to {0}'.format(distance)) + + +class Ret(Instruction): + def __init__(self): + pass + + def encode(self): + return [ 0xc3 ] + + +class Inc(Instruction): + def __init__(self, reg): + assert(reg in regs64), str(reg) + self.rex = RexToken(w=1, b=reg.rexbit) + self.opcode = 0xff + self.mod_rm = ModRmToken(mod=3, rm=reg.regbits) + + def encode(self): + code = bytes([self.opcode]) + return self.rex.encode() + code + self.mod_rm.encode() + + +def prepost8(r8, rm8): + assert(r8 in regs8) + pre = [] + if type(rm8) is list: + # TODO: merge mem access with prepost for 64 bits + if len(rm8) == 1: + base, = rm8 + if type(base) is str and base in regs64: + assert(not base in ['rbp', 'rsp', 'r12', 'r13']) + mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8]) + if rexbit[base] == 1: + pre.append(rex(b=1)) + post = [mod_rm] + else: + Error('One arg of type {0} not implemented'.format(base)) + elif len(rm8) == 2: + base, offset = rm8 + assert(type(offset) is int) + assert(base in regs64) + + if base == 'rsp' or base == 'r12': + Error('Cannot use rsp or r12 as base yet') + if rexbit[base] == 1: + pre.append( rex(b=1) ) + mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8]) + post = [mod_rm] + imm8(offset) + else: + Error('not supporting prepost8 with list len {0}'.format(len(rm8))) + else: + Error('Not supporting move with reg8 {0}'.format(r8)) + return pre, post + +def prepost(r64, rm64): + assert(r64 in regs64) + if type(rm64) is list: + if len(rm64) == 3: + base, index, disp = rm64 + assert(base in regs64) + assert(index in regs64) + assert(type(disp) is int) + # Assert that no special cases are used: + # TODO: swap base and index to avoid special cases + # TODO: exploit special cases and make better code + assert(index != 'rsp') + + rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base]) + # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8 + mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) + si_b = sib(ss=0, index=regs64[index], base=regs64[base]) + return [rexprefix], [mod_rm, si_b] + imm8(disp) + elif len(rm64) == 2: + base, offset = rm64 + assert(type(offset) is int) + if base == 'RIP': + # RIP pointer relative addressing mode! + rexprefix = rex(w=1, r=rexbit[r64]) + mod_rm = modrm(mod=0, rm=5, reg=regs64[r64]) + return [rexprefix], [mod_rm] + imm32(offset) + else: + assert(base in regs64) + + if base == 'rsp' or base == 'r12': + # extended function that uses SIB byte + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) + # rm=4 indicates a SIB byte follows + mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) + # index=4 indicates that index is not used + si_b = sib(ss=0, index=4, base=regs64[base]) + return [rexprefix], [mod_rm, si_b] + imm8(offset) + else: + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) + mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64]) + return [rexprefix], [mod_rm] + imm8(offset) + elif len(rm64) == 1: + offset = rm64[0] + if type(offset) is int: + rexprefix = rex(w=1, r=rexbit[r64]) + mod_rm = modrm(mod=0, rm=4,reg=regs64[r64]) + si_b = sib(ss=0, index=4,base=5) # 0x25 + return [rexprefix], [mod_rm, si_b] + imm32(offset) + else: + Error('Memory reference of type {0} not implemented'.format(offset)) + else: + Error('Memory reference not implemented') + elif rm64 in regs64: + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64]) + mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64]) + return [rexprefix], [mod_rm] + + +def leareg64(rega, m): + opcode = 0x8d # lea r64, m + pre, post = prepost(rega, m) + return pre + [opcode] + post + + +class Mov1(Instruction): + """ Mov r64 to r64 """ + def __init__(self, dst, src): + assert src in regs64, str(src) + assert dst in regs64, str(dst) + self.rex = RexToken(w=1, r=dst.rexbit, b=src.rexbit) + self.mod_rm = ModRmToken(mod=3, rm=dst.regbits, reg=src.regbits) + + def encode(self): + opcode = 0x89 # mov r/m64, r64 + code = bytes([opcode]) + return self.rex.encode() + code + self.mod_rm.encode() + + +def Mov(dst, src): + if type(src) is int: + pre = [rex(w=1, b=rexbit[rega])] + opcode = 0xb8 + regs64[rega] + post = imm64(regb) + elif type(src) is X86Register: + return Mov1(dst, src) + elif type(src) is str: + if rega in regs64: + opcode = 0x8b # mov r64, r/m64 + pre, post = prepost(rega, regb) + else: + raise Exception('Unknown register {0}'.format(rega)) + else: + raise Exception('Move of this kind {0}, {1} not implemented'.format(rega, regb)) + return pre + [opcode] + post + + +def Xor(rega, regb): + return Xor1(rega, regb) + + +class Xor1(Instruction): + def __init__(self, a, b): + self.rex = RexToken(w=1, r=b.rexbit, b=a.rexbit) + self.mod_rm = ModRmToken(mod=3, rm=a.regbits, reg=b.regbits) + + def encode(self): + opcode = 0x31 # XOR r/m64, r64 + # Alternative is 0x33 XOR r64, r/m64 + code = bytes([opcode]) + return self.rex.encode() + code + self.mod_rm.encode() + + +# integer arithmatic: +def addreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x01 # ADD r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + if regb < 100: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # add r/m, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=0) + return [rexprefix, opcode, mod_rm]+imm8(regb) + elif regb < (1<<31): + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x81 # add r/m64, imm32 + mod_rm = modrm(3, rm=regs64[rega], reg=0) + return [rexprefix, opcode, mod_rm]+imm32(regb) + else: + Error('Constant value too large!') + else: + Error('unknown second operand!'.format(regb)) + +def subreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x29 # SUB r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + if regb < 100: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # sub r/m, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=5) + return [rexprefix, opcode, mod_rm]+imm8(regb) + elif regb < (1<<31): + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x81 # sub r/m64, imm32 + mod_rm = modrm(3, rm=regs64[rega], reg=5) + return [rexprefix, opcode, mod_rm]+imm32(regb) + else: + Error('Constant value too large!') + + else: + Error('unknown second operand!'.format(regb)) + +def idivreg64(reg): + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xf7 # IDIV r/m64 + mod_rm = modrm(3, rm=regs64[reg], reg=7) + return [rexprefix, opcode, mod_rm] + +def imulreg64_rax(reg): + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xf7 # IMUL r/m64 + mod_rm = modrm(3, rm=regs64[reg], reg=5) + return [rexprefix, opcode, mod_rm] + +def imulreg64(rega, regb): + pre, post = prepost(rega, regb) + opcode = 0x0f # IMUL r64, r/m64 + opcode2 = 0xaf + return pre + [opcode, opcode2] + post + + +def cmpreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x39 # CMP r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # CMP r/m64, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=7) + return [rexprefix, opcode, mod_rm] + imm8(regb) + else: + Error('not implemented cmp64')