Mercurial > paraspace
view paraspace/dalvik_opcodes.py @ 103:8a53e6f7f517
Provide convinience API to convert str block into opcode vectors
author | Thinker K.F. Li <thinker@codemud.net> |
---|---|
date | Wed, 27 Jul 2011 12:09:19 +0800 |
parents | fd668c00e7e0 |
children | 61cef1662035 |
line wrap: on
line source
## \file # Define constants about opcodes of Dalvik. Constants are opcodes, # names, formats, and maps. # class all_opcodes(object): OP_NOP = 0x00 OP_MOVE = 0x01 OP_MOVE_FROM16 = 0x02 OP_MOVE_16 = 0x03 OP_MOVE_WIDE = 0x04 OP_MOVE_WIDE_FROM16 = 0x05 OP_MOVE_WIDE_16 = 0x06 OP_MOVE_OBJECT = 0x07 OP_MOVE_OBJECT_FROM16 = 0x08 OP_MOVE_OBJECT_16 = 0x09 OP_MOVE_RESULT = 0x0a OP_MOVE_RESULT_WIDE = 0x0b OP_MOVE_RESULT_OBJECT = 0x0c OP_MOVE_EXCEPTION = 0x0d OP_RETURN_VOID = 0x0e OP_RETURN = 0x0f OP_RETURN_WIDE = 0x10 OP_RETURN_OBJECT = 0x11 OP_CONST_4 = 0x12 OP_CONST_16 = 0x13 OP_CONST = 0x14 OP_CONST_HIGH16 = 0x15 OP_CONST_WIDE_16 = 0x16 OP_CONST_WIDE_32 = 0x17 OP_CONST_WIDE = 0x18 OP_CONST_WIDE_HIGH16 = 0x19 OP_CONST_STRING = 0x1a OP_CONST_STRING_JUMBO = 0x1b OP_CONST_CLASS = 0x1c OP_MONITOR_ENTER = 0x1d OP_MONITOR_EXIT = 0x1e OP_CHECK_CAST = 0x1f OP_INSTANCE_OF = 0x20 OP_ARRAY_LENGTH = 0x21 OP_NEW_INSTANCE = 0x22 OP_NEW_ARRAY = 0x23 OP_FILLED_NEW_ARRAY = 0x24 OP_FILLED_NEW_ARRAY_RANGE = 0x25 OP_FILL_ARRAY_DATA = 0x26 OP_THROW = 0x27 OP_GOTO = 0x28 OP_GOTO_16 = 0x29 OP_GOTO_32 = 0x2a OP_PACKED_SWITCH = 0x2b OP_SPARSE_SWITCH = 0x2c OP_CMPL_FLOAT = 0x2d OP_CMPG_FLOAT = 0x2e OP_CMPL_DOUBLE = 0x2f OP_CMPG_DOUBLE = 0x30 OP_CMP_LONG = 0x31 OP_IF_EQ = 0x32 OP_IF_NE = 0x33 OP_IF_LT = 0x34 OP_IF_GE = 0x35 OP_IF_GT = 0x36 OP_IF_LE = 0x37 OP_IF_EQZ = 0x38 OP_IF_NEZ = 0x39 OP_IF_LTZ = 0x3a OP_IF_GEZ = 0x3b OP_IF_GTZ = 0x3c OP_IF_LEZ = 0x3d OP_UNUSED_3E = 0x3e OP_UNUSED_3F = 0x3f OP_UNUSED_40 = 0x40 OP_UNUSED_41 = 0x41 OP_UNUSED_42 = 0x42 OP_UNUSED_43 = 0x43 OP_AGET = 0x44 OP_AGET_WIDE = 0x45 OP_AGET_OBJECT = 0x46 OP_AGET_BOOLEAN = 0x47 OP_AGET_BYTE = 0x48 OP_AGET_CHAR = 0x49 OP_AGET_SHORT = 0x4a OP_APUT = 0x4b OP_APUT_WIDE = 0x4c OP_APUT_OBJECT = 0x4d OP_APUT_BOOLEAN = 0x4e OP_APUT_BYTE = 0x4f OP_APUT_CHAR = 0x50 OP_APUT_SHORT = 0x51 OP_IGET = 0x52 OP_IGET_WIDE = 0x53 OP_IGET_OBJECT = 0x54 OP_IGET_BOOLEAN = 0x55 OP_IGET_BYTE = 0x56 OP_IGET_CHAR = 0x57 OP_IGET_SHORT = 0x58 OP_IPUT = 0x59 OP_IPUT_WIDE = 0x5a OP_IPUT_OBJECT = 0x5b OP_IPUT_BOOLEAN = 0x5c OP_IPUT_BYTE = 0x5d OP_IPUT_CHAR = 0x5e OP_IPUT_SHORT = 0x5f OP_SGET = 0x60 OP_SGET_WIDE = 0x61 OP_SGET_OBJECT = 0x62 OP_SGET_BOOLEAN = 0x63 OP_SGET_BYTE = 0x64 OP_SGET_CHAR = 0x65 OP_SGET_SHORT = 0x66 OP_SPUT = 0x67 OP_SPUT_WIDE = 0x68 OP_SPUT_OBJECT = 0x69 OP_SPUT_BOOLEAN = 0x6a OP_SPUT_BYTE = 0x6b OP_SPUT_CHAR = 0x6c OP_SPUT_SHORT = 0x6d OP_INVOKE_VIRTUAL = 0x6e OP_INVOKE_SUPER = 0x6f OP_INVOKE_DIRECT = 0x70 OP_INVOKE_STATIC = 0x71 OP_INVOKE_INTERFACE = 0x72 OP_UNUSED_73 = 0x73 OP_INVOKE_VIRTUAL_RANGE = 0x74 OP_INVOKE_SUPER_RANGE = 0x75 OP_INVOKE_DIRECT_RANGE = 0x76 OP_INVOKE_STATIC_RANGE = 0x77 OP_INVOKE_INTERFACE_RANGE = 0x78 OP_UNUSED_79 = 0x79 OP_UNUSED_7A = 0x7a OP_NEG_INT = 0x7b OP_NOT_INT = 0x7c OP_NEG_LONG = 0x7d OP_NOT_LONG = 0x7e OP_NEG_FLOAT = 0x7f OP_NEG_DOUBLE = 0x80 OP_INT_TO_LONG = 0x81 OP_INT_TO_FLOAT = 0x82 OP_INT_TO_DOUBLE = 0x83 OP_LONG_TO_INT = 0x84 OP_LONG_TO_FLOAT = 0x85 OP_LONG_TO_DOUBLE = 0x86 OP_FLOAT_TO_INT = 0x87 OP_FLOAT_TO_LONG = 0x88 OP_FLOAT_TO_DOUBLE = 0x89 OP_DOUBLE_TO_INT = 0x8a OP_DOUBLE_TO_LONG = 0x8b OP_DOUBLE_TO_FLOAT = 0x8c OP_INT_TO_BYTE = 0x8d OP_INT_TO_CHAR = 0x8e OP_INT_TO_SHORT = 0x8f OP_ADD_INT = 0x90 OP_SUB_INT = 0x91 OP_MUL_INT = 0x92 OP_DIV_INT = 0x93 OP_REM_INT = 0x94 OP_AND_INT = 0x95 OP_OR_INT = 0x96 OP_XOR_INT = 0x97 OP_SHL_INT = 0x98 OP_SHR_INT = 0x99 OP_USHR_INT = 0x9a OP_ADD_LONG = 0x9b OP_SUB_LONG = 0x9c OP_MUL_LONG = 0x9d OP_DIV_LONG = 0x9e OP_REM_LONG = 0x9f OP_AND_LONG = 0xa0 OP_OR_LONG = 0xa1 OP_XOR_LONG = 0xa2 OP_SHL_LONG = 0xa3 OP_SHR_LONG = 0xa4 OP_USHR_LONG = 0xa5 OP_ADD_FLOAT = 0xa6 OP_SUB_FLOAT = 0xa7 OP_MUL_FLOAT = 0xa8 OP_DIV_FLOAT = 0xa9 OP_REM_FLOAT = 0xaa OP_ADD_DOUBLE = 0xab OP_SUB_DOUBLE = 0xac OP_MUL_DOUBLE = 0xad OP_DIV_DOUBLE = 0xae OP_REM_DOUBLE = 0xaf OP_ADD_INT_2ADDR = 0xb0 OP_SUB_INT_2ADDR = 0xb1 OP_MUL_INT_2ADDR = 0xb2 OP_DIV_INT_2ADDR = 0xb3 OP_REM_INT_2ADDR = 0xb4 OP_AND_INT_2ADDR = 0xb5 OP_OR_INT_2ADDR = 0xb6 OP_XOR_INT_2ADDR = 0xb7 OP_SHL_INT_2ADDR = 0xb8 OP_SHR_INT_2ADDR = 0xb9 OP_USHR_INT_2ADDR = 0xba OP_ADD_LONG_2ADDR = 0xbb OP_SUB_LONG_2ADDR = 0xbc OP_MUL_LONG_2ADDR = 0xbd OP_DIV_LONG_2ADDR = 0xbe OP_REM_LONG_2ADDR = 0xbf OP_AND_LONG_2ADDR = 0xc0 OP_OR_LONG_2ADDR = 0xc1 OP_XOR_LONG_2ADDR = 0xc2 OP_SHL_LONG_2ADDR = 0xc3 OP_SHR_LONG_2ADDR = 0xc4 OP_USHR_LONG_2ADDR = 0xc5 OP_ADD_FLOAT_2ADDR = 0xc6 OP_SUB_FLOAT_2ADDR = 0xc7 OP_MUL_FLOAT_2ADDR = 0xc8 OP_DIV_FLOAT_2ADDR = 0xc9 OP_REM_FLOAT_2ADDR = 0xca OP_ADD_DOUBLE_2ADDR = 0xcb OP_SUB_DOUBLE_2ADDR = 0xcc OP_MUL_DOUBLE_2ADDR = 0xcd OP_DIV_DOUBLE_2ADDR = 0xce OP_REM_DOUBLE_2ADDR = 0xcf OP_ADD_INT_LIT16 = 0xd0 OP_RSUB_INT = 0xd1 OP_MUL_INT_LIT16 = 0xd2 OP_DIV_INT_LIT16 = 0xd3 OP_REM_INT_LIT16 = 0xd4 OP_AND_INT_LIT16 = 0xd5 OP_OR_INT_LIT16 = 0xd6 OP_XOR_INT_LIT16 = 0xd7 OP_ADD_INT_LIT8 = 0xd8 OP_RSUB_INT_LIT8 = 0xd9 OP_MUL_INT_LIT8 = 0xda OP_DIV_INT_LIT8 = 0xdb OP_REM_INT_LIT8 = 0xdc OP_AND_INT_LIT8 = 0xdd OP_OR_INT_LIT8 = 0xde OP_XOR_INT_LIT8 = 0xdf OP_SHL_INT_LIT8 = 0xe0 OP_SHR_INT_LIT8 = 0xe1 OP_USHR_INT_LIT8 = 0xe2 OP_UNUSED_E3 = 0xe3 OP_UNUSED_E4 = 0xe4 OP_UNUSED_E5 = 0xe5 OP_UNUSED_E6 = 0xe6 OP_UNUSED_E7 = 0xe7 OP_UNUSED_E8 = 0xe8 OP_UNUSED_E9 = 0xe9 OP_UNUSED_EA = 0xea OP_UNUSED_EB = 0xeb OP_UNUSED_EC = 0xec OP_UNUSED_ED = 0xed OP_EXECUTE_INLINE = 0xee OP_UNUSED_EF = 0xef OP_INVOKE_DIRECT_EMPTY = 0xf0 OP_UNUSED_F1 = 0xf1 OP_IGET_QUICK = 0xf2 OP_IGET_WIDE_QUICK = 0xf3 OP_IGET_OBJECT_QUICK = 0xf4 OP_IPUT_QUICK = 0xf5 OP_IPUT_WIDE_QUICK = 0xf6 OP_IPUT_OBJECT_QUICK = 0xf7 OP_INVOKE_VIRTUAL_QUICK = 0xf8 OP_INVOKE_VIRTUAL_QUICK_RANGE = 0xf9 OP_INVOKE_SUPER_QUICK = 0xfa OP_INVOKE_SUPER_QUICK_RANGE = 0xfb OP_UNUSED_FC = 0xfc OP_UNUSED_FD = 0xfd OP_UNUSED_FE = 0xfe OP_UNUSED_FF = 0xff pass ## \brief List of names of opcodes opcode_names = sorted([name for name in all_opcodes.__dict__.keys() if name.startswith('OP_')], key=lambda name: all_opcodes.__dict__[name]) ## \brief Map opcode names to opcodes name_2_opcodes = dict([(name, code) for name, code in all_opcodes.__dict__.items()]) ## \brief Length of instructions of opcodes in bytes opcode_widths = [0] * 256 _w1_ops = '''OP_NOP OP_MOVE OP_MOVE_WIDE OP_MOVE_OBJECT OP_MOVE_RESULT OP_MOVE_RESULT_WIDE OP_MOVE_RESULT_OBJECT OP_MOVE_EXCEPTION OP_RETURN_VOID OP_RETURN OP_RETURN_WIDE OP_RETURN_OBJECT OP_CONST_4 OP_MONITOR_ENTER OP_MONITOR_EXIT OP_ARRAY_LENGTH OP_THROW OP_GOTO OP_NEG_INT OP_NOT_INT OP_NEG_LONG OP_NOT_LONG OP_NEG_FLOAT OP_NEG_DOUBLE OP_INT_TO_LONG OP_INT_TO_FLOAT OP_INT_TO_DOUBLE OP_LONG_TO_INT OP_LONG_TO_FLOAT OP_LONG_TO_DOUBLE OP_FLOAT_TO_INT OP_FLOAT_TO_LONG OP_FLOAT_TO_DOUBLE OP_DOUBLE_TO_INT OP_DOUBLE_TO_LONG OP_DOUBLE_TO_FLOAT OP_INT_TO_BYTE OP_INT_TO_CHAR OP_INT_TO_SHORT OP_ADD_INT_2ADDR OP_SUB_INT_2ADDR OP_MUL_INT_2ADDR OP_DIV_INT_2ADDR OP_REM_INT_2ADDR OP_AND_INT_2ADDR OP_OR_INT_2ADDR OP_XOR_INT_2ADDR OP_SHL_INT_2ADDR OP_SHR_INT_2ADDR OP_USHR_INT_2ADDR OP_ADD_LONG_2ADDR OP_SUB_LONG_2ADDR OP_MUL_LONG_2ADDR OP_DIV_LONG_2ADDR OP_REM_LONG_2ADDR OP_AND_LONG_2ADDR OP_OR_LONG_2ADDR OP_XOR_LONG_2ADDR OP_SHL_LONG_2ADDR OP_SHR_LONG_2ADDR OP_USHR_LONG_2ADDR OP_ADD_FLOAT_2ADDR OP_SUB_FLOAT_2ADDR OP_MUL_FLOAT_2ADDR OP_DIV_FLOAT_2ADDR OP_REM_FLOAT_2ADDR OP_ADD_DOUBLE_2ADDR OP_SUB_DOUBLE_2ADDR OP_MUL_DOUBLE_2ADDR OP_DIV_DOUBLE_2ADDR OP_REM_DOUBLE_2ADDR'''.split() for name in _w1_ops: _opcode = name_2_opcodes[name] opcode_widths[_opcode] = 1 pass _w2_ops = '''OP_MOVE_FROM16 OP_MOVE_WIDE_FROM16 OP_MOVE_OBJECT_FROM16 OP_CONST_16 OP_CONST_HIGH16 OP_CONST_WIDE_16 OP_CONST_WIDE_HIGH16 OP_CONST_STRING OP_CONST_CLASS OP_CHECK_CAST OP_INSTANCE_OF OP_NEW_INSTANCE OP_NEW_ARRAY OP_CMPL_FLOAT OP_CMPG_FLOAT OP_CMPL_DOUBLE OP_CMPG_DOUBLE OP_CMP_LONG OP_GOTO_16 OP_IF_EQ OP_IF_NE OP_IF_LT OP_IF_GE OP_IF_GT OP_IF_LE OP_IF_EQZ OP_IF_NEZ OP_IF_LTZ OP_IF_GEZ OP_IF_GTZ OP_IF_LEZ OP_AGET OP_AGET_WIDE OP_AGET_OBJECT OP_AGET_BOOLEAN OP_AGET_BYTE OP_AGET_CHAR OP_AGET_SHORT OP_APUT OP_APUT_WIDE OP_APUT_OBJECT OP_APUT_BOOLEAN OP_APUT_BYTE OP_APUT_CHAR OP_APUT_SHORT OP_IGET OP_IGET_WIDE OP_IGET_OBJECT OP_IGET_BOOLEAN OP_IGET_BYTE OP_IGET_CHAR OP_IGET_SHORT OP_IPUT OP_IPUT_WIDE OP_IPUT_OBJECT OP_IPUT_BOOLEAN OP_IPUT_BYTE OP_IPUT_CHAR OP_IPUT_SHORT OP_SGET OP_SGET_WIDE OP_SGET_OBJECT OP_SGET_BOOLEAN OP_SGET_BYTE OP_SGET_CHAR OP_SGET_SHORT OP_SPUT OP_SPUT_WIDE OP_SPUT_OBJECT OP_SPUT_BOOLEAN OP_SPUT_BYTE OP_SPUT_CHAR OP_SPUT_SHORT OP_ADD_INT OP_SUB_INT OP_MUL_INT OP_DIV_INT OP_REM_INT OP_AND_INT OP_OR_INT OP_XOR_INT OP_SHL_INT OP_SHR_INT OP_USHR_INT OP_ADD_LONG OP_SUB_LONG OP_MUL_LONG OP_DIV_LONG OP_REM_LONG OP_AND_LONG OP_OR_LONG OP_XOR_LONG OP_SHL_LONG OP_SHR_LONG OP_USHR_LONG OP_ADD_FLOAT OP_SUB_FLOAT OP_MUL_FLOAT OP_DIV_FLOAT OP_REM_FLOAT OP_ADD_DOUBLE OP_SUB_DOUBLE OP_MUL_DOUBLE OP_DIV_DOUBLE OP_REM_DOUBLE OP_ADD_INT_LIT16 OP_RSUB_INT OP_MUL_INT_LIT16 OP_DIV_INT_LIT16 OP_REM_INT_LIT16 OP_AND_INT_LIT16 OP_OR_INT_LIT16 OP_XOR_INT_LIT16 OP_ADD_INT_LIT8 OP_RSUB_INT_LIT8 OP_MUL_INT_LIT8 OP_DIV_INT_LIT8 OP_REM_INT_LIT8 OP_AND_INT_LIT8 OP_OR_INT_LIT8 OP_XOR_INT_LIT8 OP_SHL_INT_LIT8 OP_SHR_INT_LIT8 OP_USHR_INT_LIT8'''.split() for name in _w2_ops: _opcode = name_2_opcodes[name] opcode_widths[_opcode] = 2 pass _w3_ops = '''OP_MOVE_16 OP_MOVE_WIDE_16 OP_MOVE_OBJECT_16 OP_CONST OP_CONST_WIDE_32 OP_CONST_STRING_JUMBO OP_GOTO_32 OP_FILLED_NEW_ARRAY OP_FILLED_NEW_ARRAY_RANGE OP_FILL_ARRAY_DATA OP_PACKED_SWITCH OP_SPARSE_SWITCH OP_INVOKE_VIRTUAL OP_INVOKE_SUPER OP_INVOKE_DIRECT OP_INVOKE_STATIC OP_INVOKE_INTERFACE OP_INVOKE_VIRTUAL_RANGE OP_INVOKE_SUPER_RANGE OP_INVOKE_DIRECT_RANGE OP_INVOKE_STATIC_RANGE OP_INVOKE_INTERFACE_RANGE'''.split() for name in _w3_ops: _opcode = name_2_opcodes[name] opcode_widths[_opcode] = 3 pass opcode_widths[all_opcodes.OP_CONST_WIDE] = 5 # # Optimized instructions. We return negative size values for these # to distinguish them. # _w_2_ops = '''OP_IGET_QUICK OP_IGET_WIDE_QUICK OP_IGET_OBJECT_QUICK OP_IPUT_QUICK OP_IPUT_WIDE_QUICK OP_IPUT_OBJECT_QUICK'''.split() for name in _w_2_ops: _opcode = name_2_opcodes[name] opcode_widths[_opcode] = -2 pass _w_3_ops = '''OP_INVOKE_VIRTUAL_QUICK OP_INVOKE_VIRTUAL_QUICK_RANGE OP_INVOKE_SUPER_QUICK OP_INVOKE_SUPER_QUICK_RANGE OP_EXECUTE_INLINE OP_INVOKE_DIRECT_EMPTY'''.split() for name in _w_3_ops: _opcode = name_2_opcodes[name] opcode_widths[_opcode] = -3 pass _w0_ops = '''OP_UNUSED_3E OP_UNUSED_3F OP_UNUSED_40 OP_UNUSED_41 OP_UNUSED_42 OP_UNUSED_43 OP_UNUSED_73 OP_UNUSED_79 OP_UNUSED_7A OP_UNUSED_E3 OP_UNUSED_E4 OP_UNUSED_E5 OP_UNUSED_E6 OP_UNUSED_E7 OP_UNUSED_E8 OP_UNUSED_E9 OP_UNUSED_EA OP_UNUSED_EB OP_UNUSED_EC OP_UNUSED_ED OP_UNUSED_EF OP_UNUSED_F1 OP_UNUSED_FC OP_UNUSED_FD OP_UNUSED_FE OP_UNUSED_FF'''.split() class all_opcode_fmts(object): kFmtUnknown = 0 kFmt10x = 1 kFmt12x = 2 kFmt11n = 3 kFmt11x = 4 kFmt10t = 5 kFmt20t = 6 kFmt22x = 7 kFmt21t = 8 kFmt21s = 9 kFmt21h = 10 kFmt21c = 11 kFmt23x = 12 kFmt22b = 13 kFmt22t = 14 kFmt22s = 15 kFmt22c = 16 kFmt22cs = 17 kFmt32x = 18 kFmt30t = 19 kFmt31t = 20 kFmt31i = 21 kFmt31c = 22 kFmt35c = 23 kFmt35ms = 24 kFmt35fs = 25 kFmt3rc = 26 kFmt3rms = 27 kFmt3rfs = 28 kFmt3inline = 29 kFmt51l = 30 pass fmt_names = '''kFmtUnknown kFmt10x kFmt12x kFmt11n kFmt11x kFmt10t kFmt20t kFmt22x kFmt21t kFmt21s kFmt21h kFmt21c kFmt23x kFmt22b kFmt22t kFmt22s kFmt22c kFmt22cs kFmt32x kFmt30t kFmt31t kFmt31i kFmt31c kFmt35c kFmt35ms kFmt35fs kFmt3rc kFmt3rms kFmt3rfs kFmt3inline kFmt51l '''.split() ## \brief Map opcodes to their formats opcode_fmts = [0] * 256 opcode_fmts[all_opcodes.OP_GOTO] = all_opcode_fmts.kFmt10t _names = '''OP_NOP OP_RETURN_VOID'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt10x pass opcode_fmts[all_opcodes.OP_CONST_4] = all_opcode_fmts.kFmt11n _names = '''OP_CONST_HIGH16 OP_CONST_WIDE_HIGH16'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt21h pass _names = '''OP_MOVE_RESULT OP_MOVE_RESULT_WIDE OP_MOVE_RESULT_OBJECT OP_MOVE_EXCEPTION OP_RETURN OP_RETURN_WIDE OP_RETURN_OBJECT OP_MONITOR_ENTER OP_MONITOR_EXIT OP_THROW'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt11x pass _names = '''OP_MOVE OP_MOVE_WIDE OP_MOVE_OBJECT OP_ARRAY_LENGTH OP_NEG_INT OP_NOT_INT OP_NEG_LONG OP_NOT_LONG OP_NEG_FLOAT OP_NEG_DOUBLE OP_INT_TO_LONG OP_INT_TO_FLOAT OP_INT_TO_DOUBLE OP_LONG_TO_INT OP_LONG_TO_FLOAT OP_LONG_TO_DOUBLE OP_FLOAT_TO_INT OP_FLOAT_TO_LONG OP_FLOAT_TO_DOUBLE OP_DOUBLE_TO_INT OP_DOUBLE_TO_LONG OP_DOUBLE_TO_FLOAT OP_INT_TO_BYTE OP_INT_TO_CHAR OP_INT_TO_SHORT OP_ADD_INT_2ADDR OP_SUB_INT_2ADDR OP_MUL_INT_2ADDR OP_DIV_INT_2ADDR OP_REM_INT_2ADDR OP_AND_INT_2ADDR OP_OR_INT_2ADDR OP_XOR_INT_2ADDR OP_SHL_INT_2ADDR OP_SHR_INT_2ADDR OP_USHR_INT_2ADDR OP_ADD_LONG_2ADDR OP_SUB_LONG_2ADDR OP_MUL_LONG_2ADDR OP_DIV_LONG_2ADDR OP_REM_LONG_2ADDR OP_AND_LONG_2ADDR OP_OR_LONG_2ADDR OP_XOR_LONG_2ADDR OP_SHL_LONG_2ADDR OP_SHR_LONG_2ADDR OP_USHR_LONG_2ADDR OP_ADD_FLOAT_2ADDR OP_SUB_FLOAT_2ADDR OP_MUL_FLOAT_2ADDR OP_DIV_FLOAT_2ADDR OP_REM_FLOAT_2ADDR OP_ADD_DOUBLE_2ADDR OP_SUB_DOUBLE_2ADDR OP_MUL_DOUBLE_2ADDR OP_DIV_DOUBLE_2ADDR OP_REM_DOUBLE_2ADDR'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt12x pass opcode_fmts[all_opcodes.OP_GOTO_16] = all_opcode_fmts.kFmt20t opcode_fmts[all_opcodes.OP_GOTO_32] = all_opcode_fmts.kFmt30t _names = '''OP_CONST_STRING OP_CONST_CLASS OP_CHECK_CAST OP_NEW_INSTANCE OP_SGET OP_SGET_WIDE OP_SGET_OBJECT OP_SGET_BOOLEAN OP_SGET_BYTE OP_SGET_CHAR OP_SGET_SHORT OP_SPUT OP_SPUT_WIDE OP_SPUT_OBJECT OP_SPUT_BOOLEAN OP_SPUT_BYTE OP_SPUT_CHAR OP_SPUT_SHORT'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt21c pass _names = '''OP_CONST_16 OP_CONST_WIDE_16'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt21s pass _names = '''OP_IF_EQZ OP_IF_NEZ OP_IF_LTZ OP_IF_GEZ OP_IF_GTZ OP_IF_LEZ'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt21t pass _names = '''OP_FILL_ARRAY_DATA OP_PACKED_SWITCH OP_SPARSE_SWITCH'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt31t pass _names = '''OP_ADD_INT_LIT8 OP_RSUB_INT_LIT8 OP_MUL_INT_LIT8 OP_DIV_INT_LIT8 OP_REM_INT_LIT8 OP_AND_INT_LIT8 OP_OR_INT_LIT8 OP_XOR_INT_LIT8 OP_SHL_INT_LIT8 OP_SHR_INT_LIT8 OP_USHR_INT_LIT8'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt22b pass _names = '''OP_INSTANCE_OF OP_NEW_ARRAY OP_IGET OP_IGET_WIDE OP_IGET_OBJECT OP_IGET_BOOLEAN OP_IGET_BYTE OP_IGET_CHAR OP_IGET_SHORT OP_IPUT OP_IPUT_WIDE OP_IPUT_OBJECT OP_IPUT_BOOLEAN OP_IPUT_BYTE OP_IPUT_CHAR OP_IPUT_SHORT'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt22c pass _names = '''OP_ADD_INT_LIT16 OP_RSUB_INT OP_MUL_INT_LIT16 OP_DIV_INT_LIT16 OP_REM_INT_LIT16 OP_AND_INT_LIT16 OP_OR_INT_LIT16 OP_XOR_INT_LIT16'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt22s pass _names = '''OP_IF_EQ OP_IF_NE OP_IF_LT OP_IF_GE OP_IF_GT OP_IF_LE'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt22t pass _names = '''OP_MOVE_FROM16 OP_MOVE_WIDE_FROM16 OP_MOVE_OBJECT_FROM16'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt22x pass _names = '''OP_CMPL_FLOAT OP_CMPG_FLOAT OP_CMPL_DOUBLE OP_CMPG_DOUBLE OP_CMP_LONG OP_AGET OP_AGET_WIDE OP_AGET_OBJECT OP_AGET_BOOLEAN OP_AGET_BYTE OP_AGET_CHAR OP_AGET_SHORT OP_APUT OP_APUT_WIDE OP_APUT_OBJECT OP_APUT_BOOLEAN OP_APUT_BYTE OP_APUT_CHAR OP_APUT_SHORT OP_ADD_INT OP_SUB_INT OP_MUL_INT OP_DIV_INT OP_REM_INT OP_AND_INT OP_OR_INT OP_XOR_INT OP_SHL_INT OP_SHR_INT OP_USHR_INT OP_ADD_LONG OP_SUB_LONG OP_MUL_LONG OP_DIV_LONG OP_REM_LONG OP_AND_LONG OP_OR_LONG OP_XOR_LONG OP_SHL_LONG OP_SHR_LONG OP_USHR_LONG OP_ADD_FLOAT OP_SUB_FLOAT OP_MUL_FLOAT OP_DIV_FLOAT OP_REM_FLOAT OP_ADD_DOUBLE OP_SUB_DOUBLE OP_MUL_DOUBLE OP_DIV_DOUBLE OP_REM_DOUBLE'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt23x pass _names = '''OP_CONST OP_CONST_WIDE_32'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt31i pass opcode_fmts[all_opcodes.OP_CONST_STRING_JUMBO] = all_opcode_fmts.kFmt31c _names = '''OP_MOVE_16 OP_MOVE_WIDE_16 OP_MOVE_OBJECT_16'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt32x pass _names = '''OP_FILLED_NEW_ARRAY OP_INVOKE_VIRTUAL OP_INVOKE_SUPER OP_INVOKE_DIRECT OP_INVOKE_STATIC OP_INVOKE_INTERFACE'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt35c pass _names = '''OP_FILLED_NEW_ARRAY_RANGE OP_INVOKE_VIRTUAL_RANGE OP_INVOKE_SUPER_RANGE OP_INVOKE_DIRECT_RANGE OP_INVOKE_STATIC_RANGE OP_INVOKE_INTERFACE_RANGE'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt3rc pass opcode_fmts[all_opcodes.OP_CONST_WIDE] = all_opcode_fmts.kFmt51l _names = '''OP_IGET_QUICK OP_IGET_WIDE_QUICK OP_IGET_OBJECT_QUICK OP_IPUT_QUICK OP_IPUT_WIDE_QUICK OP_IPUT_OBJECT_QUICK'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt22cs pass _names = '''OP_INVOKE_VIRTUAL_QUICK OP_INVOKE_SUPER_QUICK'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt35ms pass _names = '''OP_INVOKE_VIRTUAL_QUICK_RANGE OP_INVOKE_SUPER_QUICK_RANGE'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmt3rms pass opcode_fmts[all_opcodes.OP_EXECUTE_INLINE] = all_opcode_fmts.kFmt3inline opcode_fmts[all_opcodes.OP_INVOKE_DIRECT_EMPTY] = all_opcode_fmts.kFmt35c _names = '''OP_UNUSED_3E OP_UNUSED_3F OP_UNUSED_40 OP_UNUSED_41 OP_UNUSED_42 OP_UNUSED_43 OP_UNUSED_73 OP_UNUSED_79 OP_UNUSED_7A OP_UNUSED_E3 OP_UNUSED_E4 OP_UNUSED_E5 OP_UNUSED_E6 OP_UNUSED_E7 OP_UNUSED_E8 OP_UNUSED_E9 OP_UNUSED_EA OP_UNUSED_EB OP_UNUSED_EC OP_UNUSED_ED OP_UNUSED_EF OP_UNUSED_F1 OP_UNUSED_FC OP_UNUSED_FD OP_UNUSED_FE OP_UNUSED_FF'''.split() for name in _names: _opcode = name_2_opcodes[name] opcode_fmts[_opcode] = all_opcode_fmts.kFmtUnknown pass ## \brief String for string format from name of format. fmt_fmtstr = { 'kFmtUnknown': '', 'kFmt10x': '', 'kFmt12x': 'v{0}, v{1}', 'kFmt11n': 'v{0}, #+{1}', 'kFmt11x': 'v{0}', 'kFmt10t': '+{0}', 'kFmt20t': '+{0}', 'kFmt22x': 'v{0}, {1:04x}', 'kFmt21t': 'v{0}, +{1:04x}', 'kFmt21s': 'v{0}, #+{1:04x}', 'kFmt21h': 'v{0}, #+{1:04x}00000[00000000]', 'kFmt21c': 'v{0}, thing@{1:04x}', 'kFmt23x': 'v{0}, v{1}, v{2}', 'kFmt22b': 'v{0}, v{1}, #+{2:02x}', 'kFmt22t': 'v{0}, v{1}, +{2:04x}', 'kFmt22s': 'v{0}, v{1}, #+{2:04x}', 'kFmt22c': 'v{0}, v{1}, thing@{2:04x}', 'kFmt22cs': 'v0{}, v{1}, field offset {2:04x}', 'kFmt32x': 'v{0:04x}, v{1:04x}', 'kFmt30t': '+{0:08x}', 'kFmt31t': 'v{0}, +{1:08x}', 'kFmt31i': 'v{0}, #+{1:08x}', 'kFmt31c': 'v{0}, thing@{1:08x}', 'kFmt35c': '{{v{3}, v{4}, v{5}, v{6}, v{0}}}, meth@{2:04x} ({1}: count, A: vG)', 'kFmt35ms': '{{v{3}, v{4}, v{5}, v{6}, v{0}}}, meth@{2:04x} ({1}: count, A: vG)', 'kFmt35fs': 'invoke-interface', 'kFmt3rc': '{{v{2:04x} .. v({2:04x}+{0:02x}-1)}}, meth@{1:04x}', 'kFmt3rms': '{{v{2:04x} .. v({2:04x}+{0:02x}-1)}}, meth@{1:04x}', 'kFmt3rfs': 'invoke-interface/range', 'kFmt3inline': '{{v{3}, v{4}, v{5}, v{6}}} thing@{2:04x} ({1}: count)', 'kFmt51l': 'v{0}, #+{1:016x}' } ## \brief Map from names of opcode formats to tuples of fields size fmt_parse_cfgs = { 'kFmtUnknown': (), # '' 'kFmt10x': (), # '' 'kFmt12x': (1, 1), # 'vA, vB' 'kFmt11n': (1, 1), # 'vA, #+B' 'kFmt11x': (2,), # 'vAA' 'kFmt10t': (2,), # '+AA' 'kFmt20t': (4,), # '+AAAA' 'kFmt22x': (2, 4), # 'vAA, vBBBB' 'kFmt21t': (2, 4), # 'vAA, +BBBB' 'kFmt21s': (2, 4), # 'vAA, #+BBBB' 'kFmt21h': (2, 4), # 'vAA, #+BBBB00000[00000000]' 'kFmt21c': (2, 4), # 'vAA, thing@BBBB' 'kFmt23x': (2, 2, 2), # 'vAA, vBB, vCC' 'kFmt22b': (2, 2, 2), # 'vAA, vBB, #+CC' 'kFmt22t': (1, 1, 4), # 'vA, vB, +CCCC' 'kFmt22s': (1, 1, 4), # 'vA, vB, #+CCCC' 'kFmt22c': (1, 1, 4), # 'vA, vB, thing@CCCC' 'kFmt22cs': (1, 1, 4), # 'vA, vB, field offset CCCC' 'kFmt32x': (4, 4), # 'vAAAA, vBBBB' 'kFmt30t': (8,), # '+AAAAAAAA' 'kFmt31t': (2, 8), # 'vAA, +BBBBBBBB' 'kFmt31i': (2, 8), # 'vAA, #+BBBBBBBB' 'kFmt31c': (2, 8), # 'vAA, thing@BBBBBBBB' # '{vC, vD, vE, vF, vG}, thing@BBBB (B: count, A: vG)' 'kFmt35c': (1, 1, 4, 1, 1, 1, 1), 'kFmt35ms': (1, 1, 4, 1, 1, 1, 1), # 'invoke-virtual+super' 'kFmt35fs': (), # 'invoke-interface' 'kFmt3rc': (2, 4, 4), # '{vCCCC .. v(CCCC+AA-1)}, meth@BBBB' 'kFmt3rms': (2, 4, 4), # 'invoke-virtual+super/range' 'kFmt3rfs': (), # 'invoke-interface/range' 'kFmt3inline': (1, 1, 4, 1, 1, 1, 1), # 'inline invoke' 'kFmt51l': (2, 16) # 'vAA, #+BBBBBBBBBBBBBBBB' } ## \brief Decode an instruction def decode_inst(insn): def build_arg(sz): if sz == 1: if build_arg.off & 0x1: arg = ord(insn[build_arg.off / 2]) >> 4 else: arg = ord(insn[build_arg.off / 2]) & 0xf pass build_arg.off = build_arg.off + 1 pass elif sz == 2: build_arg.off = (build_arg.off + 1) & ~0x1 arg = ord(insn[build_arg.off / 2]) build_arg.off = build_arg.off + 2 elif sz == 4: build_arg.off = (build_arg.off + 3) & ~0x3 arg = ord(insn[build_arg.off / 2]) | \ (ord(insn[build_arg.off / 2 + 1]) << 8) build_arg.off = build_arg.off + 4 elif sz == 8: build_arg.off = (build_arg.off + 3) & ~0x3 arg = ord(insn[build_arg.off / 2]) | \ (ord(insn[build_arg.off / 2 + 1]) << 8) | \ (ord(insn[build_arg.off / 2 + 2]) << 16)| \ (ord(insn[build_arg.off / 2 + 3]) << 24) build_arg.off = build_arg.off + 8 elif sz == 16: build_arg.off = (build_arg.off + 3) & ~0x3 arg = ord(insn[build_arg.off / 2]) | \ (ord(insn[build_arg.off / 2 + 1]) << 8) | \ (ord(insn[build_arg.off / 2 + 2]) << 16)| \ (ord(insn[build_arg.off / 2 + 3]) << 24)| \ (ord(insn[build_arg.off / 2 + 4]) << 32)| \ (ord(insn[build_arg.off / 2 + 5]) << 40)| \ (ord(insn[build_arg.off / 2 + 6]) << 48)| \ (ord(insn[build_arg.off / 2 + 7]) << 52) build_arg.off = build_arg.off + 16 else: raise ValueError, 'Invalid argument size %d' % (sz) return arg build_arg.off = 2 opcode = ord(insn[0]) fmt = opcode_fmts[opcode] fmt_name = fmt_names[fmt] fmt_parse_cfg = fmt_parse_cfgs[fmt_name] args = tuple([build_arg(sz) for sz in fmt_parse_cfg]) op_vector = (opcode, args) return op_vector ## \brief Encode a opcode and arguments vector to an instruction def encode_inst(op_vector): def encode_arg(arg, sz): if sz == 1: if encode_arg.off & 0x1: arg_txt = chr((arg << 4) | encode_arg.value) else: arg_txt = '' encode_arg.value = arg pass encode_arg.off = encode_arg.off + 1 pass elif sz in (2, 4, 8, 16): if encode_arg.off & 0x1: arg_txt = chr(encode_arg.value) encode_arg.off = encode_arg.off + 1 else: arg_txt = '' pass if sz == 2: arg_txt = arg_txt + chr(arg) encode_arg.off = encode_arg.off + 2 elif sz in (4, 8, 16): while encode_arg.off % 4: encode_arg.off = encoe_arg.off + 1 arg_txt = arg_txt + chr(0) pass for i in range(sz / 4): arg_txt = arg_txt + \ chr(arg & 0xff) + chr((arg >> 8) & 0xff) pass pass pass else: raise ValueError, 'invalid argument size %d' % (sz) return arg_txt encode_arg.off = 2 encode_arg.value = 0 opcode, args = op_vector fmt = opcode_fmts[opcode] fmt_name = fmt_names[fmt] fmt_parse_cfg = fmt_parse_cfgs[fmt_name] arg_part = ''.join([encode_arg(arg, sz) for arg, sz in map(None, args, fmt_parse_cfg)]) while encode_arg.off % 4: arg_part = arg_part + encode_arg(0, 1) pass inst = chr(opcode) + arg_part return inst ## \brief Generate formated string for given opcode and arguments vector def format_inst(op_vector): opcode, args = op_vector opname = opcode_names[opcode] fmt = opcode_fmts[opcode] fmt_name = fmt_names[fmt] fmt_str = fmt_fmtstr[fmt_name] if fmt_str: line = opname + ' ' + fmt_str.format(*args) else: line = opname pass return line ## \brief Split a block of code into instructions. def split_blk_2_insns(insn_blk): start = 0 while start < len(insn_blk): opcode = ord(insn_blk[start]) width = opcode_widths[opcode] stop = start + width * 2 inst = insn_blk[start:stop] yield inst start = stop pass pass ## \brief Decode a block of instructions. # # Split and docode a block of instructions into a list of opcode # vectors. # # \param insn_blk is a string that is block of instructions. # \return a list of (opcode, args) vectors. # def decode_insn_blk(insn_blk): insns = split_blk_2_insns(insn_blk) decoded_insns = [decode_inst(insn) for insn in insns] return decoded_insns del name del _names del _opcode