#!/usr/bin/env python # -*- coding: utf-8 -*- # $Id: IEMAllInstructionsPython.py $ """ IEM instruction extractor. This script/module parses the IEMAllInstruction*.cpp.h files next to it and collects information about the instructions. It can then be used to generate disassembler tables and tests. """ __copyright__ = \ """ Copyright (C) 2017-2023 Oracle and/or its affiliates. This file is part of VirtualBox base platform packages, as available from https://www.virtualbox.org. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, in version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . The contents of this file may alternatively be used under the terms of the Common Development and Distribution License Version 1.0 (CDDL), a copy of it is provided in the "COPYING.CDDL" file included in the VirtualBox distribution, in which case the provisions of the CDDL are applicable instead of those of the GPL. You may elect to license modified versions of this file under the terms and conditions of either the GPL or the CDDL or both. SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 """ __version__ = "$Revision: 155244 $" # pylint: disable=anomalous-backslash-in-string # Standard python imports. import os import re import sys ## Only the main script needs to modify the path. #g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), # 'ValidationKit'); #sys.path.append(g_ksValidationKitDir); # #from common import utils; - Windows build boxes doesn't have pywin32. # Python 3 hacks: if sys.version_info[0] >= 3: long = int; # pylint: disable=redefined-builtin,invalid-name g_kdX86EFlagsConstants = { 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0) 'X86_EFL_1': 0x00000002, # RT_BIT_32(1) 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2) 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4) 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6) 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7) 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8) 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9) 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10) 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11) 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13)) 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14) 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16) 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17) 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18) 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19) 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20) 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21) 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5) 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1) }; ## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear. g_kdEFlagsMnemonics = { # Debugger flag notation (sorted by value): 'cf': 'X86_EFL_CF', ##< Carry Flag. 'nc': '!X86_EFL_CF', ##< No Carry. 'po': 'X86_EFL_PF', ##< Parity Pdd. 'pe': '!X86_EFL_PF', ##< Parity Even. 'af': 'X86_EFL_AF', ##< Aux Flag. 'na': '!X86_EFL_AF', ##< No Aux. 'zr': 'X86_EFL_ZF', ##< ZeRo. 'nz': '!X86_EFL_ZF', ##< No Zero. 'ng': 'X86_EFL_SF', ##< NeGative (sign). 'pl': '!X86_EFL_SF', ##< PLuss (sign). 'tf': 'X86_EFL_TF', ##< Trap flag. 'ei': 'X86_EFL_IF', ##< Enabled Interrupts. 'di': '!X86_EFL_IF', ##< Disabled Interrupts. 'dn': 'X86_EFL_DF', ##< DowN (string op direction). 'up': '!X86_EFL_DF', ##< UP (string op direction). 'ov': 'X86_EFL_OF', ##< OVerflow. 'nv': '!X86_EFL_OF', ##< No Overflow. 'nt': 'X86_EFL_NT', ##< Nested Task. 'rf': 'X86_EFL_RF', ##< Resume Flag. 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode. 'ac': 'X86_EFL_AC', ##< Alignment Check. 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag. 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending. # Reference manual notation not covered above (sorted by value): 'pf': 'X86_EFL_PF', 'zf': 'X86_EFL_ZF', 'sf': 'X86_EFL_SF', 'if': 'X86_EFL_IF', 'df': 'X86_EFL_DF', 'of': 'X86_EFL_OF', 'iopl': 'X86_EFL_IOPL', 'id': 'X86_EFL_ID', }; ## Constants and values for CR0. g_kdX86Cr0Constants = { 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0) 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1) 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2) 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3) 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4) 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5) 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16) 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18) 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29) 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30) 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31) }; ## Constants and values for CR4. g_kdX86Cr4Constants = { 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0) 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1) 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2) 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3) 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4) 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5) 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6) 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7) 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8) 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9) 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10) 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13) 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14) 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17) 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18) 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20) 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21) 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22) }; ## XSAVE components (XCR0). g_kdX86XSaveCConstants = { 'XSAVE_C_X87': 0x00000001, 'XSAVE_C_SSE': 0x00000002, 'XSAVE_C_YMM': 0x00000004, 'XSAVE_C_BNDREGS': 0x00000008, 'XSAVE_C_BNDCSR': 0x00000010, 'XSAVE_C_OPMASK': 0x00000020, 'XSAVE_C_ZMM_HI256': 0x00000040, 'XSAVE_C_ZMM_16HI': 0x00000080, 'XSAVE_C_PKRU': 0x00000200, 'XSAVE_C_LWP': 0x4000000000000000, 'XSAVE_C_X': 0x8000000000000000, 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits. 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits. }; ## \@op[1-4] locations g_kdOpLocations = { 'reg': [], ## modrm.reg 'rm': [], ## modrm.rm 'imm': [], ## immediate instruction data 'vvvv': [], ## VEX.vvvv # fixed registers. 'AL': [], 'rAX': [], 'rDX': [], 'rSI': [], 'rDI': [], 'rFLAGS': [], 'CS': [], 'DS': [], 'ES': [], 'FS': [], 'GS': [], 'SS': [], }; ## \@op[1-4] types ## ## Value fields: ## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM). ## - 1: the location (g_kdOpLocations). ## - 2: disassembler format string version of the type. ## - 3: disassembler OP_PARAM_XXX (XXX only). ## - 4: IEM form matching instruction. ## ## Note! See the A.2.1 in SDM vol 2 for the type names. g_kdOpTypes = { # Fixed addresses 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ), # ModR/M.rm 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ), 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ), 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ), 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ), 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ), 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ), 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ), 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ), 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ), 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ), 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ), 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ), 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ), 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ), 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ), 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ), 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ), 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ), 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ), 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ), 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ), 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ), 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ), 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ), 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ), 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ), # ModR/M.rm - register only. 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ), 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ), 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ), 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ), 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ), 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ), 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ), 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ), # ModR/M.rm - memory only. 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND. 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ), 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ), 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ), 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ), 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ), 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ), 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ), 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ), 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ), 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ), 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ), 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ), 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ), 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ), # ModR/M.reg 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ), 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ), 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ), 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ), 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ), 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ), 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ), 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ), 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ), 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ), 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ), 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ), 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ), 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ), 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ), 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ), 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ), 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ), 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ), 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ), 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ), 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ), 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ), 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ), 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ), 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ), 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ), 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ), 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ), 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ), 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ), 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ), # VEX.vvvv 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ), 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ), 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ), 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ), 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ), 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ), 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ), 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ), # Immediate values. 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs. 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ), 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ), 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ), 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword # Address operands (no ModR/M). 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ), 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ), # Relative jump targets 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ), 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ), # DS:rSI 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ), 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ), # ES:rDI 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ), 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ), 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ), # Fixed registers. 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ), 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ), 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ), 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ), 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ), 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ), 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ), 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ), }; # IDX_ParseFixedReg # IDX_ParseVexDest ## IEMFORM_XXX mappings. g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ), 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ), 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ), 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ), 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ), 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ), 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ), 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ), 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ), 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ), 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ), 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ), 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ), 'M': ( 'ModR/M', [ 'rm', ], '', ), 'M_REG': ( 'ModR/M', [ 'rm', ], '', ), 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ), 'R': ( 'ModR/M', [ 'reg', ], '', ), 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ), 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ), 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ), 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ), 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ), 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ), 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ), 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ), 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ), 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ), 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ), 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ), 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ), 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ), 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ), 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ), 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ), 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ), 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ), 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ), 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ), 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ), 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ), 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ), 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ), 'FIXED': ( 'fixed', None, '', ), }; ## \@oppfx values. g_kdPrefixes = { 'none': [], '0x66': [], '0xf3': [], '0xf2': [], }; ## Special \@opcode tag values. g_kdSpecialOpcodes = { '/reg': [], 'mr/reg': [], '11 /reg': [], '!11 /reg': [], '11 mr/reg': [], '!11 mr/reg': [], }; ## Special \@opcodesub tag values. ## The first value is the real value for aliases. ## The second value is for bs3cg1. g_kdSubOpcodes = { 'none': [ None, '', ], '11 mr/reg': [ '11 mr/reg', '', ], '11': [ '11 mr/reg', '', ], ##< alias '!11 mr/reg': [ '!11 mr/reg', '', ], '!11': [ '!11 mr/reg', '', ], ##< alias 'rex.w=0': [ 'rex.w=0', 'WZ', ], 'w=0': [ 'rex.w=0', '', ], ##< alias 'rex.w=1': [ 'rex.w=1', 'WNZ', ], 'w=1': [ 'rex.w=1', '', ], ##< alias 'vex.l=0': [ 'vex.l=0', 'L0', ], 'vex.l=1': [ 'vex.l=0', 'L1', ], '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ], '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ], '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ], '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ], }; ## Valid values for \@openc g_kdEncodings = { 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc). 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc). 'prefix': [ None, ], ##< Prefix }; ## \@opunused, \@opinvalid, \@opinvlstyle g_kdInvalidStyles = { 'immediate': [], ##< CPU stops decoding immediately after the opcode. 'vex.modrm': [], ##< VEX+ModR/M, everyone. 'intel-modrm': [], ##< Intel decodes ModR/M. 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate. 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.) 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate. }; g_kdCpuNames = { '8086': (), '80186': (), '80286': (), '80386': (), '80486': (), }; ## \@opcpuid g_kdCpuIdFlags = { 'vme': 'X86_CPUID_FEATURE_EDX_VME', 'tsc': 'X86_CPUID_FEATURE_EDX_TSC', 'msr': 'X86_CPUID_FEATURE_EDX_MSR', 'cx8': 'X86_CPUID_FEATURE_EDX_CX8', 'sep': 'X86_CPUID_FEATURE_EDX_SEP', 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV', 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH', 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT', 'mmx': 'X86_CPUID_FEATURE_EDX_MMX', 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR', 'sse': 'X86_CPUID_FEATURE_EDX_SSE', 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2', 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3', 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64', 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS', 'vmx': 'X86_CPUID_FEATURE_ECX_VMX', 'smx': 'X86_CPUID_FEATURE_ECX_TM2', 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3', 'fma': 'X86_CPUID_FEATURE_ECX_FMA', 'cx16': 'X86_CPUID_FEATURE_ECX_CX16', 'pcid': 'X86_CPUID_FEATURE_ECX_PCID', 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1', 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2', 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE', 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT', 'aes': 'X86_CPUID_FEATURE_ECX_AES', 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE', 'avx': 'X86_CPUID_FEATURE_ECX_AVX', 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2', 'f16c': 'X86_CPUID_FEATURE_ECX_F16C', 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND', 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX', '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX', '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW', 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM', 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L', 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM', 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A', '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF', 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP', 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4', }; ## \@ophints values. g_kdHints = { 'invalid': 'DISOPTYPE_INVALID', ##< 'harmless': 'DISOPTYPE_HARMLESS', ##< 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##< 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##< 'dangerous': 'DISOPTYPE_DANGEROUS', ##< 'portio': 'DISOPTYPE_PORTIO', ##< 'privileged': 'DISOPTYPE_PRIVILEGED', ##< 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##< 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##< 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##< 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##< 'interrupt': 'DISOPTYPE_INTERRUPT', ##< 'illegal': 'DISOPTYPE_ILLEGAL', ##< 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0. 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0. 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */ 'portio_read': 'DISOPTYPE_PORTIO_READ', ##< 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##< 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes ## (only in 16 & 32 bits mode!) 'avx': 'DISOPTYPE_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet! 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet! 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet! 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet! 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h. 'ignores_rexw': '', ##< Ignores REX.W. 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes". 'vex_l_zero': '', ##< VEX.L must be 0. 'vex_l_ignored': '', ##< VEX.L is ignored. 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?) 'lock_allowed': '', ##< Lock prefix allowed. }; ## \@opxcpttype values (see SDMv2 2.4, 2.7). g_kdXcptTypes = { 'none': [], '1': [], '2': [], '3': [], '4': [], '4UA': [], '5': [], '5LZ': [], # LZ = VEX.L must be zero. '6': [], '7': [], '7LZ': [], '8': [], '11': [], '12': [], 'E1': [], 'E1NF': [], 'E2': [], 'E3': [], 'E3NF': [], 'E4': [], 'E4NF': [], 'E5': [], 'E5NF': [], 'E6': [], 'E6NF': [], 'E7NF': [], 'E9': [], 'E9NF': [], 'E10': [], 'E11': [], 'E12': [], 'E12NF': [], }; def _isValidOpcodeByte(sOpcode): """ Checks if sOpcode is a valid lower case opcode byte. Returns true/false. """ if len(sOpcode) == 4: if sOpcode[:2] == '0x': if sOpcode[2] in '0123456789abcdef': if sOpcode[3] in '0123456789abcdef': return True; return False; class InstructionMap(object): """ Instruction map. The opcode map provides the lead opcode bytes (empty for the one byte opcode map). An instruction can be member of multiple opcode maps as long as it uses the same opcode value within the map (because of VEX). """ kdEncodings = { 'legacy': [], 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3 'xop8': [], ##< XOP prefix with vvvvv = 8 'xop9': [], ##< XOP prefix with vvvvv = 9 'xop10': [], ##< XOP prefix with vvvvv = 10 }; ## Selectors. ## 1. The first value is the number of table entries required by a ## decoder or disassembler for this type of selector. ## 2. The second value is how many entries per opcode byte if applicable. kdSelectors = { 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default). 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes. '/r': [ 8, 1, ], ##< modrm.reg selects the instruction. 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction. 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction. '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11. '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11. '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11. }; ## Define the subentry number according to the Instruction::sPrefix ## value for 'byte+pfx' selected tables. kiPrefixOrder = { 'none': 0, '0x66': 1, '0xf3': 2, '0xf2': 3, }; def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx', sEncoding = 'legacy', sDisParse = None): assert sSelector in self.kdSelectors; assert sEncoding in self.kdEncodings; if asLeadOpcodes is None: asLeadOpcodes = []; else: for sOpcode in asLeadOpcodes: assert _isValidOpcodeByte(sOpcode); assert sDisParse is None or sDisParse.startswith('IDX_Parse'); self.sName = sName; self.sIemName = sIemName; self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'. self.sSelector = sSelector; ##< The member selector, see kdSelectors. self.sEncoding = sEncoding; ##< The encoding, see kdSelectors. self.aoInstructions = [] # type: Instruction self.sDisParse = sDisParse; ##< IDX_ParseXXX. def copy(self, sNewName, sPrefixFilter = None): """ Copies the table with filtering instruction by sPrefix if not None. """ oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes, sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx' else self.sSelector, sEncoding = self.sEncoding, sDisParse = self.sDisParse); if sPrefixFilter is None: oCopy.aoInstructions = list(self.aoInstructions); else: oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter]; return oCopy; def getTableSize(self): """ Number of table entries. This corresponds directly to the selector. """ return self.kdSelectors[self.sSelector][0]; def getEntriesPerByte(self): """ Number of table entries per opcode bytes. This only really makes sense for the 'byte' and 'byte+pfx' selectors, for the others it will just return 1. """ return self.kdSelectors[self.sSelector][1]; def getInstructionIndex(self, oInstr): """ Returns the table index for the instruction. """ bOpcode = oInstr.getOpcodeByte(); # The byte selectors are simple. We need a full opcode byte and need just return it. if self.sSelector == 'byte': assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr); return bOpcode; # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode. if self.sSelector == 'byte+pfx': assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr); assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0; return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384); # The other selectors needs masking and shifting. if self.sSelector == '/r': return (bOpcode >> 3) & 0x7; if self.sSelector == 'mod /r': return (bOpcode >> 3) & 0x1f; if self.sSelector == 'memreg /r': return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3); if self.sSelector == '!11 /r': assert (bOpcode & 0xc0) != 0xc, str(oInstr); return (bOpcode >> 3) & 0x7; if self.sSelector == '11 /r': assert (bOpcode & 0xc0) == 0xc, str(oInstr); return (bOpcode >> 3) & 0x7; if self.sSelector == '11': assert (bOpcode & 0xc0) == 0xc, str(oInstr); return bOpcode & 0x3f; assert False, self.sSelector; return -1; def getInstructionsInTableOrder(self): """ Get instructions in table order. Returns array of instructions. Normally there is exactly one instruction per entry. However the entry could also be None if not instruction was specified for that opcode value. Or there could be a list of instructions to deal with special encodings where for instance prefix (e.g. REX.W) encodes a different instruction or different CPUs have different instructions or prefixes in the same place. """ # Start with empty table. cTable = self.getTableSize(); aoTable = [None] * cTable; # Insert the instructions. for oInstr in self.aoInstructions: if oInstr.sOpcode: idxOpcode = self.getInstructionIndex(oInstr); assert idxOpcode < cTable, str(idxOpcode); oExisting = aoTable[idxOpcode]; if oExisting is None: aoTable[idxOpcode] = oInstr; elif not isinstance(oExisting, list): aoTable[idxOpcode] = list([oExisting, oInstr]); else: oExisting.append(oInstr); return aoTable; def getDisasTableName(self): """ Returns the disassembler table name for this map. """ sName = 'g_aDisas'; for sWord in self.sName.split('_'): if sWord == 'm': # suffix indicating modrm.mod==mem sName += '_m'; elif sWord == 'r': # suffix indicating modrm.mod==reg sName += '_r'; elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord): sName += '_' + sWord; else: sWord = sWord.replace('grp', 'Grp'); sWord = sWord.replace('map', 'Map'); sName += sWord[0].upper() + sWord[1:]; return sName; def getDisasRangeName(self): """ Returns the disassembler table range name for this map. """ return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range'; def isVexMap(self): """ Returns True if a VEX map. """ return self.sEncoding.startswith('vex'); class TestType(object): """ Test value type. This base class deals with integer like values. The fUnsigned constructor parameter indicates the default stance on zero vs sign extending. It is possible to override fUnsigned=True by prefixing the value with '+' or '-'. """ def __init__(self, sName, acbSizes = None, fUnsigned = True): self.sName = sName; self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes. self.fUnsigned = fUnsigned; class BadValue(Exception): """ Bad value exception. """ def __init__(self, sMessage): Exception.__init__(self, sMessage); self.sMessage = sMessage; ## For ascii ~ operator. kdHexInv = { '0': 'f', '1': 'e', '2': 'd', '3': 'c', '4': 'b', '5': 'a', '6': '9', '7': '8', '8': '7', '9': '6', 'a': '5', 'b': '4', 'c': '3', 'd': '2', 'e': '1', 'f': '0', }; def get(self, sValue): """ Get the shortest normal sized byte representation of oValue. Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ). The latter form is for AND+OR pairs where the first entry is what to AND with the field and the second the one or OR with. Raises BadValue if invalid value. """ if not sValue: raise TestType.BadValue('empty value'); # Deal with sign and detect hexadecimal or decimal. fSignExtend = not self.fUnsigned; if sValue[0] == '-' or sValue[0] == '+': fSignExtend = True; fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x'; else: fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x'; # try convert it to long integer. try: iValue = long(sValue, 16 if fHex else 10); except Exception as oXcpt: raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt)); # Convert the hex string and pad it to a decent value. Negative values # needs to be manually converted to something non-negative (~-n + 1). if iValue >= 0: sHex = hex(iValue); if sys.version_info[0] < 3: assert sHex[-1] == 'L'; sHex = sHex[:-1]; assert sHex[:2] == '0x'; sHex = sHex[2:]; else: sHex = hex(-iValue - 1); if sys.version_info[0] < 3: assert sHex[-1] == 'L'; sHex = sHex[:-1]; assert sHex[:2] == '0x'; sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]); if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']: sHex = 'f' + sHex; cDigits = len(sHex); if cDigits <= self.acbSizes[-1] * 2: for cb in self.acbSizes: cNaturalDigits = cb * 2; if cDigits <= cNaturalDigits: break; else: cNaturalDigits = self.acbSizes[-1] * 2; cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits; assert isinstance(cNaturalDigits, int) if cNaturalDigits != cDigits: cNeeded = cNaturalDigits - cDigits; if iValue >= 0: sHex = ('0' * cNeeded) + sHex; else: sHex = ('f' * cNeeded) + sHex; # Invert and convert to bytearray and return it. abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]); return ((fSignExtend, abValue),); def validate(self, sValue): """ Returns True if value is okay, error message on failure. """ try: self.get(sValue); except TestType.BadValue as oXcpt: return oXcpt.sMessage; return True; def isAndOrPair(self, sValue): """ Checks if sValue is a pair. """ _ = sValue; return False; class TestTypeEflags(TestType): """ Special value parsing for EFLAGS/RFLAGS/FLAGS. """ kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 }; def __init__(self, sName): TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True); def get(self, sValue): fClear = 0; fSet = 0; for sFlag in sValue.split(','): sConstant = g_kdEFlagsMnemonics.get(sFlag, None); if sConstant is None: raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue)) if sConstant[0] == '!': fClear |= g_kdX86EFlagsConstants[sConstant[1:]]; else: fSet |= g_kdX86EFlagsConstants[sConstant]; aoSet = TestType.get(self, '0x%x' % (fSet,)); if fClear != 0: aoClear = TestType.get(self, '%#x' % (fClear,)) assert self.isAndOrPair(sValue) is True; return (aoClear[0], aoSet[0]); assert self.isAndOrPair(sValue) is False; return aoSet; def isAndOrPair(self, sValue): for sZeroFlag in self.kdZeroValueFlags: if sValue.find(sZeroFlag) >= 0: return True; return False; class TestTypeFromDict(TestType): """ Special value parsing for CR0. """ kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 }; def __init__(self, sName, kdConstantsAndValues, sConstantPrefix): TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True); self.kdConstantsAndValues = kdConstantsAndValues; self.sConstantPrefix = sConstantPrefix; def get(self, sValue): fValue = 0; for sFlag in sValue.split(','): fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None); if fFlagValue is None: raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue)) fValue |= fFlagValue; return TestType.get(self, '0x%x' % (fValue,)); class TestInOut(object): """ One input or output state modifier. This should be thought as values to modify BS3REGCTX and extended (needs to be structured) state. """ ## Assigned operators. kasOperators = [ '&|=', # Special AND(INV)+OR operator for use with EFLAGS. '&~=', '&=', '|=', '=' ]; ## Types kdTypes = { 'uint': TestType('uint', fUnsigned = True), 'int': TestType('int'), 'efl': TestTypeEflags('efl'), 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'), 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'), 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'), }; ## CPU context fields. kdFields = { # name: ( default type, [both|input|output], ) # Operands. 'op1': ( 'uint', 'both', ), ## \@op1 'op2': ( 'uint', 'both', ), ## \@op2 'op3': ( 'uint', 'both', ), ## \@op3 'op4': ( 'uint', 'both', ), ## \@op4 # Flags. 'efl': ( 'efl', 'both', ), 'efl_undef': ( 'uint', 'output', ), # 8-bit GPRs. 'al': ( 'uint', 'both', ), 'cl': ( 'uint', 'both', ), 'dl': ( 'uint', 'both', ), 'bl': ( 'uint', 'both', ), 'ah': ( 'uint', 'both', ), 'ch': ( 'uint', 'both', ), 'dh': ( 'uint', 'both', ), 'bh': ( 'uint', 'both', ), 'r8l': ( 'uint', 'both', ), 'r9l': ( 'uint', 'both', ), 'r10l': ( 'uint', 'both', ), 'r11l': ( 'uint', 'both', ), 'r12l': ( 'uint', 'both', ), 'r13l': ( 'uint', 'both', ), 'r14l': ( 'uint', 'both', ), 'r15l': ( 'uint', 'both', ), # 16-bit GPRs. 'ax': ( 'uint', 'both', ), 'dx': ( 'uint', 'both', ), 'cx': ( 'uint', 'both', ), 'bx': ( 'uint', 'both', ), 'sp': ( 'uint', 'both', ), 'bp': ( 'uint', 'both', ), 'si': ( 'uint', 'both', ), 'di': ( 'uint', 'both', ), 'r8w': ( 'uint', 'both', ), 'r9w': ( 'uint', 'both', ), 'r10w': ( 'uint', 'both', ), 'r11w': ( 'uint', 'both', ), 'r12w': ( 'uint', 'both', ), 'r13w': ( 'uint', 'both', ), 'r14w': ( 'uint', 'both', ), 'r15w': ( 'uint', 'both', ), # 32-bit GPRs. 'eax': ( 'uint', 'both', ), 'edx': ( 'uint', 'both', ), 'ecx': ( 'uint', 'both', ), 'ebx': ( 'uint', 'both', ), 'esp': ( 'uint', 'both', ), 'ebp': ( 'uint', 'both', ), 'esi': ( 'uint', 'both', ), 'edi': ( 'uint', 'both', ), 'r8d': ( 'uint', 'both', ), 'r9d': ( 'uint', 'both', ), 'r10d': ( 'uint', 'both', ), 'r11d': ( 'uint', 'both', ), 'r12d': ( 'uint', 'both', ), 'r13d': ( 'uint', 'both', ), 'r14d': ( 'uint', 'both', ), 'r15d': ( 'uint', 'both', ), # 64-bit GPRs. 'rax': ( 'uint', 'both', ), 'rdx': ( 'uint', 'both', ), 'rcx': ( 'uint', 'both', ), 'rbx': ( 'uint', 'both', ), 'rsp': ( 'uint', 'both', ), 'rbp': ( 'uint', 'both', ), 'rsi': ( 'uint', 'both', ), 'rdi': ( 'uint', 'both', ), 'r8': ( 'uint', 'both', ), 'r9': ( 'uint', 'both', ), 'r10': ( 'uint', 'both', ), 'r11': ( 'uint', 'both', ), 'r12': ( 'uint', 'both', ), 'r13': ( 'uint', 'both', ), 'r14': ( 'uint', 'both', ), 'r15': ( 'uint', 'both', ), # 16-bit, 32-bit or 64-bit registers according to operand size. 'oz.rax': ( 'uint', 'both', ), 'oz.rdx': ( 'uint', 'both', ), 'oz.rcx': ( 'uint', 'both', ), 'oz.rbx': ( 'uint', 'both', ), 'oz.rsp': ( 'uint', 'both', ), 'oz.rbp': ( 'uint', 'both', ), 'oz.rsi': ( 'uint', 'both', ), 'oz.rdi': ( 'uint', 'both', ), 'oz.r8': ( 'uint', 'both', ), 'oz.r9': ( 'uint', 'both', ), 'oz.r10': ( 'uint', 'both', ), 'oz.r11': ( 'uint', 'both', ), 'oz.r12': ( 'uint', 'both', ), 'oz.r13': ( 'uint', 'both', ), 'oz.r14': ( 'uint', 'both', ), 'oz.r15': ( 'uint', 'both', ), # Control registers. 'cr0': ( 'cr0', 'both', ), 'cr4': ( 'cr4', 'both', ), 'xcr0': ( 'xcr0', 'both', ), # FPU Registers 'fcw': ( 'uint', 'both', ), 'fsw': ( 'uint', 'both', ), 'ftw': ( 'uint', 'both', ), 'fop': ( 'uint', 'both', ), 'fpuip': ( 'uint', 'both', ), 'fpucs': ( 'uint', 'both', ), 'fpudp': ( 'uint', 'both', ), 'fpuds': ( 'uint', 'both', ), 'mxcsr': ( 'uint', 'both', ), 'st0': ( 'uint', 'both', ), 'st1': ( 'uint', 'both', ), 'st2': ( 'uint', 'both', ), 'st3': ( 'uint', 'both', ), 'st4': ( 'uint', 'both', ), 'st5': ( 'uint', 'both', ), 'st6': ( 'uint', 'both', ), 'st7': ( 'uint', 'both', ), # MMX registers. 'mm0': ( 'uint', 'both', ), 'mm1': ( 'uint', 'both', ), 'mm2': ( 'uint', 'both', ), 'mm3': ( 'uint', 'both', ), 'mm4': ( 'uint', 'both', ), 'mm5': ( 'uint', 'both', ), 'mm6': ( 'uint', 'both', ), 'mm7': ( 'uint', 'both', ), # SSE registers. 'xmm0': ( 'uint', 'both', ), 'xmm1': ( 'uint', 'both', ), 'xmm2': ( 'uint', 'both', ), 'xmm3': ( 'uint', 'both', ), 'xmm4': ( 'uint', 'both', ), 'xmm5': ( 'uint', 'both', ), 'xmm6': ( 'uint', 'both', ), 'xmm7': ( 'uint', 'both', ), 'xmm8': ( 'uint', 'both', ), 'xmm9': ( 'uint', 'both', ), 'xmm10': ( 'uint', 'both', ), 'xmm11': ( 'uint', 'both', ), 'xmm12': ( 'uint', 'both', ), 'xmm13': ( 'uint', 'both', ), 'xmm14': ( 'uint', 'both', ), 'xmm15': ( 'uint', 'both', ), 'xmm0.lo': ( 'uint', 'both', ), 'xmm1.lo': ( 'uint', 'both', ), 'xmm2.lo': ( 'uint', 'both', ), 'xmm3.lo': ( 'uint', 'both', ), 'xmm4.lo': ( 'uint', 'both', ), 'xmm5.lo': ( 'uint', 'both', ), 'xmm6.lo': ( 'uint', 'both', ), 'xmm7.lo': ( 'uint', 'both', ), 'xmm8.lo': ( 'uint', 'both', ), 'xmm9.lo': ( 'uint', 'both', ), 'xmm10.lo': ( 'uint', 'both', ), 'xmm11.lo': ( 'uint', 'both', ), 'xmm12.lo': ( 'uint', 'both', ), 'xmm13.lo': ( 'uint', 'both', ), 'xmm14.lo': ( 'uint', 'both', ), 'xmm15.lo': ( 'uint', 'both', ), 'xmm0.hi': ( 'uint', 'both', ), 'xmm1.hi': ( 'uint', 'both', ), 'xmm2.hi': ( 'uint', 'both', ), 'xmm3.hi': ( 'uint', 'both', ), 'xmm4.hi': ( 'uint', 'both', ), 'xmm5.hi': ( 'uint', 'both', ), 'xmm6.hi': ( 'uint', 'both', ), 'xmm7.hi': ( 'uint', 'both', ), 'xmm8.hi': ( 'uint', 'both', ), 'xmm9.hi': ( 'uint', 'both', ), 'xmm10.hi': ( 'uint', 'both', ), 'xmm11.hi': ( 'uint', 'both', ), 'xmm12.hi': ( 'uint', 'both', ), 'xmm13.hi': ( 'uint', 'both', ), 'xmm14.hi': ( 'uint', 'both', ), 'xmm15.hi': ( 'uint', 'both', ), 'xmm0.lo.zx': ( 'uint', 'both', ), 'xmm1.lo.zx': ( 'uint', 'both', ), 'xmm2.lo.zx': ( 'uint', 'both', ), 'xmm3.lo.zx': ( 'uint', 'both', ), 'xmm4.lo.zx': ( 'uint', 'both', ), 'xmm5.lo.zx': ( 'uint', 'both', ), 'xmm6.lo.zx': ( 'uint', 'both', ), 'xmm7.lo.zx': ( 'uint', 'both', ), 'xmm8.lo.zx': ( 'uint', 'both', ), 'xmm9.lo.zx': ( 'uint', 'both', ), 'xmm10.lo.zx': ( 'uint', 'both', ), 'xmm11.lo.zx': ( 'uint', 'both', ), 'xmm12.lo.zx': ( 'uint', 'both', ), 'xmm13.lo.zx': ( 'uint', 'both', ), 'xmm14.lo.zx': ( 'uint', 'both', ), 'xmm15.lo.zx': ( 'uint', 'both', ), 'xmm0.dw0': ( 'uint', 'both', ), 'xmm1.dw0': ( 'uint', 'both', ), 'xmm2.dw0': ( 'uint', 'both', ), 'xmm3.dw0': ( 'uint', 'both', ), 'xmm4.dw0': ( 'uint', 'both', ), 'xmm5.dw0': ( 'uint', 'both', ), 'xmm6.dw0': ( 'uint', 'both', ), 'xmm7.dw0': ( 'uint', 'both', ), 'xmm8.dw0': ( 'uint', 'both', ), 'xmm9.dw0': ( 'uint', 'both', ), 'xmm10.dw0': ( 'uint', 'both', ), 'xmm11.dw0': ( 'uint', 'both', ), 'xmm12.dw0': ( 'uint', 'both', ), 'xmm13.dw0': ( 'uint', 'both', ), 'xmm14.dw0': ( 'uint', 'both', ), 'xmm15_dw0': ( 'uint', 'both', ), # AVX registers. 'ymm0': ( 'uint', 'both', ), 'ymm1': ( 'uint', 'both', ), 'ymm2': ( 'uint', 'both', ), 'ymm3': ( 'uint', 'both', ), 'ymm4': ( 'uint', 'both', ), 'ymm5': ( 'uint', 'both', ), 'ymm6': ( 'uint', 'both', ), 'ymm7': ( 'uint', 'both', ), 'ymm8': ( 'uint', 'both', ), 'ymm9': ( 'uint', 'both', ), 'ymm10': ( 'uint', 'both', ), 'ymm11': ( 'uint', 'both', ), 'ymm12': ( 'uint', 'both', ), 'ymm13': ( 'uint', 'both', ), 'ymm14': ( 'uint', 'both', ), 'ymm15': ( 'uint', 'both', ), # Special ones. 'value.xcpt': ( 'uint', 'output', ), }; def __init__(self, sField, sOp, sValue, sType): assert sField in self.kdFields; assert sOp in self.kasOperators; self.sField = sField; self.sOp = sOp; self.sValue = sValue; self.sType = sType; assert isinstance(sField, str); assert isinstance(sOp, str); assert isinstance(sType, str); assert isinstance(sValue, str); class TestSelector(object): """ One selector for an instruction test. """ ## Selector compare operators. kasCompareOps = [ '==', '!=' ]; ## Selector variables and their valid values. kdVariables = { # Operand size. 'size': { 'o16': 'size_o16', 'o32': 'size_o32', 'o64': 'size_o64', }, # VEX.L value. 'vex.l': { '0': 'vexl_0', '1': 'vexl_1', }, # Execution ring. 'ring': { '0': 'ring_0', '1': 'ring_1', '2': 'ring_2', '3': 'ring_3', '0..2': 'ring_0_thru_2', '1..3': 'ring_1_thru_3', }, # Basic code mode. 'codebits': { '64': 'code_64bit', '32': 'code_32bit', '16': 'code_16bit', }, # cpu modes. 'mode': { 'real': 'mode_real', 'prot': 'mode_prot', 'long': 'mode_long', 'v86': 'mode_v86', 'smm': 'mode_smm', 'vmx': 'mode_vmx', 'svm': 'mode_svm', }, # paging on/off 'paging': { 'on': 'paging_on', 'off': 'paging_off', }, # CPU vendor 'vendor': { 'amd': 'vendor_amd', 'intel': 'vendor_intel', 'via': 'vendor_via', }, }; ## Selector shorthand predicates. ## These translates into variable expressions. kdPredicates = { 'o16': 'size==o16', 'o32': 'size==o32', 'o64': 'size==o64', 'ring0': 'ring==0', '!ring0': 'ring==1..3', 'ring1': 'ring==1', 'ring2': 'ring==2', 'ring3': 'ring==3', 'user': 'ring==3', 'supervisor': 'ring==0..2', '16-bit': 'codebits==16', '32-bit': 'codebits==32', '64-bit': 'codebits==64', 'real': 'mode==real', 'prot': 'mode==prot', 'long': 'mode==long', 'v86': 'mode==v86', 'smm': 'mode==smm', 'vmx': 'mode==vmx', 'svm': 'mode==svm', 'paging': 'paging==on', '!paging': 'paging==off', 'amd': 'vendor==amd', '!amd': 'vendor!=amd', 'intel': 'vendor==intel', '!intel': 'vendor!=intel', 'via': 'vendor==via', '!via': 'vendor!=via', }; def __init__(self, sVariable, sOp, sValue): assert sVariable in self.kdVariables; assert sOp in self.kasCompareOps; assert sValue in self.kdVariables[sVariable]; self.sVariable = sVariable; self.sOp = sOp; self.sValue = sValue; class InstructionTest(object): """ Instruction test. """ def __init__(self, oInstr): # type: (InstructionTest, Instruction) self.oInstr = oInstr # type: InstructionTest self.aoInputs = [] # type: list(TestInOut) self.aoOutputs = [] # type: list(TestInOut) self.aoSelectors = [] # type: list(TestSelector) def toString(self, fRepr = False): """ Converts it to string representation. """ asWords = []; if self.aoSelectors: for oSelector in self.aoSelectors: asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,)); asWords.append('/'); for oModifier in self.aoInputs: asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,)); asWords.append('->'); for oModifier in self.aoOutputs: asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,)); if fRepr: return '<' + ' '.join(asWords) + '>'; return ' '.join(asWords); def __str__(self): """ Provide string represenation. """ return self.toString(False); def __repr__(self): """ Provide unambigious string representation. """ return self.toString(True); class Operand(object): """ Instruction operand. """ def __init__(self, sWhere, sType): assert sWhere in g_kdOpLocations, sWhere; assert sType in g_kdOpTypes, sType; self.sWhere = sWhere; ##< g_kdOpLocations self.sType = sType; ##< g_kdOpTypes def usesModRM(self): """ Returns True if using some form of ModR/M encoding. """ return self.sType[0] in ['E', 'G', 'M']; class Instruction(object): # pylint: disable=too-many-instance-attributes """ Instruction. """ def __init__(self, sSrcFile, iLine): ## @name Core attributes. ## @{ self.oParent = None # type: Instruction self.sMnemonic = None; self.sBrief = None; self.asDescSections = [] # type: list(str) self.aoMaps = [] # type: list(InstructionMap) self.aoOperands = [] # type: list(Operand) self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2 self.sOpcode = None # type: str self.sSubOpcode = None # type: str self.sEncoding = None; self.asFlTest = None; self.asFlModify = None; self.asFlUndefined = None; self.asFlSet = None; self.asFlClear = None; self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value). self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic. self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND. self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction. self.aoTests = [] # type: list(InstructionTest) self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is. self.oCpuExpr = None; ##< Some CPU restriction expression... self.sGroup = None; self.fUnused = False; ##< Unused instruction. self.fInvalid = False; ##< Invalid instruction (like UD2). self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles), self.sXcptType = None; ##< Exception type (g_kdXcptTypes). ## @} ## @name Implementation attributes. ## @{ self.sStats = None; self.sFunction = None; self.fStub = False; self.fUdStub = False; ## @} ## @name Decoding info ## @{ self.sSrcFile = sSrcFile; self.iLineCreated = iLine; self.iLineCompleted = None; self.cOpTags = 0; self.iLineFnIemOpMacro = -1; self.iLineMnemonicMacro = -1; ## @} ## @name Intermediate input fields. ## @{ self.sRawDisOpNo = None; self.asRawDisParams = []; self.sRawIemOpFlags = None; self.sRawOldOpcodes = None; self.asCopyTests = []; ## @} def toString(self, fRepr = False): """ Turn object into a string. """ aasFields = []; aasFields.append(['opcode', self.sOpcode]); if self.sPrefix: aasFields.append(['prefix', self.sPrefix]); aasFields.append(['mnemonic', self.sMnemonic]); for iOperand, oOperand in enumerate(self.aoOperands): aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]); if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]); aasFields.append(['encoding', self.sEncoding]); if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]); aasFields.append(['disenum', self.sDisEnum]); if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]); aasFields.append(['group', self.sGroup]); if self.fUnused: aasFields.append(['unused', 'True']); if self.fInvalid: aasFields.append(['invalid', 'True']); aasFields.append(['invlstyle', self.sInvalidStyle]); aasFields.append(['fltest', self.asFlTest]); aasFields.append(['flmodify', self.asFlModify]); aasFields.append(['flundef', self.asFlUndefined]); aasFields.append(['flset', self.asFlSet]); aasFields.append(['flclear', self.asFlClear]); aasFields.append(['mincpu', self.sMinCpu]); aasFields.append(['stats', self.sStats]); aasFields.append(['sFunction', self.sFunction]); if self.fStub: aasFields.append(['fStub', 'True']); if self.fUdStub: aasFields.append(['fUdStub', 'True']); if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]); if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]); if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]); sRet = '<' if fRepr else ''; for sField, sValue in aasFields: if sValue is not None: if len(sRet) > 1: sRet += '; '; sRet += '%s=%s' % (sField, sValue,); if fRepr: sRet += '>'; return sRet; def __str__(self): """ Provide string represenation. """ return self.toString(False); def __repr__(self): """ Provide unambigious string representation. """ return self.toString(True); def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None): """ Makes a copy of the object for the purpose of putting in a different map or a different place in the current map. """ oCopy = Instruction(self.sSrcFile, self.iLineCreated); oCopy.oParent = self; oCopy.sMnemonic = self.sMnemonic; oCopy.sBrief = self.sBrief; oCopy.asDescSections = list(self.asDescSections); oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps); oCopy.aoOperands = list(self.aoOperands); ## Deeper copy? oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix; oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode; oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode; oCopy.sEncoding = self.sEncoding; oCopy.asFlTest = self.asFlTest; oCopy.asFlModify = self.asFlModify; oCopy.asFlUndefined = self.asFlUndefined; oCopy.asFlSet = self.asFlSet; oCopy.asFlClear = self.asFlClear; oCopy.dHints = dict(self.dHints); oCopy.sDisEnum = self.sDisEnum; oCopy.asCpuIds = list(self.asCpuIds); oCopy.asReqFeatures = list(self.asReqFeatures); oCopy.aoTests = list(self.aoTests); ## Deeper copy? oCopy.sMinCpu = self.sMinCpu; oCopy.oCpuExpr = self.oCpuExpr; oCopy.sGroup = self.sGroup; oCopy.fUnused = self.fUnused; oCopy.fInvalid = self.fInvalid; oCopy.sInvalidStyle = self.sInvalidStyle; oCopy.sXcptType = self.sXcptType; oCopy.sStats = self.sStats; oCopy.sFunction = self.sFunction; oCopy.fStub = self.fStub; oCopy.fUdStub = self.fUdStub; oCopy.iLineCompleted = self.iLineCompleted; oCopy.cOpTags = self.cOpTags; oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro; oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro; oCopy.sRawDisOpNo = self.sRawDisOpNo; oCopy.asRawDisParams = list(self.asRawDisParams); oCopy.sRawIemOpFlags = self.sRawIemOpFlags; oCopy.sRawOldOpcodes = self.sRawOldOpcodes; oCopy.asCopyTests = list(self.asCopyTests); return oCopy; def getOpcodeByte(self): """ Decodes sOpcode into a byte range integer value. Raises exception if sOpcode is None or invalid. """ if self.sOpcode is None: raise Exception('No opcode byte for %s!' % (self,)); sOpcode = str(self.sOpcode); # pylint type confusion workaround. # Full hex byte form. if sOpcode[:2] == '0x': return int(sOpcode, 16); # The /r form: if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit(): return int(sOpcode[-1:]) << 3; # The 11/r form: if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit(): return (int(sOpcode[-1:]) << 3) | 0xc0; # The !11/r form (returns mod=1): ## @todo this doesn't really work... if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit(): return (int(sOpcode[-1:]) << 3) | 0x80; raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,)); @staticmethod def _flagsToIntegerMask(asFlags): """ Returns the integer mask value for asFlags. """ uRet = 0; if asFlags: for sFlag in asFlags: sConstant = g_kdEFlagsMnemonics[sFlag]; assert sConstant[0] != '!', sConstant uRet |= g_kdX86EFlagsConstants[sConstant]; return uRet; def getTestedFlagsMask(self): """ Returns asFlTest into a integer mask value """ return self._flagsToIntegerMask(self.asFlTest); def getModifiedFlagsMask(self): """ Returns asFlModify into a integer mask value """ return self._flagsToIntegerMask(self.asFlModify); def getUndefinedFlagsMask(self): """ Returns asFlUndefined into a integer mask value """ return self._flagsToIntegerMask(self.asFlUndefined); def getSetFlagsMask(self): """ Returns asFlSet into a integer mask value """ return self._flagsToIntegerMask(self.asFlSet); def getClearedFlagsMask(self): """ Returns asFlClear into a integer mask value """ return self._flagsToIntegerMask(self.asFlClear); def onlyInVexMaps(self): """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """ if not self.aoMaps: return False; for oMap in self.aoMaps: if not oMap.isVexMap(): return False; return True; ## All the instructions. g_aoAllInstructions = [] # type: list(Instruction) ## All the instructions indexed by statistics name (opstat). g_dAllInstructionsByStat = {} # type: dict(Instruction) ## All the instructions indexed by function name (opfunction). g_dAllInstructionsByFunction = {} # type: dict(list(Instruction)) ## Instructions tagged by oponlytest g_aoOnlyTestInstructions = [] # type: list(Instruction) ## Instruction maps. g_aoInstructionMaps = [ InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'), InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'), InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'), InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'), InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'), InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'), InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'), InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'), InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'), InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'), InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'), InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'), ## @todo g_apfnEscF1_E0toFF InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'), InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'), InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'), InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'), InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'), InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'), InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'), InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'), InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'), InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'), InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'), InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'), ## @todo What about g_apfnGroup9MemReg? InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'), InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'), InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'), InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'), ## @todo What about g_apfnGroup15RegReg? InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'), InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]), InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]), InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'), InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'), InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'), InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'), InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'), InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'), InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'), InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'), InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]), InstructionMap('xopmap8', sEncoding = 'xop8'), InstructionMap('xopmap9', sEncoding = 'xop9'), InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'), InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'), InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'), InstructionMap('xopmap10', sEncoding = 'xop10'), InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'), ]; g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps }; g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps }; class ParserException(Exception): """ Parser exception """ def __init__(self, sMessage): Exception.__init__(self, sMessage); class SimpleParser(object): """ Parser of IEMAllInstruction*.cpp.h instruction specifications. """ ## @name Parser state. ## @{ kiCode = 0; kiCommentMulti = 1; ## @} def __init__(self, sSrcFile, asLines, sDefaultMap): self.sSrcFile = sSrcFile; self.asLines = asLines; self.iLine = 0; self.iState = self.kiCode; self.sComment = ''; self.iCommentLine = 0; self.aoCurInstrs = []; assert sDefaultMap in g_dInstructionMaps; self.oDefaultMap = g_dInstructionMaps[sDefaultMap]; self.cTotalInstr = 0; self.cTotalStubs = 0; self.cTotalTagged = 0; self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$'); self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$'); self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$'); self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$'); self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$'); self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$'); self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$'); self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments. self.fDebug = True; self.dTagHandlers = { '@opbrief': self.parseTagOpBrief, '@opdesc': self.parseTagOpDesc, '@opmnemonic': self.parseTagOpMnemonic, '@op1': self.parseTagOpOperandN, '@op2': self.parseTagOpOperandN, '@op3': self.parseTagOpOperandN, '@op4': self.parseTagOpOperandN, '@oppfx': self.parseTagOpPfx, '@opmaps': self.parseTagOpMaps, '@opcode': self.parseTagOpcode, '@opcodesub': self.parseTagOpcodeSub, '@openc': self.parseTagOpEnc, '@opfltest': self.parseTagOpEFlags, '@opflmodify': self.parseTagOpEFlags, '@opflundef': self.parseTagOpEFlags, '@opflset': self.parseTagOpEFlags, '@opflclear': self.parseTagOpEFlags, '@ophints': self.parseTagOpHints, '@opdisenum': self.parseTagOpDisEnum, '@opmincpu': self.parseTagOpMinCpu, '@opcpuid': self.parseTagOpCpuId, '@opgroup': self.parseTagOpGroup, '@opunused': self.parseTagOpUnusedInvalid, '@opinvalid': self.parseTagOpUnusedInvalid, '@opinvlstyle': self.parseTagOpUnusedInvalid, '@optest': self.parseTagOpTest, '@optestign': self.parseTagOpTestIgnore, '@optestignore': self.parseTagOpTestIgnore, '@opcopytests': self.parseTagOpCopyTests, '@oponly': self.parseTagOpOnlyTest, '@oponlytest': self.parseTagOpOnlyTest, '@opxcpttype': self.parseTagOpXcptType, '@opstats': self.parseTagOpStats, '@opfunction': self.parseTagOpFunction, '@opdone': self.parseTagOpDone, }; for i in range(48): self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum; self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum; self.asErrors = []; def raiseError(self, sMessage): """ Raise error prefixed with the source and line number. """ raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,)); def raiseCommentError(self, iLineInComment, sMessage): """ Similar to raiseError, but the line number is iLineInComment + self.iCommentLine. """ raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,)); def error(self, sMessage): """ Adds an error. returns False; """ self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,)); return False; def errorOnLine(self, iLine, sMessage): """ Adds an error. returns False; """ self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,)); return False; def errorComment(self, iLineInComment, sMessage): """ Adds a comment error. returns False; """ self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,)); return False; def printErrors(self): """ Print the errors to stderr. Returns number of errors. """ if self.asErrors: sys.stderr.write(u''.join(self.asErrors)); return len(self.asErrors); def debug(self, sMessage): """ For debugging. """ if self.fDebug: print('debug: %s' % (sMessage,)); def stripComments(self, sLine): """ Returns sLine with comments stripped. Complains if traces of incomplete multi-line comments are encountered. """ sLine = self.oReComment.sub(" ", sLine); if sLine.find('/*') >= 0 or sLine.find('*/') >= 0: self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.'); return sLine; def parseFunctionTable(self, sLine): """ Parses a PFNIEMOP table, updating/checking the @oppfx value. Note! Updates iLine as it consumes the whole table. """ # # Extract the table name. # sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1); oMap = g_dInstructionMapsByIemName.get(sName); if not oMap: self.debug('No map for PFNIEMOP table: %s' % (sName,)); oMap = self.oDefaultMap; # This is wrong wrong wrong. # # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four # entries per byte: # no prefix, 066h prefix, f3h prefix, f2h prefix # Those tables has 256 & 32 entries respectively. # cEntriesPerByte = 4; cValidTableLength = 1024; asPrefixes = ('none', '0x66', '0xf3', '0xf2'); oEntriesMatch = re.search('\[ *(256|32) *\]', sLine); if oEntriesMatch: cEntriesPerByte = 1; cValidTableLength = int(oEntriesMatch.group(1)); asPrefixes = (None,); # # The next line should be '{' and nothing else. # if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]): return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, )); self.iLine += 1; # # Parse till we find the end of the table. # iEntry = 0; while self.iLine < len(self.asLines): # Get the next line and strip comments and spaces (assumes no # multi-line comments). sLine = self.asLines[self.iLine]; self.iLine += 1; sLine = self.stripComments(sLine).strip(); # Split the line up into entries, expanding IEMOP_X4 usage. asEntries = sLine.split(','); for i in range(len(asEntries) - 1, -1, -1): sEntry = asEntries[i].strip(); if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')': sEntry = (sEntry[len('IEMOP_X4('):-1]).strip(); asEntries.insert(i + 1, sEntry); asEntries.insert(i + 1, sEntry); asEntries.insert(i + 1, sEntry); if sEntry: asEntries[i] = sEntry; else: del asEntries[i]; # Process the entries. for sEntry in asEntries: if sEntry in ('};', '}'): if iEntry != cValidTableLength: return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, )); return True; if sEntry.startswith('iemOp_Invalid'): pass; # skip else: # Look up matching instruction by function. sPrefix = asPrefixes[iEntry % cEntriesPerByte]; sOpcode = '%#04x' % (iEntry // cEntriesPerByte); aoInstr = g_dAllInstructionsByFunction.get(sEntry); if aoInstr: if not isinstance(aoInstr, list): aoInstr = [aoInstr,]; oInstr = None; for oCurInstr in aoInstr: if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix: pass; elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None: oCurInstr.sPrefix = sPrefix; elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None: oCurInstr.sOpcode = sOpcode; oCurInstr.sPrefix = sPrefix; else: continue; oInstr = oCurInstr; break; if not oInstr: oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix); aoInstr.append(oInstr); g_dAllInstructionsByFunction[sEntry] = aoInstr; g_aoAllInstructions.append(oInstr); oMap.aoInstructions.append(oInstr); else: self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.' % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,)); iEntry += 1; return self.error('Unexpected end of file in PFNIEMOP table'); def addInstruction(self, iLine = None): """ Adds an instruction. """ oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine); g_aoAllInstructions.append(oInstr); self.aoCurInstrs.append(oInstr); return oInstr; def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats): """ Derives the mnemonic and operands from a IEM stats base name like string. """ if oInstr.sMnemonic is None: asWords = sStats.split('_'); oInstr.sMnemonic = asWords[0].lower(); if len(asWords) > 1 and not oInstr.aoOperands: for sType in asWords[1:]: if sType in g_kdOpTypes: oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType)); else: #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr)) return False; return True; def doneInstructionOne(self, oInstr, iLine): """ Complete the parsing by processing, validating and expanding raw inputs. """ assert oInstr.iLineCompleted is None; oInstr.iLineCompleted = iLine; # # Specified instructions. # if oInstr.cOpTags > 0: if oInstr.sStats is None: pass; # # Unspecified legacy stuff. We generally only got a few things to go on here. # /** Opcode 0x0f 0x00 /0. */ # FNIEMOPRM_DEF(iemOp_Grp6_sldt) # else: #if oInstr.sRawOldOpcodes: # #if oInstr.sMnemonic: pass; # # Common defaults. # # Guess mnemonic and operands from stats if the former is missing. if oInstr.sMnemonic is None: if oInstr.sStats is not None: self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats); elif oInstr.sFunction is not None: self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', '')); # Derive the disassembler op enum constant from the mnemonic. if oInstr.sDisEnum is None and oInstr.sMnemonic is not None: oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper(); # Derive the IEM statistics base name from mnemonic and operand types. if oInstr.sStats is None: if oInstr.sFunction is not None: oInstr.sStats = oInstr.sFunction.replace('iemOp_', ''); elif oInstr.sMnemonic is not None: oInstr.sStats = oInstr.sMnemonic; for oOperand in oInstr.aoOperands: if oOperand.sType: oInstr.sStats += '_' + oOperand.sType; # Derive the IEM function name from mnemonic and operand types. if oInstr.sFunction is None: if oInstr.sMnemonic is not None: oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic; for oOperand in oInstr.aoOperands: if oOperand.sType: oInstr.sFunction += '_' + oOperand.sType; elif oInstr.sStats: oInstr.sFunction = 'iemOp_' + oInstr.sStats; # # Apply default map and then add the instruction to all it's groups. # if not oInstr.aoMaps: oInstr.aoMaps = [ self.oDefaultMap, ]; for oMap in oInstr.aoMaps: oMap.aoInstructions.append(oInstr); # # Derive encoding from operands and maps. # if oInstr.sEncoding is None: if not oInstr.aoOperands: if oInstr.fUnused and oInstr.sSubOpcode: oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M'; else: oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed'; elif oInstr.aoOperands[0].usesModRM(): if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \ or oInstr.onlyInVexMaps(): oInstr.sEncoding = 'VEX.ModR/M'; else: oInstr.sEncoding = 'ModR/M'; # # Check the opstat value and add it to the opstat indexed dictionary. # if oInstr.sStats: if oInstr.sStats not in g_dAllInstructionsByStat: g_dAllInstructionsByStat[oInstr.sStats] = oInstr; else: self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s' % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],)); # # Add to function indexed dictionary. We allow multiple instructions per function. # if oInstr.sFunction: if oInstr.sFunction not in g_dAllInstructionsByFunction: g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,]; else: g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr); #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags)); return True; def doneInstructions(self, iLineInComment = None): """ Done with current instruction. """ for oInstr in self.aoCurInstrs: self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment); if oInstr.fStub: self.cTotalStubs += 1; self.cTotalInstr += len(self.aoCurInstrs); self.sComment = ''; self.aoCurInstrs = []; return True; def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False): """ Sets the sAttrib of all current instruction to oValue. If fOverwrite is False, only None values and empty strings are replaced. """ for oInstr in self.aoCurInstrs: if fOverwrite is not True: oOldValue = getattr(oInstr, sAttrib); if oOldValue is not None: continue; setattr(oInstr, sAttrib, oValue); def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False): """ Sets the iEntry of the array sAttrib of all current instruction to oValue. If fOverwrite is False, only None values and empty strings are replaced. """ for oInstr in self.aoCurInstrs: aoArray = getattr(oInstr, sAttrib); while len(aoArray) <= iEntry: aoArray.append(None); if fOverwrite is True or aoArray[iEntry] is None: aoArray[iEntry] = oValue; def parseCommentOldOpcode(self, asLines): """ Deals with 'Opcode 0xff /4' like comments """ asWords = asLines[0].split(); if len(asWords) >= 2 \ and asWords[0] == 'Opcode' \ and ( asWords[1].startswith('0x') or asWords[1].startswith('0X')): asWords = asWords[:1]; for iWord, sWord in enumerate(asWords): if sWord.startswith('0X'): sWord = '0x' + sWord[:2]; asWords[iWord] = asWords; self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords)); return False; def ensureInstructionForOpTag(self, iTagLine): """ Ensure there is an instruction for the op-tag being parsed. """ if not self.aoCurInstrs: self.addInstruction(self.iCommentLine + iTagLine); for oInstr in self.aoCurInstrs: oInstr.cOpTags += 1; if oInstr.cOpTags == 1: self.cTotalTagged += 1; return self.aoCurInstrs[-1]; @staticmethod def flattenSections(aasSections): """ Flattens multiline sections into stripped single strings. Returns list of strings, on section per string. """ asRet = []; for asLines in aasSections: if asLines: asRet.append(' '.join([sLine.strip() for sLine in asLines])); return asRet; @staticmethod def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'): """ Flattens sections into a simple stripped string with newlines as section breaks. The final section does not sport a trailing newline. """ # Typical: One section with a single line. if len(aasSections) == 1 and len(aasSections[0]) == 1: return aasSections[0][0].strip(); sRet = ''; for iSection, asLines in enumerate(aasSections): if asLines: if iSection > 0: sRet += sSectionSep; sRet += sLineSep.join([sLine.strip() for sLine in asLines]); return sRet; ## @name Tag parsers ## @{ def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opbrief Value: Text description, multiple sections, appended. Brief description. If not given, it's the first sentence from @opdesc. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sBrief = self.flattenAllSections(aasSections); if not sBrief: return self.errorComment(iTagLine, '%s: value required' % (sTag,)); if sBrief[-1] != '.': sBrief = sBrief + '.'; if len(sBrief) > 180: return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief)); offDot = sBrief.find('.'); while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ': offDot = sBrief.find('.', offDot + 1); if offDot >= 0 and offDot != len(sBrief) - 1: return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief)); # Update the instruction. if oInstr.sBrief is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"' % (sTag, oInstr.sBrief, sBrief,)); _ = iEndLine; return True; def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opdesc Value: Text description, multiple sections, appended. It is used to describe instructions. """ oInstr = self.ensureInstructionForOpTag(iTagLine); if aasSections: oInstr.asDescSections.extend(self.flattenSections(aasSections)); return True; _ = sTag; _ = iEndLine; return True; def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: @opmenmonic Value: mnemonic The 'mnemonic' value must be a valid C identifier string. Because of prefixes, groups and whatnot, there times when the mnemonic isn't that of an actual assembler mnemonic. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sMnemonic = self.flattenAllSections(aasSections); if not self.oReMnemonic.match(sMnemonic): return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,)); if oInstr.sMnemonic is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"' % (sTag, oInstr.sMnemonic, sMnemonic,)); oInstr.sMnemonic = sMnemonic _ = iEndLine; return True; def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine): """ Tags: \@op1, \@op2, \@op3, \@op4 Value: [where:]type The 'where' value indicates where the operand is found, like the 'reg' part of the ModR/M encoding. See Instruction.kdOperandLocations for a list. The 'type' value indicates the operand type. These follow the types given in the opcode tables in the CPU reference manuals. See Instruction.kdOperandTypes for a list. """ oInstr = self.ensureInstructionForOpTag(iTagLine); idxOp = int(sTag[-1]) - 1; assert 0 <= idxOp < 4; # flatten, split up, and validate the "where:type" value. sFlattened = self.flattenAllSections(aasSections); asSplit = sFlattened.split(':'); if len(asSplit) == 1: sType = asSplit[0]; sWhere = None; elif len(asSplit) == 2: (sWhere, sType) = asSplit; else: return self.errorComment(iTagLine, 'expected %s value on format "[:]" not "%s"' % (sTag, sFlattened,)); if sType not in g_kdOpTypes: return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s' % (sTag, sType, ', '.join(g_kdOpTypes.keys()),)); if sWhere is None: sWhere = g_kdOpTypes[sType][1]; elif sWhere not in g_kdOpLocations: return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s' % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),)); # Insert the operand, refusing to overwrite an existing one. while idxOp >= len(oInstr.aoOperands): oInstr.aoOperands.append(None); if oInstr.aoOperands[idxOp] is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"' % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType, sWhere, sType,)); oInstr.aoOperands[idxOp] = Operand(sWhere, sType); _ = iEndLine; return True; def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opmaps Value: map[,map2] Indicates which maps the instruction is in. There is a default map associated with each input file. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten, split up and validate the value. sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ','); asMaps = sFlattened.split(','); if not asMaps: return self.errorComment(iTagLine, '%s: value required' % (sTag,)); for sMap in asMaps: if sMap not in g_dInstructionMaps: return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)' % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),)); # Add the maps to the current list. Throw errors on duplicates. for oMap in oInstr.aoMaps: if oMap.sName in asMaps: return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName)); for sMap in asMaps: oMap = g_dInstructionMaps[sMap]; if oMap not in oInstr.aoMaps: oInstr.aoMaps.append(oMap); else: self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap)); _ = iEndLine; return True; def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@oppfx Value: n/a|none|0x66|0xf3|0xf2 Required prefix for the instruction. (In a (E)VEX context this is the value of the 'pp' field rather than an actual prefix.) """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sFlattened = self.flattenAllSections(aasSections); asPrefixes = sFlattened.split(); if len(asPrefixes) > 1: return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,)); sPrefix = asPrefixes[0].lower(); if sPrefix == 'none': sPrefix = 'none'; elif sPrefix == 'n/a': sPrefix = None; else: if len(sPrefix) == 2: sPrefix = '0x' + sPrefix; if not _isValidOpcodeByte(sPrefix): return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,)); if sPrefix is not None and sPrefix not in g_kdPrefixes: return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,)); # Set it. if oInstr.sPrefix is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,)); oInstr.sPrefix = sPrefix; _ = iEndLine; return True; def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opcode Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg) The opcode byte or sub-byte for the instruction in the context of a map. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sOpcode = self.flattenAllSections(aasSections); if _isValidOpcodeByte(sOpcode): pass; elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678': pass; elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678': pass; elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678': pass; else: return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,)); # Set it. if oInstr.sOpcode is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,)); oInstr.sOpcode = sOpcode; _ = iEndLine; return True; def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opcodesub Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1 This is a simple way of dealing with encodings where the mod=3 and mod!=3 represents exactly two different instructions. The more proper way would be to go via maps with two members, but this is faster. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sSubOpcode = self.flattenAllSections(aasSections); if sSubOpcode not in g_kdSubOpcodes: return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,)); sSubOpcode = g_kdSubOpcodes[sSubOpcode][0]; # Set it. if oInstr.sSubOpcode is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sSubOpcode, sSubOpcode,)); oInstr.sSubOpcode = sSubOpcode; _ = iEndLine; return True; def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@openc Value: ModR/M|fixed|prefix| The instruction operand encoding style. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sEncoding = self.flattenAllSections(aasSections); if sEncoding in g_kdEncodings: pass; elif sEncoding in g_dInstructionMaps: pass; elif not _isValidOpcodeByte(sEncoding): return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,)); # Set it. if oInstr.sEncoding is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sEncoding, sEncoding,)); oInstr.sEncoding = sEncoding; _ = iEndLine; return True; ## EFlags tag to Instruction attribute name. kdOpFlagToAttr = { '@opfltest': 'asFlTest', '@opflmodify': 'asFlModify', '@opflundef': 'asFlUndefined', '@opflset': 'asFlSet', '@opflclear': 'asFlClear', }; def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine): """ Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear Value: """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten, split up and validate the values. asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(','); if len(asFlags) == 1 and asFlags[0].lower() == 'none': asFlags = []; else: fRc = True; for iFlag, sFlag in enumerate(asFlags): if sFlag not in g_kdEFlagsMnemonics: if sFlag.strip() in g_kdEFlagsMnemonics: asFlags[iFlag] = sFlag.strip(); else: fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,)); if not fRc: return False; # Set them. asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]); if asOld is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,)); setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags); _ = iEndLine; return True; def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@ophints Value: Comma or space separated list of flags and hints. This covers the disassembler flags table and more. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten as a space separated list, split it up and validate the values. asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split(); if len(asHints) == 1 and asHints[0].lower() == 'none': asHints = []; else: fRc = True; for iHint, sHint in enumerate(asHints): if sHint not in g_kdHints: if sHint.strip() in g_kdHints: sHint[iHint] = sHint.strip(); else: fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,)); if not fRc: return False; # Append them. for sHint in asHints: if sHint not in oInstr.dHints: oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed) else: self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,)); _ = iEndLine; return True; def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opdisenum Value: OP_XXXX This is for select a specific (legacy) disassembler enum value for the instruction. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and split. asWords = self.flattenAllSections(aasSections).split(); if len(asWords) != 1: self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,)); if not asWords: return False; sDisEnum = asWords[0]; if not self.oReDisEnum.match(sDisEnum): return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)' % (sTag, sDisEnum, self.oReDisEnum.pattern)); # Set it. if oInstr.sDisEnum is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,)); oInstr.sDisEnum = sDisEnum; _ = iEndLine; return True; def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opmincpu Value: Indicates when this instruction was introduced. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten the value, split into words, make sure there's just one, valid it. asCpus = self.flattenAllSections(aasSections).split(); if len(asCpus) > 1: self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),)); sMinCpu = asCpus[0]; if sMinCpu in g_kdCpuNames: oInstr.sMinCpu = sMinCpu; else: return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)' % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),)); # Set it. if oInstr.sMinCpu is None: oInstr.sMinCpu = sMinCpu; elif oInstr.sMinCpu != sMinCpu: self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,)); _ = iEndLine; return True; def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opcpuid Value: none | CPUID feature bit which is required for the instruction to be present. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten as a space separated list, split it up and validate the values. asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split(); if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none': asCpuIds = []; else: fRc = True; for iCpuId, sCpuId in enumerate(asCpuIds): if sCpuId not in g_kdCpuIdFlags: if sCpuId.strip() in g_kdCpuIdFlags: sCpuId[iCpuId] = sCpuId.strip(); else: fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,)); if not fRc: return False; # Append them. for sCpuId in asCpuIds: if sCpuId not in oInstr.asCpuIds: oInstr.asCpuIds.append(sCpuId); else: self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,)); _ = iEndLine; return True; def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opgroup Value: op_grp1[_subgrp2[_subsubgrp3]] Instruction grouping. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten as a space separated list, split it up and validate the values. asGroups = self.flattenAllSections(aasSections).split(); if len(asGroups) != 1: return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,)); sGroup = asGroups[0]; if not self.oReGroupName.match(sGroup): return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)' % (sTag, sGroup, self.oReGroupName.pattern)); # Set it. if oInstr.sGroup is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,)); oInstr.sGroup = sGroup; _ = iEndLine; return True; def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opunused, \@opinvalid, \@opinvlstyle Value: The \@opunused indicates the specification is for a currently unused instruction encoding. The \@opinvalid indicates the specification is for an invalid currently instruction encoding (like UD2). The \@opinvlstyle just indicates how CPUs decode the instruction when not supported (\@opcpuid, \@opmincpu) or disabled. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten as a space separated list, split it up and validate the values. asStyles = self.flattenAllSections(aasSections).split(); if len(asStyles) != 1: return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,)); sStyle = asStyles[0]; if sStyle not in g_kdInvalidStyles: return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)' % (sTag, sStyle, g_kdInvalidStyles.keys(),)); # Set it. if oInstr.sInvalidStyle is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)' % ( sTag, oInstr.sInvalidStyle, sStyle,)); oInstr.sInvalidStyle = sStyle; if sTag == '@opunused': oInstr.fUnused = True; elif sTag == '@opinvalid': oInstr.fInvalid = True; _ = iEndLine; return True; def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals """ Tag: \@optest Value: [[ ]?] -> Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p? The main idea here is to generate basic instruction tests. The probably simplest way of handling the diverse input, would be to use it to produce size optimized byte code for a simple interpreter that modifies the register input and output states. An alternative to the interpreter would be creating multiple tables, but that becomes rather complicated wrt what goes where and then to use them in an efficient manner. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # # Do it section by section. # for asSectionLines in aasSections: # # Sort the input into outputs, inputs and selector conditions. # sFlatSection = self.flattenAllSections([asSectionLines,]); if not sFlatSection: self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections)); continue; oTest = InstructionTest(oInstr); asSelectors = []; asInputs = []; asOutputs = []; asCur = asOutputs; fRc = True; asWords = sFlatSection.split(); for iWord in range(len(asWords) - 1, -1, -1): sWord = asWords[iWord]; # Check for array switchers. if sWord == '->': if asCur != asOutputs: fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,)); break; asCur = asInputs; elif sWord == '/': if asCur != asInputs: fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,)); break; asCur = asSelectors; else: asCur.insert(0, sWord); # # Validate and add selectors. # for sCond in asSelectors: sCondExp = TestSelector.kdPredicates.get(sCond, sCond); oSelector = None; for sOp in TestSelector.kasCompareOps: off = sCondExp.find(sOp); if off >= 0: sVariable = sCondExp[:off]; sValue = sCondExp[off + len(sOp):]; if sVariable in TestSelector.kdVariables: if sValue in TestSelector.kdVariables[sVariable]: oSelector = TestSelector(sVariable, sOp, sValue); else: self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)' % ( sTag, sValue, sCond, TestSelector.kdVariables[sVariable].keys(),)); else: self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)' % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),)); break; if oSelector is not None: for oExisting in oTest.aoSelectors: if oExisting.sVariable == oSelector.sVariable: self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)' % ( sTag, oSelector.sVariable, oExisting, oSelector,)); oTest.aoSelectors.append(oSelector); else: fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,)); # # Validate outputs and inputs, adding them to the test as we go along. # for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]: asValidFieldKinds = [ 'both', sDesc, ]; for sItem in asItems: oItem = None; for sOp in TestInOut.kasOperators: off = sItem.find(sOp); if off < 0: continue; sField = sItem[:off]; sValueType = sItem[off + len(sOp):]; if sField in TestInOut.kdFields \ and TestInOut.kdFields[sField][1] in asValidFieldKinds: asSplit = sValueType.split(':', 1); sValue = asSplit[0]; sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0]; if sType in TestInOut.kdTypes: oValid = TestInOut.kdTypes[sType].validate(sValue); if oValid is True: if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=': oItem = TestInOut(sField, sOp, sValue, sType); else: self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="' % ( sTag, sDesc, sItem, )); else: self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s' % ( sTag, sDesc, sValue, sItem, sType, oValid, )); else: self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)' % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),)); else: self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s' % ( sTag, sDesc, sField, sItem, ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items() if asVal[1] in asValidFieldKinds]),)); break; if oItem is not None: for oExisting in aoDst: if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp: self.errorComment(iTagLine, '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)' % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,)); aoDst.append(oItem); else: fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,)); # # . # if fRc: oInstr.aoTests.append(oTest); else: self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),)); self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s' % (sTag, asSelectors, asInputs, asOutputs,)); _ = iEndLine; return True; def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine): """ Numbered \@optest tag. Either \@optest42 or \@optest[42]. """ oInstr = self.ensureInstructionForOpTag(iTagLine); iTest = 0; if sTag[-1] == ']': iTest = int(sTag[8:-1]); else: iTest = int(sTag[7:]); if iTest != len(oInstr.aoTests): self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),)); return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine); def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@optestign | \@optestignore Value: This is a simple trick to ignore a test while debugging another. See also \@oponlytest. """ _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine; return True; def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opcopytests Value: [..] Example: \@opcopytests add_Eb_Gb Trick to avoid duplicating tests for different encodings of the same operation. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten, validate and append the copy job to the instruction. We execute # them after parsing all the input so we can handle forward references. asToCopy = self.flattenAllSections(aasSections).split(); if not asToCopy: return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,)); for sToCopy in asToCopy: if sToCopy not in oInstr.asCopyTests: if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy): oInstr.asCopyTests.append(sToCopy); else: self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)' % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern)); else: self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,)); _ = iEndLine; return True; def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@oponlytest | \@oponly Value: none Only test instructions with this tag. This is a trick that is handy for singling out one or two new instructions or tests. See also \@optestignore. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Validate and add instruction to only test dictionary. sValue = self.flattenAllSections(aasSections).strip(); if sValue: return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue)); if oInstr not in g_aoOnlyTestInstructions: g_aoOnlyTestInstructions.append(oInstr); _ = iEndLine; return True; def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opxcpttype Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF] Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7). """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten as a space separated list, split it up and validate the values. asTypes = self.flattenAllSections(aasSections).split(); if len(asTypes) != 1: return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,)); sType = asTypes[0]; if sType not in g_kdXcptTypes: return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)' % (sTag, sType, sorted(g_kdXcptTypes.keys()),)); # Set it. if oInstr.sXcptType is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)' % ( sTag, oInstr.sXcptType, sType,)); oInstr.sXcptType = sType; _ = iEndLine; return True; def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opfunction Value: This is for explicitly setting the IEM function name. Normally we pick this up from the FNIEMOP_XXX macro invocation after the description, or generate it from the mnemonic and operands. It it thought it maybe necessary to set it when specifying instructions which implementation isn't following immediately or aren't implemented yet. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sFunction = self.flattenAllSections(aasSections); if not self.oReFunctionName.match(sFunction): return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)' % (sTag, sFunction, self.oReFunctionName.pattern)); if oInstr.sFunction is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"' % (sTag, oInstr.sFunction, sFunction,)); oInstr.sFunction = sFunction; _ = iEndLine; return True; def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opstats Value: This is for explicitly setting the statistics name. Normally we pick this up from the IEMOP_MNEMONIC macro invocation, or generate it from the mnemonic and operands. It it thought it maybe necessary to set it when specifying instructions which implementation isn't following immediately or aren't implemented yet. """ oInstr = self.ensureInstructionForOpTag(iTagLine); # Flatten and validate the value. sStats = self.flattenAllSections(aasSections); if not self.oReStatsName.match(sStats): return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)' % (sTag, sStats, self.oReStatsName.pattern)); if oInstr.sStats is not None: return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"' % (sTag, oInstr.sStats, sStats,)); oInstr.sStats = sStats; _ = iEndLine; return True; def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine): """ Tag: \@opdone Value: none Used to explictily flush the instructions that have been specified. """ sFlattened = self.flattenAllSections(aasSections); if sFlattened != '': return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,)); _ = sTag; _ = iEndLine; return self.doneInstructions(); ## @} def parseComment(self): """ Parse the current comment (self.sComment). If it's a opcode specifiying comment, we reset the macro stuff. """ # # Reject if comment doesn't seem to contain anything interesting. # if self.sComment.find('Opcode') < 0 \ and self.sComment.find('@') < 0: return False; # # Split the comment into lines, removing leading asterisks and spaces. # Also remove leading and trailing empty lines. # asLines = self.sComment.split('\n'); for iLine, sLine in enumerate(asLines): asLines[iLine] = sLine.lstrip().lstrip('*').lstrip(); while asLines and not asLines[0]: self.iCommentLine += 1; asLines.pop(0); while asLines and not asLines[-1]: asLines.pop(len(asLines) - 1); # # Check for old style: Opcode 0x0f 0x12 # if asLines[0].startswith('Opcode '): self.parseCommentOldOpcode(asLines); # # Look for @op* tagged data. # cOpTags = 0; sFlatDefault = None; sCurTag = '@default'; iCurTagLine = 0; asCurSection = []; aasSections = [ asCurSection, ]; for iLine, sLine in enumerate(asLines): if not sLine.startswith('@'): if sLine: asCurSection.append(sLine); elif asCurSection: asCurSection = []; aasSections.append(asCurSection); else: # # Process the previous tag. # if not asCurSection and len(aasSections) > 1: aasSections.pop(-1); if sCurTag in self.dTagHandlers: self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine); cOpTags += 1; elif sCurTag.startswith('@op'): self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag)); elif sCurTag == '@default': sFlatDefault = self.flattenAllSections(aasSections); elif '@op' + sCurTag[1:] in self.dTagHandlers: self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag)); elif sCurTag in ['@encoding', '@opencoding']: self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,)); # # New tag. # asSplit = sLine.split(None, 1); sCurTag = asSplit[0].lower(); if len(asSplit) > 1: asCurSection = [asSplit[1],]; else: asCurSection = []; aasSections = [asCurSection, ]; iCurTagLine = iLine; # # Process the final tag. # if not asCurSection and len(aasSections) > 1: aasSections.pop(-1); if sCurTag in self.dTagHandlers: self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine); cOpTags += 1; elif sCurTag.startswith('@op'): self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag)); elif sCurTag == '@default': sFlatDefault = self.flattenAllSections(aasSections); # # Don't allow default text in blocks containing @op*. # if cOpTags > 0 and sFlatDefault: self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,)); return True; def parseMacroInvocation(self, sInvocation): """ Parses a macro invocation. Returns a tuple, first element is the offset following the macro invocation. The second element is a list of macro arguments, where the zero'th is the macro name. """ # First the name. offOpen = sInvocation.find('('); if offOpen <= 0: self.raiseError("macro invocation open parenthesis not found"); sName = sInvocation[:offOpen].strip(); if not self.oReMacroName.match(sName): return self.error("invalid macro name '%s'" % (sName,)); asRet = [sName, ]; # Arguments. iLine = self.iLine; cDepth = 1; off = offOpen + 1; offStart = off; chQuote = None; while cDepth > 0: if off >= len(sInvocation): if iLine >= len(self.asLines): self.error('macro invocation beyond end of file'); return (off, asRet); sInvocation += self.asLines[iLine]; iLine += 1; ch = sInvocation[off]; if chQuote: if ch == '\\' and off + 1 < len(sInvocation): off += 1; elif ch == chQuote: chQuote = None; elif ch in ('"', '\'',): chQuote = ch; elif ch in (',', ')',): if cDepth == 1: asRet.append(sInvocation[offStart:off].strip()); offStart = off + 1; if ch == ')': cDepth -= 1; elif ch == '(': cDepth += 1; off += 1; return (off, asRet); def findAndParseMacroInvocationEx(self, sCode, sMacro): """ Returns (len(sCode), None) if not found, parseMacroInvocation result if found. """ offHit = sCode.find(sMacro); if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(': offAfter, asRet = self.parseMacroInvocation(sCode[offHit:]) return (offHit + offAfter, asRet); return (len(sCode), None); def findAndParseMacroInvocation(self, sCode, sMacro): """ Returns None if not found, arguments as per parseMacroInvocation if found. """ return self.findAndParseMacroInvocationEx(sCode, sMacro)[1]; def findAndParseFirstMacroInvocation(self, sCode, asMacro): """ Returns same as findAndParseMacroInvocation. """ for sMacro in asMacro: asRet = self.findAndParseMacroInvocation(sCode, sMacro); if asRet is not None: return asRet; return None; def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments sDisHints, sIemHints, asOperands): """ Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX, IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros. """ # # Some invocation checks. # if sUpper != sUpper.upper(): self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,)); if sLower != sLower.lower(): self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,)); if sUpper.lower() != sLower: self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,)); if not self.oReMnemonic.match(sLower): self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,)); # # Check if sIemHints tells us to not consider this macro invocation. # if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0: return True; # Apply to the last instruction only for now. if not self.aoCurInstrs: self.addInstruction(); oInstr = self.aoCurInstrs[-1]; if oInstr.iLineMnemonicMacro == -1: oInstr.iLineMnemonicMacro = self.iLine; else: self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction' % (sMacro, oInstr.iLineMnemonicMacro,)); # Mnemonic if oInstr.sMnemonic is None: oInstr.sMnemonic = sLower; elif oInstr.sMnemonic != sLower: self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,)); # Process operands. if len(oInstr.aoOperands) not in [0, len(asOperands)]: self.error('%s: number of operands given by @opN does not match macro: %s vs %s' % (sMacro, len(oInstr.aoOperands), len(asOperands),)); for iOperand, sType in enumerate(asOperands): sWhere = g_kdOpTypes.get(sType, [None, None])[1]; if sWhere is None: self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType)); if iOperand < len(oInstr.aoOperands): # error recovery. sWhere = oInstr.aoOperands[iOperand].sWhere; sType = oInstr.aoOperands[iOperand].sType; else: sWhere = 'reg'; sType = 'Gb'; if iOperand == len(oInstr.aoOperands): oInstr.aoOperands.append(Operand(sWhere, sType)) elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType: self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s' % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, oInstr.aoOperands[iOperand].sType, sWhere, sType,)); # Encoding. if sForm not in g_kdIemForms: self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,)); else: if oInstr.sEncoding is None: oInstr.sEncoding = g_kdIemForms[sForm][0]; elif g_kdIemForms[sForm][0] != oInstr.sEncoding: self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)' % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm)); # Check the parameter locations for the encoding. if g_kdIemForms[sForm][1] is not None: if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands): self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s' % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) )); else: for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]): if oInstr.aoOperands[iOperand].sWhere != sWhere: self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)' % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,)); sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4]; if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \ or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \ or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \ or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \ or sForm.replace('VEX','').find('V') < 0) ): self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s' % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, )); if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands): for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)): if oInstr.aoOperands[iOperand].sType != 'FIXED' \ and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg': self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)' % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType, oInstr.aoOperands[iOperand].sWhere)); # Check @opcodesub if oInstr.sSubOpcode \ and g_kdIemForms[sForm][2] \ and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0: self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)' % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,)); # Stats. if not self.oReStatsName.match(sStats): self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,)); elif oInstr.sStats is None: oInstr.sStats = sStats; elif oInstr.sStats != sStats: self.error('%s: mismatching @opstats and a_Stats value: %s vs %s' % (sMacro, oInstr.sStats, sStats,)); # Process the hints (simply merge with @ophints w/o checking anything). for sHint in sDisHints.split('|'): sHint = sHint.strip(); if sHint.startswith('DISOPTYPE_'): sShortHint = sHint[len('DISOPTYPE_'):].lower(); if sShortHint in g_kdHints: oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed) else: self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,)); elif sHint != '0': self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,)); for sHint in sIemHints.split('|'): sHint = sHint.strip(); if sHint.startswith('IEMOPHINT_'): sShortHint = sHint[len('IEMOPHINT_'):].lower(); if sShortHint in g_kdHints: oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed) else: self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,)); elif sHint != '0': self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,)); _ = sAsm; return True; def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands): """ Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2, IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros. """ if not asOperands: return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands); return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands), sForm, sUpper, sLower, sDisHints, sIemHints, asOperands); def checkCodeForMacro(self, sCode): """ Checks code for relevant macro invocation. """ # # Scan macro invocations. # if sCode.find('(') > 0: # Look for instruction decoder function definitions. ASSUME single line. asArgs = self.findAndParseFirstMacroInvocation(sCode, [ 'FNIEMOP_DEF', 'FNIEMOP_STUB', 'FNIEMOP_STUB_1', 'FNIEMOP_UD_STUB', 'FNIEMOP_UD_STUB_1' ]); if asArgs is not None: sFunction = asArgs[1]; if not self.aoCurInstrs: self.addInstruction(); for oInstr in self.aoCurInstrs: if oInstr.iLineFnIemOpMacro == -1: oInstr.iLineFnIemOpMacro = self.iLine; else: self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) ); self.setInstrunctionAttrib('sFunction', sFunction); self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True); self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True); if asArgs[0].find('STUB') > 0: self.doneInstructions(); return True; # IEMOP_HLP_DONE_VEX_DECODING_* asArgs = self.findAndParseFirstMacroInvocation(sCode, [ 'IEMOP_HLP_DONE_VEX_DECODING', 'IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ]); if asArgs is not None: sMacro = asArgs[0]; if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ): for oInstr in self.aoCurInstrs: if 'vex_l_zero' not in oInstr.dHints: if oInstr.iLineMnemonicMacro >= 0: self.errorOnLine(oInstr.iLineMnemonicMacro, 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,)); oInstr.dHints['vex_l_zero'] = True; return True; # # IEMOP_MNEMONIC* # # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC'); if asArgs is not None: if len(self.aoCurInstrs) == 1: oInstr = self.aoCurInstrs[0]; if oInstr.sStats is None: oInstr.sStats = asArgs[1]; self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]); # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX'); if asArgs is not None: self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7], []); # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX'); if asArgs is not None: self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8], [asArgs[6],]); # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX'); if asArgs is not None: self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9], [asArgs[6], asArgs[7]]); # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX'); if asArgs is not None: self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9], asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]); # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, # a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX'); if asArgs is not None: self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10], asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]); # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0'); if asArgs is not None: self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []); # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1'); if asArgs is not None: self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]); # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2'); if asArgs is not None: self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7], [asArgs[4], asArgs[5],]); # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3'); if asArgs is not None: self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8], [asArgs[4], asArgs[5], asArgs[6],]); # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints) asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4'); if asArgs is not None: self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9], [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]); return False; def parse(self): """ Parses the given file. Returns number or errors. Raises exception on fatal trouble. """ #self.debug('Parsing %s' % (self.sSrcFile,)); while self.iLine < len(self.asLines): sLine = self.asLines[self.iLine]; self.iLine += 1; # We only look for comments, so only lines with a slash might possibly # influence the parser state. offSlash = sLine.find('/'); if offSlash >= 0: if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode: offLine = 0; while offLine < len(sLine): if self.iState == self.kiCode: offHit = sLine.find('/*', offLine); # only multiline comments for now. if offHit >= 0: self.checkCodeForMacro(sLine[offLine:offHit]); self.sComment = ''; self.iCommentLine = self.iLine; self.iState = self.kiCommentMulti; offLine = offHit + 2; else: self.checkCodeForMacro(sLine[offLine:]); offLine = len(sLine); elif self.iState == self.kiCommentMulti: offHit = sLine.find('*/', offLine); if offHit >= 0: self.sComment += sLine[offLine:offHit]; self.iState = self.kiCode; offLine = offHit + 2; self.parseComment(); else: self.sComment += sLine[offLine:]; offLine = len(sLine); else: assert False; # C++ line comment. elif offSlash > 0: self.checkCodeForMacro(sLine[:offSlash]); # No slash, but append the line if in multi-line comment. elif self.iState == self.kiCommentMulti: #self.debug('line %d: multi' % (self.iLine,)); self.sComment += sLine; # No slash, but check code line for relevant macro. elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0: #self.debug('line %d: macro' % (self.iLine,)); self.checkCodeForMacro(sLine); # If the line is a '}' in the first position, complete the instructions. elif self.iState == self.kiCode and sLine[0] == '}': #self.debug('line %d: }' % (self.iLine,)); self.doneInstructions(); # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3' # so we can check/add @oppfx info from it. elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine): self.parseFunctionTable(sLine); self.doneInstructions(); self.debug('%3s%% / %3s stubs out of %4s instructions in %s' % (self.cTotalStubs * 100 // self.cTotalInstr, self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),)); return self.printErrors(); def __parseFileByName(sSrcFile, sDefaultMap): """ Parses one source file for instruction specfications. """ # # Read sSrcFile into a line array. # try: oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with except Exception as oXcpt: raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,)); try: asLines = oFile.readlines(); except Exception as oXcpt: raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,)); finally: oFile.close(); # # Do the parsing. # try: cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse(); except ParserException as oXcpt: print(str(oXcpt)); raise; return cErrors; def __doTestCopying(): """ Executes the asCopyTests instructions. """ asErrors = []; for oDstInstr in g_aoAllInstructions: if oDstInstr.asCopyTests: for sSrcInstr in oDstInstr.asCopyTests: oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None); if oSrcInstr: aoSrcInstrs = [oSrcInstr,]; else: aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []); if aoSrcInstrs: for oSrcInstr in aoSrcInstrs: if oSrcInstr != oDstInstr: oDstInstr.aoTests.extend(oSrcInstr.aoTests); else: asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n' % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr)); else: asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n' % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr)); if asErrors: sys.stderr.write(u''.join(asErrors)); return len(asErrors); def __applyOnlyTest(): """ If g_aoOnlyTestInstructions contains any instructions, drop aoTests from all other instructions so that only these get tested. """ if g_aoOnlyTestInstructions: for oInstr in g_aoAllInstructions: if oInstr.aoTests: if oInstr not in g_aoOnlyTestInstructions: oInstr.aoTests = []; return 0; def __parseAll(): """ Parses all the IEMAllInstruction*.cpp.h files. Raises exception on failure. """ sSrcDir = os.path.dirname(os.path.abspath(__file__)); cErrors = 0; for sDefaultMap, sName in [ ( 'one', 'IEMAllInstructionsOneByte.cpp.h'), ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'), ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'), ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'), ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'), ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'), ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'), ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'), ]: cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap); cErrors += __doTestCopying(); cErrors += __applyOnlyTest(); # Total stub stats: cTotalStubs = 0; for oInstr in g_aoAllInstructions: cTotalStubs += oInstr.fStub; print('debug: %3s%% / %3s stubs out of %4s instructions in total' % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions),)); if cErrors != 0: #raise Exception('%d parse errors' % (cErrors,)); sys.exit(1); return True; __parseAll(); # # Generators (may perhaps move later). # def __formatDisassemblerTableEntry(oInstr): """ """ sMacro = 'OP'; cMaxOperands = 3; if len(oInstr.aoOperands) > 3: sMacro = 'OPVEX' cMaxOperands = 4; assert len(oInstr.aoOperands) <= cMaxOperands; # # Format string. # sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,); for iOperand, oOperand in enumerate(oInstr.aoOperands): sTmp += ' ' if iOperand == 0 else ','; if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later. sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later. else: sTmp += g_kdOpTypes[oOperand.sType][2]; sTmp += '",'; asColumns = [ sTmp, ]; # # Decoders. # iStart = len(asColumns); if oInstr.sEncoding is None: pass; elif oInstr.sEncoding == 'ModR/M': # ASSUME the first operand is using the ModR/M encoding assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(); asColumns.append('IDX_ParseModRM,'); elif oInstr.sEncoding in [ 'prefix', ]: for oOperand in oInstr.aoOperands: asColumns.append('0,'); elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]: pass; elif oInstr.sEncoding == 'VEX.ModR/M': asColumns.append('IDX_ParseModRM,'); elif oInstr.sEncoding == 'vex2': asColumns.append('IDX_ParseVex2b,') elif oInstr.sEncoding == 'vex3': asColumns.append('IDX_ParseVex3b,') elif oInstr.sEncoding in g_dInstructionMaps: asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ','); else: ## @todo #IDX_ParseTwoByteEsc, #IDX_ParseGrp1, #IDX_ParseShiftGrp2, #IDX_ParseGrp3, #IDX_ParseGrp4, #IDX_ParseGrp5, #IDX_Parse3DNow, #IDX_ParseGrp6, #IDX_ParseGrp7, #IDX_ParseGrp8, #IDX_ParseGrp9, #IDX_ParseGrp10, #IDX_ParseGrp12, #IDX_ParseGrp13, #IDX_ParseGrp14, #IDX_ParseGrp15, #IDX_ParseGrp16, #IDX_ParseThreeByteEsc4, #IDX_ParseThreeByteEsc5, #IDX_ParseModFence, #IDX_ParseEscFP, #IDX_ParseNopPause, #IDX_ParseInvOpModRM, assert False, str(oInstr); # Check for immediates and stuff in the remaining operands. for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]: sIdx = g_kdOpTypes[oOperand.sType][0]; #if sIdx != 'IDX_UseModRM': asColumns.append(sIdx + ','); asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart))); # # Opcode and operands. # assert oInstr.sDisEnum, str(oInstr); asColumns.append(oInstr.sDisEnum + ','); iStart = len(asColumns) for oOperand in oInstr.aoOperands: asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ','); asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart))); # # Flags. # sTmp = ''; for sHint in sorted(oInstr.dHints.keys()): sDefine = g_kdHints[sHint]; if sDefine.startswith('DISOPTYPE_'): if sTmp: sTmp += ' | ' + sDefine; else: sTmp += sDefine; if sTmp: sTmp += '),'; else: sTmp += '0),'; asColumns.append(sTmp); # # Format the columns into a line. # aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199]; sLine = ''; for i, s in enumerate(asColumns): if len(sLine) < aoffColumns[i]: sLine += ' ' * (aoffColumns[i] - len(sLine)); else: sLine += ' '; sLine += s; # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE, # DISOPTYPE_HARMLESS), # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \ # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype } return sLine; def __checkIfShortTable(aoTableOrdered, oMap): """ Returns (iInstr, cInstructions, fShortTable) """ # Determin how much we can trim off. cInstructions = len(aoTableOrdered); while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None: cInstructions -= 1; iInstr = 0; while iInstr < cInstructions and aoTableOrdered[iInstr] is None: iInstr += 1; # If we can save more than 30%, we go for the short table version. if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30: return (iInstr, cInstructions, True); _ = oMap; # Use this for overriding. # Output the full table. return (0, len(aoTableOrdered), False); def generateDisassemblerTables(oDstFile = sys.stdout): """ Generates disassembler tables. """ # # The disassembler uses a slightly different table layout to save space, # since several of the prefix varia # aoDisasmMaps = []; for sName, oMap in sorted(iter(g_dInstructionMaps.items()), key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)): if oMap.sSelector != 'byte+pfx': aoDisasmMaps.append(oMap); else: # Split the map by prefix. aoDisasmMaps.append(oMap.copy(oMap.sName, 'none')); aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66')); aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3')); aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2')); # # Dump each map. # asHeaderLines = []; print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),)); for oMap in aoDisasmMaps: sName = oMap.sName; if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment. # # Get the instructions for the map and see if we can do a short version or not. # aoTableOrder = oMap.getInstructionsInTableOrder(); cEntriesPerByte = oMap.getEntriesPerByte(); (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap); # # Output the table start. # Note! Short tables are static and only accessible via the map range record. # asLines = []; asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */' % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), )); if fShortTable: asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),)); else: asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,)); asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,)); asLines.append('{'); if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0: asLines.append(' /* %#04x: */' % (iInstrStart,)); # # Output the instructions. # iInstr = iInstrStart; while iInstr < iInstrEnd: oInstr = aoTableOrder[iInstr]; if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0: if iInstr != iInstrStart: asLines.append(''); asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,)); if oInstr is None: # Invalid. Optimize blocks of invalid instructions. cInvalidInstrs = 1; while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None: cInvalidInstrs += 1; if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte: asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,)); iInstr += 0x10 * cEntriesPerByte - 1; elif cEntriesPerByte > 1: if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte: asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,)); iInstr += 3; else: asLines.append(' /* %#04x/%d */ INVALID_OPCODE,' % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte)); else: asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr)); elif isinstance(oInstr, list): if len(oInstr) != 0: asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */' % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),)); else: asLines.append(__formatDisassemblerTableEntry(oInstr)); else: asLines.append(__formatDisassemblerTableEntry(oInstr)); iInstr += 1; if iInstrStart >= iInstrEnd: asLines.append(' /* dummy */ INVALID_OPCODE'); asLines.append('};'); asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,)); # # We always emit a map range record, assuming the linker will eliminate the unnecessary ones. # asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName())); asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };' % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),)); # # Write out the lines. # oDstFile.write('\n'.join(asLines)); oDstFile.write('\n'); oDstFile.write('\n'); #break; #for now if __name__ == '__main__': generateDisassemblerTables();