diff options
Diffstat (limited to 'client/asmjit/x86/x86opcode_p.h')
| -rw-r--r-- | client/asmjit/x86/x86opcode_p.h | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/client/asmjit/x86/x86opcode_p.h b/client/asmjit/x86/x86opcode_p.h new file mode 100644 index 0000000..5f936bf --- /dev/null +++ b/client/asmjit/x86/x86opcode_p.h @@ -0,0 +1,478 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_X86_X86OPCODE_P_H_INCLUDED +#define ASMJIT_X86_X86OPCODE_P_H_INCLUDED + +#include "../x86/x86globals.h" + +ASMJIT_BEGIN_SUB_NAMESPACE(x86) + +//! \cond INTERNAL +//! \addtogroup asmjit_x86 +//! \{ + +// ============================================================================ +// [asmjit::x86::Opcode] +// ============================================================================ + +//! Helper class to store and manipulate X86 opcodes. +//! +//! The first 8 least significant bits describe the opcode byte as defined in +//! ISA manuals, all other bits describe other properties like prefixes, see +//! `Opcode::Bits` for more information. +struct Opcode { + uint32_t v; + + //! Describes a meaning of all bits of AsmJit's 32-bit opcode value. + //! + //! This schema is AsmJit specific and has been designed to allow encoding of + //! all X86 instructions available. X86, MMX, and SSE+ instructions always use + //! `MM` and `PP` fields, which are encoded to corresponding prefixes needed + //! by X86 or SIMD instructions. AVX+ instructions embed `MMMMM` and `PP` fields + //! in a VEX prefix, and AVX-512 instructions embed `MM` and `PP` in EVEX prefix. + //! + //! The instruction opcode definition uses 1 or 2 bytes as an opcode value. 1 + //! byte is needed by most of the instructions, 2 bytes are only used by legacy + //! X87-FPU instructions. This means that a second byte is free to by used by + //! instructions encoded by using VEX and/or EVEX prefix. + //! + //! The fields description: + //! + //! - `MM` field is used to encode prefixes needed by the instruction or as + //! a part of VEX/EVEX prefix. Described as `mm` and `mmmmm` in instruction + //! manuals. + //! + //! NOTE: Since `MM` field is defined as `mmmmm` (5 bits), but only 2 least + //! significant bits are used by VEX and EVEX prefixes, and additional 4th + //! bit is used by XOP prefix, AsmJit uses the 3rd and 5th bit for it's own + //! purposes. These bits will probably never be used in future encodings as + //! AVX512 uses only `000mm` from `mmmmm`. + //! + //! - `PP` field is used to encode prefixes needed by the instruction or as a + //! part of VEX/EVEX prefix. Described as `pp` in instruction manuals. + //! + //! - `LL` field is used exclusively by AVX+ and AVX512+ instruction sets. It + //! describes vector size, which is `L.128` for XMM register, `L.256` for + //! for YMM register, and `L.512` for ZMM register. The `LL` field is omitted + //! in case that instruction supports multiple vector lengths, however, if the + //! instruction requires specific `L` value it must be specified as a part of + //! the opcode. + //! + //! NOTE: `LL` having value `11` is not defined yet. + //! + //! - `W` field is the most complicated. It was added by 64-bit architecture + //! to promote default operation width (instructions that perform 32-bit + //! operation by default require to override the width to 64-bit explicitly). + //! There is nothing wrong on this, however, some instructions introduced + //! implicit `W` override, for example a `cdqe` instruction is basically a + //! `cwde` instruction with overridden `W` (set to 1). There are some others + //! in the base X86 instruction set. More recent instruction sets started + //! using `W` field more often: + //! + //! - AVX instructions started using `W` field as an extended opcode for FMA, + //! GATHER, PERM, and other instructions. It also uses `W` field to override + //! the default operation width in instructions like `vmovq`. + //! + //! - AVX-512 instructions started using `W` field as an extended opcode for + //! all new instructions. This wouldn't have been an issue if the `W` field + //! of AVX-512 have matched AVX, but this is not always the case. + //! + //! - `O` field is an extended opcode field (3 bits) embedded in ModR/M BYTE. + //! + //! - `CDSHL` and `CDTT` fields describe 'compressed-displacement'. `CDSHL` is + //! defined for each instruction that is AVX-512 encodable (EVEX) and contains + //! a base N shift (base shift to perform the calculation). The `CDTT` field + //! is derived from instruction specification and describes additional shift + //! to calculate the final `CDSHL` that will be used in SIB byte. + //! + //! \note Don't reorder any fields here, the shifts and masks were defined + //! carefully to make encoding of X86 instructions fast, especially to construct + //! REX, VEX, and EVEX prefixes in the most efficient way. Changing values defined + //! by these enums many cause AsmJit to emit invalid binary representations of + //! instructions passed to `x86::Assembler::_emit`. + enum Bits : uint32_t { + // MM & VEX & EVEX & XOP + // --------------------- + // + // Two meanings: + // * Part of a legacy opcode (prefixes emitted before the main opcode byte). + // * `MMMMM` field in VEX|EVEX|XOP instruction. + // + // AVX reserves 5 bits for `MMMMM` field, however AVX instructions only use + // 2 bits and XOP 3 bits. AVX-512 shrinks `MMMMM` field into `MM` so it's + // safe to assume that bits [4:2] of `MM` field won't be used in future + // extensions, which will most probably use EVEX encoding. AsmJit divides + // MM field into this layout: + // + // [1:0] - Used to describe 0F, 0F38 and 0F3A legacy prefix bytes and + // 2 bits of MM field. + // [2] - Used to force 3-BYTE VEX prefix, but then cleared to zero before + // the prefix is emitted. This bit is not used by any instruction + // so it can be used for any purpose by AsmJit. Also, this bit is + // used as an extension to `MM` field describing 0F|0F38|0F3A to also + // describe 0F01 as used by some legacy instructions (instructions + // not using VEX/EVEX prefix). + // [3] - Required by XOP instructions, so we use this bit also to indicate + // that this is a XOP opcode. + kMM_Shift = 8, + kMM_Mask = 0x1Fu << kMM_Shift, + kMM_00 = 0x00u << kMM_Shift, + kMM_0F = 0x01u << kMM_Shift, + kMM_0F38 = 0x02u << kMM_Shift, + kMM_0F3A = 0x03u << kMM_Shift, // Described also as XOP.M3 in AMD manuals. + kMM_0F01 = 0x04u << kMM_Shift, // AsmJit way to describe 0F01 (never VEX/EVEX). + + // `XOP` field is only used to force XOP prefix instead of VEX3 prefix. We + // know that only XOP encoding uses bit 0b1000 of MM field and that no VEX + // and EVEX instruction uses such bit, so we can use this bit to force XOP + // prefix to be emitted instead of VEX3 prefix. See `x86VEXPrefix` defined + // in `x86assembler.cpp`. + kMM_XOP08 = 0x08u << kMM_Shift, // XOP.M8. + kMM_XOP09 = 0x09u << kMM_Shift, // XOP.M9. + kMM_XOP0A = 0x0Au << kMM_Shift, // XOP.MA. + + kMM_IsXOP_Shift= kMM_Shift + 3, + kMM_IsXOP = kMM_XOP08, + + // NOTE: Force VEX3 allows to force to emit VEX3 instead of VEX2 in some + // cases (similar to forcing REX prefix). Force EVEX will force emitting + // EVEX prefix instead of VEX2|VEX3. EVEX-only instructions will have + // ForceEvex always set, however. instructions that can be encoded by + // either VEX or EVEX prefix should not have ForceEvex set. + + kMM_ForceVex3 = 0x04u << kMM_Shift, // Force 3-BYTE VEX prefix. + kMM_ForceEvex = 0x10u << kMM_Shift, // Force 4-BYTE EVEX prefix. + + // FPU_2B - Second-Byte of the Opcode used by FPU + // ---------------------------------------------- + // + // Second byte opcode. This BYTE is ONLY used by FPU instructions and + // collides with 3 bits from `MM` and 5 bits from 'CDSHL' and 'CDTT'. + // It's fine as FPU and AVX512 flags are never used at the same time. + kFPU_2B_Shift = 10, + kFPU_2B_Mask = 0xFF << kFPU_2B_Shift, + + // CDSHL & CDTT + // ------------ + // + // Compressed displacement bits. + // + // Each opcode defines the base size (N) shift: + // [0]: BYTE (1 byte). + // [1]: WORD (2 bytes). + // [2]: DWORD (4 bytes - float/int32). + // [3]: QWORD (8 bytes - double/int64). + // [4]: OWORD (16 bytes - used by FV|FVM|M128). + // + // Which is then scaled by the instruction's TT (TupleType) into possible: + // [5]: YWORD (32 bytes) + // [6]: ZWORD (64 bytes) + // + // These bits are then adjusted before calling EmitModSib or EmitModVSib. + kCDSHL_Shift = 13, + kCDSHL_Mask = 0x7u << kCDSHL_Shift, + + kCDSHL__ = 0x0u << kCDSHL_Shift, // Base element size not used. + kCDSHL_0 = 0x0u << kCDSHL_Shift, // N << 0. + kCDSHL_1 = 0x1u << kCDSHL_Shift, // N << 1. + kCDSHL_2 = 0x2u << kCDSHL_Shift, // N << 2. + kCDSHL_3 = 0x3u << kCDSHL_Shift, // N << 3. + kCDSHL_4 = 0x4u << kCDSHL_Shift, // N << 4. + kCDSHL_5 = 0x5u << kCDSHL_Shift, // N << 5. + + // Compressed displacement tuple-type (specific to AsmJit). + // + // Since we store the base offset independently of CDTT we can simplify the + // number of 'TUPLE_TYPE' groups significantly and just handle special cases. + kCDTT_Shift = 16, + kCDTT_Mask = 0x3u << kCDTT_Shift, + kCDTT_None = 0x0u << kCDTT_Shift, // Does nothing. + kCDTT_ByLL = 0x1u << kCDTT_Shift, // Scales by LL (1x 2x 4x). + kCDTT_T1W = 0x2u << kCDTT_Shift, // Used to add 'W' to the shift. + kCDTT_DUP = 0x3u << kCDTT_Shift, // Special 'VMOVDDUP' case. + + // Aliases that match names used in instruction manuals. + kCDTT__ = kCDTT_None, + kCDTT_FV = kCDTT_ByLL, + kCDTT_HV = kCDTT_ByLL, + kCDTT_FVM = kCDTT_ByLL, + kCDTT_T1S = kCDTT_None, + kCDTT_T1F = kCDTT_None, + kCDTT_T1_4X = kCDTT_None, + kCDTT_T2 = kCDTT_None, + kCDTT_T4 = kCDTT_None, + kCDTT_T8 = kCDTT_None, + kCDTT_HVM = kCDTT_ByLL, + kCDTT_QVM = kCDTT_ByLL, + kCDTT_OVM = kCDTT_ByLL, + kCDTT_128 = kCDTT_None, + + kCDTT_T4X = kCDTT_T1_4X, // Alias to have only 3 letters. + + // `O` Field in ModR/M (??:xxx:???) + // -------------------------------- + + kModO_Shift = 18, + kModO_Mask = 0x7u << kModO_Shift, + + kModO__ = 0x0u, + kModO_0 = 0x0u << kModO_Shift, + kModO_1 = 0x1u << kModO_Shift, + kModO_2 = 0x2u << kModO_Shift, + kModO_3 = 0x3u << kModO_Shift, + kModO_4 = 0x4u << kModO_Shift, + kModO_5 = 0x5u << kModO_Shift, + kModO_6 = 0x6u << kModO_Shift, + kModO_7 = 0x7u << kModO_Shift, + + // `RM` Field in ModR/M (??:???:xxx) + // --------------------------------- + // + // Second data field used by ModR/M byte. This is only used by few + // instructions that use OPCODE+MOD/RM where both values in Mod/RM + // are part of the opcode. + + kModRM_Shift = 10, + kModRM_Mask = 0x7u << kModRM_Shift, + + kModRM__ = 0x0u, + kModRM_0 = 0x0u << kModRM_Shift, + kModRM_1 = 0x1u << kModRM_Shift, + kModRM_2 = 0x2u << kModRM_Shift, + kModRM_3 = 0x3u << kModRM_Shift, + kModRM_4 = 0x4u << kModRM_Shift, + kModRM_5 = 0x5u << kModRM_Shift, + kModRM_6 = 0x6u << kModRM_Shift, + kModRM_7 = 0x7u << kModRM_Shift, + + // `PP` Field + // ---------- + // + // These fields are stored deliberately right after each other as it makes + // it easier to construct VEX prefix from the opcode value stored in the + // instruction database. + // + // Two meanings: + // * "PP" field in AVX/XOP/AVX-512 instruction. + // * Mandatory Prefix in legacy encoding. + // + // AVX reserves 2 bits for `PP` field, but AsmJit extends the storage by 1 + // more bit that is used to emit 9B prefix for some X87-FPU instructions. + + kPP_Shift = 21, + kPP_VEXMask = 0x03u << kPP_Shift, // PP field mask used by VEX/EVEX. + kPP_FPUMask = 0x07u << kPP_Shift, // Mask used by EMIT_PP, also includes '0x9B'. + kPP_00 = 0x00u << kPP_Shift, + kPP_66 = 0x01u << kPP_Shift, + kPP_F3 = 0x02u << kPP_Shift, + kPP_F2 = 0x03u << kPP_Shift, + + kPP_9B = 0x07u << kPP_Shift, // AsmJit specific to emit FPU's '9B' byte. + + // REX|VEX|EVEX B|X|R|W Bits + // ------------------------- + // + // NOTE: REX.[B|X|R] are never stored within the opcode itself, they are + // reserved by AsmJit are are added dynamically to the opcode to represent + // [REX|VEX|EVEX].[B|X|R] bits. REX.W can be stored in DB as it's sometimes + // part of the opcode itself. + + // These must be binary compatible with instruction options. + kREX_Shift = 24, + kREX_Mask = 0x0Fu << kREX_Shift, + kB = 0x01u << kREX_Shift, // Never stored in DB, used by encoder. + kX = 0x02u << kREX_Shift, // Never stored in DB, used by encoder. + kR = 0x04u << kREX_Shift, // Never stored in DB, used by encoder. + kW = 0x08u << kREX_Shift, + kW_Shift = kREX_Shift + 3, + + kW__ = 0u << kW_Shift, // REX.W/VEX.W is unspecified. + kW_x = 0u << kW_Shift, // REX.W/VEX.W is based on instruction operands. + kW_I = 0u << kW_Shift, // REX.W/VEX.W is ignored (WIG). + kW_0 = 0u << kW_Shift, // REX.W/VEX.W is 0 (W0). + kW_1 = 1u << kW_Shift, // REX.W/VEX.W is 1 (W1). + + // EVEX.W Field + // ------------ + // + // `W` field used by EVEX instruction encoding. + + kEvex_W_Shift = 28, + kEvex_W_Mask = 1u << kEvex_W_Shift, + + kEvex_W__ = 0u << kEvex_W_Shift, // EVEX.W is unspecified (not EVEX instruction). + kEvex_W_x = 0u << kEvex_W_Shift, // EVEX.W is based on instruction operands. + kEvex_W_I = 0u << kEvex_W_Shift, // EVEX.W is ignored (WIG). + kEvex_W_0 = 0u << kEvex_W_Shift, // EVEX.W is 0 (W0). + kEvex_W_1 = 1u << kEvex_W_Shift, // EVEX.W is 1 (W1). + + // `L` or `LL` field in AVX/XOP/AVX-512 + // ------------------------------------ + // + // VEX/XOP prefix can only use the first bit `L.128` or `L.256`. EVEX prefix + // prefix makes it possible to use also `L.512`. + // + // If the instruction set manual describes an instruction by `LIG` it means + // that the `L` field is ignored and AsmJit defaults to `0` in such case. + kLL_Shift = 29, + kLL_Mask = 0x3u << kLL_Shift, + + kLL__ = 0x0u << kLL_Shift, // LL is unspecified. + kLL_x = 0x0u << kLL_Shift, // LL is based on instruction operands. + kLL_I = 0x0u << kLL_Shift, // LL is ignored (LIG). + kLL_0 = 0x0u << kLL_Shift, // LL is 0 (L.128). + kLL_1 = 0x1u << kLL_Shift, // LL is 1 (L.256). + kLL_2 = 0x2u << kLL_Shift, // LL is 2 (L.512). + + // Opcode Combinations + // ------------------- + + k0 = 0, // '__' (no prefix, used internally). + k000000 = kPP_00 | kMM_00, // '__' (no prefix, to be the same width as others). + k000F00 = kPP_00 | kMM_0F, // '0F' + k000F01 = kPP_00 | kMM_0F01, // '0F01' + k000F0F = kPP_00 | kMM_0F, // '0F0F' - 3DNOW, equal to 0x0F, must have special encoding to take effect. + k000F38 = kPP_00 | kMM_0F38, // '0F38' + k000F3A = kPP_00 | kMM_0F3A, // '0F3A' + k660000 = kPP_66 | kMM_00, // '66' + k660F00 = kPP_66 | kMM_0F, // '660F' + k660F01 = kPP_66 | kMM_0F01, // '660F01' + k660F38 = kPP_66 | kMM_0F38, // '660F38' + k660F3A = kPP_66 | kMM_0F3A, // '660F3A' + kF20000 = kPP_F2 | kMM_00, // 'F2' + kF20F00 = kPP_F2 | kMM_0F, // 'F20F' + kF20F01 = kPP_F2 | kMM_0F01, // 'F20F01' + kF20F38 = kPP_F2 | kMM_0F38, // 'F20F38' + kF20F3A = kPP_F2 | kMM_0F3A, // 'F20F3A' + kF30000 = kPP_F3 | kMM_00, // 'F3' + kF30F00 = kPP_F3 | kMM_0F, // 'F30F' + kF30F01 = kPP_F3 | kMM_0F01, // 'F30F01' + kF30F38 = kPP_F3 | kMM_0F38, // 'F30F38' + kF30F3A = kPP_F3 | kMM_0F3A, // 'F30F3A' + kFPU_00 = kPP_00 | kMM_00, // '__' (FPU) + kFPU_9B = kPP_9B | kMM_00, // '9B' (FPU) + kXOP_M8 = kPP_00 | kMM_XOP08, // 'M8' (XOP) + kXOP_M9 = kPP_00 | kMM_XOP09, // 'M9' (XOP) + kXOP_MA = kPP_00 | kMM_XOP0A // 'MA' (XOP) + }; + + // -------------------------------------------------------------------------- + // [Opcode Builder] + // -------------------------------------------------------------------------- + + ASMJIT_INLINE uint32_t get() const noexcept { return v; } + + ASMJIT_INLINE bool hasW() const noexcept { return (v & kW) != 0; } + ASMJIT_INLINE bool has66h() const noexcept { return (v & kPP_66) != 0; } + + ASMJIT_INLINE Opcode& add(uint32_t x) noexcept { return operator+=(x); } + + ASMJIT_INLINE Opcode& add66h() noexcept { return operator|=(kPP_66); } + template<typename T> + ASMJIT_INLINE Opcode& add66hIf(T exp) noexcept { return operator|=(uint32_t(exp) << kPP_Shift); } + template<typename T> + ASMJIT_INLINE Opcode& add66hBySize(T size) noexcept { return add66hIf(size == 2); } + + ASMJIT_INLINE Opcode& addW() noexcept { return operator|=(kW); } + template<typename T> + ASMJIT_INLINE Opcode& addWIf(T exp) noexcept { return operator|=(uint32_t(exp) << kW_Shift); } + template<typename T> + ASMJIT_INLINE Opcode& addWBySize(T size) noexcept { return addWIf(size == 8); } + + template<typename T> + ASMJIT_INLINE Opcode& addPrefixBySize(T size) noexcept { + static const uint32_t mask[16] = { + 0, // #0 + 0, // #1 -> nothing (already handled or not possible) + kPP_66, // #2 -> 66H + 0, // #3 + 0, // #4 -> nothing + 0, // #5 + 0, // #6 + 0, // #7 + kW // #8 -> REX.W + }; + return operator|=(mask[size & 0xF]); + } + + template<typename T> + ASMJIT_INLINE Opcode& addArithBySize(T size) noexcept { + static const uint32_t mask[16] = { + 0, // #0 + 0, // #1 -> nothing + 1 | kPP_66, // #2 -> NOT_BYTE_OP(1) and 66H + 0, // #3 + 1, // #4 -> NOT_BYTE_OP(1) + 0, // #5 + 0, // #6 + 0, // #7 + 1 | kW // #8 -> NOT_BYTE_OP(1) and REX.W + }; + return operator|=(mask[size & 0xF]); + } + + //! Extract `O` field (R) from the opcode (specified as /0..7 in instruction manuals). + ASMJIT_INLINE uint32_t extractModO() const noexcept { + return (v >> kModO_Shift) & 0x07; + } + + //! Extract `RM` field (RM) from the opcode (usually specified as another opcode value). + ASMJIT_INLINE uint32_t extractModRM() const noexcept { + return (v >> kModRM_Shift) & 0x07; + } + + //! Extract `REX` prefix from opcode combined with `options`. + ASMJIT_INLINE uint32_t extractRex(uint32_t options) const noexcept { + // kREX was designed in a way that when shifted there will be no bytes + // set except REX.[B|X|R|W]. The returned value forms a real REX prefix byte. + // This case should be unit-tested as well. + return (v | options) >> kREX_Shift; + } + + ASMJIT_INLINE uint32_t extractLLMM(uint32_t options) const noexcept { + uint32_t x = v & (kLL_Mask | kMM_Mask); + uint32_t y = options & (Inst::kOptionVex3 | Inst::kOptionEvex); + return (x | y) >> kMM_Shift; + } + + ASMJIT_INLINE Opcode& operator=(uint32_t x) noexcept { v = x; return *this; } + ASMJIT_INLINE Opcode& operator+=(uint32_t x) noexcept { v += x; return *this; } + ASMJIT_INLINE Opcode& operator-=(uint32_t x) noexcept { v -= x; return *this; } + ASMJIT_INLINE Opcode& operator&=(uint32_t x) noexcept { v &= x; return *this; } + ASMJIT_INLINE Opcode& operator|=(uint32_t x) noexcept { v |= x; return *this; } + ASMJIT_INLINE Opcode& operator^=(uint32_t x) noexcept { v ^= x; return *this; } + + ASMJIT_INLINE uint32_t operator&(uint32_t x) const noexcept { return v & x; } + ASMJIT_INLINE uint32_t operator|(uint32_t x) const noexcept { return v | x; } + ASMJIT_INLINE uint32_t operator^(uint32_t x) const noexcept { return v ^ x; } + ASMJIT_INLINE uint32_t operator<<(uint32_t x) const noexcept { return v << x; } + ASMJIT_INLINE uint32_t operator>>(uint32_t x) const noexcept { return v >> x; } +}; + +//! \} +//! \endcond + +ASMJIT_END_SUB_NAMESPACE + +#endif // ASMJIT_X86_X86OPCODE_P_H_INCLUDED |