aboutsummaryrefslogtreecommitdiff
path: root/client/asmjit/x86/x86internal.cpp
diff options
context:
space:
mode:
authorauth12 <[email protected]>2020-07-19 11:57:04 -0700
committerGitHub <[email protected]>2020-07-19 11:57:04 -0700
commit1bae439a35a3aadca6772716aaeea8c8a0991114 (patch)
treef8eab7a7bae237ad697feecfae26b17bab91b16e /client/asmjit/x86/x86internal.cpp
parentMore placeholders and general plan. (diff)
parentMerge branch 'master' into windows (diff)
downloadloader-1bae439a35a3aadca6772716aaeea8c8a0991114.tar.xz
loader-1bae439a35a3aadca6772716aaeea8c8a0991114.zip
Merge pull request #1 from auth12/windows
Windows
Diffstat (limited to 'client/asmjit/x86/x86internal.cpp')
-rw-r--r--client/asmjit/x86/x86internal.cpp1733
1 files changed, 1733 insertions, 0 deletions
diff --git a/client/asmjit/x86/x86internal.cpp b/client/asmjit/x86/x86internal.cpp
new file mode 100644
index 0000000..062525f
--- /dev/null
+++ b/client/asmjit/x86/x86internal.cpp
@@ -0,0 +1,1733 @@
+// AsmJit - Machine code generation for C++
+//
+// * Official AsmJit Home Page: https://asmjit.com
+// * Official Github Repository: https://github.com/asmjit/asmjit
+//
+// Copyright (c) 2008-2020 The AsmJit Authors
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+// claim that you wrote the original software. If you use this software
+// in a product, an acknowledgment in the product documentation would be
+// appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+// misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+
+#include "../core/api-build_p.h"
+#ifdef ASMJIT_BUILD_X86
+
+#include "../core/formatter.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../x86/x86internal_p.h"
+
+// Can be used for debugging...
+// #define ASMJIT_DUMP_ARGS_ASSIGNMENT
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// ============================================================================
+// [asmjit::X86Internal - Helpers]
+// ============================================================================
+
+static ASMJIT_INLINE uint32_t x86GetXmmMovInst(const FuncFrame& frame) {
+ bool avx = frame.isAvxEnabled();
+ bool aligned = frame.hasAlignedVecSR();
+
+ return aligned ? (avx ? Inst::kIdVmovaps : Inst::kIdMovaps)
+ : (avx ? Inst::kIdVmovups : Inst::kIdMovups);
+}
+
+static ASMJIT_INLINE uint32_t x86VecTypeIdToRegType(uint32_t typeId) noexcept {
+ return typeId <= Type::_kIdVec128End ? Reg::kTypeXmm :
+ typeId <= Type::_kIdVec256End ? Reg::kTypeYmm : Reg::kTypeZmm;
+}
+
+//! Converts `size` to a 'kmov?' instructio.
+static inline uint32_t x86KmovFromSize(uint32_t size) noexcept {
+ switch (size) {
+ case 1: return Inst::kIdKmovb;
+ case 2: return Inst::kIdKmovw;
+ case 4: return Inst::kIdKmovd;
+ case 8: return Inst::kIdKmovq;
+ default: return Inst::kIdNone;
+ }
+}
+
+// ============================================================================
+// [asmjit::X86Internal - FuncDetail]
+// ============================================================================
+
+ASMJIT_FAVOR_SIZE Error X86Internal::initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept {
+ const CallConv& cc = func.callConv();
+ uint32_t arch = cc.arch();
+ uint32_t stackOffset = cc._spillZoneSize;
+
+ uint32_t i;
+ uint32_t argCount = func.argCount();
+
+ if (func.retCount() != 0) {
+ uint32_t typeId = func._rets[0].typeId();
+ switch (typeId) {
+ case Type::kIdI64:
+ case Type::kIdU64: {
+ if (Environment::is32Bit(arch)) {
+ // Convert a 64-bit return value to two 32-bit return values.
+ func._retCount = 2;
+ typeId -= 2;
+
+ // 64-bit value is returned in EDX:EAX on X86.
+ func._rets[0].initReg(Reg::kTypeGpd, Gp::kIdAx, typeId);
+ func._rets[1].initReg(Reg::kTypeGpd, Gp::kIdDx, typeId);
+ break;
+ }
+ else {
+ func._rets[0].initReg(Reg::kTypeGpq, Gp::kIdAx, typeId);
+ }
+ break;
+ }
+
+ case Type::kIdI8:
+ case Type::kIdI16:
+ case Type::kIdI32: {
+ func._rets[0].initReg(Reg::kTypeGpd, Gp::kIdAx, Type::kIdI32);
+ break;
+ }
+
+ case Type::kIdU8:
+ case Type::kIdU16:
+ case Type::kIdU32: {
+ func._rets[0].initReg(Reg::kTypeGpd, Gp::kIdAx, Type::kIdU32);
+ break;
+ }
+
+ case Type::kIdF32:
+ case Type::kIdF64: {
+ uint32_t regType = Environment::is32Bit(arch) ? Reg::kTypeSt : Reg::kTypeXmm;
+ func._rets[0].initReg(regType, 0, typeId);
+ break;
+ }
+
+ case Type::kIdF80: {
+ // 80-bit floats are always returned by FP0.
+ func._rets[0].initReg(Reg::kTypeSt, 0, typeId);
+ break;
+ }
+
+ case Type::kIdMmx32:
+ case Type::kIdMmx64: {
+ // MM registers are returned through XMM (SystemV) or GPQ (Win64).
+ uint32_t regType = Reg::kTypeMm;
+ if (Environment::is64Bit(arch))
+ regType = cc.strategy() == CallConv::kStrategyDefault ? Reg::kTypeXmm : Reg::kTypeGpq;
+
+ func._rets[0].initReg(regType, 0, typeId);
+ break;
+ }
+
+ default: {
+ func._rets[0].initReg(x86VecTypeIdToRegType(typeId), 0, typeId);
+ break;
+ }
+ }
+ }
+
+ switch (cc.strategy()) {
+ case CallConv::kStrategyDefault: {
+ uint32_t gpzPos = 0;
+ uint32_t vecPos = 0;
+
+ for (i = 0; i < argCount; i++) {
+ FuncValue& arg = func._args[i];
+ uint32_t typeId = arg.typeId();
+
+ if (Type::isInt(typeId)) {
+ uint32_t regId = BaseReg::kIdBad;
+
+ if (gpzPos < CallConv::kMaxRegArgsPerGroup)
+ regId = cc._passedOrder[Reg::kGroupGp].id[gpzPos];
+
+ if (regId != BaseReg::kIdBad) {
+ uint32_t regType = (typeId <= Type::kIdU32) ? Reg::kTypeGpd : Reg::kTypeGpq;
+ arg.assignRegData(regType, regId);
+ func.addUsedRegs(Reg::kGroupGp, Support::bitMask(regId));
+ gpzPos++;
+ }
+ else {
+ uint32_t size = Support::max<uint32_t>(Type::sizeOf(typeId), registerSize);
+ arg.assignStackOffset(int32_t(stackOffset));
+ stackOffset += size;
+ }
+ continue;
+ }
+
+ if (Type::isFloat(typeId) || Type::isVec(typeId)) {
+ uint32_t regId = BaseReg::kIdBad;
+
+ if (vecPos < CallConv::kMaxRegArgsPerGroup)
+ regId = cc._passedOrder[Reg::kGroupVec].id[vecPos];
+
+ if (Type::isFloat(typeId)) {
+ // If this is a float, but `kFlagPassFloatsByVec` is false, we have
+ // to use stack instead. This should be only used by 32-bit calling
+ // conventions.
+ if (!cc.hasFlag(CallConv::kFlagPassFloatsByVec))
+ regId = BaseReg::kIdBad;
+ }
+ else {
+ // Pass vector registers via stack if this is a variable arguments
+ // function. This should be only used by 32-bit calling conventions.
+ if (signature.hasVarArgs() && cc.hasFlag(CallConv::kFlagPassVecByStackIfVA))
+ regId = BaseReg::kIdBad;
+ }
+
+ if (regId != BaseReg::kIdBad) {
+ arg.initTypeId(typeId);
+ arg.assignRegData(x86VecTypeIdToRegType(typeId), regId);
+ func.addUsedRegs(Reg::kGroupVec, Support::bitMask(regId));
+ vecPos++;
+ }
+ else {
+ uint32_t size = Type::sizeOf(typeId);
+ arg.assignStackOffset(int32_t(stackOffset));
+ stackOffset += size;
+ }
+ continue;
+ }
+ }
+ break;
+ }
+
+ case CallConv::kStrategyX64Windows:
+ case CallConv::kStrategyX64VectorCall: {
+ // Both X64 and VectorCall behave similarly - arguments are indexed
+ // from left to right. The position of the argument determines in
+ // which register the argument is allocated, so it's either GP or
+ // one of XMM/YMM/ZMM registers.
+ //
+ // [ X64 ] [VecCall]
+ // Index: #0 #1 #2 #3 #4 #5
+ //
+ // GP : RCX RDX R8 R9
+ // VEC : XMM0 XMM1 XMM2 XMM3 XMM4 XMM5
+ //
+ // For example function `f(int a, double b, int c, double d)` will be:
+ //
+ // (a) (b) (c) (d)
+ // RCX XMM1 R8 XMM3
+ //
+ // Unused vector registers are used by HVA.
+
+ bool isVectorCall = (cc.strategy() == CallConv::kStrategyX64VectorCall);
+
+ for (i = 0; i < argCount; i++) {
+ FuncValue& arg = func._args[i];
+
+ uint32_t typeId = arg.typeId();
+ uint32_t size = Type::sizeOf(typeId);
+
+ if (Type::isInt(typeId) || Type::isMmx(typeId)) {
+ uint32_t regId = BaseReg::kIdBad;
+
+ if (i < CallConv::kMaxRegArgsPerGroup)
+ regId = cc._passedOrder[Reg::kGroupGp].id[i];
+
+ if (regId != BaseReg::kIdBad) {
+ uint32_t regType = (size <= 4 && !Type::isMmx(typeId)) ? Reg::kTypeGpd : Reg::kTypeGpq;
+ arg.assignRegData(regType, regId);
+ func.addUsedRegs(Reg::kGroupGp, Support::bitMask(regId));
+ }
+ else {
+ arg.assignStackOffset(int32_t(stackOffset));
+ stackOffset += 8;
+ }
+ continue;
+ }
+
+ if (Type::isFloat(typeId) || Type::isVec(typeId)) {
+ uint32_t regId = BaseReg::kIdBad;
+
+ if (i < CallConv::kMaxRegArgsPerGroup)
+ regId = cc._passedOrder[Reg::kGroupVec].id[i];
+
+ if (regId != BaseReg::kIdBad) {
+ // X64-ABI doesn't allow vector types (XMM|YMM|ZMM) to be passed
+ // via registers, however, VectorCall was designed for that purpose.
+ if (Type::isFloat(typeId) || isVectorCall) {
+ uint32_t regType = x86VecTypeIdToRegType(typeId);
+ arg.assignRegData(regType, regId);
+ func.addUsedRegs(Reg::kGroupVec, Support::bitMask(regId));
+ continue;
+ }
+ }
+
+ // Passed via stack if the argument is float/double or indirectly.
+ // The trap is - if the argument is passed indirectly, the address
+ // can be passed via register, if the argument's index has GP one.
+ if (Type::isFloat(typeId)) {
+ arg.assignStackOffset(int32_t(stackOffset));
+ }
+ else {
+ uint32_t gpRegId = cc._passedOrder[Reg::kGroupGp].id[i];
+ if (gpRegId != BaseReg::kIdBad)
+ arg.assignRegData(Reg::kTypeGpq, gpRegId);
+ else
+ arg.assignStackOffset(int32_t(stackOffset));
+ arg.addFlags(FuncValue::kFlagIsIndirect);
+ }
+
+ // Always 8 bytes (float/double/pointer).
+ stackOffset += 8;
+ continue;
+ }
+ }
+ break;
+ }
+ }
+
+ func._argStackSize = stackOffset;
+ return kErrorOk;
+}
+
+// ============================================================================
+// [asmjit::X86FuncArgsContext]
+// ============================================================================
+
+static RegInfo x86GetRegForMemToMemMove(uint32_t arch, uint32_t dstTypeId, uint32_t srcTypeId) noexcept {
+ uint32_t dstSize = Type::sizeOf(dstTypeId);
+ uint32_t srcSize = Type::sizeOf(srcTypeId);
+ uint32_t maxSize = Support::max<uint32_t>(dstSize, srcSize);
+ uint32_t regSize = Environment::registerSizeFromArch(arch);
+
+ uint32_t signature = 0;
+ if (maxSize <= regSize || (Type::isInt(dstTypeId) && Type::isInt(srcTypeId)))
+ signature = maxSize <= 4 ? Gpd::kSignature : Gpq::kSignature;
+ else if (maxSize <= 16)
+ signature = Xmm::kSignature;
+ else if (maxSize <= 32)
+ signature = Ymm::kSignature;
+ else if (maxSize <= 64)
+ signature = Zmm::kSignature;
+
+ return RegInfo { signature };
+}
+
+// Used by both `argsToFuncFrame()` and `emitArgsAssignment()`.
+class X86FuncArgsContext {
+public:
+ enum VarId : uint32_t {
+ kVarIdNone = 0xFF
+ };
+
+ //! Contains information about a single argument or SA register that may need shuffling.
+ struct Var {
+ inline void init(const FuncValue& cur_, const FuncValue& out_) noexcept {
+ cur = cur_;
+ out = out_;
+ }
+
+ //! Reset the value to its unassigned state.
+ inline void reset() noexcept {
+ cur.reset();
+ out.reset();
+ }
+
+ inline bool isDone() const noexcept { return cur.isDone(); }
+ inline void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); }
+
+ FuncValue cur;
+ FuncValue out;
+ };
+
+ struct WorkData {
+ inline void reset() noexcept {
+ _archRegs = 0;
+ _workRegs = 0;
+ _usedRegs = 0;
+ _assignedRegs = 0;
+ _dstRegs = 0;
+ _dstShuf = 0;
+ _numSwaps = 0;
+ _numStackArgs = 0;
+ memset(_reserved, 0, sizeof(_reserved));
+ memset(_physToVarId, kVarIdNone, 32);
+ }
+
+ inline bool isAssigned(uint32_t regId) const noexcept {
+ ASMJIT_ASSERT(regId < 32);
+ return Support::bitTest(_assignedRegs, regId);
+ }
+
+ inline void assign(uint32_t varId, uint32_t regId) noexcept {
+ ASMJIT_ASSERT(!isAssigned(regId));
+ ASMJIT_ASSERT(_physToVarId[regId] == kVarIdNone);
+
+ _physToVarId[regId] = uint8_t(varId);
+ _assignedRegs ^= Support::bitMask(regId);
+ }
+
+ inline void reassign(uint32_t varId, uint32_t newId, uint32_t oldId) noexcept {
+ ASMJIT_ASSERT( isAssigned(oldId));
+ ASMJIT_ASSERT(!isAssigned(newId));
+ ASMJIT_ASSERT(_physToVarId[oldId] == varId);
+ ASMJIT_ASSERT(_physToVarId[newId] == kVarIdNone);
+
+ _physToVarId[oldId] = uint8_t(kVarIdNone);
+ _physToVarId[newId] = uint8_t(varId);
+ _assignedRegs ^= Support::bitMask(newId) ^ Support::bitMask(oldId);
+ }
+
+ inline void swap(uint32_t aVarId, uint32_t aRegId, uint32_t bVarId, uint32_t bRegId) noexcept {
+ ASMJIT_ASSERT(isAssigned(aRegId));
+ ASMJIT_ASSERT(isAssigned(bRegId));
+ ASMJIT_ASSERT(_physToVarId[aRegId] == aVarId);
+ ASMJIT_ASSERT(_physToVarId[bRegId] == bVarId);
+
+ _physToVarId[aRegId] = uint8_t(bVarId);
+ _physToVarId[bRegId] = uint8_t(aVarId);
+ }
+
+ inline void unassign(uint32_t varId, uint32_t regId) noexcept {
+ ASMJIT_ASSERT(isAssigned(regId));
+ ASMJIT_ASSERT(_physToVarId[regId] == varId);
+
+ DebugUtils::unused(varId);
+ _physToVarId[regId] = uint8_t(kVarIdNone);
+ _assignedRegs ^= Support::bitMask(regId);
+ }
+
+ inline uint32_t archRegs() const noexcept { return _archRegs; }
+ inline uint32_t workRegs() const noexcept { return _workRegs; }
+ inline uint32_t usedRegs() const noexcept { return _usedRegs; }
+ inline uint32_t assignedRegs() const noexcept { return _assignedRegs; }
+ inline uint32_t dstRegs() const noexcept { return _dstRegs; }
+ inline uint32_t availableRegs() const noexcept { return _workRegs & ~_assignedRegs; }
+
+ uint32_t _archRegs; //!< All allocable registers provided by the architecture.
+ uint32_t _workRegs; //!< All registers that can be used by the shuffler.
+ uint32_t _usedRegs; //!< Registers used by the shuffler (all).
+ uint32_t _assignedRegs; //!< Assigned registers.
+ uint32_t _dstRegs; //!< Destination registers assigned to arguments or SA.
+ uint32_t _dstShuf; //!< Destination registers that require shuffling.
+ uint8_t _numSwaps; //!< Number of register swaps.
+ uint8_t _numStackArgs; //!< Number of stack loads.
+ uint8_t _reserved[6]; //!< Reserved (only used as padding).
+ uint8_t _physToVarId[32]; //!< Physical ID to variable ID mapping.
+ };
+
+ uint8_t _arch;
+ bool _hasStackSrc; //!< Has arguments passed via stack (SRC).
+ bool _hasPreservedFP; //!< Has preserved frame-pointer (FP).
+ uint8_t _stackDstMask; //!< Has arguments assigned to stack (DST).
+ uint8_t _regSwapsMask; //!< Register swap groups (bit-mask).
+ uint8_t _saVarId;
+ uint32_t _varCount;
+ WorkData _workData[BaseReg::kGroupVirt];
+ Var _vars[kFuncArgCountLoHi + 1];
+
+ X86FuncArgsContext() noexcept;
+
+ inline uint32_t arch() const noexcept { return _arch; }
+ inline uint32_t varCount() const noexcept { return _varCount; }
+
+ inline Var& var(size_t varId) noexcept { return _vars[varId]; }
+ inline const Var& var(size_t varId) const noexcept { return _vars[varId]; }
+ inline size_t indexOf(const Var* var) const noexcept { return (size_t)(var - _vars); }
+
+ Error initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args) noexcept;
+ Error markScratchRegs(FuncFrame& frame) noexcept;
+ Error markDstRegsDirty(FuncFrame& frame) noexcept;
+ Error markStackArgsReg(FuncFrame& frame) noexcept;
+};
+
+X86FuncArgsContext::X86FuncArgsContext() noexcept {
+ _arch = Environment::kArchUnknown;
+ _varCount = 0;
+ _hasStackSrc = false;
+ _hasPreservedFP = false;
+ _stackDstMask = 0;
+ _regSwapsMask = 0;
+ _saVarId = kVarIdNone;
+
+ for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++)
+ _workData[group].reset();
+}
+
+ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args) noexcept {
+ // The code has to be updated if this changes.
+ ASMJIT_ASSERT(BaseReg::kGroupVirt == 4);
+
+ uint32_t i;
+ const FuncDetail& func = *args.funcDetail();
+
+ // Initialize Architecture.
+ uint32_t arch = func.callConv().arch();
+ uint32_t archRegCount = Environment::is32Bit(arch) ? 8 : 16;
+
+ _arch = uint8_t(arch);
+
+ // Initialize `_archRegs`.
+ _workData[Reg::kGroupGp ]._archRegs = Support::lsbMask<uint32_t>(archRegCount) & ~Support::bitMask(Gp::kIdSp);
+ _workData[Reg::kGroupVec ]._archRegs = Support::lsbMask<uint32_t>(archRegCount);
+ _workData[Reg::kGroupMm ]._archRegs = Support::lsbMask<uint32_t>(8);
+ _workData[Reg::kGroupKReg]._archRegs = Support::lsbMask<uint32_t>(8);
+
+ if (frame.hasPreservedFP())
+ _workData[Reg::kGroupGp]._archRegs &= ~Support::bitMask(Gp::kIdBp);
+
+ // Extract information from all function arguments/assignments and build Var[] array.
+ uint32_t varId = 0;
+ for (i = 0; i < kFuncArgCountLoHi; i++) {
+ const FuncValue& dst_ = args.arg(i);
+ if (!dst_.isAssigned())
+ continue;
+
+ const FuncValue& src_ = func.arg(i);
+ if (ASMJIT_UNLIKELY(!src_.isAssigned()))
+ return DebugUtils::errored(kErrorInvalidState);
+
+ Var& var = _vars[varId];
+ var.init(src_, dst_);
+
+ FuncValue& src = var.cur;
+ FuncValue& dst = var.out;
+
+ uint32_t dstGroup = 0xFFFFFFFFu;
+ uint32_t dstId = BaseReg::kIdBad;
+ WorkData* dstWd = nullptr;
+
+ // Not supported.
+ if (src.isIndirect())
+ return DebugUtils::errored(kErrorInvalidAssignment);
+
+ if (dst.isReg()) {
+ uint32_t dstType = dst.regType();
+ if (ASMJIT_UNLIKELY(dstType >= Reg::kTypeCount))
+ return DebugUtils::errored(kErrorInvalidRegType);
+
+ // Copy TypeId from source if the destination doesn't have it. The RA
+ // used by BaseCompiler would never leave TypeId undefined, but users
+ // of FuncAPI can just assign phys regs without specifying the type.
+ if (!dst.hasTypeId())
+ dst.setTypeId(Reg::typeIdOf(dst.regType()));
+
+ dstGroup = Reg::groupOf(dstType);
+ if (ASMJIT_UNLIKELY(dstGroup >= BaseReg::kGroupVirt))
+ return DebugUtils::errored(kErrorInvalidRegGroup);
+
+ dstWd = &_workData[dstGroup];
+ dstId = dst.regId();
+ if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId)))
+ return DebugUtils::errored(kErrorInvalidPhysId);
+
+ if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId)))
+ return DebugUtils::errored(kErrorOverlappedRegs);
+
+ dstWd->_dstRegs |= Support::bitMask(dstId);
+ dstWd->_dstShuf |= Support::bitMask(dstId);
+ dstWd->_usedRegs |= Support::bitMask(dstId);
+ }
+ else {
+ if (!dst.hasTypeId())
+ dst.setTypeId(src.typeId());
+
+ RegInfo regInfo = x86GetRegForMemToMemMove(arch, dst.typeId(), src.typeId());
+ if (ASMJIT_UNLIKELY(!regInfo.isValid()))
+ return DebugUtils::errored(kErrorInvalidState);
+ _stackDstMask = uint8_t(_stackDstMask | Support::bitMask(regInfo.group()));
+ }
+
+ if (src.isReg()) {
+ uint32_t srcId = src.regId();
+ uint32_t srcGroup = Reg::groupOf(src.regType());
+
+ if (dstGroup == srcGroup) {
+ dstWd->assign(varId, srcId);
+
+ // The best case, register is allocated where it is expected to be.
+ if (dstId == srcId)
+ var.markDone();
+ }
+ else {
+ if (ASMJIT_UNLIKELY(srcGroup >= BaseReg::kGroupVirt))
+ return DebugUtils::errored(kErrorInvalidState);
+
+ WorkData& srcData = _workData[srcGroup];
+ srcData.assign(varId, srcId);
+ }
+ }
+ else {
+ if (dstWd)
+ dstWd->_numStackArgs++;
+ _hasStackSrc = true;
+ }
+
+ varId++;
+ }
+
+ // Initialize WorkData::workRegs.
+ for (i = 0; i < BaseReg::kGroupVirt; i++)
+ _workData[i]._workRegs = (_workData[i].archRegs() & (frame.dirtyRegs(i) | ~frame.preservedRegs(i))) | _workData[i].dstRegs() | _workData[i].assignedRegs();
+
+ // Create a variable that represents `SARegId` if necessary.
+ bool saRegRequired = _hasStackSrc && frame.hasDynamicAlignment() && !frame.hasPreservedFP();
+
+ WorkData& gpRegs = _workData[BaseReg::kGroupGp];
+ uint32_t saCurRegId = frame.saRegId();
+ uint32_t saOutRegId = args.saRegId();
+
+ if (saCurRegId != BaseReg::kIdBad) {
+ // Check if the provided `SARegId` doesn't collide with input registers.
+ if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId)))
+ return DebugUtils::errored(kErrorOverlappedRegs);
+ }
+
+ if (saOutRegId != BaseReg::kIdBad) {
+ // Check if the provided `SARegId` doesn't collide with argument assignments.
+ if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId)))
+ return DebugUtils::errored(kErrorOverlappedRegs);
+ saRegRequired = true;
+ }
+
+ if (saRegRequired) {
+ uint32_t ptrTypeId = Environment::is32Bit(arch) ? Type::kIdU32 : Type::kIdU64;
+ uint32_t ptrRegType = Environment::is32Bit(arch) ? BaseReg::kTypeGp32 : BaseReg::kTypeGp64;
+
+ _saVarId = uint8_t(varId);
+ _hasPreservedFP = frame.hasPreservedFP();
+
+ Var& var = _vars[varId];
+ var.reset();
+
+ if (saCurRegId == BaseReg::kIdBad) {
+ if (saOutRegId != BaseReg::kIdBad && !gpRegs.isAssigned(saOutRegId)) {
+ saCurRegId = saOutRegId;
+ }
+ else {
+ uint32_t availableRegs = gpRegs.availableRegs();
+ if (!availableRegs)
+ availableRegs = gpRegs.archRegs() & ~gpRegs.workRegs();
+
+ if (ASMJIT_UNLIKELY(!availableRegs))
+ return DebugUtils::errored(kErrorNoMorePhysRegs);
+
+ saCurRegId = Support::ctz(availableRegs);
+ }
+ }
+
+ var.cur.initReg(ptrRegType, saCurRegId, ptrTypeId);
+ gpRegs.assign(varId, saCurRegId);
+ gpRegs._workRegs |= Support::bitMask(saCurRegId);
+
+ if (saOutRegId != BaseReg::kIdBad) {
+ var.out.initReg(ptrRegType, saOutRegId, ptrTypeId);
+ gpRegs._dstRegs |= Support::bitMask(saOutRegId);
+ gpRegs._workRegs |= Support::bitMask(saOutRegId);
+ }
+ else {
+ var.markDone();
+ }
+
+ varId++;
+ }
+
+ _varCount = varId;
+
+ // Detect register swaps.
+ for (varId = 0; varId < _varCount; varId++) {
+ Var& var = _vars[varId];
+ if (var.cur.isReg() && var.out.isReg()) {
+ uint32_t srcId = var.cur.regId();
+ uint32_t dstId = var.out.regId();
+
+ uint32_t group = Reg::groupOf(var.cur.regType());
+ if (group != Reg::groupOf(var.out.regType()))
+ continue;
+
+ WorkData& wd = _workData[group];
+ if (wd.isAssigned(dstId)) {
+ Var& other = _vars[wd._physToVarId[dstId]];
+ if (Reg::groupOf(other.out.regType()) == group && other.out.regId() == srcId) {
+ wd._numSwaps++;
+ _regSwapsMask = uint8_t(_regSwapsMask | Support::bitMask(group));
+ }
+ }
+ }
+ }
+
+ return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markDstRegsDirty(FuncFrame& frame) noexcept {
+ for (uint32_t i = 0; i < BaseReg::kGroupVirt; i++) {
+ WorkData& wd = _workData[i];
+ uint32_t regs = wd.usedRegs() | wd._dstShuf;
+
+ wd._workRegs |= regs;
+ frame.addDirtyRegs(i, regs);
+ }
+
+ return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markScratchRegs(FuncFrame& frame) noexcept {
+ uint32_t groupMask = 0;
+
+ // Handle stack to stack moves.
+ groupMask |= _stackDstMask;
+
+ // Handle register swaps.
+ groupMask |= _regSwapsMask & ~Support::bitMask(BaseReg::kGroupGp);
+
+ if (!groupMask)
+ return kErrorOk;
+
+ // Selects one dirty register per affected group that can be used as a scratch register.
+ for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
+ if (Support::bitTest(groupMask, group)) {
+ WorkData& wd = _workData[group];
+
+ // Initially, pick some clobbered or dirty register.
+ uint32_t workRegs = wd.workRegs();
+ uint32_t regs = workRegs & ~(wd.usedRegs() | wd._dstShuf);
+
+ // If that didn't work out pick some register which is not in 'used'.
+ if (!regs)
+ regs = workRegs & ~wd.usedRegs();
+
+ // If that didn't work out pick any other register that is allocable.
+ // This last resort case will, however, result in marking one more
+ // register dirty.
+ if (!regs)
+ regs = wd.archRegs() & ~workRegs;
+
+ // If that didn't work out we will have to use XORs instead of MOVs.
+ if (!regs)
+ continue;
+
+ uint32_t regMask = Support::blsi(regs);
+ wd._workRegs |= regMask;
+ frame.addDirtyRegs(group, regMask);
+ }
+ }
+
+ return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markStackArgsReg(FuncFrame& frame) noexcept {
+ if (_saVarId != kVarIdNone) {
+ const Var& var = _vars[_saVarId];
+ frame.setSARegId(var.cur.regId());
+ }
+ else if (frame.hasPreservedFP()) {
+ // Always EBP|RBP if the frame-pointer isn't omitted.
+ frame.setSARegId(Gp::kIdBp);
+ }
+
+ return kErrorOk;
+}
+
+// ============================================================================
+// [asmjit::X86Internal - FrameLayout]
+// ============================================================================
+
+ASMJIT_FAVOR_SIZE Error X86Internal::initFuncFrame(FuncFrame& frame, const FuncDetail& func) noexcept {
+ uint32_t arch = func.callConv().arch();
+
+ // Initializing FuncFrame means making a copy of some properties of `func`.
+ // Properties like `_localStackSize` will be set by the user before the frame
+ // is finalized.
+ frame.reset();
+
+ frame._arch = uint8_t(arch);
+ frame._spRegId = Gp::kIdSp;
+ frame._saRegId = Gp::kIdBad;
+
+ uint32_t naturalStackAlignment = func.callConv().naturalStackAlignment();
+ uint32_t minDynamicAlignment = Support::max<uint32_t>(naturalStackAlignment, 16);
+
+ if (minDynamicAlignment == naturalStackAlignment)
+ minDynamicAlignment <<= 1;
+
+ frame._naturalStackAlignment = uint8_t(naturalStackAlignment);
+ frame._minDynamicAlignment = uint8_t(minDynamicAlignment);
+ frame._redZoneSize = uint8_t(func.redZoneSize());
+ frame._spillZoneSize = uint8_t(func.spillZoneSize());
+ frame._finalStackAlignment = uint8_t(frame._naturalStackAlignment);
+
+ if (func.hasFlag(CallConv::kFlagCalleePopsStack)) {
+ frame._calleeStackCleanup = uint16_t(func.argStackSize());
+ }
+
+ // Initial masks of dirty and preserved registers.
+ for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
+ frame._dirtyRegs[group] = func.usedRegs(group);
+ frame._preservedRegs[group] = func.preservedRegs(group);
+ }
+
+ // Exclude ESP/RSP - this register is never included in saved GP regs.
+ frame._preservedRegs[BaseReg::kGroupGp] &= ~Support::bitMask(Gp::kIdSp);
+
+ return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error X86Internal::finalizeFuncFrame(FuncFrame& frame) noexcept {
+ uint32_t registerSize = Environment::registerSizeFromArch(frame.arch());
+
+ // The final stack alignment must be updated accordingly to call and local stack alignments.
+ uint32_t stackAlignment = frame._finalStackAlignment;
+ ASMJIT_ASSERT(stackAlignment == Support::max(frame._naturalStackAlignment,
+ frame._callStackAlignment,
+ frame._localStackAlignment));
+
+ // TODO: Must be configurable.
+ uint32_t vecSize = 16;
+
+ bool hasFP = frame.hasPreservedFP();
+ bool hasDA = frame.hasDynamicAlignment();
+
+ // Include EBP|RBP if the function preserves the frame-pointer.
+ if (hasFP)
+ frame._dirtyRegs[Reg::kGroupGp] |= Support::bitMask(Gp::kIdBp);
+
+ // These two are identical if the function doesn't align its stack dynamically.
+ uint32_t saRegId = frame.saRegId();
+ if (saRegId == BaseReg::kIdBad)
+ saRegId = Gp::kIdSp;
+
+ // Fix stack arguments base-register from ESP|RSP to EBP|RBP in case it was
+ // not picked before and the function performs dynamic stack alignment.
+ if (hasDA && saRegId == Gp::kIdSp)
+ saRegId = Gp::kIdBp;
+
+ // Mark as dirty any register but ESP|RSP if used as SA pointer.
+ if (saRegId != Gp::kIdSp)
+ frame._dirtyRegs[Reg::kGroupGp] |= Support::bitMask(saRegId);
+
+ frame._spRegId = uint8_t(Gp::kIdSp);
+ frame._saRegId = uint8_t(saRegId);
+
+ // Setup stack size used to save preserved registers.
+ frame._gpSaveSize = uint16_t(Support::popcnt(frame.savedRegs(Reg::kGroupGp )) * registerSize);
+ frame._nonGpSaveSize = uint16_t(Support::popcnt(frame.savedRegs(Reg::kGroupVec )) * vecSize +
+ Support::popcnt(frame.savedRegs(Reg::kGroupMm )) * 8 +
+ Support::popcnt(frame.savedRegs(Reg::kGroupKReg)) * 8);
+
+ uint32_t v = 0; // The beginning of the stack frame relative to SP after prolog.
+ v += frame.callStackSize(); // Count 'callStackSize' <- This is used to call functions.
+ v = Support::alignUp(v, stackAlignment); // Align to function's stack alignment.
+
+ frame._localStackOffset = v; // Store 'localStackOffset' <- Function's local stack starts here.
+ v += frame.localStackSize(); // Count 'localStackSize' <- Function's local stack ends here.
+
+ // If the function's stack must be aligned, calculate the alignment necessary
+ // to store vector registers, and set `FuncFrame::kAttrAlignedVecSR` to inform
+ // PEI that it can use instructions that perform aligned stores/loads.
+ if (stackAlignment >= vecSize && frame._nonGpSaveSize) {
+ frame.addAttributes(FuncFrame::kAttrAlignedVecSR);
+ v = Support::alignUp(v, vecSize); // Align '_nonGpSaveOffset'.
+ }
+
+ frame._nonGpSaveOffset = v; // Store '_nonGpSaveOffset' <- Non-GP Save/Restore starts here.
+ v += frame._nonGpSaveSize; // Count '_nonGpSaveSize' <- Non-GP Save/Restore ends here.
+
+ // Calculate if dynamic alignment (DA) slot (stored as offset relative to SP) is required and its offset.
+ if (hasDA && !hasFP) {
+ frame._daOffset = v; // Store 'daOffset' <- DA pointer would be stored here.
+ v += registerSize; // Count 'daOffset'.
+ }
+ else {
+ frame._daOffset = FuncFrame::kTagInvalidOffset;
+ }
+
+ // The return address should be stored after GP save/restore regs. It has
+ // the same size as `registerSize` (basically the native register/pointer
+ // size). We don't adjust it now as `v` now contains the exact size that the
+ // function requires to adjust (call frame + stack frame, vec stack size).
+ // The stack (if we consider this size) is misaligned now, as it's always
+ // aligned before the function call - when `call()` is executed it pushes
+ // the current EIP|RIP onto the stack, and misaligns it by 12 or 8 bytes
+ // (depending on the architecture). So count number of bytes needed to align
+ // it up to the function's CallFrame (the beginning).
+ if (v || frame.hasFuncCalls())
+ v += Support::alignUpDiff(v + frame.gpSaveSize() + registerSize, stackAlignment);
+
+ frame._gpSaveOffset = v; // Store 'gpSaveOffset' <- Function's GP Save/Restore starts here.
+ frame._stackAdjustment = v; // Store 'stackAdjustment' <- SA used by 'add zsp, SA' and 'sub zsp, SA'.
+
+ v += frame._gpSaveSize; // Count 'gpSaveSize' <- Function's GP Save/Restore ends here.
+ v += registerSize; // Count 'ReturnAddress' <- As CALL pushes onto stack.
+
+ // If the function performs dynamic stack alignment then the stack-adjustment must be aligned.
+ if (hasDA)
+ frame._stackAdjustment = Support::alignUp(frame._stackAdjustment, stackAlignment);
+
+ uint32_t saInvOff = FuncFrame::kTagInvalidOffset;
+ uint32_t saTmpOff = registerSize + frame._gpSaveSize;
+
+ // Calculate where the function arguments start relative to SP.
+ frame._saOffsetFromSP = hasDA ? saInvOff : v;
+
+ // Calculate where the function arguments start relative to FP or user-provided register.
+ frame._saOffsetFromSA = hasFP ? registerSize * 2 // Return address + frame pointer.
+ : saTmpOff; // Return address + all saved GP regs.
+
+ return kErrorOk;
+}
+
+// ============================================================================
+// [asmjit::X86Internal - ArgsToFrameInfo]
+// ============================================================================
+
+ASMJIT_FAVOR_SIZE Error X86Internal::argsToFuncFrame(const FuncArgsAssignment& args, FuncFrame& frame) noexcept {
+ X86FuncArgsContext ctx;
+ ASMJIT_PROPAGATE(ctx.initWorkData(frame, args));
+ ASMJIT_PROPAGATE(ctx.markDstRegsDirty(frame));
+ ASMJIT_PROPAGATE(ctx.markScratchRegs(frame));
+ ASMJIT_PROPAGATE(ctx.markStackArgsReg(frame));
+ return kErrorOk;
+}
+
+// ============================================================================
+// [asmjit::X86Internal - Emit Helpers]
+// ============================================================================
+
+ASMJIT_FAVOR_SIZE Error X86Internal::emitRegMove(Emitter* emitter,
+ const Operand_& dst_,
+ const Operand_& src_, uint32_t typeId, bool avxEnabled, const char* comment) {
+
+ // Invalid or abstract TypeIds are not allowed.
+ ASMJIT_ASSERT(Type::isValid(typeId) && !Type::isAbstract(typeId));
+
+ Operand dst(dst_);
+ Operand src(src_);
+
+ uint32_t instId = Inst::kIdNone;
+ uint32_t memFlags = 0;
+ uint32_t overrideMemSize = 0;
+
+ enum MemFlags : uint32_t {
+ kDstMem = 0x1,
+ kSrcMem = 0x2
+ };
+
+ // Detect memory operands and patch them to have the same size as the register.
+ // BaseCompiler always sets memory size of allocs and spills, so it shouldn't
+ // be really necessary, however, after this function was separated from Compiler
+ // it's better to make sure that the size is always specified, as we can use
+ // 'movzx' and 'movsx' that rely on it.
+ if (dst.isMem()) { memFlags |= kDstMem; dst.as<Mem>().setSize(src.size()); }
+ if (src.isMem()) { memFlags |= kSrcMem; src.as<Mem>().setSize(dst.size()); }
+
+ switch (typeId) {
+ case Type::kIdI8:
+ case Type::kIdU8:
+ case Type::kIdI16:
+ case Type::kIdU16:
+ // Special case - 'movzx' load.
+ if (memFlags & kSrcMem) {
+ instId = Inst::kIdMovzx;
+ dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
+ }
+ else if (!memFlags) {
+ // Change both destination and source registers to GPD (safer, no dependencies).
+ dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
+ src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
+ }
+ ASMJIT_FALLTHROUGH;
+
+ case Type::kIdI32:
+ case Type::kIdU32:
+ case Type::kIdI64:
+ case Type::kIdU64:
+ instId = Inst::kIdMov;
+ break;
+
+ case Type::kIdMmx32:
+ instId = Inst::kIdMovd;
+ if (memFlags) break;
+ ASMJIT_FALLTHROUGH;
+
+ case Type::kIdMmx64 : instId = Inst::kIdMovq ; break;
+ case Type::kIdMask8 : instId = Inst::kIdKmovb; break;
+ case Type::kIdMask16: instId = Inst::kIdKmovw; break;
+ case Type::kIdMask32: instId = Inst::kIdKmovd; break;
+ case Type::kIdMask64: instId = Inst::kIdKmovq; break;
+
+ default: {
+ uint32_t elementTypeId = Type::baseOf(typeId);
+ if (Type::isVec32(typeId) && memFlags) {
+ overrideMemSize = 4;
+ if (elementTypeId == Type::kIdF32)
+ instId = avxEnabled ? Inst::kIdVmovss : Inst::kIdMovss;
+ else
+ instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+ break;
+ }
+
+ if (Type::isVec64(typeId) && memFlags) {
+ overrideMemSize = 8;
+ if (elementTypeId == Type::kIdF64)
+ instId = avxEnabled ? Inst::kIdVmovsd : Inst::kIdMovsd;
+ else
+ instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+ break;
+ }
+
+ if (elementTypeId == Type::kIdF32)
+ instId = avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
+ else if (elementTypeId == Type::kIdF64)
+ instId = avxEnabled ? Inst::kIdVmovapd : Inst::kIdMovapd;
+ else if (typeId <= Type::_kIdVec256End)
+ instId = avxEnabled ? Inst::kIdVmovdqa : Inst::kIdMovdqa;
+ else if (elementTypeId <= Type::kIdU32)
+ instId = Inst::kIdVmovdqa32;
+ else
+ instId = Inst::kIdVmovdqa64;
+ break;
+ }
+ }
+
+ if (!instId)
+ return DebugUtils::errored(kErrorInvalidState);
+
+ if (overrideMemSize) {
+ if (dst.isMem()) dst.as<Mem>().setSize(overrideMemSize);
+ if (src.isMem()) src.as<Mem>().setSize(overrideMemSize);
+ }
+
+ emitter->setInlineComment(comment);
+ return emitter->emit(instId, dst, src);
+}
+
+ASMJIT_FAVOR_SIZE Error X86Internal::emitArgMove(Emitter* emitter,
+ const Reg& dst_, uint32_t dstTypeId,
+ const Operand_& src_, uint32_t srcTypeId, bool avxEnabled, const char* comment) {
+
+ // Deduce optional `dstTypeId`, which may be `Type::kIdVoid` in some cases.
+ if (!dstTypeId)
+ dstTypeId = opData.archRegs.regTypeToTypeId[dst_.type()];
+
+ // Invalid or abstract TypeIds are not allowed.
+ ASMJIT_ASSERT(Type::isValid(dstTypeId) && !Type::isAbstract(dstTypeId));
+ ASMJIT_ASSERT(Type::isValid(srcTypeId) && !Type::isAbstract(srcTypeId));
+
+ Reg dst(dst_);
+ Operand src(src_);
+
+ uint32_t dstSize = Type::sizeOf(dstTypeId);
+ uint32_t srcSize = Type::sizeOf(srcTypeId);
+
+ uint32_t instId = Inst::kIdNone;
+
+ // Not a real loop, just 'break' is nicer than 'goto'.
+ for (;;) {
+ if (Type::isInt(dstTypeId)) {
+ if (Type::isInt(srcTypeId)) {
+ instId = Inst::kIdMovsx;
+ uint32_t typeOp = (dstTypeId << 8) | srcTypeId;
+
+ // Sign extend by using 'movsx'.
+ if (typeOp == ((Type::kIdI16 << 8) | Type::kIdI8 ) ||
+ typeOp == ((Type::kIdI32 << 8) | Type::kIdI8 ) ||
+ typeOp == ((Type::kIdI32 << 8) | Type::kIdI16) ||
+ typeOp == ((Type::kIdI64 << 8) | Type::kIdI8 ) ||
+ typeOp == ((Type::kIdI64 << 8) | Type::kIdI16))
+ break;
+
+ // Sign extend by using 'movsxd'.
+ instId = Inst::kIdMovsxd;
+ if (typeOp == ((Type::kIdI64 << 8) | Type::kIdI32))
+ break;
+ }
+
+ if (Type::isInt(srcTypeId) || src_.isMem()) {
+ // Zero extend by using 'movzx' or 'mov'.
+ if (dstSize <= 4 && srcSize < 4) {
+ instId = Inst::kIdMovzx;
+ dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
+ }
+ else {
+ // We should have caught all possibilities where `srcSize` is less
+ // than 4, so we don't have to worry about 'movzx' anymore. Minimum
+ // size is enough to determine if we want 32-bit or 64-bit move.
+ instId = Inst::kIdMov;
+ srcSize = Support::min(srcSize, dstSize);
+
+ dst.setSignature(srcSize == 4 ? Reg::signatureOfT<Reg::kTypeGpd>()
+ : Reg::signatureOfT<Reg::kTypeGpq>());
+ if (src.isReg())
+ src.setSignature(dst.signature());
+ }
+ break;
+ }
+
+ // NOTE: The previous branch caught all memory sources, from here it's
+ // always register to register conversion, so catch the remaining cases.
+ srcSize = Support::min(srcSize, dstSize);
+
+ if (Type::isMmx(srcTypeId)) {
+ // 64-bit move.
+ instId = Inst::kIdMovq;
+ if (srcSize == 8)
+ break;
+
+ // 32-bit move.
+ instId = Inst::kIdMovd;
+ dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
+ break;
+ }
+
+ if (Type::isMask(srcTypeId)) {
+ instId = x86KmovFromSize(srcSize);
+ dst.setSignature(srcSize <= 4 ? Reg::signatureOfT<Reg::kTypeGpd>()
+ : Reg::signatureOfT<Reg::kTypeGpq>());
+ break;
+ }
+
+ if (Type::isVec(srcTypeId)) {
+ // 64-bit move.
+ instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+ if (srcSize == 8)
+ break;
+
+ // 32-bit move.
+ instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+ dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
+ break;
+ }
+ }
+
+ if (Type::isMmx(dstTypeId)) {
+ instId = Inst::kIdMovq;
+ srcSize = Support::min(srcSize, dstSize);
+
+ if (Type::isInt(srcTypeId) || src.isMem()) {
+ // 64-bit move.
+ if (srcSize == 8)
+ break;
+
+ // 32-bit move.
+ instId = Inst::kIdMovd;
+ if (src.isReg())
+ src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
+ break;
+ }
+
+ if (Type::isMmx(srcTypeId))
+ break;
+
+ // This will hurt if `avxEnabled`.
+ instId = Inst::kIdMovdq2q;
+ if (Type::isVec(srcTypeId))
+break;
+ }
+
+ if (Type::isMask(dstTypeId)) {
+ srcSize = Support::min(srcSize, dstSize);
+
+ if (Type::isInt(srcTypeId) || Type::isMask(srcTypeId) || src.isMem()) {
+ instId = x86KmovFromSize(srcSize);
+ if (Reg::isGp(src) && srcSize <= 4)
+ src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
+ break;
+ }
+ }
+
+ if (Type::isVec(dstTypeId)) {
+ // By default set destination to XMM, will be set to YMM|ZMM if needed.
+ dst.setSignature(Reg::signatureOfT<Reg::kTypeXmm>());
+
+ // This will hurt if `avxEnabled`.
+ if (Reg::isMm(src)) {
+ // 64-bit move.
+ instId = Inst::kIdMovq2dq;
+ break;
+ }
+
+ // Argument conversion.
+ uint32_t dstElement = Type::baseOf(dstTypeId);
+ uint32_t srcElement = Type::baseOf(srcTypeId);
+
+ if (dstElement == Type::kIdF32 && srcElement == Type::kIdF64) {
+ srcSize = Support::min(dstSize * 2, srcSize);
+ dstSize = srcSize / 2;
+
+ if (srcSize <= 8)
+ instId = avxEnabled ? Inst::kIdVcvtss2sd : Inst::kIdCvtss2sd;
+ else
+ instId = avxEnabled ? Inst::kIdVcvtps2pd : Inst::kIdCvtps2pd;
+
+ if (dstSize == 32)
+ dst.setSignature(Reg::signatureOfT<Reg::kTypeYmm>());
+ if (src.isReg())
+ src.setSignature(Reg::signatureOfVecBySize(srcSize));
+ break;
+ }
+
+ if (dstElement == Type::kIdF64 && srcElement == Type::kIdF32) {
+ srcSize = Support::min(dstSize, srcSize * 2) / 2;
+ dstSize = srcSize * 2;
+
+ if (srcSize <= 4)
+ instId = avxEnabled ? Inst::kIdVcvtsd2ss : Inst::kIdCvtsd2ss;
+ else
+ instId = avxEnabled ? Inst::kIdVcvtpd2ps : Inst::kIdCvtpd2ps;
+
+ dst.setSignature(Reg::signatureOfVecBySize(dstSize));
+ if (src.isReg() && srcSize >= 32)
+ src.setSignature(Reg::signatureOfT<Reg::kTypeYmm>());
+ break;
+ }
+
+ srcSize = Support::min(srcSize, dstSize);
+ if (Reg::isGp(src) || src.isMem()) {
+ // 32-bit move.
+ if (srcSize <= 4) {
+ instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+ if (src.isReg())
+ src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
+ break;
+ }
+
+ // 64-bit move.
+ if (srcSize == 8) {
+ instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+ break;
+ }
+ }
+
+ if (Reg::isVec(src) || src.isMem()) {
+ instId = avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
+
+ if (src.isMem() && srcSize < emitter->environment().stackAlignment())
+ instId = avxEnabled ? Inst::kIdVmovups : Inst::kIdMovups;
+
+ uint32_t signature = Reg::signatureOfVecBySize(srcSize);
+ dst.setSignature(signature);
+ if (src.isReg())
+ src.setSignature(signature);
+ break;
+ }
+ }
+
+ return DebugUtils::errored(kErrorInvalidState);
+ }
+
+ if (src.isMem())
+ src.as<Mem>().setSize(srcSize);
+
+ emitter->setInlineComment(comment);
+ return emitter->emit(instId, dst, src);
+}
+
+// ============================================================================
+// [asmjit::X86Internal - Emit Prolog & Epilog]
+// ============================================================================
+
+static ASMJIT_INLINE void X86Internal_setupSaveRestoreInfo(uint32_t group, const FuncFrame& frame, Reg& xReg, uint32_t& xInst, uint32_t& xSize) noexcept {
+ switch (group) {
+ case Reg::kGroupVec:
+ xReg = xmm(0);
+ xInst = x86GetXmmMovInst(frame);
+ xSize = xReg.size();
+ break;
+ case Reg::kGroupMm:
+ xReg = mm(0);
+ xInst = Inst::kIdMovq;
+ xSize = xReg.size();
+ break;
+ case Reg::kGroupKReg:
+ xReg = k(0);
+ xInst = Inst::kIdKmovq;
+ xSize = xReg.size();
+ break;
+ }
+}
+
+ASMJIT_FAVOR_SIZE Error X86Internal::emitProlog(Emitter* emitter, const FuncFrame& frame) {
+ uint32_t gpSaved = frame.savedRegs(Reg::kGroupGp);
+
+ Gp zsp = emitter->zsp(); // ESP|RSP register.
+ Gp zbp = emitter->zbp(); // EBP|RBP register.
+ Gp gpReg = zsp; // General purpose register (temporary).
+ Gp saReg = zsp; // Stack-arguments base pointer.
+
+ // Emit: 'push zbp'
+ // 'mov zbp, zsp'.
+ if (frame.hasPreservedFP()) {
+ gpSaved &= ~Support::bitMask(Gp::kIdBp);
+ ASMJIT_PROPAGATE(emitter->push(zbp));
+ ASMJIT_PROPAGATE(emitter->mov(zbp, zsp));
+ }
+
+ // Emit: 'push gp' sequence.
+ {
+ Support::BitWordIterator<uint32_t> it(gpSaved);
+ while (it.hasNext()) {
+ gpReg.setId(it.next());
+ ASMJIT_PROPAGATE(emitter->push(gpReg));
+ }
+ }
+
+ // Emit: 'mov saReg, zsp'.
+ uint32_t saRegId = frame.saRegId();
+ if (saRegId != BaseReg::kIdBad && saRegId != Gp::kIdSp) {
+ saReg.setId(saRegId);
+ if (frame.hasPreservedFP()) {
+ if (saRegId != Gp::kIdBp)
+ ASMJIT_PROPAGATE(emitter->mov(saReg, zbp));
+ }
+ else {
+ ASMJIT_PROPAGATE(emitter->mov(saReg, zsp));
+ }
+ }
+
+ // Emit: 'and zsp, StackAlignment'.
+ if (frame.hasDynamicAlignment()) {
+ ASMJIT_PROPAGATE(emitter->and_(zsp, -int32_t(frame.finalStackAlignment())));
+ }
+
+ // Emit: 'sub zsp, StackAdjustment'.
+ if (frame.hasStackAdjustment()) {
+ ASMJIT_PROPAGATE(emitter->sub(zsp, frame.stackAdjustment()));
+ }
+
+ // Emit: 'mov [zsp + DAOffset], saReg'.
+ if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
+ Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
+ ASMJIT_PROPAGATE(emitter->mov(saMem, saReg));
+ }
+
+ // Emit 'movxxx [zsp + X], {[x|y|z]mm, k}'.
+ {
+ Reg xReg;
+ Mem xBase = ptr(zsp, int32_t(frame.nonGpSaveOffset()));
+
+ uint32_t xInst;
+ uint32_t xSize;
+
+ for (uint32_t group = 1; group < BaseReg::kGroupVirt; group++) {
+ Support::BitWordIterator<uint32_t> it(frame.savedRegs(group));
+ if (it.hasNext()) {
+ X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
+ do {
+ xReg.setId(it.next());
+ ASMJIT_PROPAGATE(emitter->emit(xInst, xBase, xReg));
+ xBase.addOffsetLo32(int32_t(xSize));
+ } while (it.hasNext());
+ }
+ }
+ }
+
+ return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error X86Internal::emitEpilog(Emitter* emitter, const FuncFrame& frame) {
+ uint32_t i;
+ uint32_t regId;
+
+ uint32_t registerSize = emitter->registerSize();
+ uint32_t gpSaved = frame.savedRegs(Reg::kGroupGp);
+
+ Gp zsp = emitter->zsp(); // ESP|RSP register.
+ Gp zbp = emitter->zbp(); // EBP|RBP register.
+ Gp gpReg = emitter->zsp(); // General purpose register (temporary).
+
+ // Don't emit 'pop zbp' in the pop sequence, this case is handled separately.
+ if (frame.hasPreservedFP())
+ gpSaved &= ~Support::bitMask(Gp::kIdBp);
+
+ // Emit 'movxxx {[x|y|z]mm, k}, [zsp + X]'.
+ {
+ Reg xReg;
+ Mem xBase = ptr(zsp, int32_t(frame.nonGpSaveOffset()));
+
+ uint32_t xInst;
+ uint32_t xSize;
+
+ for (uint32_t group = 1; group < BaseReg::kGroupVirt; group++) {
+ Support::BitWordIterator<uint32_t> it(frame.savedRegs(group));
+ if (it.hasNext()) {
+ X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
+ do {
+ xReg.setId(it.next());
+ ASMJIT_PROPAGATE(emitter->emit(xInst, xReg, xBase));
+ xBase.addOffsetLo32(int32_t(xSize));
+ } while (it.hasNext());
+ }
+ }
+ }
+
+ // Emit 'emms' and/or 'vzeroupper'.
+ if (frame.hasMmxCleanup()) ASMJIT_PROPAGATE(emitter->emms());
+ if (frame.hasAvxCleanup()) ASMJIT_PROPAGATE(emitter->vzeroupper());
+
+ if (frame.hasPreservedFP()) {
+ // Emit 'mov zsp, zbp' or 'lea zsp, [zbp - x]'
+ int32_t count = int32_t(frame.gpSaveSize() - registerSize);
+ if (!count)
+ ASMJIT_PROPAGATE(emitter->mov(zsp, zbp));
+ else
+ ASMJIT_PROPAGATE(emitter->lea(zsp, ptr(zbp, -count)));
+ }
+ else {
+ if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
+ // Emit 'mov zsp, [zsp + DsaSlot]'.
+ Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
+ ASMJIT_PROPAGATE(emitter->mov(zsp, saMem));
+ }
+ else if (frame.hasStackAdjustment()) {
+ // Emit 'add zsp, StackAdjustment'.
+ ASMJIT_PROPAGATE(emitter->add(zsp, int32_t(frame.stackAdjustment())));
+ }
+ }
+
+ // Emit 'pop gp' sequence.
+ if (gpSaved) {
+ i = gpSaved;
+ regId = 16;
+
+ do {
+ regId--;
+ if (i & 0x8000) {
+ gpReg.setId(regId);
+ ASMJIT_PROPAGATE(emitter->pop(gpReg));
+ }
+ i <<= 1;
+ } while (regId != 0);
+ }
+
+ // Emit 'pop zbp'.
+ if (frame.hasPreservedFP())
+ ASMJIT_PROPAGATE(emitter->pop(zbp));
+
+ // Emit 'ret' or 'ret x'.
+ if (frame.hasCalleeStackCleanup())
+ ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet, int(frame.calleeStackCleanup())));
+ else
+ ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet));
+
+ return kErrorOk;
+}
+
+// ============================================================================
+// [asmjit::X86Internal - Emit Arguments Assignment]
+// ============================================================================
+
+#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT
+static void dumpFuncValue(String& sb, uint32_t arch, const FuncValue& value) noexcept {
+ Formatter::formatTypeId(sb, value.typeId());
+ sb.append('@');
+
+ if (value.isIndirect())
+ sb.append('[');
+
+ if (value.isReg())
+ Formatter::formatRegister(sb, 0, nullptr, arch, value.regType(), value.regId());
+ else if (value.isStack())
+ sb.appendFormat("[%d]", value.stackOffset());
+ else
+ sb.append("<none>");
+
+ if (value.isIndirect())
+ sb.append(']');
+}
+
+static void dumpAssignment(String& sb, const X86FuncArgsContext& ctx) noexcept {
+ typedef X86FuncArgsContext::Var Var;
+
+ uint32_t arch = ctx.arch();
+ uint32_t varCount = ctx.varCount();
+
+ for (uint32_t i = 0; i < varCount; i++) {
+ const Var& var = ctx.var(i);
+ const FuncValue& dst = var.out;
+ const FuncValue& cur = var.cur;
+
+ sb.appendFormat("Var%u: ", i);
+ dumpFuncValue(sb, arch, dst);
+ sb.append(" <- ");
+ dumpFuncValue(sb, arch, cur);
+
+ if (var.isDone())
+ sb.append(" {Done}");
+
+ sb.append('\n');
+ }
+}
+#endif
+
+ASMJIT_FAVOR_SIZE Error X86Internal::emitArgsAssignment(Emitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) {
+ typedef X86FuncArgsContext::Var Var;
+ typedef X86FuncArgsContext::WorkData WorkData;
+
+ enum WorkFlags : uint32_t {
+ kWorkNone = 0x00,
+ kWorkDidSome = 0x01,
+ kWorkPending = 0x02,
+ kWorkPostponed = 0x04
+ };
+
+ X86FuncArgsContext ctx;
+ ASMJIT_PROPAGATE(ctx.initWorkData(frame, args));
+
+#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT
+ {
+ String sb;
+ dumpAssignment(sb, ctx);
+ printf("%s\n", sb.data());
+ }
+#endif
+
+ uint32_t arch = ctx.arch();
+ uint32_t varCount = ctx._varCount;
+ WorkData* workData = ctx._workData;
+
+ // Use AVX if it's enabled.
+ bool avxEnabled = frame.isAvxEnabled();
+
+ uint32_t saVarId = ctx._saVarId;
+ uint32_t saRegId = Gp::kIdSp;
+
+ if (frame.hasDynamicAlignment()) {
+ if (frame.hasPreservedFP())
+ saRegId = Gp::kIdBp;
+ else
+ saRegId = saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId();
+ }
+
+ RegInfo gpRegInfo = emitter->_gpRegInfo;
+
+ // --------------------------------------------------------------------------
+ // Register to stack and stack to stack moves must be first as now we have
+ // the biggest chance of having as many as possible unassigned registers.
+ // --------------------------------------------------------------------------
+
+ if (ctx._stackDstMask) {
+ // Base address of all arguments passed by stack.
+ Mem baseArgPtr = ptr(emitter->gpz(saRegId), int32_t(frame.saOffset(saRegId)));
+ Mem baseStackPtr = ptr(emitter->gpz(Gp::kIdSp), int32_t(0));
+
+ for (uint32_t varId = 0; varId < varCount; varId++) {
+ Var& var = ctx._vars[varId];
+
+ if (!var.out.isStack())
+ continue;
+
+ FuncValue& cur = var.cur;
+ FuncValue& out = var.out;
+
+ ASMJIT_ASSERT(cur.isReg() || cur.isStack());
+ Reg reg;
+
+ Mem dstStackPtr = baseStackPtr.cloneAdjusted(out.stackOffset());
+ Mem srcStackPtr = baseArgPtr.cloneAdjusted(cur.stackOffset());
+
+ if (cur.isIndirect()) {
+ if (cur.isStack()) {
+ // TODO: Indirect stack.
+ return DebugUtils::errored(kErrorInvalidAssignment);
+ }
+ else {
+ srcStackPtr = ptr(Gp(gpRegInfo.signature(), cur.regId()));
+ }
+ }
+
+ if (cur.isReg() && !cur.isIndirect()) {
+ WorkData& wd = workData[Reg::groupOf(cur.regType())];
+ uint32_t rId = cur.regId();
+
+ reg.setSignatureAndId(Reg::signatureOf(cur.regType()), rId);
+ wd.unassign(varId, rId);
+ }
+ else {
+ // Stack to reg move - tricky since we move stack to stack we can decide which
+ // register to use. In general we follow the rule that IntToInt moves will use
+ // GP regs with possibility to signature or zero extend, and all other moves will
+ // either use GP or VEC regs depending on the size of the move.
+ RegInfo rInfo = x86GetRegForMemToMemMove(arch, out.typeId(), cur.typeId());
+ if (ASMJIT_UNLIKELY(!rInfo.isValid()))
+ return DebugUtils::errored(kErrorInvalidState);
+
+ WorkData& wd = workData[rInfo.group()];
+ uint32_t availableRegs = wd.availableRegs();
+ if (ASMJIT_UNLIKELY(!availableRegs))
+ return DebugUtils::errored(kErrorInvalidState);
+
+ uint32_t rId = Support::ctz(availableRegs);
+ reg.setSignatureAndId(rInfo.signature(), rId);
+
+ ASMJIT_PROPAGATE(emitArgMove(emitter, reg, out.typeId(), srcStackPtr, cur.typeId(), avxEnabled));
+ }
+
+ if (cur.isIndirect() && cur.isReg())
+ workData[BaseReg::kGroupGp].unassign(varId, cur.regId());
+
+ // Register to stack move.
+ ASMJIT_PROPAGATE(emitRegMove(emitter, dstStackPtr, reg, cur.typeId(), avxEnabled));
+ var.markDone();
+ }
+ }
+
+ // --------------------------------------------------------------------------
+ // Shuffle all registers that are currently assigned accordingly to target
+ // assignment.
+ // --------------------------------------------------------------------------
+
+ uint32_t workFlags = kWorkNone;
+ for (;;) {
+ for (uint32_t varId = 0; varId < varCount; varId++) {
+ Var& var = ctx._vars[varId];
+ if (var.isDone() || !var.cur.isReg())
+ continue;
+
+ FuncValue& cur = var.cur;
+ FuncValue& out = var.out;
+
+ uint32_t curGroup = Reg::groupOf(cur.regType());
+ uint32_t outGroup = Reg::groupOf(out.regType());
+
+ uint32_t curId = cur.regId();
+ uint32_t outId = out.regId();
+
+ if (curGroup != outGroup) {
+ // TODO: Conversion is not supported.
+ return DebugUtils::errored(kErrorInvalidAssignment);
+ }
+ else {
+ WorkData& wd = workData[outGroup];
+ if (!wd.isAssigned(outId)) {
+EmitMove:
+ ASMJIT_PROPAGATE(
+ emitArgMove(emitter,
+ Reg::fromTypeAndId(out.regType(), outId), out.typeId(),
+ Reg::fromTypeAndId(cur.regType(), curId), cur.typeId(), avxEnabled));
+
+ wd.reassign(varId, outId, curId);
+ cur.initReg(out.regType(), outId, out.typeId());
+
+ if (outId == out.regId())
+ var.markDone();
+ workFlags |= kWorkDidSome | kWorkPending;
+ }
+ else {
+ uint32_t altId = wd._physToVarId[outId];
+ Var& altVar = ctx._vars[altId];
+
+ if (!altVar.out.isInitialized() || (altVar.out.isReg() && altVar.out.regId() == curId)) {
+ // Swap operation is possible only between two GP registers.
+ if (curGroup == Reg::kGroupGp) {
+ uint32_t highestType = Support::max(cur.regType(), altVar.cur.regType());
+ uint32_t signature = highestType == Reg::kTypeGpq ? Reg::signatureOfT<Reg::kTypeGpq>()
+ : Reg::signatureOfT<Reg::kTypeGpd>();
+
+ ASMJIT_PROPAGATE(emitter->emit(Inst::kIdXchg, Reg(signature, outId), Reg(signature, curId)));
+ wd.swap(varId, curId, altId, outId);
+ cur.setRegId(outId);
+ var.markDone();
+ altVar.cur.setRegId(curId);
+
+ if (altVar.out.isInitialized())
+ altVar.markDone();
+ workFlags |= kWorkDidSome;
+ }
+ else {
+ // If there is a scratch register it can be used to perform the swap.
+ uint32_t availableRegs = wd.availableRegs();
+ if (availableRegs) {
+ uint32_t inOutRegs = wd.dstRegs();
+ if (availableRegs & ~inOutRegs)
+ availableRegs &= ~inOutRegs;
+ outId = Support::ctz(availableRegs);
+ goto EmitMove;
+ }
+ else {
+ workFlags |= kWorkPending;
+ }
+ }
+ }
+ else {
+ workFlags |= kWorkPending;
+ }
+ }
+ }
+ }
+
+ if (!(workFlags & kWorkPending))
+ break;
+
+ // If we did nothing twice it means that something is really broken.
+ if ((workFlags & (kWorkDidSome | kWorkPostponed)) == kWorkPostponed)
+ return DebugUtils::errored(kErrorInvalidState);
+
+ workFlags = (workFlags & kWorkDidSome) ? kWorkNone : kWorkPostponed;
+ }
+
+ // --------------------------------------------------------------------------
+ // Load arguments passed by stack into registers. This is pretty simple and
+ // it never requires multiple iterations like the previous phase.
+ // --------------------------------------------------------------------------
+
+ if (ctx._hasStackSrc) {
+ uint32_t iterCount = 1;
+ if (frame.hasDynamicAlignment() && !frame.hasPreservedFP())
+ saRegId = saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId();
+
+ // Base address of all arguments passed by stack.
+ Mem baseArgPtr = ptr(emitter->gpz(saRegId), int32_t(frame.saOffset(saRegId)));
+
+ for (uint32_t iter = 0; iter < iterCount; iter++) {
+ for (uint32_t varId = 0; varId < varCount; varId++) {
+ Var& var = ctx._vars[varId];
+ if (var.isDone())
+ continue;
+
+ if (var.cur.isStack()) {
+ ASMJIT_ASSERT(var.out.isReg());
+
+ uint32_t outId = var.out.regId();
+ uint32_t outType = var.out.regType();
+
+ uint32_t group = Reg::groupOf(outType);
+ WorkData& wd = ctx._workData[group];
+
+ if (outId == saRegId && group == BaseReg::kGroupGp) {
+ // This register will be processed last as we still need `saRegId`.
+ if (iterCount == 1) {
+ iterCount++;
+ continue;
+ }
+ wd.unassign(wd._physToVarId[outId], outId);
+ }
+
+ Reg dstReg = Reg::fromTypeAndId(outType, outId);
+ Mem srcMem = baseArgPtr.cloneAdjusted(var.cur.stackOffset());
+
+ ASMJIT_PROPAGATE(
+ emitArgMove(emitter,
+ dstReg, var.out.typeId(),
+ srcMem, var.cur.typeId(), avxEnabled));
+
+ wd.assign(varId, outId);
+ var.cur.initReg(outType, outId, var.cur.typeId(), FuncValue::kFlagIsDone);
+ }
+ }
+ }
+ }
+
+ return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_BUILD_X86