aboutsummaryrefslogtreecommitdiff
path: root/client/asmjit/core/ralocal.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'client/asmjit/core/ralocal.cpp')
-rw-r--r--client/asmjit/core/ralocal.cpp1039
1 files changed, 1039 insertions, 0 deletions
diff --git a/client/asmjit/core/ralocal.cpp b/client/asmjit/core/ralocal.cpp
new file mode 100644
index 0000000..e3a8a97
--- /dev/null
+++ b/client/asmjit/core/ralocal.cpp
@@ -0,0 +1,1039 @@
+// AsmJit - Machine code generation for C++
+//
+// * Official AsmJit Home Page: https://asmjit.com
+// * Official Github Repository: https://github.com/asmjit/asmjit
+//
+// Copyright (c) 2008-2020 The AsmJit Authors
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+// claim that you wrote the original software. If you use this software
+// in a product, an acknowledgment in the product documentation would be
+// appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+// misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/ralocal_p.h"
+#include "../core/support.h"
+
+ASMJIT_BEGIN_NAMESPACE
+
+// ============================================================================
+// [asmjit::RALocalAllocator - Utilities]
+// ============================================================================
+
+static ASMJIT_INLINE RATiedReg* RALocal_findTiedRegByWorkId(RATiedReg* tiedRegs, size_t count, uint32_t workId) noexcept {
+ for (size_t i = 0; i < count; i++)
+ if (tiedRegs[i].workId() == workId)
+ return &tiedRegs[i];
+ return nullptr;
+}
+
+// ============================================================================
+// [asmjit::RALocalAllocator - Init / Reset]
+// ============================================================================
+
+Error RALocalAllocator::init() noexcept {
+ PhysToWorkMap* physToWorkMap;
+ WorkToPhysMap* workToPhysMap;
+
+ physToWorkMap = _pass->newPhysToWorkMap();
+ workToPhysMap = _pass->newWorkToPhysMap();
+ if (!physToWorkMap || !workToPhysMap)
+ return DebugUtils::errored(kErrorOutOfMemory);
+
+ _curAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
+ _curAssignment.initMaps(physToWorkMap, workToPhysMap);
+
+ physToWorkMap = _pass->newPhysToWorkMap();
+ workToPhysMap = _pass->newWorkToPhysMap();
+ if (!physToWorkMap || !workToPhysMap)
+ return DebugUtils::errored(kErrorOutOfMemory);
+
+ _tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
+ _tmpAssignment.initMaps(physToWorkMap, workToPhysMap);
+
+ return kErrorOk;
+}
+
+// ============================================================================
+// [asmjit::RALocalAllocator - Assignment]
+// ============================================================================
+
+Error RALocalAllocator::makeInitialAssignment() noexcept {
+ FuncNode* func = _pass->func();
+ RABlock* entry = _pass->entryBlock();
+
+ ZoneBitVector& liveIn = entry->liveIn();
+ uint32_t argCount = func->argCount();
+ uint32_t numIter = 1;
+
+ for (uint32_t iter = 0; iter < numIter; iter++) {
+ for (uint32_t i = 0; i < argCount; i++) {
+ // Unassigned argument.
+ VirtReg* virtReg = func->arg(i);
+ if (!virtReg) continue;
+
+ // Unreferenced argument.
+ RAWorkReg* workReg = virtReg->workReg();
+ if (!workReg) continue;
+
+ // Overwritten argument.
+ uint32_t workId = workReg->workId();
+ if (!liveIn.bitAt(workId))
+ continue;
+
+ uint32_t group = workReg->group();
+ if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone)
+ continue;
+
+ uint32_t allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
+ if (iter == 0) {
+ // First iteration: Try to allocate to home RegId.
+ if (workReg->hasHomeRegId()) {
+ uint32_t physId = workReg->homeRegId();
+ if (Support::bitTest(allocableRegs, physId)) {
+ _curAssignment.assign(group, workId, physId, true);
+ _pass->_argsAssignment.assignReg(i, workReg->info().type(), physId, workReg->typeId());
+ continue;
+ }
+ }
+
+ numIter = 2;
+ }
+ else {
+ // Second iteration: Pick any other register if the is an unassigned one or assign to stack.
+ if (allocableRegs) {
+ uint32_t physId = Support::ctz(allocableRegs);
+ _curAssignment.assign(group, workId, physId, true);
+ _pass->_argsAssignment.assignReg(i, workReg->info().type(), physId, workReg->typeId());
+ }
+ else {
+ // This register will definitely need stack, create the slot now and assign also `argIndex`
+ // to it. We will patch `_argsAssignment` later after RAStackAllocator finishes.
+ RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg);
+ if (ASMJIT_UNLIKELY(!slot))
+ return DebugUtils::errored(kErrorOutOfMemory);
+
+ // This means STACK_ARG may be moved to STACK.
+ workReg->addFlags(RAWorkReg::kFlagStackArgToStack);
+ _pass->_numStackArgsToStackSlots++;
+ }
+ }
+ }
+ }
+
+ return kErrorOk;
+}
+
+Error RALocalAllocator::replaceAssignment(
+ const PhysToWorkMap* physToWorkMap,
+ const WorkToPhysMap* workToPhysMap) noexcept {
+
+ _curAssignment.copyFrom(physToWorkMap, workToPhysMap);
+ return kErrorOk;
+}
+
+Error RALocalAllocator::switchToAssignment(
+ PhysToWorkMap* dstPhysToWorkMap,
+ WorkToPhysMap* dstWorkToPhysMap,
+ const ZoneBitVector& liveIn,
+ bool dstReadOnly,
+ bool tryMode) noexcept {
+
+ RAAssignment dst;
+ RAAssignment& cur = _curAssignment;
+
+ dst.initLayout(_pass->_physRegCount, _pass->workRegs());
+ dst.initMaps(dstPhysToWorkMap, dstWorkToPhysMap);
+
+ if (tryMode)
+ return kErrorOk;
+
+ for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
+ // ------------------------------------------------------------------------
+ // STEP 1:
+ // - KILL all registers that are not live at `dst`,
+ // - SPILL all registers that are not assigned at `dst`.
+ // ------------------------------------------------------------------------
+
+ if (!tryMode) {
+ Support::BitWordIterator<uint32_t> it(cur.assigned(group));
+ while (it.hasNext()) {
+ uint32_t physId = it.next();
+ uint32_t workId = cur.physToWorkId(group, physId);
+
+ // Must be true as we iterate over assigned registers.
+ ASMJIT_ASSERT(workId != RAAssignment::kWorkNone);
+
+ // KILL if it's not live on entry.
+ if (!liveIn.bitAt(workId)) {
+ onKillReg(group, workId, physId);
+ continue;
+ }
+
+ // SPILL if it's not assigned on entry.
+ uint32_t altId = dst.workToPhysId(group, workId);
+ if (altId == RAAssignment::kPhysNone) {
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+ }
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ // STEP 2:
+ // - MOVE and SWAP registers from their current assignments into their
+ // DST assignments.
+ // - Build `willLoadRegs` mask of registers scheduled for `onLoadReg()`.
+ // ------------------------------------------------------------------------
+
+ // Current run-id (1 means more aggressive decisions).
+ int32_t runId = -1;
+ // Remaining registers scheduled for `onLoadReg()`.
+ uint32_t willLoadRegs = 0;
+ // Remaining registers to be allocated in this loop.
+ uint32_t affectedRegs = dst.assigned(group);
+
+ while (affectedRegs) {
+ if (++runId == 2) {
+ if (!tryMode)
+ return DebugUtils::errored(kErrorInvalidState);
+
+ // Stop in `tryMode` if we haven't done anything in past two rounds.
+ break;
+ }
+
+ Support::BitWordIterator<uint32_t> it(affectedRegs);
+ while (it.hasNext()) {
+ uint32_t physId = it.next();
+ uint32_t physMask = Support::bitMask(physId);
+
+ uint32_t curWorkId = cur.physToWorkId(group, physId);
+ uint32_t dstWorkId = dst.physToWorkId(group, physId);
+
+ // The register must have assigned `dstWorkId` as we only iterate over assigned regs.
+ ASMJIT_ASSERT(dstWorkId != RAAssignment::kWorkNone);
+
+ if (curWorkId != RAAssignment::kWorkNone) {
+ // Both assigned.
+ if (curWorkId != dstWorkId) {
+ // Wait a bit if this is the first run, we may avoid this if `curWorkId` moves out.
+ if (runId <= 0)
+ continue;
+
+ uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
+ if (altPhysId == RAAssignment::kPhysNone)
+ continue;
+
+ // Reset as we will do some changes to the current assignment.
+ runId = -1;
+
+ if (_archTraits.hasSwap(group)) {
+ ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId));
+ }
+ else {
+ // SPILL the reg if it's not dirty in DST, otherwise try to MOVE.
+ if (!cur.isPhysDirty(group, physId)) {
+ ASMJIT_PROPAGATE(onKillReg(group, curWorkId, physId));
+ }
+ else {
+ uint32_t allocableRegs = _pass->_availableRegs[group] & ~cur.assigned(group);
+
+ // If possible don't conflict with assigned regs at DST.
+ if (allocableRegs & ~dst.assigned(group))
+ allocableRegs &= ~dst.assigned(group);
+
+ if (allocableRegs) {
+ // MOVE is possible, thus preferred.
+ uint32_t tmpPhysId = Support::ctz(allocableRegs);
+
+ ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
+ _pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
+ }
+ else {
+ // MOVE is impossible, must SPILL.
+ ASMJIT_PROPAGATE(onSpillReg(group, curWorkId, physId));
+ }
+ }
+
+ goto Cleared;
+ }
+ }
+ }
+ else {
+Cleared:
+ // DST assigned, CUR unassigned.
+ uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
+ if (altPhysId == RAAssignment::kPhysNone) {
+ if (liveIn.bitAt(dstWorkId))
+ willLoadRegs |= physMask; // Scheduled for `onLoadReg()`.
+ affectedRegs &= ~physMask; // Unaffected from now.
+ continue;
+ }
+ ASMJIT_PROPAGATE(onMoveReg(group, dstWorkId, physId, altPhysId));
+ }
+
+ // Both DST and CUR assigned to the same reg or CUR just moved to DST.
+ if ((dst.dirty(group) & physMask) != (cur.dirty(group) & physMask)) {
+ if ((dst.dirty(group) & physMask) == 0) {
+ // CUR dirty, DST not dirty (the assert is just to visualize the condition).
+ ASMJIT_ASSERT(!dst.isPhysDirty(group, physId) && cur.isPhysDirty(group, physId));
+
+ // If `dstReadOnly` is true it means that that block was already
+ // processed and we cannot change from CLEAN to DIRTY. In that case
+ // the register has to be saved as it cannot enter the block DIRTY.
+ if (dstReadOnly)
+ ASMJIT_PROPAGATE(onSaveReg(group, dstWorkId, physId));
+ else
+ dst.makeDirty(group, dstWorkId, physId);
+ }
+ else {
+ // DST dirty, CUR not dirty (the assert is just to visualize the condition).
+ ASMJIT_ASSERT(dst.isPhysDirty(group, physId) && !cur.isPhysDirty(group, physId));
+
+ cur.makeDirty(group, dstWorkId, physId);
+ }
+ }
+
+ // Must match now...
+ ASMJIT_ASSERT(dst.physToWorkId(group, physId) == cur.physToWorkId(group, physId));
+ ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
+
+ runId = -1;
+ affectedRegs &= ~physMask;
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ // STEP 3:
+ // - Load registers specified by `willLoadRegs`.
+ // ------------------------------------------------------------------------
+
+ {
+ Support::BitWordIterator<uint32_t> it(willLoadRegs);
+ while (it.hasNext()) {
+ uint32_t physId = it.next();
+
+ if (!cur.isPhysAssigned(group, physId)) {
+ uint32_t workId = dst.physToWorkId(group, physId);
+
+ // The algorithm is broken if it tries to load a register that is not in LIVE-IN.
+ ASMJIT_ASSERT(liveIn.bitAt(workId) == true);
+
+ ASMJIT_PROPAGATE(onLoadReg(group, workId, physId));
+ if (dst.isPhysDirty(group, physId))
+ cur.makeDirty(group, workId, physId);
+ ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
+ }
+ else {
+ // Not possible otherwise.
+ ASMJIT_ASSERT(tryMode == true);
+ }
+ }
+ }
+ }
+
+ if (!tryMode) {
+ // Hre is a code that dumps the conflicting part if something fails here:
+ // if (!dst.equals(cur)) {
+ // uint32_t physTotal = dst._layout.physTotal;
+ // uint32_t workCount = dst._layout.workCount;
+ //
+ // for (uint32_t physId = 0; physId < physTotal; physId++) {
+ // uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
+ // uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
+ // if (dstWorkId != curWorkId)
+ // fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
+ // }
+ //
+ // for (uint32_t workId = 0; workId < workCount; workId++) {
+ // uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
+ // uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
+ // if (dstPhysId != curPhysId)
+ // fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
+ // }
+ // }
+ ASMJIT_ASSERT(dst.equals(cur));
+ }
+
+ return kErrorOk;
+}
+
+Error RALocalAllocator::spillScratchGpRegsBeforeEntry(uint32_t scratchRegs) noexcept {
+ uint32_t group = BaseReg::kGroupGp;
+ Support::BitWordIterator<uint32_t> it(scratchRegs);
+
+ while (it.hasNext()) {
+ uint32_t physId = it.next();
+ if (_curAssignment.isPhysAssigned(group, physId)) {
+ uint32_t workId = _curAssignment.physToWorkId(group, physId);
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+ }
+ }
+
+ return kErrorOk;
+}
+
+// ============================================================================
+// [asmjit::RALocalAllocator - Allocation]
+// ============================================================================
+
+Error RALocalAllocator::allocInst(InstNode* node) noexcept {
+ RAInst* raInst = node->passData<RAInst>();
+
+ RATiedReg* outTiedRegs[Globals::kMaxPhysRegs];
+ RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs];
+
+ // The cursor must point to the previous instruction for a possible instruction insertion.
+ _cc->_setCursor(node->prev());
+
+ _node = node;
+ _raInst = raInst;
+ _tiedTotal = raInst->_tiedTotal;
+ _tiedCount = raInst->_tiedCount;
+
+ // Whether we already replaced register operand with memory operand.
+ bool rmAllocated = false;
+
+ for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
+ uint32_t i, count = this->tiedCount(group);
+ RATiedReg* tiedRegs = this->tiedRegs(group);
+
+ uint32_t willUse = _raInst->_usedRegs[group];
+ uint32_t willOut = _raInst->_clobberedRegs[group];
+ uint32_t willFree = 0;
+ uint32_t usePending = count;
+
+ uint32_t outTiedCount = 0;
+ uint32_t dupTiedCount = 0;
+
+ // ------------------------------------------------------------------------
+ // STEP 1:
+ //
+ // Calculate `willUse` and `willFree` masks based on tied registers we have.
+ //
+ // We don't do any assignment decisions at this stage as we just need to
+ // collect some information first. Then, after we populate all masks needed
+ // we can finally make some decisions in the second loop. The main reason
+ // for this is that we really need `willFree` to make assignment decisions
+ // for `willUse`, because if we mark some registers that will be freed, we
+ // can consider them in decision making afterwards.
+ // ------------------------------------------------------------------------
+
+ for (i = 0; i < count; i++) {
+ RATiedReg* tiedReg = &tiedRegs[i];
+
+ // Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment.
+ if (tiedReg->isOutOrKill())
+ outTiedRegs[outTiedCount++] = tiedReg;
+
+ if (tiedReg->isDuplicate())
+ dupTiedRegs[dupTiedCount++] = tiedReg;
+
+ if (!tiedReg->isUse()) {
+ tiedReg->markUseDone();
+ usePending--;
+ continue;
+ }
+
+ uint32_t workId = tiedReg->workId();
+ uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+ if (tiedReg->hasUseId()) {
+ // If the register has `useId` it means it can only be allocated in that register.
+ uint32_t useMask = Support::bitMask(tiedReg->useId());
+
+ // RAInstBuilder must have collected `usedRegs` on-the-fly.
+ ASMJIT_ASSERT((willUse & useMask) != 0);
+
+ if (assignedId == tiedReg->useId()) {
+ // If the register is already allocated in this one, mark it done and continue.
+ tiedReg->markUseDone();
+ if (tiedReg->isWrite())
+ _curAssignment.makeDirty(group, workId, assignedId);
+ usePending--;
+ willUse |= useMask;
+ }
+ else {
+ willFree |= useMask & _curAssignment.assigned(group);
+ }
+ }
+ else {
+ // Check if the register must be moved to `allocableRegs`.
+ uint32_t allocableRegs = tiedReg->allocableRegs();
+ if (assignedId != RAAssignment::kPhysNone) {
+ uint32_t assignedMask = Support::bitMask(assignedId);
+ if ((allocableRegs & ~willUse) & assignedMask) {
+ tiedReg->setUseId(assignedId);
+ tiedReg->markUseDone();
+ if (tiedReg->isWrite())
+ _curAssignment.makeDirty(group, workId, assignedId);
+ usePending--;
+ willUse |= assignedMask;
+ }
+ else {
+ willFree |= assignedMask;
+ }
+ }
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ // STEP 2:
+ //
+ // Do some decision making to find the best candidates of registers that
+ // need to be assigned, moved, and/or spilled. Only USE registers are
+ // considered here, OUT will be decided later after all CLOBBERed and OUT
+ // registers are unassigned.
+ // ------------------------------------------------------------------------
+
+ if (usePending) {
+ // TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear.
+
+ // All registers that are currently alive without registers that will be freed.
+ uint32_t liveRegs = _curAssignment.assigned(group) & ~willFree;
+
+ for (i = 0; i < count; i++) {
+ RATiedReg* tiedReg = &tiedRegs[i];
+ if (tiedReg->isUseDone()) continue;
+
+ uint32_t workId = tiedReg->workId();
+ uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+ // REG/MEM: Patch register operand to memory operand if not allocated.
+ if (!rmAllocated && tiedReg->hasUseRM()) {
+ if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) {
+ RAWorkReg* workReg = workRegById(tiedReg->workId());
+ uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t));
+ uint32_t rmSize = tiedReg->rmSize();
+
+ if (rmSize <= workReg->virtReg()->virtSize()) {
+ Operand& op = node->operands()[opIndex];
+ op = _pass->workRegAsMem(workReg);
+ op.as<BaseMem>().setSize(rmSize);
+ tiedReg->_useRewriteMask = 0;
+
+ tiedReg->markUseDone();
+ usePending--;
+
+ rmAllocated = true;
+ continue;
+ }
+ }
+ }
+
+ if (!tiedReg->hasUseId()) {
+ uint32_t allocableRegs = tiedReg->allocableRegs() & ~(willFree | willUse);
+
+ // DECIDE where to assign the USE register.
+ uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs);
+ uint32_t useMask = Support::bitMask(useId);
+
+ willUse |= useMask;
+ willFree |= useMask & liveRegs;
+ tiedReg->setUseId(useId);
+
+ if (assignedId != RAAssignment::kPhysNone) {
+ uint32_t assignedMask = Support::bitMask(assignedId);
+
+ willFree |= assignedMask;
+ liveRegs &= ~assignedMask;
+
+ // OPTIMIZATION: Assign the USE register here if it's possible.
+ if (!(liveRegs & useMask)) {
+ ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId));
+ tiedReg->markUseDone();
+ if (tiedReg->isWrite())
+ _curAssignment.makeDirty(group, workId, useId);
+ usePending--;
+ }
+ }
+ else {
+ // OPTIMIZATION: Assign the USE register here if it's possible.
+ if (!(liveRegs & useMask)) {
+ ASMJIT_PROPAGATE(onLoadReg(group, workId, useId));
+ tiedReg->markUseDone();
+ if (tiedReg->isWrite())
+ _curAssignment.makeDirty(group, workId, useId);
+ usePending--;
+ }
+ }
+
+ liveRegs |= useMask;
+ }
+ }
+ }
+
+ // Initially all used regs will be marked clobbered.
+ uint32_t clobberedByInst = willUse | willOut;
+
+ // ------------------------------------------------------------------------
+ // STEP 3:
+ //
+ // Free all registers that we marked as `willFree`. Only registers that are not
+ // USEd by the instruction are considered as we don't want to free regs we need.
+ // ------------------------------------------------------------------------
+
+ if (willFree) {
+ uint32_t allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut);
+ Support::BitWordIterator<uint32_t> it(willFree);
+
+ do {
+ uint32_t assignedId = it.next();
+ if (_curAssignment.isPhysAssigned(group, assignedId)) {
+ uint32_t workId = _curAssignment.physToWorkId(group, assignedId);
+
+ // DECIDE whether to MOVE or SPILL.
+ if (allocableRegs) {
+ uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs);
+ if (reassignedId != RAAssignment::kPhysNone) {
+ ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
+ allocableRegs ^= Support::bitMask(reassignedId);
+ continue;
+ }
+ }
+
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
+ }
+ } while (it.hasNext());
+ }
+
+ // ------------------------------------------------------------------------
+ // STEP 4:
+ //
+ // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't
+ // allocated yet. This is a bit complicated as the allocation is iterative.
+ // In some cases we have to wait before allocating a particual physical
+ // register as it's still occupied by some other one, which we need to move
+ // before we can use it. In this case we skip it and allocate another some
+ // other instead (making it free for another iteration).
+ //
+ // NOTE: Iterations are mostly important for complicated allocations like
+ // function calls, where there can be up to N registers used at once. Asm
+ // instructions won't run the loop more than once in 99.9% of cases as they
+ // use 2..3 registers in average.
+ // ------------------------------------------------------------------------
+
+ if (usePending) {
+ bool mustSwap = false;
+ do {
+ uint32_t oldPending = usePending;
+
+ for (i = 0; i < count; i++) {
+ RATiedReg* thisTiedReg = &tiedRegs[i];
+ if (thisTiedReg->isUseDone()) continue;
+
+ uint32_t thisWorkId = thisTiedReg->workId();
+ uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
+
+ // This would be a bug, fatal one!
+ uint32_t targetPhysId = thisTiedReg->useId();
+ ASMJIT_ASSERT(targetPhysId != thisPhysId);
+
+ uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId);
+ if (targetWorkId != RAAssignment::kWorkNone) {
+ RAWorkReg* targetWorkReg = workRegById(targetWorkId);
+
+ // Swapping two registers can solve two allocation tasks by emitting
+ // just a single instruction. However, swap is only available on few
+ // architectures and it's definitely not available for each register
+ // group. Calling `onSwapReg()` before checking these would be fatal.
+ if (_archTraits.hasSwap(group) && thisPhysId != RAAssignment::kPhysNone) {
+ ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
+
+ thisTiedReg->markUseDone();
+ if (thisTiedReg->isWrite())
+ _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
+ usePending--;
+
+ // Double-hit.
+ RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
+ if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
+ targetTiedReg->markUseDone();
+ if (targetTiedReg->isWrite())
+ _curAssignment.makeDirty(group, targetWorkId, thisPhysId);
+ usePending--;
+ }
+ continue;
+ }
+
+ if (!mustSwap)
+ continue;
+
+ // Only branched here if the previous iteration did nothing. This is
+ // essentially a SWAP operation without having a dedicated instruction
+ // for that purpose (vector registers, etc). The simplest way to
+ // handle such case is to SPILL the target register.
+ ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId));
+ }
+
+ if (thisPhysId != RAAssignment::kPhysNone) {
+ ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
+
+ thisTiedReg->markUseDone();
+ if (thisTiedReg->isWrite())
+ _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
+ usePending--;
+ }
+ else {
+ ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
+
+ thisTiedReg->markUseDone();
+ if (thisTiedReg->isWrite())
+ _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
+ usePending--;
+ }
+ }
+
+ mustSwap = (oldPending == usePending);
+ } while (usePending);
+ }
+
+ // ------------------------------------------------------------------------
+ // STEP 5:
+ //
+ // KILL registers marked as KILL/OUT.
+ // ------------------------------------------------------------------------
+
+ uint32_t outPending = outTiedCount;
+ if (outTiedCount) {
+ for (i = 0; i < outTiedCount; i++) {
+ RATiedReg* tiedReg = outTiedRegs[i];
+
+ uint32_t workId = tiedReg->workId();
+ uint32_t physId = _curAssignment.workToPhysId(group, workId);
+
+ // Must check if it's allocated as KILL can be related to OUT (like KILL
+ // immediately after OUT, which could mean the register is not assigned).
+ if (physId != RAAssignment::kPhysNone) {
+ ASMJIT_PROPAGATE(onKillReg(group, workId, physId));
+ willOut &= ~Support::bitMask(physId);
+ }
+
+ // We still maintain number of pending registers for OUT assignment.
+ // So, if this is only KILL, not OUT, we can safely decrement it.
+ outPending -= !tiedReg->isOut();
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ // STEP 6:
+ //
+ // SPILL registers that will be CLOBBERed. Since OUT and KILL were
+ // already processed this is used mostly to handle function CALLs.
+ // ------------------------------------------------------------------------
+
+ if (willOut) {
+ Support::BitWordIterator<uint32_t> it(willOut);
+ do {
+ uint32_t physId = it.next();
+ uint32_t workId = _curAssignment.physToWorkId(group, physId);
+
+ if (workId == RAAssignment::kWorkNone)
+ continue;
+
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
+ } while (it.hasNext());
+ }
+
+ // ------------------------------------------------------------------------
+ // STEP 7:
+ //
+ // Duplication.
+ // ------------------------------------------------------------------------
+
+ for (i = 0; i < dupTiedCount; i++) {
+ RATiedReg* tiedReg = dupTiedRegs[i];
+ uint32_t workId = tiedReg->workId();
+ uint32_t srcId = tiedReg->useId();
+
+ Support::BitWordIterator<uint32_t> it(tiedReg->_allocableRegs);
+ while (it.hasNext()) {
+ uint32_t dstId = it.next();
+ if (dstId == srcId)
+ continue;
+ _pass->onEmitMove(workId, dstId, srcId);
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ // STEP 8:
+ //
+ // Assign OUT registers.
+ // ------------------------------------------------------------------------
+
+ if (outPending) {
+ // Live registers, we need a separate variable (outside of `_curAssignment)
+ // to hold these because of KILLed registers. If we KILL a register here it
+ // will go out from `_curAssignment`, but we cannot assign to it in here.
+ uint32_t liveRegs = _curAssignment.assigned(group);
+
+ // Must avoid as they have been already OUTed (added during the loop).
+ uint32_t outRegs = 0;
+
+ // Must avoid as they collide with already allocated ones.
+ uint32_t avoidRegs = willUse & ~clobberedByInst;
+
+ for (i = 0; i < outTiedCount; i++) {
+ RATiedReg* tiedReg = outTiedRegs[i];
+ if (!tiedReg->isOut()) continue;
+
+ uint32_t workId = tiedReg->workId();
+ uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+
+ if (assignedId != RAAssignment::kPhysNone)
+ ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
+
+ uint32_t physId = tiedReg->outId();
+ if (physId == RAAssignment::kPhysNone) {
+ uint32_t allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs);
+
+ if (!(allocableRegs & ~liveRegs)) {
+ // There are no more registers, decide which one to spill.
+ uint32_t spillWorkId;
+ physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId);
+ ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId));
+ }
+ else {
+ physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs);
+ }
+ }
+
+ // OUTs are CLOBBERed thus cannot be ASSIGNed right now.
+ ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
+
+ if (!tiedReg->isKill())
+ ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
+
+ tiedReg->setOutId(physId);
+ tiedReg->markOutDone();
+
+ outRegs |= Support::bitMask(physId);
+ liveRegs &= ~Support::bitMask(physId);
+ outPending--;
+ }
+
+ clobberedByInst |= outRegs;
+ ASMJIT_ASSERT(outPending == 0);
+ }
+
+ _clobberedRegs[group] |= clobberedByInst;
+ }
+
+ return kErrorOk;
+}
+
+Error RALocalAllocator::spillAfterAllocation(InstNode* node) noexcept {
+ // This is experimental feature that would spill registers that don't have
+ // home-id and are last in this basic block. This prevents saving these regs
+ // in other basic blocks and then restoring them (mostly relevant for loops).
+ RAInst* raInst = node->passData<RAInst>();
+ uint32_t count = raInst->tiedCount();
+
+ for (uint32_t i = 0; i < count; i++) {
+ RATiedReg* tiedReg = raInst->tiedAt(i);
+ if (tiedReg->isLast()) {
+ uint32_t workId = tiedReg->workId();
+ RAWorkReg* workReg = workRegById(workId);
+ if (!workReg->hasHomeRegId()) {
+ uint32_t group = workReg->group();
+ uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
+ if (assignedId != RAAssignment::kPhysNone) {
+ _cc->_setCursor(node);
+ ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
+ }
+ }
+ }
+ }
+
+ return kErrorOk;
+}
+
+Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept {
+ // TODO: This should be used to make the branch allocation better.
+ DebugUtils::unused(cont);
+
+ // The cursor must point to the previous instruction for a possible instruction insertion.
+ _cc->_setCursor(node->prev());
+
+ // Use TryMode of `switchToAssignment()` if possible.
+ if (target->hasEntryAssignment()) {
+ ASMJIT_PROPAGATE(switchToAssignment(
+ target->entryPhysToWorkMap(),
+ target->entryWorkToPhysMap(),
+ target->liveIn(),
+ target->isAllocated(),
+ true));
+ }
+
+ ASMJIT_PROPAGATE(allocInst(node));
+ ASMJIT_PROPAGATE(spillRegsBeforeEntry(target));
+
+ if (target->hasEntryAssignment()) {
+ BaseNode* injectionPoint = _pass->extraBlock()->prev();
+ BaseNode* prevCursor = _cc->setCursor(injectionPoint);
+
+ _tmpAssignment.copyFrom(_curAssignment);
+ ASMJIT_PROPAGATE(switchToAssignment(
+ target->entryPhysToWorkMap(),
+ target->entryWorkToPhysMap(),
+ target->liveIn(),
+ target->isAllocated(),
+ false));
+
+ BaseNode* curCursor = _cc->cursor();
+ if (curCursor != injectionPoint) {
+ // Additional instructions emitted to switch from the current state to
+ // the `target` state. This means that we have to move these instructions
+ // into an independent code block and patch the jump location.
+ Operand& targetOp = node->op(node->opCount() - 1);
+ if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
+ return DebugUtils::errored(kErrorInvalidState);
+
+ Label trampoline = _cc->newLabel();
+ Label savedTarget = targetOp.as<Label>();
+
+ // Patch `target` to point to the `trampoline` we just created.
+ targetOp = trampoline;
+
+ // Clear a possible SHORT form as we have no clue now if the SHORT form would
+ // be encodable after patching the target to `trampoline` (X86 specific).
+ node->clearInstOptions(BaseInst::kOptionShortForm);
+
+ // Finalize the switch assignment sequence.
+ ASMJIT_PROPAGATE(_pass->onEmitJump(savedTarget));
+ _cc->_setCursor(injectionPoint);
+ _cc->bind(trampoline);
+ }
+
+ _cc->_setCursor(prevCursor);
+ _curAssignment.swap(_tmpAssignment);
+ }
+ else {
+ ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(target, block(), _curAssignment));
+ }
+
+ return kErrorOk;
+}
+
+Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept {
+ if (targets.empty())
+ return DebugUtils::errored(kErrorInvalidState);
+
+ // The cursor must point to the previous instruction for a possible instruction insertion.
+ _cc->_setCursor(node->prev());
+
+ // All `targets` should have the same sharedAssignmentId, we just read the first.
+ RABlock* anyTarget = targets[0];
+ if (!anyTarget->hasSharedAssignmentId())
+ return DebugUtils::errored(kErrorInvalidState);
+
+ RASharedAssignment& sharedAssignment = _pass->_sharedAssignments[anyTarget->sharedAssignmentId()];
+
+ ASMJIT_PROPAGATE(allocInst(node));
+
+ if (!sharedAssignment.empty()) {
+ ASMJIT_PROPAGATE(switchToAssignment(
+ sharedAssignment.physToWorkMap(),
+ sharedAssignment.workToPhysMap(),
+ sharedAssignment.liveIn(),
+ true, // Read-only.
+ false // Try-mode.
+ ));
+ }
+
+ ASMJIT_PROPAGATE(spillRegsBeforeEntry(anyTarget));
+
+ if (sharedAssignment.empty()) {
+ ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(anyTarget, block(), _curAssignment));
+ }
+
+ return kErrorOk;
+}
+
+// ============================================================================
+// [asmjit::RALocalAllocator - Decision Making]
+// ============================================================================
+
+uint32_t RALocalAllocator::decideOnAssignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept {
+ ASMJIT_ASSERT(allocableRegs != 0);
+ DebugUtils::unused(group, physId);
+
+ RAWorkReg* workReg = workRegById(workId);
+
+ // Prefer home register id, if possible.
+ if (workReg->hasHomeRegId()) {
+ uint32_t homeId = workReg->homeRegId();
+ if (Support::bitTest(allocableRegs, homeId))
+ return homeId;
+ }
+
+ // Prefer registers used upon block entries.
+ uint32_t previouslyAssignedRegs = workReg->allocatedMask();
+ if (allocableRegs & previouslyAssignedRegs)
+ allocableRegs &= previouslyAssignedRegs;
+
+ return Support::ctz(allocableRegs);
+}
+
+uint32_t RALocalAllocator::decideOnReassignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept {
+ ASMJIT_ASSERT(allocableRegs != 0);
+ DebugUtils::unused(group, physId);
+
+ RAWorkReg* workReg = workRegById(workId);
+
+ // Prefer allocating back to HomeId, if possible.
+ if (workReg->hasHomeRegId()) {
+ if (Support::bitTest(allocableRegs, workReg->homeRegId()))
+ return workReg->homeRegId();
+ }
+
+ // TODO: [Register Allocator] This could be improved.
+
+ // Decided to SPILL.
+ return RAAssignment::kPhysNone;
+}
+
+uint32_t RALocalAllocator::decideOnSpillFor(uint32_t group, uint32_t workId, uint32_t spillableRegs, uint32_t* spillWorkId) const noexcept {
+ // May be used in the future to decide which register would be best to spill so `workId` can be assigned.
+ DebugUtils::unused(workId);
+ ASMJIT_ASSERT(spillableRegs != 0);
+
+ Support::BitWordIterator<uint32_t> it(spillableRegs);
+ uint32_t bestPhysId = it.next();
+ uint32_t bestWorkId = _curAssignment.physToWorkId(group, bestPhysId);
+
+ // Avoid calculating the cost model if there is only one spillable register.
+ if (it.hasNext()) {
+ uint32_t bestCost = calculateSpillCost(group, bestWorkId, bestPhysId);
+ do {
+ uint32_t localPhysId = it.next();
+ uint32_t localWorkId = _curAssignment.physToWorkId(group, localPhysId);
+ uint32_t localCost = calculateSpillCost(group, localWorkId, localPhysId);
+
+ if (localCost < bestCost) {
+ bestCost = localCost;
+ bestPhysId = localPhysId;
+ bestWorkId = localWorkId;
+ }
+ } while (it.hasNext());
+ }
+
+ *spillWorkId = bestWorkId;
+ return bestPhysId;
+}
+
+ASMJIT_END_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER