// This file is part of AsmJit project // // See asmjit.h or LICENSE.md for license and copyright information // SPDX-License-Identifier: Zlib #include "../core/api-build_p.h" #ifndef ASMJIT_NO_COMPILER #include "../core/ralocal_p.h" #include "../core/support.h" ASMJIT_BEGIN_NAMESPACE // RALocalAllocator - Utilities // ============================ static ASMJIT_FORCE_INLINE RATiedReg* RALocal_findTiedRegByWorkId(RATiedReg* tiedRegs, size_t count, uint32_t workId) noexcept { for (size_t i = 0; i < count; i++) if (tiedRegs[i].workId() == workId) return &tiedRegs[i]; return nullptr; } // RALocalAllocator - Init & Reset // =============================== Error RALocalAllocator::init() noexcept { PhysToWorkMap* physToWorkMap; WorkToPhysMap* workToPhysMap; physToWorkMap = _pass->newPhysToWorkMap(); workToPhysMap = _pass->newWorkToPhysMap(); if (!physToWorkMap || !workToPhysMap) return DebugUtils::errored(kErrorOutOfMemory); _curAssignment.initLayout(_pass->_physRegCount, _pass->workRegs()); _curAssignment.initMaps(physToWorkMap, workToPhysMap); physToWorkMap = _pass->newPhysToWorkMap(); workToPhysMap = _pass->newWorkToPhysMap(); _tmpWorkToPhysMap = _pass->newWorkToPhysMap(); if (!physToWorkMap || !workToPhysMap || !_tmpWorkToPhysMap) return DebugUtils::errored(kErrorOutOfMemory); _tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs()); _tmpAssignment.initMaps(physToWorkMap, workToPhysMap); return kErrorOk; } // RALocalAllocator - Assignment // ============================= Error RALocalAllocator::makeInitialAssignment() noexcept { FuncNode* func = _pass->func(); RABlock* entry = _pass->entryBlock(); ZoneBitVector& liveIn = entry->liveIn(); uint32_t argCount = func->argCount(); uint32_t numIter = 1; for (uint32_t iter = 0; iter < numIter; iter++) { for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { // Unassigned argument. const RegOnly& regArg = func->argPack(argIndex)[valueIndex]; if (!regArg.isReg() || !_cc->isVirtIdValid(regArg.id())) continue; VirtReg* virtReg = _cc->virtRegById(regArg.id()); // Unreferenced argument. RAWorkReg* workReg = virtReg->workReg(); if (!workReg) continue; // Overwritten argument. uint32_t workId = workReg->workId(); if (!liveIn.bitAt(workId)) continue; RegGroup group = workReg->group(); if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone) continue; RegMask allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group); if (iter == 0) { // First iteration: Try to allocate to home RegId. if (workReg->hasHomeRegId()) { uint32_t physId = workReg->homeRegId(); if (Support::bitTest(allocableRegs, physId)) { _curAssignment.assign(group, workId, physId, true); _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId()); continue; } } numIter = 2; } else { // Second iteration: Pick any other register if the is an unassigned one or assign to stack. if (allocableRegs) { uint32_t physId = Support::ctz(allocableRegs); _curAssignment.assign(group, workId, physId, true); _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId()); } else { // This register will definitely need stack, create the slot now and assign also `argIndex` // to it. We will patch `_argsAssignment` later after RAStackAllocator finishes. RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg); if (ASMJIT_UNLIKELY(!slot)) return DebugUtils::errored(kErrorOutOfMemory); // This means STACK_ARG may be moved to STACK. workReg->addFlags(RAWorkRegFlags::kStackArgToStack); _pass->_numStackArgsToStackSlots++; } } } } } return kErrorOk; } Error RALocalAllocator::replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept { _curAssignment.copyFrom(physToWorkMap); return kErrorOk; } Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept { RAAssignment dst; RAAssignment& cur = _curAssignment; dst.initLayout(_pass->_physRegCount, _pass->workRegs()); dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap); dst.assignWorkIdsFromPhysIds(); if (tryMode) return kErrorOk; for (RegGroup group : RegGroupVirtValues{}) { // STEP 1 // ------ // // - KILL all registers that are not live at `dst`, // - SPILL all registers that are not assigned at `dst`. if (!tryMode) { Support::BitWordIterator it(cur.assigned(group)); while (it.hasNext()) { uint32_t physId = it.next(); uint32_t workId = cur.physToWorkId(group, physId); // Must be true as we iterate over assigned registers. ASMJIT_ASSERT(workId != RAAssignment::kWorkNone); // KILL if it's not live on entry. if (!liveIn.bitAt(workId)) { onKillReg(group, workId, physId); continue; } // SPILL if it's not assigned on entry. uint32_t altId = dst.workToPhysId(group, workId); if (altId == RAAssignment::kPhysNone) { ASMJIT_PROPAGATE(onSpillReg(group, workId, physId)); } } } // STEP 2 // ------ // // - MOVE and SWAP registers from their current assignments into their DST assignments. // - Build `willLoadRegs` mask of registers scheduled for `onLoadReg()`. // Current run-id (1 means more aggressive decisions). int32_t runId = -1; // Remaining registers scheduled for `onLoadReg()`. RegMask willLoadRegs = 0; // Remaining registers to be allocated in this loop. RegMask affectedRegs = dst.assigned(group); while (affectedRegs) { if (++runId == 2) { if (!tryMode) return DebugUtils::errored(kErrorInvalidState); // Stop in `tryMode` if we haven't done anything in past two rounds. break; } Support::BitWordIterator it(affectedRegs); while (it.hasNext()) { uint32_t physId = it.next(); RegMask physMask = Support::bitMask(physId); uint32_t curWorkId = cur.physToWorkId(group, physId); uint32_t dstWorkId = dst.physToWorkId(group, physId); // The register must have assigned `dstWorkId` as we only iterate over assigned regs. ASMJIT_ASSERT(dstWorkId != RAAssignment::kWorkNone); if (curWorkId != RAAssignment::kWorkNone) { // Both assigned. if (curWorkId != dstWorkId) { // Wait a bit if this is the first run, we may avoid this if `curWorkId` moves out. if (runId <= 0) continue; uint32_t altPhysId = cur.workToPhysId(group, dstWorkId); if (altPhysId == RAAssignment::kPhysNone) continue; // Reset as we will do some changes to the current assignment. runId = -1; if (_archTraits->hasInstRegSwap(group)) { ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId)); } else { // SPILL the reg if it's not dirty in DST, otherwise try to MOVE. if (!cur.isPhysDirty(group, physId)) { ASMJIT_PROPAGATE(onKillReg(group, curWorkId, physId)); } else { RegMask allocableRegs = _pass->_availableRegs[group] & ~cur.assigned(group); // If possible don't conflict with assigned regs at DST. if (allocableRegs & ~dst.assigned(group)) allocableRegs &= ~dst.assigned(group); if (allocableRegs) { // MOVE is possible, thus preferred. uint32_t tmpPhysId = Support::ctz(allocableRegs); ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId)); _pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId); } else { // MOVE is impossible, must SPILL. ASMJIT_PROPAGATE(onSpillReg(group, curWorkId, physId)); } } goto Cleared; } } } else { Cleared: // DST assigned, CUR unassigned. uint32_t altPhysId = cur.workToPhysId(group, dstWorkId); if (altPhysId == RAAssignment::kPhysNone) { if (liveIn.bitAt(dstWorkId)) willLoadRegs |= physMask; // Scheduled for `onLoadReg()`. affectedRegs &= ~physMask; // Unaffected from now. continue; } ASMJIT_PROPAGATE(onMoveReg(group, dstWorkId, physId, altPhysId)); } // Both DST and CUR assigned to the same reg or CUR just moved to DST. if ((dst.dirty(group) & physMask) != (cur.dirty(group) & physMask)) { if ((dst.dirty(group) & physMask) == 0) { // CUR dirty, DST not dirty (the assert is just to visualize the condition). ASMJIT_ASSERT(!dst.isPhysDirty(group, physId) && cur.isPhysDirty(group, physId)); // If `dstReadOnly` is true it means that that block was already processed and we cannot change from // CLEAN to DIRTY. In that case the register has to be saved as it cannot enter the block DIRTY. if (dstReadOnly) ASMJIT_PROPAGATE(onSaveReg(group, dstWorkId, physId)); else dst.makeDirty(group, dstWorkId, physId); } else { // DST dirty, CUR not dirty (the assert is just to visualize the condition). ASMJIT_ASSERT(dst.isPhysDirty(group, physId) && !cur.isPhysDirty(group, physId)); cur.makeDirty(group, dstWorkId, physId); } } // Must match now... ASMJIT_ASSERT(dst.physToWorkId(group, physId) == cur.physToWorkId(group, physId)); ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId)); runId = -1; affectedRegs &= ~physMask; } } // STEP 3 // ------ // // - Load registers specified by `willLoadRegs`. { Support::BitWordIterator it(willLoadRegs); while (it.hasNext()) { uint32_t physId = it.next(); if (!cur.isPhysAssigned(group, physId)) { uint32_t workId = dst.physToWorkId(group, physId); // The algorithm is broken if it tries to load a register that is not in LIVE-IN. ASMJIT_ASSERT(liveIn.bitAt(workId) == true); ASMJIT_PROPAGATE(onLoadReg(group, workId, physId)); if (dst.isPhysDirty(group, physId)) cur.makeDirty(group, workId, physId); ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId)); } else { // Not possible otherwise. ASMJIT_ASSERT(tryMode == true); } } } } if (!tryMode) { // Here is a code that dumps the conflicting part if something fails here: // if (!dst.equals(cur)) { // uint32_t physTotal = dst._layout.physTotal; // uint32_t workCount = dst._layout.workCount; // // fprintf(stderr, "Dirty DST=0x%08X CUR=0x%08X\n", dst.dirty(RegGroup::kGp), cur.dirty(RegGroup::kGp)); // fprintf(stderr, "Assigned DST=0x%08X CUR=0x%08X\n", dst.assigned(RegGroup::kGp), cur.assigned(RegGroup::kGp)); // // for (uint32_t physId = 0; physId < physTotal; physId++) { // uint32_t dstWorkId = dst._physToWorkMap->workIds[physId]; // uint32_t curWorkId = cur._physToWorkMap->workIds[physId]; // if (dstWorkId != curWorkId) // fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId); // } // // for (uint32_t workId = 0; workId < workCount; workId++) { // uint32_t dstPhysId = dst._workToPhysMap->physIds[workId]; // uint32_t curPhysId = cur._workToPhysMap->physIds[workId]; // if (dstPhysId != curPhysId) // fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId); // } // } ASMJIT_ASSERT(dst.equals(cur)); } return kErrorOk; } Error RALocalAllocator::spillScratchGpRegsBeforeEntry(RegMask scratchRegs) noexcept { RegGroup group = RegGroup::kGp; Support::BitWordIterator it(scratchRegs); while (it.hasNext()) { uint32_t physId = it.next(); if (_curAssignment.isPhysAssigned(group, physId)) { uint32_t workId = _curAssignment.physToWorkId(group, physId); ASMJIT_PROPAGATE(onSpillReg(group, workId, physId)); } } return kErrorOk; } // RALocalAllocator - Allocation // ============================= Error RALocalAllocator::allocInst(InstNode* node) noexcept { RAInst* raInst = node->passData(); RATiedReg* outTiedRegs[Globals::kMaxPhysRegs]; RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs]; RATiedReg* consecutiveRegs[kMaxConsecutiveRegs]; // The cursor must point to the previous instruction for a possible instruction insertion. _cc->_setCursor(node->prev()); _node = node; _raInst = raInst; _tiedTotal = raInst->_tiedTotal; _tiedCount = raInst->_tiedCount; // Whether we already replaced register operand with memory operand. bool rmAllocated = false; for (RegGroup group : RegGroupVirtValues{}) { uint32_t i, count = this->tiedCount(group); RATiedReg* tiedRegs = this->tiedRegs(group); RegMask willUse = _raInst->_usedRegs[group]; RegMask willOut = _raInst->_clobberedRegs[group]; RegMask willFree = 0; uint32_t usePending = count; uint32_t outTiedCount = 0; uint32_t dupTiedCount = 0; uint32_t consecutiveMask = 0; // STEP 1 // ------ // // Calculate `willUse` and `willFree` masks based on tied registers we have. In addition, aggregate information // regarding consecutive registers used by this instruction. We need that to make USE/OUT assignments. // // We don't do any assignment decisions at this stage as we just need to collect some information first. Then, // after we populate all masks needed we can finally make some decisions in the second loop. The main reason // for this is that we really need `willFree` to make assignment decisions for `willUse`, because if we mark // some registers that will be freed, we can consider them in decision making afterwards. for (i = 0; i < count; i++) { RATiedReg* tiedReg = &tiedRegs[i]; if (tiedReg->hasAnyConsecutiveFlag()) { uint32_t consecutiveOffset = tiedReg->isLeadConsecutive() ? uint32_t(0) : tiedReg->consecutiveData(); if (ASMJIT_UNLIKELY(Support::bitTest(consecutiveMask, consecutiveOffset))) return DebugUtils::errored(kErrorInvalidState); consecutiveMask |= Support::bitMask(consecutiveOffset); consecutiveRegs[consecutiveOffset] = tiedReg; } // Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment. if (tiedReg->isOutOrKill()) outTiedRegs[outTiedCount++] = tiedReg; if (tiedReg->isDuplicate()) dupTiedRegs[dupTiedCount++] = tiedReg; if (!tiedReg->isUse()) { tiedReg->markUseDone(); usePending--; continue; } // Don't assign anything here if this is a consecutive USE - we will handle this in STEP 2 instead. if (tiedReg->isUseConsecutive()) continue; uint32_t workId = tiedReg->workId(); uint32_t assignedId = _curAssignment.workToPhysId(group, workId); if (tiedReg->hasUseId()) { // If the register has `useId` it means it can only be allocated in that register. RegMask useMask = Support::bitMask(tiedReg->useId()); // RAInstBuilder must have collected `usedRegs` on-the-fly. ASMJIT_ASSERT((willUse & useMask) != 0); if (assignedId == tiedReg->useId()) { // If the register is already allocated in this one, mark it done and continue. tiedReg->markUseDone(); if (tiedReg->isWrite()) _curAssignment.makeDirty(group, workId, assignedId); usePending--; willUse |= useMask; } else { willFree |= useMask & _curAssignment.assigned(group); } } else { // Check if the register must be moved to `allocableRegs`. RegMask allocableRegs = tiedReg->useRegMask(); if (assignedId != RAAssignment::kPhysNone) { RegMask assignedMask = Support::bitMask(assignedId); if ((allocableRegs & ~willUse) & assignedMask) { tiedReg->setUseId(assignedId); tiedReg->markUseDone(); if (tiedReg->isWrite()) _curAssignment.makeDirty(group, workId, assignedId); usePending--; willUse |= assignedMask; } else { willFree |= assignedMask; } } } } // STEP 2 // ------ // // Verify that all the consecutive registers are really consecutive. Terminate if there is a gap. In addition, // decide which USE ids will be used in case that this consecutive sequence is USE (OUT registers are allocated // in a different step). uint32_t consecutiveCount = 0; if (consecutiveMask) { if ((consecutiveMask & (consecutiveMask + 1u)) != 0) return DebugUtils::errored(kErrorInvalidState); // Count of trailing ones is the count of consecutive registers. There cannot be gap. consecutiveCount = Support::ctz(~consecutiveMask); // Prioritize allocation that would result in least moves even when moving registers away from their homes. RATiedReg* lead = consecutiveRegs[0]; // Assign the best possible USE Ids to all consecutives. if (lead->isUseConsecutive()) { uint32_t bestScore = 0; uint32_t bestLeadReg = 0xFFFFFFFF; RegMask allocableRegs = (_availableRegs[group] | willFree) & ~willUse; uint32_t assignments[kMaxConsecutiveRegs]; for (i = 0; i < consecutiveCount; i++) assignments[i] = _curAssignment.workToPhysId(group, consecutiveRegs[i]->workId()); Support::BitWordIterator it(lead->useRegMask()); while (it.hasNext()) { uint32_t regIndex = it.next(); if (Support::bitTest(lead->useRegMask(), regIndex)) { uint32_t score = 15; for (i = 0; i < consecutiveCount; i++) { uint32_t consecutiveIndex = regIndex + i; if (!Support::bitTest(allocableRegs, consecutiveIndex)) { score = 0; break; } RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId()); score += uint32_t(workReg->homeRegId() == consecutiveIndex); score += uint32_t(assignments[i] == consecutiveIndex) * 2; } if (score > bestScore) { bestScore = score; bestLeadReg = regIndex; } } } if (bestLeadReg == 0xFFFFFFFF) return DebugUtils::errored(kErrorConsecutiveRegsAllocation); for (i = 0; i < consecutiveCount; i++) { uint32_t consecutiveIndex = bestLeadReg + i; RATiedReg* tiedReg = consecutiveRegs[i]; RegMask useMask = Support::bitMask(consecutiveIndex); uint32_t workId = tiedReg->workId(); uint32_t assignedId = _curAssignment.workToPhysId(group, workId); tiedReg->setUseId(consecutiveIndex); if (assignedId == consecutiveIndex) { // If the register is already allocated in this one, mark it done and continue. tiedReg->markUseDone(); if (tiedReg->isWrite()) _curAssignment.makeDirty(group, workId, assignedId); usePending--; willUse |= useMask; } else { willUse |= useMask; willFree |= useMask & _curAssignment.assigned(group); } } } } // STEP 3 // ------ // // Do some decision making to find the best candidates of registers that need to be assigned, moved, and/or // spilled. Only USE registers are considered here, OUT will be decided later after all CLOBBERed and OUT // registers are unassigned. if (usePending) { // TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear. // All registers that are currently alive without registers that will be freed. RegMask liveRegs = _curAssignment.assigned(group) & ~willFree; for (i = 0; i < count; i++) { RATiedReg* tiedReg = &tiedRegs[i]; if (tiedReg->isUseDone()) continue; uint32_t workId = tiedReg->workId(); uint32_t assignedId = _curAssignment.workToPhysId(group, workId); // REG/MEM: Patch register operand to memory operand if not allocated. if (!rmAllocated && tiedReg->hasUseRM()) { if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) { RAWorkReg* workReg = workRegById(tiedReg->workId()); uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t)); uint32_t rmSize = tiedReg->rmSize(); if (rmSize <= workReg->virtReg()->virtSize()) { Operand& op = node->operands()[opIndex]; op = _pass->workRegAsMem(workReg); op.as().setSize(rmSize); tiedReg->_useRewriteMask = 0; tiedReg->markUseDone(); usePending--; rmAllocated = true; continue; } } } if (!tiedReg->hasUseId()) { // DECIDE where to assign the USE register. RegMask allocableRegs = tiedReg->useRegMask() & ~(willFree | willUse); uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs); RegMask useMask = Support::bitMask(useId); willUse |= useMask; willFree |= useMask & liveRegs; tiedReg->setUseId(useId); if (assignedId != RAAssignment::kPhysNone) { RegMask assignedMask = Support::bitMask(assignedId); willFree |= assignedMask; liveRegs &= ~assignedMask; // OPTIMIZATION: Assign the USE register here if it's possible. if (!(liveRegs & useMask)) { ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId)); tiedReg->markUseDone(); if (tiedReg->isWrite()) _curAssignment.makeDirty(group, workId, useId); usePending--; } } else { // OPTIMIZATION: Assign the USE register here if it's possible. if (!(liveRegs & useMask)) { ASMJIT_PROPAGATE(onLoadReg(group, workId, useId)); tiedReg->markUseDone(); if (tiedReg->isWrite()) _curAssignment.makeDirty(group, workId, useId); usePending--; } } liveRegs |= useMask; } } } // Initially all used regs will be marked as clobbered. RegMask clobberedByInst = willUse | willOut; // STEP 4 // ------ // // Free all registers that we marked as `willFree`. Only registers that are not USEd by the instruction are // considered as we don't want to free regs we need. if (willFree) { RegMask allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut); Support::BitWordIterator it(willFree); do { uint32_t assignedId = it.next(); if (_curAssignment.isPhysAssigned(group, assignedId)) { uint32_t workId = _curAssignment.physToWorkId(group, assignedId); // DECIDE whether to MOVE or SPILL. if (allocableRegs) { uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs); if (reassignedId != RAAssignment::kPhysNone) { ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId)); allocableRegs ^= Support::bitMask(reassignedId); continue; } } ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId)); } } while (it.hasNext()); } // STEP 5 // ------ // // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't allocated yet. This is a bit // complicated as the allocation is iterative. In some cases we have to wait before allocating a particual // physical register as it's still occupied by some other one, which we need to move before we can use it. // In this case we skip it and allocate another some other instead (making it free for another iteration). // // NOTE: Iterations are mostly important for complicated allocations like function calls, where there can // be up to N registers used at once. Asm instructions won't run the loop more than once in 99.9% of cases // as they use 2..3 registers in average. if (usePending) { bool mustSwap = false; do { uint32_t oldPending = usePending; for (i = 0; i < count; i++) { RATiedReg* thisTiedReg = &tiedRegs[i]; if (thisTiedReg->isUseDone()) continue; uint32_t thisWorkId = thisTiedReg->workId(); uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId); // This would be a bug, fatal one! uint32_t targetPhysId = thisTiedReg->useId(); ASMJIT_ASSERT(targetPhysId != thisPhysId); uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId); if (targetWorkId != RAAssignment::kWorkNone) { RAWorkReg* targetWorkReg = workRegById(targetWorkId); // Swapping two registers can solve two allocation tasks by emitting just a single instruction. However, // swap is only available on few architectures and it's definitely not available for each register group. // Calling `onSwapReg()` before checking these would be fatal. if (_archTraits->hasInstRegSwap(group) && thisPhysId != RAAssignment::kPhysNone) { ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId)); thisTiedReg->markUseDone(); if (thisTiedReg->isWrite()) _curAssignment.makeDirty(group, thisWorkId, targetPhysId); usePending--; // Double-hit. RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId()); if (targetTiedReg && targetTiedReg->useId() == thisPhysId) { targetTiedReg->markUseDone(); if (targetTiedReg->isWrite()) _curAssignment.makeDirty(group, targetWorkId, thisPhysId); usePending--; } continue; } if (!mustSwap) continue; // Only branched here if the previous iteration did nothing. This is essentially a SWAP operation without // having a dedicated instruction for that purpose (vector registers, etc). The simplest way to handle // such case is to SPILL the target register. ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId)); } if (thisPhysId != RAAssignment::kPhysNone) { ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId)); thisTiedReg->markUseDone(); if (thisTiedReg->isWrite()) _curAssignment.makeDirty(group, thisWorkId, targetPhysId); usePending--; } else { ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId)); thisTiedReg->markUseDone(); if (thisTiedReg->isWrite()) _curAssignment.makeDirty(group, thisWorkId, targetPhysId); usePending--; } } mustSwap = (oldPending == usePending); } while (usePending); } // STEP 6 // ------ // // KILL registers marked as KILL/OUT. uint32_t outPending = outTiedCount; if (outTiedCount) { for (i = 0; i < outTiedCount; i++) { RATiedReg* tiedReg = outTiedRegs[i]; uint32_t workId = tiedReg->workId(); uint32_t physId = _curAssignment.workToPhysId(group, workId); // Must check if it's allocated as KILL can be related to OUT (like KILL immediately after OUT, which could // mean the register is not assigned). if (physId != RAAssignment::kPhysNone) { ASMJIT_PROPAGATE(onKillReg(group, workId, physId)); willOut &= ~Support::bitMask(physId); } // We still maintain number of pending registers for OUT assignment. So, if this is only KILL, not OUT, we // can safely decrement it. outPending -= !tiedReg->isOut(); } } // STEP 7 // ------ // // SPILL registers that will be CLOBBERed. Since OUT and KILL were already processed this is used mostly to // handle function CALLs. if (willOut) { Support::BitWordIterator it(willOut); do { uint32_t physId = it.next(); uint32_t workId = _curAssignment.physToWorkId(group, physId); if (workId == RAAssignment::kWorkNone) continue; ASMJIT_PROPAGATE(onSpillReg(group, workId, physId)); } while (it.hasNext()); } // STEP 8 // ------ // // Duplication. for (i = 0; i < dupTiedCount; i++) { RATiedReg* tiedReg = dupTiedRegs[i]; uint32_t workId = tiedReg->workId(); uint32_t srcId = tiedReg->useId(); Support::BitWordIterator it(tiedReg->useRegMask()); while (it.hasNext()) { uint32_t dstId = it.next(); if (dstId == srcId) continue; _pass->emitMove(workId, dstId, srcId); } } // STEP 9 // ------ // // Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary. if (node->isInvoke() && group == RegGroup::kVec) { const InvokeNode* invokeNode = node->as(); RegMask maybeClobberedRegs = invokeNode->detail().callConv().preservedRegs(group) & _curAssignment.assigned(group); if (maybeClobberedRegs) { uint32_t saveRestoreVecSize = invokeNode->detail().callConv().saveRestoreRegSize(group); Support::BitWordIterator it(maybeClobberedRegs); do { uint32_t physId = it.next(); uint32_t workId = _curAssignment.physToWorkId(group, physId); RAWorkReg* workReg = workRegById(workId); uint32_t virtSize = workReg->virtReg()->virtSize(); if (virtSize > saveRestoreVecSize) { ASMJIT_PROPAGATE(onSpillReg(group, workId, physId)); } } while (it.hasNext()); } } // STEP 10 // ------- // // Assign OUT registers. if (outPending) { // Live registers, we need a separate register (outside of `_curAssignment) to hold these because of KILLed // registers. If we KILL a register here it will go out from `_curAssignment`, but we cannot assign to it in // here. RegMask liveRegs = _curAssignment.assigned(group); // Must avoid as they have been already OUTed (added during the loop). RegMask outRegs = 0; // Must avoid as they collide with already allocated ones. RegMask avoidRegs = willUse & ~clobberedByInst; // Assign the best possible OUT ids of all consecutives. if (consecutiveCount) { RATiedReg* lead = consecutiveRegs[0]; if (lead->isOutConsecutive()) { uint32_t bestScore = 0; uint32_t bestLeadReg = 0xFFFFFFFF; RegMask allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs); Support::BitWordIterator it(lead->outRegMask()); while (it.hasNext()) { uint32_t regIndex = it.next(); if (Support::bitTest(lead->outRegMask(), regIndex)) { uint32_t score = 15; for (i = 0; i < consecutiveCount; i++) { uint32_t consecutiveIndex = regIndex + i; if (!Support::bitTest(allocableRegs, consecutiveIndex)) { score = 0; break; } RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId()); score += uint32_t(workReg->homeRegId() == consecutiveIndex); } if (score > bestScore) { bestScore = score; bestLeadReg = regIndex; } } } if (bestLeadReg == 0xFFFFFFFF) return DebugUtils::errored(kErrorConsecutiveRegsAllocation); for (i = 0; i < consecutiveCount; i++) { uint32_t consecutiveIndex = bestLeadReg + i; RATiedReg* tiedReg = consecutiveRegs[i]; tiedReg->setOutId(consecutiveIndex); } } } // Allocate OUT registers. for (i = 0; i < outTiedCount; i++) { RATiedReg* tiedReg = outTiedRegs[i]; if (!tiedReg->isOut()) continue; uint32_t workId = tiedReg->workId(); uint32_t assignedId = _curAssignment.workToPhysId(group, workId); if (assignedId != RAAssignment::kPhysNone) ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId)); uint32_t physId = tiedReg->outId(); if (physId == RAAssignment::kPhysNone) { RegMask allocableRegs = tiedReg->outRegMask() & ~(outRegs | avoidRegs); if (!(allocableRegs & ~liveRegs)) { // There are no more registers, decide which one to spill. uint32_t spillWorkId; physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId); ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId)); } else { physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs); } } // OUTs are CLOBBERed thus cannot be ASSIGNed right now. ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId)); if (!tiedReg->isKill()) ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true)); tiedReg->setOutId(physId); tiedReg->markOutDone(); outRegs |= Support::bitMask(physId); liveRegs &= ~Support::bitMask(physId); outPending--; } clobberedByInst |= outRegs; ASMJIT_ASSERT(outPending == 0); } _clobberedRegs[group] |= clobberedByInst; } return kErrorOk; } Error RALocalAllocator::spillAfterAllocation(InstNode* node) noexcept { // This is experimental feature that would spill registers that don't have home-id and are last in this basic block. // This prevents saving these regs in other basic blocks and then restoring them (mostly relevant for loops). RAInst* raInst = node->passData(); uint32_t count = raInst->tiedCount(); for (uint32_t i = 0; i < count; i++) { RATiedReg* tiedReg = raInst->tiedAt(i); if (tiedReg->isLast()) { uint32_t workId = tiedReg->workId(); RAWorkReg* workReg = workRegById(workId); if (!workReg->hasHomeRegId()) { RegGroup group = workReg->group(); uint32_t assignedId = _curAssignment.workToPhysId(group, workId); if (assignedId != RAAssignment::kPhysNone) { _cc->_setCursor(node); ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId)); } } } } return kErrorOk; } Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept { // TODO: This should be used to make the branch allocation better. DebugUtils::unused(cont); // The cursor must point to the previous instruction for a possible instruction insertion. _cc->_setCursor(node->prev()); // Use TryMode of `switchToAssignment()` if possible. if (target->hasEntryAssignment()) { ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), true)); } ASMJIT_PROPAGATE(allocInst(node)); ASMJIT_PROPAGATE(spillRegsBeforeEntry(target)); if (target->hasEntryAssignment()) { BaseNode* injectionPoint = _pass->extraBlock()->prev(); BaseNode* prevCursor = _cc->setCursor(injectionPoint); _tmpAssignment.copyFrom(_curAssignment); ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), false)); BaseNode* curCursor = _cc->cursor(); if (curCursor != injectionPoint) { // Additional instructions emitted to switch from the current state to the `target` state. This means // that we have to move these instructions into an independent code block and patch the jump location. Operand& targetOp = node->op(node->opCount() - 1); if (ASMJIT_UNLIKELY(!targetOp.isLabel())) return DebugUtils::errored(kErrorInvalidState); Label trampoline = _cc->newLabel(); Label savedTarget = targetOp.as