1167 lines
41 KiB
C++
1167 lines
41 KiB
C++
|
// This file is part of AsmJit project <https://asmjit.com>
|
||
|
//
|
||
|
// See asmjit.h or LICENSE.md for license and copyright information
|
||
|
// SPDX-License-Identifier: Zlib
|
||
|
|
||
|
#include "../core/api-build_p.h"
|
||
|
#ifndef ASMJIT_NO_COMPILER
|
||
|
|
||
|
#include "../core/ralocal_p.h"
|
||
|
#include "../core/support.h"
|
||
|
|
||
|
ASMJIT_BEGIN_NAMESPACE
|
||
|
|
||
|
// RALocalAllocator - Utilities
|
||
|
// ============================
|
||
|
|
||
|
static ASMJIT_FORCE_INLINE RATiedReg* RALocal_findTiedRegByWorkId(RATiedReg* tiedRegs, size_t count, uint32_t workId) noexcept {
|
||
|
for (size_t i = 0; i < count; i++)
|
||
|
if (tiedRegs[i].workId() == workId)
|
||
|
return &tiedRegs[i];
|
||
|
return nullptr;
|
||
|
}
|
||
|
|
||
|
// RALocalAllocator - Init & Reset
|
||
|
// ===============================
|
||
|
|
||
|
Error RALocalAllocator::init() noexcept {
|
||
|
PhysToWorkMap* physToWorkMap;
|
||
|
WorkToPhysMap* workToPhysMap;
|
||
|
|
||
|
physToWorkMap = _pass->newPhysToWorkMap();
|
||
|
workToPhysMap = _pass->newWorkToPhysMap();
|
||
|
if (!physToWorkMap || !workToPhysMap)
|
||
|
return DebugUtils::errored(kErrorOutOfMemory);
|
||
|
|
||
|
_curAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
|
||
|
_curAssignment.initMaps(physToWorkMap, workToPhysMap);
|
||
|
|
||
|
physToWorkMap = _pass->newPhysToWorkMap();
|
||
|
workToPhysMap = _pass->newWorkToPhysMap();
|
||
|
_tmpWorkToPhysMap = _pass->newWorkToPhysMap();
|
||
|
|
||
|
if (!physToWorkMap || !workToPhysMap || !_tmpWorkToPhysMap)
|
||
|
return DebugUtils::errored(kErrorOutOfMemory);
|
||
|
|
||
|
_tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
|
||
|
_tmpAssignment.initMaps(physToWorkMap, workToPhysMap);
|
||
|
|
||
|
return kErrorOk;
|
||
|
}
|
||
|
|
||
|
// RALocalAllocator - Assignment
|
||
|
// =============================
|
||
|
|
||
|
Error RALocalAllocator::makeInitialAssignment() noexcept {
|
||
|
FuncNode* func = _pass->func();
|
||
|
RABlock* entry = _pass->entryBlock();
|
||
|
|
||
|
ZoneBitVector& liveIn = entry->liveIn();
|
||
|
uint32_t argCount = func->argCount();
|
||
|
uint32_t numIter = 1;
|
||
|
|
||
|
for (uint32_t iter = 0; iter < numIter; iter++) {
|
||
|
for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
|
||
|
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
|
||
|
// Unassigned argument.
|
||
|
const RegOnly& regArg = func->argPack(argIndex)[valueIndex];
|
||
|
if (!regArg.isReg() || !_cc->isVirtIdValid(regArg.id()))
|
||
|
continue;
|
||
|
|
||
|
VirtReg* virtReg = _cc->virtRegById(regArg.id());
|
||
|
|
||
|
// Unreferenced argument.
|
||
|
RAWorkReg* workReg = virtReg->workReg();
|
||
|
if (!workReg)
|
||
|
continue;
|
||
|
|
||
|
// Overwritten argument.
|
||
|
uint32_t workId = workReg->workId();
|
||
|
if (!liveIn.bitAt(workId))
|
||
|
continue;
|
||
|
|
||
|
RegGroup group = workReg->group();
|
||
|
if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone)
|
||
|
continue;
|
||
|
|
||
|
RegMask allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
|
||
|
if (iter == 0) {
|
||
|
// First iteration: Try to allocate to home RegId.
|
||
|
if (workReg->hasHomeRegId()) {
|
||
|
uint32_t physId = workReg->homeRegId();
|
||
|
if (Support::bitTest(allocableRegs, physId)) {
|
||
|
_curAssignment.assign(group, workId, physId, true);
|
||
|
_pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId());
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
numIter = 2;
|
||
|
}
|
||
|
else {
|
||
|
// Second iteration: Pick any other register if the is an unassigned one or assign to stack.
|
||
|
if (allocableRegs) {
|
||
|
uint32_t physId = Support::ctz(allocableRegs);
|
||
|
_curAssignment.assign(group, workId, physId, true);
|
||
|
_pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId());
|
||
|
}
|
||
|
else {
|
||
|
// This register will definitely need stack, create the slot now and assign also `argIndex`
|
||
|
// to it. We will patch `_argsAssignment` later after RAStackAllocator finishes.
|
||
|
RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg);
|
||
|
if (ASMJIT_UNLIKELY(!slot))
|
||
|
return DebugUtils::errored(kErrorOutOfMemory);
|
||
|
|
||
|
// This means STACK_ARG may be moved to STACK.
|
||
|
workReg->addFlags(RAWorkRegFlags::kStackArgToStack);
|
||
|
_pass->_numStackArgsToStackSlots++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return kErrorOk;
|
||
|
}
|
||
|
|
||
|
Error RALocalAllocator::replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept {
|
||
|
_curAssignment.copyFrom(physToWorkMap);
|
||
|
return kErrorOk;
|
||
|
}
|
||
|
|
||
|
Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept {
|
||
|
RAAssignment dst;
|
||
|
RAAssignment& cur = _curAssignment;
|
||
|
|
||
|
dst.initLayout(_pass->_physRegCount, _pass->workRegs());
|
||
|
dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap);
|
||
|
dst.assignWorkIdsFromPhysIds();
|
||
|
|
||
|
if (tryMode)
|
||
|
return kErrorOk;
|
||
|
|
||
|
for (RegGroup group : RegGroupVirtValues{}) {
|
||
|
// STEP 1
|
||
|
// ------
|
||
|
//
|
||
|
// - KILL all registers that are not live at `dst`,
|
||
|
// - SPILL all registers that are not assigned at `dst`.
|
||
|
|
||
|
if (!tryMode) {
|
||
|
Support::BitWordIterator<RegMask> it(cur.assigned(group));
|
||
|
while (it.hasNext()) {
|
||
|
uint32_t physId = it.next();
|
||
|
uint32_t workId = cur.physToWorkId(group, physId);
|
||
|
|
||
|
// Must be true as we iterate over assigned registers.
|
||
|
ASMJIT_ASSERT(workId != RAAssignment::kWorkNone);
|
||
|
|
||
|
// KILL if it's not live on entry.
|
||
|
if (!liveIn.bitAt(workId)) {
|
||
|
onKillReg(group, workId, physId);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// SPILL if it's not assigned on entry.
|
||
|
uint32_t altId = dst.workToPhysId(group, workId);
|
||
|
if (altId == RAAssignment::kPhysNone) {
|
||
|
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// STEP 2
|
||
|
// ------
|
||
|
//
|
||
|
// - MOVE and SWAP registers from their current assignments into their DST assignments.
|
||
|
// - Build `willLoadRegs` mask of registers scheduled for `onLoadReg()`.
|
||
|
|
||
|
// Current run-id (1 means more aggressive decisions).
|
||
|
int32_t runId = -1;
|
||
|
// Remaining registers scheduled for `onLoadReg()`.
|
||
|
RegMask willLoadRegs = 0;
|
||
|
// Remaining registers to be allocated in this loop.
|
||
|
RegMask affectedRegs = dst.assigned(group);
|
||
|
|
||
|
while (affectedRegs) {
|
||
|
if (++runId == 2) {
|
||
|
if (!tryMode)
|
||
|
return DebugUtils::errored(kErrorInvalidState);
|
||
|
|
||
|
// Stop in `tryMode` if we haven't done anything in past two rounds.
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
Support::BitWordIterator<RegMask> it(affectedRegs);
|
||
|
while (it.hasNext()) {
|
||
|
uint32_t physId = it.next();
|
||
|
RegMask physMask = Support::bitMask<RegMask>(physId);
|
||
|
|
||
|
uint32_t curWorkId = cur.physToWorkId(group, physId);
|
||
|
uint32_t dstWorkId = dst.physToWorkId(group, physId);
|
||
|
|
||
|
// The register must have assigned `dstWorkId` as we only iterate over assigned regs.
|
||
|
ASMJIT_ASSERT(dstWorkId != RAAssignment::kWorkNone);
|
||
|
|
||
|
if (curWorkId != RAAssignment::kWorkNone) {
|
||
|
// Both assigned.
|
||
|
if (curWorkId != dstWorkId) {
|
||
|
// Wait a bit if this is the first run, we may avoid this if `curWorkId` moves out.
|
||
|
if (runId <= 0)
|
||
|
continue;
|
||
|
|
||
|
uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
|
||
|
if (altPhysId == RAAssignment::kPhysNone)
|
||
|
continue;
|
||
|
|
||
|
// Reset as we will do some changes to the current assignment.
|
||
|
runId = -1;
|
||
|
|
||
|
if (_archTraits->hasInstRegSwap(group)) {
|
||
|
ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId));
|
||
|
}
|
||
|
else {
|
||
|
// SPILL the reg if it's not dirty in DST, otherwise try to MOVE.
|
||
|
if (!cur.isPhysDirty(group, physId)) {
|
||
|
ASMJIT_PROPAGATE(onKillReg(group, curWorkId, physId));
|
||
|
}
|
||
|
else {
|
||
|
RegMask allocableRegs = _pass->_availableRegs[group] & ~cur.assigned(group);
|
||
|
|
||
|
// If possible don't conflict with assigned regs at DST.
|
||
|
if (allocableRegs & ~dst.assigned(group))
|
||
|
allocableRegs &= ~dst.assigned(group);
|
||
|
|
||
|
if (allocableRegs) {
|
||
|
// MOVE is possible, thus preferred.
|
||
|
uint32_t tmpPhysId = Support::ctz(allocableRegs);
|
||
|
|
||
|
ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
|
||
|
_pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
|
||
|
}
|
||
|
else {
|
||
|
// MOVE is impossible, must SPILL.
|
||
|
ASMJIT_PROPAGATE(onSpillReg(group, curWorkId, physId));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
goto Cleared;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
Cleared:
|
||
|
// DST assigned, CUR unassigned.
|
||
|
uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
|
||
|
if (altPhysId == RAAssignment::kPhysNone) {
|
||
|
if (liveIn.bitAt(dstWorkId))
|
||
|
willLoadRegs |= physMask; // Scheduled for `onLoadReg()`.
|
||
|
affectedRegs &= ~physMask; // Unaffected from now.
|
||
|
continue;
|
||
|
}
|
||
|
ASMJIT_PROPAGATE(onMoveReg(group, dstWorkId, physId, altPhysId));
|
||
|
}
|
||
|
|
||
|
// Both DST and CUR assigned to the same reg or CUR just moved to DST.
|
||
|
if ((dst.dirty(group) & physMask) != (cur.dirty(group) & physMask)) {
|
||
|
if ((dst.dirty(group) & physMask) == 0) {
|
||
|
// CUR dirty, DST not dirty (the assert is just to visualize the condition).
|
||
|
ASMJIT_ASSERT(!dst.isPhysDirty(group, physId) && cur.isPhysDirty(group, physId));
|
||
|
|
||
|
// If `dstReadOnly` is true it means that that block was already processed and we cannot change from
|
||
|
// CLEAN to DIRTY. In that case the register has to be saved as it cannot enter the block DIRTY.
|
||
|
if (dstReadOnly)
|
||
|
ASMJIT_PROPAGATE(onSaveReg(group, dstWorkId, physId));
|
||
|
else
|
||
|
dst.makeDirty(group, dstWorkId, physId);
|
||
|
}
|
||
|
else {
|
||
|
// DST dirty, CUR not dirty (the assert is just to visualize the condition).
|
||
|
ASMJIT_ASSERT(dst.isPhysDirty(group, physId) && !cur.isPhysDirty(group, physId));
|
||
|
|
||
|
cur.makeDirty(group, dstWorkId, physId);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Must match now...
|
||
|
ASMJIT_ASSERT(dst.physToWorkId(group, physId) == cur.physToWorkId(group, physId));
|
||
|
ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
|
||
|
|
||
|
runId = -1;
|
||
|
affectedRegs &= ~physMask;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// STEP 3
|
||
|
// ------
|
||
|
//
|
||
|
// - Load registers specified by `willLoadRegs`.
|
||
|
|
||
|
{
|
||
|
Support::BitWordIterator<RegMask> it(willLoadRegs);
|
||
|
while (it.hasNext()) {
|
||
|
uint32_t physId = it.next();
|
||
|
|
||
|
if (!cur.isPhysAssigned(group, physId)) {
|
||
|
uint32_t workId = dst.physToWorkId(group, physId);
|
||
|
|
||
|
// The algorithm is broken if it tries to load a register that is not in LIVE-IN.
|
||
|
ASMJIT_ASSERT(liveIn.bitAt(workId) == true);
|
||
|
|
||
|
ASMJIT_PROPAGATE(onLoadReg(group, workId, physId));
|
||
|
if (dst.isPhysDirty(group, physId))
|
||
|
cur.makeDirty(group, workId, physId);
|
||
|
ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
|
||
|
}
|
||
|
else {
|
||
|
// Not possible otherwise.
|
||
|
ASMJIT_ASSERT(tryMode == true);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!tryMode) {
|
||
|
// Here is a code that dumps the conflicting part if something fails here:
|
||
|
// if (!dst.equals(cur)) {
|
||
|
// uint32_t physTotal = dst._layout.physTotal;
|
||
|
// uint32_t workCount = dst._layout.workCount;
|
||
|
//
|
||
|
// fprintf(stderr, "Dirty DST=0x%08X CUR=0x%08X\n", dst.dirty(RegGroup::kGp), cur.dirty(RegGroup::kGp));
|
||
|
// fprintf(stderr, "Assigned DST=0x%08X CUR=0x%08X\n", dst.assigned(RegGroup::kGp), cur.assigned(RegGroup::kGp));
|
||
|
//
|
||
|
// for (uint32_t physId = 0; physId < physTotal; physId++) {
|
||
|
// uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
|
||
|
// uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
|
||
|
// if (dstWorkId != curWorkId)
|
||
|
// fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
|
||
|
// }
|
||
|
//
|
||
|
// for (uint32_t workId = 0; workId < workCount; workId++) {
|
||
|
// uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
|
||
|
// uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
|
||
|
// if (dstPhysId != curPhysId)
|
||
|
// fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
|
||
|
// }
|
||
|
// }
|
||
|
ASMJIT_ASSERT(dst.equals(cur));
|
||
|
}
|
||
|
|
||
|
return kErrorOk;
|
||
|
}
|
||
|
|
||
|
Error RALocalAllocator::spillScratchGpRegsBeforeEntry(RegMask scratchRegs) noexcept {
|
||
|
RegGroup group = RegGroup::kGp;
|
||
|
Support::BitWordIterator<RegMask> it(scratchRegs);
|
||
|
|
||
|
while (it.hasNext()) {
|
||
|
uint32_t physId = it.next();
|
||
|
if (_curAssignment.isPhysAssigned(group, physId)) {
|
||
|
uint32_t workId = _curAssignment.physToWorkId(group, physId);
|
||
|
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return kErrorOk;
|
||
|
}
|
||
|
|
||
|
// RALocalAllocator - Allocation
|
||
|
// =============================
|
||
|
|
||
|
Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
||
|
RAInst* raInst = node->passData<RAInst>();
|
||
|
|
||
|
RATiedReg* outTiedRegs[Globals::kMaxPhysRegs];
|
||
|
RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs];
|
||
|
RATiedReg* consecutiveRegs[kMaxConsecutiveRegs];
|
||
|
|
||
|
// The cursor must point to the previous instruction for a possible instruction insertion.
|
||
|
_cc->_setCursor(node->prev());
|
||
|
|
||
|
_node = node;
|
||
|
_raInst = raInst;
|
||
|
_tiedTotal = raInst->_tiedTotal;
|
||
|
_tiedCount = raInst->_tiedCount;
|
||
|
|
||
|
// Whether we already replaced register operand with memory operand.
|
||
|
bool rmAllocated = false;
|
||
|
|
||
|
for (RegGroup group : RegGroupVirtValues{}) {
|
||
|
uint32_t i, count = this->tiedCount(group);
|
||
|
RATiedReg* tiedRegs = this->tiedRegs(group);
|
||
|
|
||
|
RegMask willUse = _raInst->_usedRegs[group];
|
||
|
RegMask willOut = _raInst->_clobberedRegs[group];
|
||
|
RegMask willFree = 0;
|
||
|
|
||
|
uint32_t usePending = count;
|
||
|
uint32_t outTiedCount = 0;
|
||
|
uint32_t dupTiedCount = 0;
|
||
|
uint32_t consecutiveMask = 0;
|
||
|
|
||
|
// STEP 1
|
||
|
// ------
|
||
|
//
|
||
|
// Calculate `willUse` and `willFree` masks based on tied registers we have. In addition, aggregate information
|
||
|
// regarding consecutive registers used by this instruction. We need that to make USE/OUT assignments.
|
||
|
//
|
||
|
// We don't do any assignment decisions at this stage as we just need to collect some information first. Then,
|
||
|
// after we populate all masks needed we can finally make some decisions in the second loop. The main reason
|
||
|
// for this is that we really need `willFree` to make assignment decisions for `willUse`, because if we mark
|
||
|
// some registers that will be freed, we can consider them in decision making afterwards.
|
||
|
|
||
|
for (i = 0; i < count; i++) {
|
||
|
RATiedReg* tiedReg = &tiedRegs[i];
|
||
|
|
||
|
if (tiedReg->hasAnyConsecutiveFlag()) {
|
||
|
uint32_t consecutiveOffset = tiedReg->isLeadConsecutive() ? uint32_t(0) : tiedReg->consecutiveData();
|
||
|
|
||
|
if (ASMJIT_UNLIKELY(Support::bitTest(consecutiveMask, consecutiveOffset)))
|
||
|
return DebugUtils::errored(kErrorInvalidState);
|
||
|
|
||
|
consecutiveMask |= Support::bitMask(consecutiveOffset);
|
||
|
consecutiveRegs[consecutiveOffset] = tiedReg;
|
||
|
}
|
||
|
|
||
|
// Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment.
|
||
|
if (tiedReg->isOutOrKill())
|
||
|
outTiedRegs[outTiedCount++] = tiedReg;
|
||
|
|
||
|
if (tiedReg->isDuplicate())
|
||
|
dupTiedRegs[dupTiedCount++] = tiedReg;
|
||
|
|
||
|
if (!tiedReg->isUse()) {
|
||
|
tiedReg->markUseDone();
|
||
|
usePending--;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// Don't assign anything here if this is a consecutive USE - we will handle this in STEP 2 instead.
|
||
|
if (tiedReg->isUseConsecutive())
|
||
|
continue;
|
||
|
|
||
|
uint32_t workId = tiedReg->workId();
|
||
|
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
||
|
|
||
|
if (tiedReg->hasUseId()) {
|
||
|
// If the register has `useId` it means it can only be allocated in that register.
|
||
|
RegMask useMask = Support::bitMask(tiedReg->useId());
|
||
|
|
||
|
// RAInstBuilder must have collected `usedRegs` on-the-fly.
|
||
|
ASMJIT_ASSERT((willUse & useMask) != 0);
|
||
|
|
||
|
if (assignedId == tiedReg->useId()) {
|
||
|
// If the register is already allocated in this one, mark it done and continue.
|
||
|
tiedReg->markUseDone();
|
||
|
if (tiedReg->isWrite())
|
||
|
_curAssignment.makeDirty(group, workId, assignedId);
|
||
|
usePending--;
|
||
|
willUse |= useMask;
|
||
|
}
|
||
|
else {
|
||
|
willFree |= useMask & _curAssignment.assigned(group);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
// Check if the register must be moved to `allocableRegs`.
|
||
|
RegMask allocableRegs = tiedReg->useRegMask();
|
||
|
if (assignedId != RAAssignment::kPhysNone) {
|
||
|
RegMask assignedMask = Support::bitMask(assignedId);
|
||
|
if ((allocableRegs & ~willUse) & assignedMask) {
|
||
|
tiedReg->setUseId(assignedId);
|
||
|
tiedReg->markUseDone();
|
||
|
if (tiedReg->isWrite())
|
||
|
_curAssignment.makeDirty(group, workId, assignedId);
|
||
|
usePending--;
|
||
|
willUse |= assignedMask;
|
||
|
}
|
||
|
else {
|
||
|
willFree |= assignedMask;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// STEP 2
|
||
|
// ------
|
||
|
//
|
||
|
// Verify that all the consecutive registers are really consecutive. Terminate if there is a gap. In addition,
|
||
|
// decide which USE ids will be used in case that this consecutive sequence is USE (OUT registers are allocated
|
||
|
// in a different step).
|
||
|
uint32_t consecutiveCount = 0;
|
||
|
|
||
|
if (consecutiveMask) {
|
||
|
if ((consecutiveMask & (consecutiveMask + 1u)) != 0)
|
||
|
return DebugUtils::errored(kErrorInvalidState);
|
||
|
|
||
|
// Count of trailing ones is the count of consecutive registers. There cannot be gap.
|
||
|
consecutiveCount = Support::ctz(~consecutiveMask);
|
||
|
|
||
|
// Prioritize allocation that would result in least moves even when moving registers away from their homes.
|
||
|
RATiedReg* lead = consecutiveRegs[0];
|
||
|
|
||
|
// Assign the best possible USE Ids to all consecutives.
|
||
|
if (lead->isUseConsecutive()) {
|
||
|
uint32_t bestScore = 0;
|
||
|
uint32_t bestLeadReg = 0xFFFFFFFF;
|
||
|
RegMask allocableRegs = (_availableRegs[group] | willFree) & ~willUse;
|
||
|
|
||
|
uint32_t assignments[kMaxConsecutiveRegs];
|
||
|
|
||
|
for (i = 0; i < consecutiveCount; i++)
|
||
|
assignments[i] = _curAssignment.workToPhysId(group, consecutiveRegs[i]->workId());
|
||
|
|
||
|
Support::BitWordIterator<uint32_t> it(lead->useRegMask());
|
||
|
while (it.hasNext()) {
|
||
|
uint32_t regIndex = it.next();
|
||
|
if (Support::bitTest(lead->useRegMask(), regIndex)) {
|
||
|
uint32_t score = 15;
|
||
|
|
||
|
for (i = 0; i < consecutiveCount; i++) {
|
||
|
uint32_t consecutiveIndex = regIndex + i;
|
||
|
if (!Support::bitTest(allocableRegs, consecutiveIndex)) {
|
||
|
score = 0;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId());
|
||
|
score += uint32_t(workReg->homeRegId() == consecutiveIndex);
|
||
|
score += uint32_t(assignments[i] == consecutiveIndex) * 2;
|
||
|
}
|
||
|
|
||
|
if (score > bestScore) {
|
||
|
bestScore = score;
|
||
|
bestLeadReg = regIndex;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (bestLeadReg == 0xFFFFFFFF)
|
||
|
return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
|
||
|
|
||
|
for (i = 0; i < consecutiveCount; i++) {
|
||
|
uint32_t consecutiveIndex = bestLeadReg + i;
|
||
|
|
||
|
RATiedReg* tiedReg = consecutiveRegs[i];
|
||
|
RegMask useMask = Support::bitMask(consecutiveIndex);
|
||
|
|
||
|
uint32_t workId = tiedReg->workId();
|
||
|
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
||
|
|
||
|
tiedReg->setUseId(consecutiveIndex);
|
||
|
|
||
|
if (assignedId == consecutiveIndex) {
|
||
|
// If the register is already allocated in this one, mark it done and continue.
|
||
|
tiedReg->markUseDone();
|
||
|
if (tiedReg->isWrite())
|
||
|
_curAssignment.makeDirty(group, workId, assignedId);
|
||
|
usePending--;
|
||
|
willUse |= useMask;
|
||
|
}
|
||
|
else {
|
||
|
willUse |= useMask;
|
||
|
willFree |= useMask & _curAssignment.assigned(group);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// STEP 3
|
||
|
// ------
|
||
|
//
|
||
|
// Do some decision making to find the best candidates of registers that need to be assigned, moved, and/or
|
||
|
// spilled. Only USE registers are considered here, OUT will be decided later after all CLOBBERed and OUT
|
||
|
// registers are unassigned.
|
||
|
|
||
|
if (usePending) {
|
||
|
// TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear.
|
||
|
|
||
|
// All registers that are currently alive without registers that will be freed.
|
||
|
RegMask liveRegs = _curAssignment.assigned(group) & ~willFree;
|
||
|
|
||
|
for (i = 0; i < count; i++) {
|
||
|
RATiedReg* tiedReg = &tiedRegs[i];
|
||
|
if (tiedReg->isUseDone())
|
||
|
continue;
|
||
|
|
||
|
uint32_t workId = tiedReg->workId();
|
||
|
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
||
|
|
||
|
// REG/MEM: Patch register operand to memory operand if not allocated.
|
||
|
if (!rmAllocated && tiedReg->hasUseRM()) {
|
||
|
if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) {
|
||
|
RAWorkReg* workReg = workRegById(tiedReg->workId());
|
||
|
uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t));
|
||
|
uint32_t rmSize = tiedReg->rmSize();
|
||
|
|
||
|
if (rmSize <= workReg->virtReg()->virtSize()) {
|
||
|
Operand& op = node->operands()[opIndex];
|
||
|
op = _pass->workRegAsMem(workReg);
|
||
|
op.as<BaseMem>().setSize(rmSize);
|
||
|
tiedReg->_useRewriteMask = 0;
|
||
|
|
||
|
tiedReg->markUseDone();
|
||
|
usePending--;
|
||
|
|
||
|
rmAllocated = true;
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!tiedReg->hasUseId()) {
|
||
|
// DECIDE where to assign the USE register.
|
||
|
RegMask allocableRegs = tiedReg->useRegMask() & ~(willFree | willUse);
|
||
|
uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs);
|
||
|
|
||
|
RegMask useMask = Support::bitMask(useId);
|
||
|
willUse |= useMask;
|
||
|
willFree |= useMask & liveRegs;
|
||
|
tiedReg->setUseId(useId);
|
||
|
|
||
|
if (assignedId != RAAssignment::kPhysNone) {
|
||
|
RegMask assignedMask = Support::bitMask(assignedId);
|
||
|
|
||
|
willFree |= assignedMask;
|
||
|
liveRegs &= ~assignedMask;
|
||
|
|
||
|
// OPTIMIZATION: Assign the USE register here if it's possible.
|
||
|
if (!(liveRegs & useMask)) {
|
||
|
ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId));
|
||
|
tiedReg->markUseDone();
|
||
|
if (tiedReg->isWrite())
|
||
|
_curAssignment.makeDirty(group, workId, useId);
|
||
|
usePending--;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
// OPTIMIZATION: Assign the USE register here if it's possible.
|
||
|
if (!(liveRegs & useMask)) {
|
||
|
ASMJIT_PROPAGATE(onLoadReg(group, workId, useId));
|
||
|
tiedReg->markUseDone();
|
||
|
if (tiedReg->isWrite())
|
||
|
_curAssignment.makeDirty(group, workId, useId);
|
||
|
usePending--;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
liveRegs |= useMask;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Initially all used regs will be marked as clobbered.
|
||
|
RegMask clobberedByInst = willUse | willOut;
|
||
|
|
||
|
// STEP 4
|
||
|
// ------
|
||
|
//
|
||
|
// Free all registers that we marked as `willFree`. Only registers that are not USEd by the instruction are
|
||
|
// considered as we don't want to free regs we need.
|
||
|
|
||
|
if (willFree) {
|
||
|
RegMask allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut);
|
||
|
Support::BitWordIterator<RegMask> it(willFree);
|
||
|
|
||
|
do {
|
||
|
uint32_t assignedId = it.next();
|
||
|
if (_curAssignment.isPhysAssigned(group, assignedId)) {
|
||
|
uint32_t workId = _curAssignment.physToWorkId(group, assignedId);
|
||
|
|
||
|
// DECIDE whether to MOVE or SPILL.
|
||
|
if (allocableRegs) {
|
||
|
uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs);
|
||
|
if (reassignedId != RAAssignment::kPhysNone) {
|
||
|
ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
|
||
|
allocableRegs ^= Support::bitMask(reassignedId);
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
|
||
|
}
|
||
|
} while (it.hasNext());
|
||
|
}
|
||
|
|
||
|
// STEP 5
|
||
|
// ------
|
||
|
//
|
||
|
// ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't allocated yet. This is a bit
|
||
|
// complicated as the allocation is iterative. In some cases we have to wait before allocating a particual
|
||
|
// physical register as it's still occupied by some other one, which we need to move before we can use it.
|
||
|
// In this case we skip it and allocate another some other instead (making it free for another iteration).
|
||
|
//
|
||
|
// NOTE: Iterations are mostly important for complicated allocations like function calls, where there can
|
||
|
// be up to N registers used at once. Asm instructions won't run the loop more than once in 99.9% of cases
|
||
|
// as they use 2..3 registers in average.
|
||
|
|
||
|
if (usePending) {
|
||
|
bool mustSwap = false;
|
||
|
do {
|
||
|
uint32_t oldPending = usePending;
|
||
|
|
||
|
for (i = 0; i < count; i++) {
|
||
|
RATiedReg* thisTiedReg = &tiedRegs[i];
|
||
|
if (thisTiedReg->isUseDone())
|
||
|
continue;
|
||
|
|
||
|
uint32_t thisWorkId = thisTiedReg->workId();
|
||
|
uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
|
||
|
|
||
|
// This would be a bug, fatal one!
|
||
|
uint32_t targetPhysId = thisTiedReg->useId();
|
||
|
ASMJIT_ASSERT(targetPhysId != thisPhysId);
|
||
|
|
||
|
uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId);
|
||
|
if (targetWorkId != RAAssignment::kWorkNone) {
|
||
|
RAWorkReg* targetWorkReg = workRegById(targetWorkId);
|
||
|
|
||
|
// Swapping two registers can solve two allocation tasks by emitting just a single instruction. However,
|
||
|
// swap is only available on few architectures and it's definitely not available for each register group.
|
||
|
// Calling `onSwapReg()` before checking these would be fatal.
|
||
|
if (_archTraits->hasInstRegSwap(group) && thisPhysId != RAAssignment::kPhysNone) {
|
||
|
ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
|
||
|
|
||
|
thisTiedReg->markUseDone();
|
||
|
if (thisTiedReg->isWrite())
|
||
|
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
|
||
|
usePending--;
|
||
|
|
||
|
// Double-hit.
|
||
|
RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
|
||
|
if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
|
||
|
targetTiedReg->markUseDone();
|
||
|
if (targetTiedReg->isWrite())
|
||
|
_curAssignment.makeDirty(group, targetWorkId, thisPhysId);
|
||
|
usePending--;
|
||
|
}
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (!mustSwap)
|
||
|
continue;
|
||
|
|
||
|
// Only branched here if the previous iteration did nothing. This is essentially a SWAP operation without
|
||
|
// having a dedicated instruction for that purpose (vector registers, etc). The simplest way to handle
|
||
|
// such case is to SPILL the target register.
|
||
|
ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId));
|
||
|
}
|
||
|
|
||
|
if (thisPhysId != RAAssignment::kPhysNone) {
|
||
|
ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
|
||
|
|
||
|
thisTiedReg->markUseDone();
|
||
|
if (thisTiedReg->isWrite())
|
||
|
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
|
||
|
usePending--;
|
||
|
}
|
||
|
else {
|
||
|
ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
|
||
|
|
||
|
thisTiedReg->markUseDone();
|
||
|
if (thisTiedReg->isWrite())
|
||
|
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
|
||
|
usePending--;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
mustSwap = (oldPending == usePending);
|
||
|
} while (usePending);
|
||
|
}
|
||
|
|
||
|
// STEP 6
|
||
|
// ------
|
||
|
//
|
||
|
// KILL registers marked as KILL/OUT.
|
||
|
|
||
|
uint32_t outPending = outTiedCount;
|
||
|
if (outTiedCount) {
|
||
|
for (i = 0; i < outTiedCount; i++) {
|
||
|
RATiedReg* tiedReg = outTiedRegs[i];
|
||
|
|
||
|
uint32_t workId = tiedReg->workId();
|
||
|
uint32_t physId = _curAssignment.workToPhysId(group, workId);
|
||
|
|
||
|
// Must check if it's allocated as KILL can be related to OUT (like KILL immediately after OUT, which could
|
||
|
// mean the register is not assigned).
|
||
|
if (physId != RAAssignment::kPhysNone) {
|
||
|
ASMJIT_PROPAGATE(onKillReg(group, workId, physId));
|
||
|
willOut &= ~Support::bitMask(physId);
|
||
|
}
|
||
|
|
||
|
// We still maintain number of pending registers for OUT assignment. So, if this is only KILL, not OUT, we
|
||
|
// can safely decrement it.
|
||
|
outPending -= !tiedReg->isOut();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// STEP 7
|
||
|
// ------
|
||
|
//
|
||
|
// SPILL registers that will be CLOBBERed. Since OUT and KILL were already processed this is used mostly to
|
||
|
// handle function CALLs.
|
||
|
|
||
|
if (willOut) {
|
||
|
Support::BitWordIterator<RegMask> it(willOut);
|
||
|
do {
|
||
|
uint32_t physId = it.next();
|
||
|
uint32_t workId = _curAssignment.physToWorkId(group, physId);
|
||
|
|
||
|
if (workId == RAAssignment::kWorkNone)
|
||
|
continue;
|
||
|
|
||
|
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
|
||
|
} while (it.hasNext());
|
||
|
}
|
||
|
|
||
|
// STEP 8
|
||
|
// ------
|
||
|
//
|
||
|
// Duplication.
|
||
|
|
||
|
for (i = 0; i < dupTiedCount; i++) {
|
||
|
RATiedReg* tiedReg = dupTiedRegs[i];
|
||
|
uint32_t workId = tiedReg->workId();
|
||
|
uint32_t srcId = tiedReg->useId();
|
||
|
|
||
|
Support::BitWordIterator<RegMask> it(tiedReg->useRegMask());
|
||
|
while (it.hasNext()) {
|
||
|
uint32_t dstId = it.next();
|
||
|
if (dstId == srcId)
|
||
|
continue;
|
||
|
_pass->emitMove(workId, dstId, srcId);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// STEP 9
|
||
|
// ------
|
||
|
//
|
||
|
// Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary.
|
||
|
|
||
|
if (node->isInvoke() && group == RegGroup::kVec) {
|
||
|
const InvokeNode* invokeNode = node->as<InvokeNode>();
|
||
|
|
||
|
RegMask maybeClobberedRegs = invokeNode->detail().callConv().preservedRegs(group) & _curAssignment.assigned(group);
|
||
|
if (maybeClobberedRegs) {
|
||
|
uint32_t saveRestoreVecSize = invokeNode->detail().callConv().saveRestoreRegSize(group);
|
||
|
Support::BitWordIterator<RegMask> it(maybeClobberedRegs);
|
||
|
|
||
|
do {
|
||
|
uint32_t physId = it.next();
|
||
|
uint32_t workId = _curAssignment.physToWorkId(group, physId);
|
||
|
|
||
|
RAWorkReg* workReg = workRegById(workId);
|
||
|
uint32_t virtSize = workReg->virtReg()->virtSize();
|
||
|
|
||
|
if (virtSize > saveRestoreVecSize) {
|
||
|
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
|
||
|
}
|
||
|
|
||
|
} while (it.hasNext());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// STEP 10
|
||
|
// -------
|
||
|
//
|
||
|
// Assign OUT registers.
|
||
|
|
||
|
if (outPending) {
|
||
|
// Live registers, we need a separate register (outside of `_curAssignment) to hold these because of KILLed
|
||
|
// registers. If we KILL a register here it will go out from `_curAssignment`, but we cannot assign to it in
|
||
|
// here.
|
||
|
RegMask liveRegs = _curAssignment.assigned(group);
|
||
|
|
||
|
// Must avoid as they have been already OUTed (added during the loop).
|
||
|
RegMask outRegs = 0;
|
||
|
|
||
|
// Must avoid as they collide with already allocated ones.
|
||
|
RegMask avoidRegs = willUse & ~clobberedByInst;
|
||
|
|
||
|
// Assign the best possible OUT ids of all consecutives.
|
||
|
if (consecutiveCount) {
|
||
|
RATiedReg* lead = consecutiveRegs[0];
|
||
|
if (lead->isOutConsecutive()) {
|
||
|
uint32_t bestScore = 0;
|
||
|
uint32_t bestLeadReg = 0xFFFFFFFF;
|
||
|
RegMask allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs);
|
||
|
|
||
|
Support::BitWordIterator<uint32_t> it(lead->outRegMask());
|
||
|
while (it.hasNext()) {
|
||
|
uint32_t regIndex = it.next();
|
||
|
if (Support::bitTest(lead->outRegMask(), regIndex)) {
|
||
|
uint32_t score = 15;
|
||
|
|
||
|
for (i = 0; i < consecutiveCount; i++) {
|
||
|
uint32_t consecutiveIndex = regIndex + i;
|
||
|
if (!Support::bitTest(allocableRegs, consecutiveIndex)) {
|
||
|
score = 0;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId());
|
||
|
score += uint32_t(workReg->homeRegId() == consecutiveIndex);
|
||
|
}
|
||
|
|
||
|
if (score > bestScore) {
|
||
|
bestScore = score;
|
||
|
bestLeadReg = regIndex;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (bestLeadReg == 0xFFFFFFFF)
|
||
|
return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
|
||
|
|
||
|
for (i = 0; i < consecutiveCount; i++) {
|
||
|
uint32_t consecutiveIndex = bestLeadReg + i;
|
||
|
RATiedReg* tiedReg = consecutiveRegs[i];
|
||
|
tiedReg->setOutId(consecutiveIndex);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Allocate OUT registers.
|
||
|
for (i = 0; i < outTiedCount; i++) {
|
||
|
RATiedReg* tiedReg = outTiedRegs[i];
|
||
|
if (!tiedReg->isOut())
|
||
|
continue;
|
||
|
|
||
|
uint32_t workId = tiedReg->workId();
|
||
|
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
||
|
|
||
|
if (assignedId != RAAssignment::kPhysNone)
|
||
|
ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
|
||
|
|
||
|
uint32_t physId = tiedReg->outId();
|
||
|
if (physId == RAAssignment::kPhysNone) {
|
||
|
RegMask allocableRegs = tiedReg->outRegMask() & ~(outRegs | avoidRegs);
|
||
|
|
||
|
if (!(allocableRegs & ~liveRegs)) {
|
||
|
// There are no more registers, decide which one to spill.
|
||
|
uint32_t spillWorkId;
|
||
|
physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId);
|
||
|
ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId));
|
||
|
}
|
||
|
else {
|
||
|
physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// OUTs are CLOBBERed thus cannot be ASSIGNed right now.
|
||
|
ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
|
||
|
|
||
|
if (!tiedReg->isKill())
|
||
|
ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
|
||
|
|
||
|
tiedReg->setOutId(physId);
|
||
|
tiedReg->markOutDone();
|
||
|
|
||
|
outRegs |= Support::bitMask(physId);
|
||
|
liveRegs &= ~Support::bitMask(physId);
|
||
|
outPending--;
|
||
|
}
|
||
|
|
||
|
clobberedByInst |= outRegs;
|
||
|
ASMJIT_ASSERT(outPending == 0);
|
||
|
}
|
||
|
|
||
|
_clobberedRegs[group] |= clobberedByInst;
|
||
|
}
|
||
|
|
||
|
return kErrorOk;
|
||
|
}
|
||
|
|
||
|
Error RALocalAllocator::spillAfterAllocation(InstNode* node) noexcept {
|
||
|
// This is experimental feature that would spill registers that don't have home-id and are last in this basic block.
|
||
|
// This prevents saving these regs in other basic blocks and then restoring them (mostly relevant for loops).
|
||
|
RAInst* raInst = node->passData<RAInst>();
|
||
|
uint32_t count = raInst->tiedCount();
|
||
|
|
||
|
for (uint32_t i = 0; i < count; i++) {
|
||
|
RATiedReg* tiedReg = raInst->tiedAt(i);
|
||
|
if (tiedReg->isLast()) {
|
||
|
uint32_t workId = tiedReg->workId();
|
||
|
RAWorkReg* workReg = workRegById(workId);
|
||
|
if (!workReg->hasHomeRegId()) {
|
||
|
RegGroup group = workReg->group();
|
||
|
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
||
|
if (assignedId != RAAssignment::kPhysNone) {
|
||
|
_cc->_setCursor(node);
|
||
|
ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return kErrorOk;
|
||
|
}
|
||
|
|
||
|
Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept {
|
||
|
// TODO: This should be used to make the branch allocation better.
|
||
|
DebugUtils::unused(cont);
|
||
|
|
||
|
// The cursor must point to the previous instruction for a possible instruction insertion.
|
||
|
_cc->_setCursor(node->prev());
|
||
|
|
||
|
// Use TryMode of `switchToAssignment()` if possible.
|
||
|
if (target->hasEntryAssignment()) {
|
||
|
ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), true));
|
||
|
}
|
||
|
|
||
|
ASMJIT_PROPAGATE(allocInst(node));
|
||
|
ASMJIT_PROPAGATE(spillRegsBeforeEntry(target));
|
||
|
|
||
|
if (target->hasEntryAssignment()) {
|
||
|
BaseNode* injectionPoint = _pass->extraBlock()->prev();
|
||
|
BaseNode* prevCursor = _cc->setCursor(injectionPoint);
|
||
|
|
||
|
_tmpAssignment.copyFrom(_curAssignment);
|
||
|
ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), false));
|
||
|
|
||
|
BaseNode* curCursor = _cc->cursor();
|
||
|
if (curCursor != injectionPoint) {
|
||
|
// Additional instructions emitted to switch from the current state to the `target` state. This means
|
||
|
// that we have to move these instructions into an independent code block and patch the jump location.
|
||
|
Operand& targetOp = node->op(node->opCount() - 1);
|
||
|
if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
|
||
|
return DebugUtils::errored(kErrorInvalidState);
|
||
|
|
||
|
Label trampoline = _cc->newLabel();
|
||
|
Label savedTarget = targetOp.as<Label>();
|
||
|
|
||
|
// Patch `target` to point to the `trampoline` we just created.
|
||
|
targetOp = trampoline;
|
||
|
|
||
|
// Clear a possible SHORT form as we have no clue now if the SHORT form would be encodable after patching
|
||
|
// the target to `trampoline` (X86 specific).
|
||
|
node->clearOptions(InstOptions::kShortForm);
|
||
|
|
||
|
// Finalize the switch assignment sequence.
|
||
|
ASMJIT_PROPAGATE(_pass->emitJump(savedTarget));
|
||
|
_cc->_setCursor(injectionPoint);
|
||
|
_cc->bind(trampoline);
|
||
|
}
|
||
|
|
||
|
_cc->_setCursor(prevCursor);
|
||
|
_curAssignment.swap(_tmpAssignment);
|
||
|
}
|
||
|
else {
|
||
|
ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(target, block(), _curAssignment));
|
||
|
}
|
||
|
|
||
|
return kErrorOk;
|
||
|
}
|
||
|
|
||
|
Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept {
|
||
|
// TODO: Do we really need to use `cont`?
|
||
|
DebugUtils::unused(cont);
|
||
|
|
||
|
if (targets.empty())
|
||
|
return DebugUtils::errored(kErrorInvalidState);
|
||
|
|
||
|
// The cursor must point to the previous instruction for a possible instruction insertion.
|
||
|
_cc->_setCursor(node->prev());
|
||
|
|
||
|
// All `targets` should have the same sharedAssignmentId, we just read the first.
|
||
|
RABlock* anyTarget = targets[0];
|
||
|
if (!anyTarget->hasSharedAssignmentId())
|
||
|
return DebugUtils::errored(kErrorInvalidState);
|
||
|
|
||
|
RASharedAssignment& sharedAssignment = _pass->_sharedAssignments[anyTarget->sharedAssignmentId()];
|
||
|
|
||
|
ASMJIT_PROPAGATE(allocInst(node));
|
||
|
|
||
|
if (!sharedAssignment.empty()) {
|
||
|
ASMJIT_PROPAGATE(switchToAssignment(
|
||
|
sharedAssignment.physToWorkMap(),
|
||
|
sharedAssignment.liveIn(),
|
||
|
true, // Read-only.
|
||
|
false // Try-mode.
|
||
|
));
|
||
|
}
|
||
|
|
||
|
ASMJIT_PROPAGATE(spillRegsBeforeEntry(anyTarget));
|
||
|
|
||
|
if (sharedAssignment.empty()) {
|
||
|
ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(anyTarget, block(), _curAssignment));
|
||
|
}
|
||
|
|
||
|
return kErrorOk;
|
||
|
}
|
||
|
|
||
|
// RALocalAllocator - Decision Making
|
||
|
// ==================================
|
||
|
|
||
|
uint32_t RALocalAllocator::decideOnAssignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs) const noexcept {
|
||
|
ASMJIT_ASSERT(allocableRegs != 0);
|
||
|
DebugUtils::unused(group, physId);
|
||
|
|
||
|
RAWorkReg* workReg = workRegById(workId);
|
||
|
|
||
|
// Prefer home register id, if possible.
|
||
|
if (workReg->hasHomeRegId()) {
|
||
|
uint32_t homeId = workReg->homeRegId();
|
||
|
if (Support::bitTest(allocableRegs, homeId))
|
||
|
return homeId;
|
||
|
}
|
||
|
|
||
|
// Prefer registers used upon block entries.
|
||
|
RegMask previouslyAssignedRegs = workReg->allocatedMask();
|
||
|
if (allocableRegs & previouslyAssignedRegs)
|
||
|
allocableRegs &= previouslyAssignedRegs;
|
||
|
|
||
|
return Support::ctz(allocableRegs);
|
||
|
}
|
||
|
|
||
|
uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs) const noexcept {
|
||
|
ASMJIT_ASSERT(allocableRegs != 0);
|
||
|
DebugUtils::unused(group, physId);
|
||
|
|
||
|
RAWorkReg* workReg = workRegById(workId);
|
||
|
|
||
|
// Prefer allocating back to HomeId, if possible.
|
||
|
if (workReg->hasHomeRegId()) {
|
||
|
if (Support::bitTest(allocableRegs, workReg->homeRegId()))
|
||
|
return workReg->homeRegId();
|
||
|
}
|
||
|
|
||
|
// TODO: [Register Allocator] This could be improved.
|
||
|
|
||
|
// Decided to SPILL.
|
||
|
return RAAssignment::kPhysNone;
|
||
|
}
|
||
|
|
||
|
uint32_t RALocalAllocator::decideOnSpillFor(RegGroup group, uint32_t workId, RegMask spillableRegs, uint32_t* spillWorkId) const noexcept {
|
||
|
// May be used in the future to decide which register would be best to spill so `workId` can be assigned.
|
||
|
DebugUtils::unused(workId);
|
||
|
ASMJIT_ASSERT(spillableRegs != 0);
|
||
|
|
||
|
Support::BitWordIterator<RegMask> it(spillableRegs);
|
||
|
uint32_t bestPhysId = it.next();
|
||
|
uint32_t bestWorkId = _curAssignment.physToWorkId(group, bestPhysId);
|
||
|
|
||
|
// Avoid calculating the cost model if there is only one spillable register.
|
||
|
if (it.hasNext()) {
|
||
|
uint32_t bestCost = calculateSpillCost(group, bestWorkId, bestPhysId);
|
||
|
do {
|
||
|
uint32_t localPhysId = it.next();
|
||
|
uint32_t localWorkId = _curAssignment.physToWorkId(group, localPhysId);
|
||
|
uint32_t localCost = calculateSpillCost(group, localWorkId, localPhysId);
|
||
|
|
||
|
if (localCost < bestCost) {
|
||
|
bestCost = localCost;
|
||
|
bestPhysId = localPhysId;
|
||
|
bestWorkId = localWorkId;
|
||
|
}
|
||
|
} while (it.hasNext());
|
||
|
}
|
||
|
|
||
|
*spillWorkId = bestWorkId;
|
||
|
return bestPhysId;
|
||
|
}
|
||
|
|
||
|
ASMJIT_END_NAMESPACE
|
||
|
|
||
|
#endif // !ASMJIT_NO_COMPILER
|