Skip to content

Commit

Permalink
Value num refine phis (#104752)
Browse files Browse the repository at this point in the history
VN doesn't always give PHI defs the best possible values (in particular if there are backedge PHI args). Revise VN to run intg a "loop-respecting" RPO where we don't visit any loop successors until all loop blocks have been visited. Once the loop is done, update the header PHI VNs since all PHI arg VNs are now known.

Then look for equivalent PHI defs in copy prop and enable copy prop when two locals have the same values at the head of a loop.

Addresses the regression case noted in #95645 (comment) where cross-block morph's copy prop plus loop bottom testing has created some unnecessary loop-carried values.

Closes #95645.
  • Loading branch information
AndyAyersMS authored Jul 17, 2024
1 parent 4f38f92 commit f455188
Show file tree
Hide file tree
Showing 6 changed files with 449 additions and 136 deletions.
21 changes: 18 additions & 3 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1466,9 +1466,24 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1,
assertion.op1.vn = optConservativeNormalVN(op1);
assertion.op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();

assert((assertion.op1.lcl.ssaNum == SsaConfig::RESERVED_SSA_NUM) ||
(assertion.op1.vn == vnStore->VNConservativeNormalValue(
lvaGetDesc(lclNum)->GetPerSsaData(assertion.op1.lcl.ssaNum)->m_vnPair)));
#ifdef DEBUG

// If we're ssa based, check that the VN is reasonable.
//
if (assertion.op1.lcl.ssaNum != SsaConfig::RESERVED_SSA_NUM)
{
LclSsaVarDsc* const ssaDsc = lvaGetDesc(lclNum)->GetPerSsaData(assertion.op1.lcl.ssaNum);

bool doesVNMatch = (assertion.op1.vn == vnStore->VNConservativeNormalValue(ssaDsc->m_vnPair));

if (!doesVNMatch && ssaDsc->m_updated)
{
doesVNMatch = (assertion.op1.vn == vnStore->VNConservativeNormalValue(ssaDsc->m_origVNPair));
}

assert(doesVNMatch);
}
#endif

ssize_t cnsValue = 0;
GenTreeFlags iconFlags = GTF_EMPTY;
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,13 @@ class LclSsaVarDsc
}

ValueNumPair m_vnPair;

#ifdef DEBUG
// True if this ssa def VN was updated
bool m_updated = false;
// Originally assigned VN
ValueNumPair m_origVNPair;
#endif
};

// This class stores information associated with a memory SSA definition.
Expand Down Expand Up @@ -5759,9 +5766,15 @@ class Compiler

// Utility functions for fgValueNumber.

// Value number a block or blocks in a loop
void fgValueNumberBlocks(BasicBlock* block, BlockSet& visitedBlocks);

// Perform value-numbering for the trees in "blk".
void fgValueNumberBlock(BasicBlock* blk);

// Value number a phi definition
void fgValueNumberPhiDef(GenTreeLclVar* newSsaDef, BasicBlock* block, bool isUpdate = false);

// Requires that "entryBlock" is the header block of "loop" and that "loop" is the
// innermost loop of which "entryBlock" is the entry. Returns the value number that should be
// assumed for the memoryKind at the start "entryBlk".
Expand Down
54 changes: 44 additions & 10 deletions src/coreclr/jit/copyprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,25 @@ bool Compiler::optCopyProp(
assert((tree->gtFlags & GTF_VAR_DEF) == 0);
assert(tree->GetLclNum() == lclNum);

bool madeChanges = false;
LclVarDsc* varDsc = lvaGetDesc(lclNum);
ValueNum lclDefVN = varDsc->GetPerSsaData(tree->GetSsaNum())->m_vnPair.GetConservative();
bool madeChanges = false;
LclVarDsc* const varDsc = lvaGetDesc(lclNum);
LclSsaVarDsc* const varSsaDsc = varDsc->GetPerSsaData(tree->GetSsaNum());
GenTree* const varDefTree = varSsaDsc->GetDefNode();
BasicBlock* const varDefBlock = varSsaDsc->GetBlock();
ValueNum const lclDefVN = varSsaDsc->m_vnPair.GetConservative();
assert(lclDefVN != ValueNumStore::NoVN);

// See if this local is a candidate for phi dev equivalence checks
//
bool const varDefTreeIsPhiDef = (varDefTree != nullptr) && varDefTree->IsPhiDefn();
bool varDefTreeIsPhiDefAtCycleEntry = false;

if (varDefTreeIsPhiDef)
{
FlowGraphNaturalLoop* const loop = m_blockToLoop->GetLoop(varDefBlock);
varDefTreeIsPhiDefAtCycleEntry = (loop != nullptr) && (loop->GetHeader() == varDefBlock);
}

for (LclNumToLiveDefsMap::Node* const iter : LclNumToLiveDefsMap::KeyValueIteration(curSsaName))
{
unsigned newLclNum = iter->GetKey();
Expand All @@ -190,7 +204,15 @@ bool Compiler::optCopyProp(

if (newLclDefVN != lclDefVN)
{
continue;
bool arePhiDefsEquivalent =
varDefTreeIsPhiDefAtCycleEntry && vnStore->AreVNsEquivalent(lclDefVN, newLclDefVN);
if (!arePhiDefsEquivalent)
{
continue;
}

JITDUMP("orig [%06u] copy [%06u] VNs proved equivalent\n", dspTreeID(tree),
dspTreeID(newLclDef.GetDefNode()));
}

// It may not be profitable to propagate a 'doNotEnregister' lclVar to an existing use of an
Expand Down Expand Up @@ -259,6 +281,24 @@ bool Compiler::optCopyProp(

tree->AsLclVarCommon()->SetLclNum(newLclNum);
tree->AsLclVarCommon()->SetSsaNum(newSsaNum);

// Update VN to match, and propagate up through any enclosing commas.
// (we could in principle try updating through other parents, but
// we lack VN's context for memory, so can't get them all).
//
if (newLclDefVN != lclDefVN)
{
tree->SetVNs(newLclSsaDef->m_vnPair);
GenTree* parent = tree->gtGetParent(nullptr);

while ((parent != nullptr) && parent->OperIs(GT_COMMA))
{
JITDUMP(" Updating COMMA parent VN [%06u]\n", dspTreeID(parent));
ValueNumPair op1Xvnp = vnStore->VNPExceptionSet(parent->AsOp()->gtOp1->gtVNPair);
parent->SetVNs(vnStore->VNPWithExc(parent->AsOp()->gtOp2->gtVNPair, op1Xvnp));
parent = tree->gtGetParent(nullptr);
}
}
gtUpdateSideEffects(stmt, tree);
newLclSsaDef->AddUse(block);

Expand Down Expand Up @@ -334,12 +374,6 @@ void Compiler::optCopyPropPushDef(GenTree* defNode, GenTreeLclVarCommon* lclNode
else if (lclNode->HasSsaName())
{
unsigned ssaNum = lclNode->GetSsaNum();
if ((defNode != nullptr) && defNode->IsPhiDefn())
{
// TODO-CQ: design better heuristics for propagation and remove this.
ssaNum = SsaConfig::RESERVED_SSA_NUM;
}

pushDef(lclNum, ssaNum);
}
}
Expand Down
59 changes: 39 additions & 20 deletions src/coreclr/jit/redundantbranchopts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1628,6 +1628,31 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti)
//
JITDUMP("Optimizing via jump threading\n");

bool setNoCseIn = false;

// If this is a phi-based threading, and the block we're bypassing has
// a memory phi, mark the successor blocks with BBF_NO_CSE_IN so we can
// block unsound CSE propagation.
//
if (jti.m_isPhiBased)
{
for (MemoryKind memoryKind : allMemoryKinds())
{
if ((memoryKind == ByrefExposed) && byrefStatesMatchGcHeapStates)
{
continue;
}

if (jti.m_block->bbMemorySsaPhiFunc[memoryKind] != nullptr)
{
JITDUMP(FMT_BB " has %s memory phi; will be marking blocks with BBF_NO_CSE_IN\n", jti.m_block->bbNum,
memoryKindNames[memoryKind]);
setNoCseIn = true;
break;
}
}
}

// Now reroute the flow from the predecessors.
// If this pred is in the set that will reuse block, do nothing.
// Else revise pred to branch directly to the appropriate successor of block.
Expand All @@ -1638,6 +1663,11 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti)
//
if (BlockSetOps::IsMember(this, jti.m_ambiguousPreds, predBlock->bbNum))
{
if (setNoCseIn && !jti.m_block->HasFlag(BBF_NO_CSE_IN))
{
JITDUMP(FMT_BB " => BBF_NO_CSE_IN\n", jti.m_block->bbNum);
jti.m_block->SetFlags(BBF_NO_CSE_IN);
}
continue;
}

Expand All @@ -1652,6 +1682,12 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti)
predBlock->bbNum, jti.m_block->bbNum, predBlock->bbNum, jti.m_trueTarget->bbNum);

fgReplaceJumpTarget(predBlock, jti.m_block, jti.m_trueTarget);

if (setNoCseIn && !jti.m_trueTarget->HasFlag(BBF_NO_CSE_IN))
{
JITDUMP(FMT_BB " => BBF_NO_CSE_IN\n", jti.m_trueTarget->bbNum);
jti.m_trueTarget->SetFlags(BBF_NO_CSE_IN);
}
}
else
{
Expand All @@ -1660,28 +1696,11 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti)
predBlock->bbNum, jti.m_block->bbNum, predBlock->bbNum, jti.m_falseTarget->bbNum);

fgReplaceJumpTarget(predBlock, jti.m_block, jti.m_falseTarget);
}
}

// If this is a phi-based threading, and the block we're bypassing has
// a memory phi, mark the block with BBF_NO_CSE_IN so we can block CSE propagation
// into the block.
//
if (jti.m_isPhiBased)
{
for (MemoryKind memoryKind : allMemoryKinds())
{
if ((memoryKind == ByrefExposed) && byrefStatesMatchGcHeapStates)
{
continue;
}

if (jti.m_block->bbMemorySsaPhiFunc[memoryKind] != nullptr)
if (setNoCseIn && !jti.m_falseTarget->HasFlag(BBF_NO_CSE_IN))
{
JITDUMP(FMT_BB " has %s memory phi; marking as BBF_NO_CSE_IN\n", jti.m_block->bbNum,
memoryKindNames[memoryKind]);
jti.m_block->SetFlags(BBF_NO_CSE_IN);
break;
JITDUMP(FMT_BB " => BBF_NO_CSE_IN\n", jti.m_falseTarget->bbNum);
jti.m_falseTarget->SetFlags(BBF_NO_CSE_IN);
}
}
}
Expand Down
Loading

0 comments on commit f455188

Please sign in to comment.