From 2d1c41c331a4be1491d8150b3528c22917687ef7 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 4 Apr 2023 11:41:46 -0700 Subject: [PATCH] JIT: improve profile update for loop inversion If the loop test block has multiple predecessors we will not do proper profile updates. This can lead to downstream problems with block layout (say leaving a cold block in a loop). Fix by changing how we compute the amount of profile that should remain in the test block. Fixes #84319. --- src/coreclr/jit/optimizer.cpp | 109 ++++++++++++++++++++++------------ 1 file changed, 72 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 58f5c549a8b505..e154aba816f469 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -4875,22 +4875,28 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } // Get hold of the jump target - BasicBlock* bTest = block->bbJumpDest; + BasicBlock* const bTest = block->bbJumpDest; - // Does the block consist of 'jtrue(cond) block' ? + // Does the bTest consist of 'jtrue(cond) block' ? if (bTest->bbJumpKind != BBJ_COND) { return false; } // bTest must be a backwards jump to block->bbNext - if (bTest->bbJumpDest != block->bbNext) + // This will be the top of the loop. + // + BasicBlock* const bTop = bTest->bbJumpDest; + + if (bTop != block->bbNext) { return false; } - // Since test is a BBJ_COND it will have a bbNext - noway_assert(bTest->bbNext != nullptr); + // Since bTest is a BBJ_COND it will have a bbNext + // + BasicBlock* const bJoin = bTest->bbNext; + noway_assert(bJoin != nullptr); // 'block' must be in the same try region as the condition, since we're going to insert a duplicated condition // in a new block after 'block', and the condition might include exception throwing code. @@ -4903,8 +4909,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // The duplicated condition block will branch to bTest->bbNext, so that also better be in the // same try region (or no try region) to avoid generating illegal flow. - BasicBlock* bTestNext = bTest->bbNext; - if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext)) + if (bJoin->hasTryIndex() && !BasicBlock::sameTryRegion(block, bJoin)) { return false; } @@ -4919,7 +4924,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } // Find the loop termination test at the bottom of the loop. - Statement* condStmt = bTest->lastStmt(); + Statement* const condStmt = bTest->lastStmt(); // Verify the test block ends with a conditional that we can manipulate. GenTree* const condTree = condStmt->GetRootNode(); @@ -4929,6 +4934,9 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) return false; } + JITDUMP("Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB "\n", + block->bbNum, bTop->bbNum, bTest->bbNum); + // Estimate the cost of cloning the entire test block. // // Note: it would help throughput to compute the maximum cost @@ -4956,7 +4964,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) bool allProfileWeightsAreValid = false; weight_t const weightBlock = block->bbWeight; weight_t const weightTest = bTest->bbWeight; - weight_t const weightNext = block->bbNext->bbWeight; + weight_t const weightTop = bTop->bbWeight; // If we have profile data then we calculate the number of times // the loop will iterate into loopIterations @@ -4964,35 +4972,45 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) { // Only rely upon the profile weight when all three of these blocks // have good profile weights - if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight()) + if (block->hasProfileWeight() && bTest->hasProfileWeight() && bTop->hasProfileWeight()) { // If this while loop never iterates then don't bother transforming // - if (weightNext == BB_ZERO_WEIGHT) + if (weightTop == BB_ZERO_WEIGHT) { return true; } - // We generally expect weightTest == weightNext + weightBlock. + // We generally expect weightTest > weightTop // // Tolerate small inconsistencies... // - if (!fgProfileWeightsConsistent(weightBlock + weightNext, weightTest)) + if (!fgProfileWeightsConsistent(weightBlock + weightTop, weightTest)) { JITDUMP("Profile weights locally inconsistent: block " FMT_WT ", next " FMT_WT ", test " FMT_WT "\n", - weightBlock, weightNext, weightTest); + weightBlock, weightTop, weightTest); } else { allProfileWeightsAreValid = true; - // Determine iteration count + // Determine average iteration count // - // weightNext is the number of time this loop iterates - // weightBlock is the number of times that we enter the while loop + // weightTop is the number of time this loop executes + // weightTest is the number of times that we consider entering or remaining in the loop // loopIterations is the average number of times that this loop iterates // - loopIterations = weightNext / weightBlock; + weight_t loopEntries = weightTest - weightTop; + + // If profile is inaccurate, try and use other data to provide a credible estimate. + // The value should at least be >= weightBlock. + // + if (loopEntries < weightBlock) + { + loopEntries = weightBlock; + } + + loopIterations = weightTop / loopEntries; } } else @@ -5132,16 +5150,33 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // Flag the block that received the copy as potentially having various constructs. bNewCond->bbFlags |= bTest->bbFlags & BBF_COPY_PROPAGATE; - bNewCond->bbJumpDest = bTest->bbNext; + // Fix flow and profile + // + bNewCond->bbJumpDest = bJoin; bNewCond->inheritWeight(block); - // Update bbRefs and bbPreds for 'bNewCond', 'bNewCond->bbNext' 'bTest' and 'bTest->bbNext'. + if (allProfileWeightsAreValid) + { + weight_t const delta = weightTest - weightTop; - fgAddRefPred(bNewCond, block); - fgAddRefPred(bNewCond->bbNext, bNewCond); + // If there is just one outside edge incident on bTest, then ideally delta == block->bbWeight. + // But this might not be the case if profile data is inconsistent. + // + // And if bTest has multiple outside edges we want to account for the weight of them all. + // + if (delta > block->bbWeight) + { + bNewCond->setBBProfileWeight(delta); + } + } + // Update pred info + // + fgAddRefPred(bJoin, bNewCond); + fgAddRefPred(bTop, bNewCond); + + fgAddRefPred(bNewCond, block); fgRemoveRefPred(bTest, block); - fgAddRefPred(bTest->bbNext, bNewCond); // Move all predecessor edges that look like loop entry edges to point to the new cloned condition // block, not the existing condition block. The idea is that if we only move `block` to point to @@ -5151,15 +5186,15 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // as the proxy for predecessors that are "in" versus "out" of the potential loop. Note that correctness // is maintained no matter which condition block we point to, but we'll lose optimization potential // (and create spaghetti code) if we get it wrong. - + // BlockToBlockMap blockMap(getAllocator(CMK_LoopOpt)); bool blockMapInitialized = false; - unsigned loopFirstNum = bNewCond->bbNext->bbNum; - unsigned loopBottomNum = bTest->bbNum; + unsigned const loopFirstNum = bTop->bbNum; + unsigned const loopBottomNum = bTest->bbNum; for (BasicBlock* const predBlock : bTest->PredBlocks()) { - unsigned bNum = predBlock->bbNum; + unsigned const bNum = predBlock->bbNum; if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum)) { // Looks like the predecessor is from within the potential loop; skip it. @@ -5189,8 +5224,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // cases of stress modes with inconsistent weights. // JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", bTest->bbNum, weightTest, - weightNext); - bTest->inheritWeight(block->bbNext); + weightTop); + bTest->inheritWeight(bTop); // Determine the new edge weights. // @@ -5200,23 +5235,23 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // Note "next" is the loop top block, not bTest's bbNext, // we'll call this latter block "after". // - weight_t const testToNextLikelihood = min(1.0, weightNext / weightTest); + weight_t const testToNextLikelihood = min(1.0, weightTop / weightTest); weight_t const testToAfterLikelihood = 1.0 - testToNextLikelihood; - // Adjust edges out of bTest (which now has weight weightNext) + // Adjust edges out of bTest (which now has weight weightTop) // - weight_t const testToNextWeight = weightNext * testToNextLikelihood; - weight_t const testToAfterWeight = weightNext * testToAfterLikelihood; + weight_t const testToNextWeight = weightTop * testToNextLikelihood; + weight_t const testToAfterWeight = weightTop * testToAfterLikelihood; - FlowEdge* const edgeTestToNext = fgGetPredForBlock(bTest->bbJumpDest, bTest); + FlowEdge* const edgeTestToNext = fgGetPredForBlock(bTop, bTest); FlowEdge* const edgeTestToAfter = fgGetPredForBlock(bTest->bbNext, bTest); - JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (iterate loop)\n", bTest->bbNum, - bTest->bbJumpDest->bbNum, testToNextWeight); + JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (iterate loop)\n", bTest->bbNum, bTop->bbNum, + testToNextWeight); JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (exit loop)\n", bTest->bbNum, bTest->bbNext->bbNum, testToAfterWeight); - edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight, bTest->bbJumpDest); + edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight, bTop); edgeTestToAfter->setEdgeWeights(testToAfterWeight, testToAfterWeight, bTest->bbNext); // Adjust edges out of block, using the same distribution.