From 18cfe65e13d1da0857dea1720e0d042c5aae086f Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 24 Oct 2023 18:23:04 -0700 Subject: [PATCH 1/5] JIT: move return merging earlier Instead of merging returns to the common return block in morph, do all the merging in `fgAddInternal` (where we already did some merging). This removes a case where morph would add a control flow edge in a way that might disrupt an ongoing RPO. Earlier merging also opens up the possibility of tail merging some of the copies into the canonical return local, and possibly even some of the computations that feed the copies. Modify the flow alterations done by morph. Previously if a tail call was expressed via a call to a `CORINFO_TAILCALL_HELPER`, morph would change the block kind to `BBJ_RETURN` and then merge the return, changing the block kind to `BBJ_ALWAYS`. Since merging now happens before moprh, morph needs to leave the block kind alone. Generalize the post-tail-call sanity check in morph to recognize one new case that can come up. Contributes to #93246. --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/flowgraph.cpp | 142 +++++++++++++++++- src/coreclr/jit/morph.cpp | 271 +++++++++++----------------------- 3 files changed, 224 insertions(+), 191 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 12d8476d0aac29..2b9151b8f149cd 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4805,7 +4805,7 @@ class Compiler void fgMorphStmts(BasicBlock* block); void fgMorphBlocks(); - void fgMergeBlockReturn(BasicBlock* block); + bool fgMergeBlockReturn(BasicBlock* block); bool fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg)); void fgMorphStmtBlockOps(BasicBlock* block, Statement* stmt); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index a4e7670ca8ff5a..58e9192b2027a0 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2367,8 +2367,13 @@ class MergedReturns assert(searchLimit < maxReturns); mergedReturnBlock = CreateReturnBB(searchLimit); comp->genReturnBB = mergedReturnBlock; - // Downstream code expects the `genReturnBB` to always remain - // once created, so that it can redirect flow edges to it. + + // In principle we should be able to remove the proctection + // we add here, and remove the genReturnBB if it becomes unreachable, + // since we now introduce all the flow to it during this phase. + // + // But we'll keep this as is for now. + // mergedReturnBlock->bbFlags |= BBF_DONT_REMOVE; } } @@ -2471,10 +2476,11 @@ class MergedReturns // Notes: // * rewrites shared generic catches in to filters // * adds code to handle modifiable this -// * determines number of epilogs and merges returns +// * determines number of epilogs and merges "same constant" returns // * does special setup for pinvoke/reverse pinvoke methods // * adds callouts and EH for synchronized methods // * adds just my code callback +// * merges returns to common return block if necessary // // Returns: // Suitable phase status. @@ -2752,6 +2758,20 @@ PhaseStatus Compiler::fgAddInternal() madeChanges = true; } + if (genReturnBB != nullptr) + { + for (BasicBlock* const block : Blocks()) + { + // We can see BBF_HAS_JMP from CEE_JMP importation. Exclude such here + // + if ((block != genReturnBB) && block->KindIs(BBJ_RETURN) && ((block->bbFlags & BBF_HAS_JMP) == 0)) + { + fgMergeBlockReturn(block); + madeChanges = true; + } + } + } + #ifdef DEBUG if (verbose) { @@ -2764,6 +2784,122 @@ PhaseStatus Compiler::fgAddInternal() return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } +//------------------------------------------------------------------------ +// fgMergeBlockReturn: assign the block return value (if any) into the single return temp +// and branch to the single return block. +// +// Arguments: +// block - the block to process. +// +// Returns: +// true if block was modified to branch to the single return block +// +// Notes: +// A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN. +// For example a method returning void could have an empty block with jump kind BBJ_RETURN. +// Such blocks do materialize as part of in-lining. +// +// A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN. +// It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC. +// For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal +// is BAD_VAR_NUM. +// +bool Compiler::fgMergeBlockReturn(BasicBlock* block) +{ + assert(block->KindIs(BBJ_RETURN) && ((block->bbFlags & BBF_HAS_JMP) == 0)); + assert((genReturnBB != nullptr) && (genReturnBB != block)); + + // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN. + + Statement* lastStmt = block->lastStmt(); + GenTree* ret = (lastStmt != nullptr) ? lastStmt->GetRootNode() : nullptr; + + if ((ret != nullptr) && (ret->OperGet() == GT_RETURN) && ((ret->gtFlags & GTF_RET_MERGED) != 0)) + { + // This return was generated during epilog merging, so leave it alone + return false; + } + +#if !defined(TARGET_X86) + if (info.compFlags & CORINFO_FLG_SYNCH) + { + fgConvertSyncReturnToLeave(block); + return true; + } +#endif // !TARGET_X86 + + // We'll jump to the genReturnBB. + // + block->SetJumpKindAndTarget(BBJ_ALWAYS, genReturnBB DEBUG_ARG(this)); + fgAddRefPred(genReturnBB, block); + fgReturnCount--; + + if (genReturnLocal != BAD_VAR_NUM) + { + // replace the GT_RETURN node to be a STORE_LCL_VAR that stores the return value into genReturnLocal. + + // Method must be returning a value other than TYP_VOID. + noway_assert(compMethodHasRetVal()); + + // This block must be ending with a GT_RETURN + noway_assert(lastStmt != nullptr); + noway_assert(lastStmt->GetNextStmt() == nullptr); + noway_assert(ret != nullptr); + + // GT_RETURN must have non-null operand as the method is returning the value assigned to + // genReturnLocal + noway_assert(ret->OperGet() == GT_RETURN); + noway_assert(ret->gtGetOp1() != nullptr); + + Statement* pAfterStatement = lastStmt; + const DebugInfo& di = lastStmt->GetDebugInfo(); + GenTree* tree = gtNewTempStore(genReturnLocal, ret->gtGetOp1(), CHECK_SPILL_NONE, &pAfterStatement, di, block); + + if (pAfterStatement == lastStmt) + { + lastStmt->SetRootNode(tree); + } + else + { + // gtNewTempStore inserted additional statements after last + fgRemoveStmt(block, lastStmt); + Statement* newStmt = gtNewStmt(tree, di); + fgInsertStmtAfter(block, pAfterStatement, newStmt); + lastStmt = newStmt; + } + } + else if (ret != nullptr && ret->OperGet() == GT_RETURN) + { + // This block ends with a GT_RETURN + noway_assert(lastStmt != nullptr); + noway_assert(lastStmt->GetNextStmt() == nullptr); + + // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn + // block + noway_assert(ret->TypeGet() == TYP_VOID); + noway_assert(ret->gtGetOp1() == nullptr); + + fgRemoveStmt(block, lastStmt); + } + + JITDUMP("\nUpdate " FMT_BB " to jump to common return block.\n", block->bbNum); + DISPBLOCK(block); + + if (block->hasProfileWeight()) + { + weight_t const oldWeight = genReturnBB->hasProfileWeight() ? genReturnBB->bbWeight : BB_ZERO_WEIGHT; + weight_t const newWeight = oldWeight + block->bbWeight; + + JITDUMP("merging profile weight " FMT_WT " from " FMT_BB " to common return " FMT_BB "\n", block->bbWeight, + block->bbNum, genReturnBB->bbNum); + + genReturnBB->setBBProfileWeight(newWeight); + DISPBLOCK(genReturnBB); + } + + return true; +} + /*****************************************************************************/ /*****************************************************************************/ diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 21f03b2d45f7ad..08363493c6015e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -6089,78 +6089,90 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // If this block has a flow successor, make suitable updates. // - BasicBlock* nextBlock = compCurBB->GetUniqueSucc(); + BasicBlock* const succBlock = compCurBB->GetUniqueSucc(); - if (nextBlock == nullptr) + if (succBlock == nullptr) { // No unique successor. compCurBB should be a return. // assert(compCurBB->KindIs(BBJ_RETURN)); } - else - { - // Flow no longer reaches nextBlock from here. - // - fgRemoveRefPred(nextBlock, compCurBB); - // Adjust profile weights of the successor blocks. +#if !FEATURE_TAILCALL_OPT_SHARED_RETURN + // We enable shared-ret tail call optimization for recursive calls even if + // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined. + if (gtIsRecursiveCall(call)) +#endif + { + // Many tailcalls will have call and ret in the same block, and thus be + // BBJ_RETURN, but if the call falls through to a ret, and we are doing a + // tailcall, change it here. + // (compCurBB may have a jump target, so use SetJumpKind() to avoid nulling it) // - // Note if this is a tail call to loop, further updates - // are needed once we install the loop edge. + // When we use CORINFO_TAILCALL_HELPERs then the call will return. // - BasicBlock* curBlock = compCurBB; - if (curBlock->hasProfileWeight()) + if (canFastTailCall || tailCallViaJitHelper) { - weight_t weightLoss = curBlock->bbWeight; - - while (nextBlock->hasProfileWeight()) + // The call won't return. + // + if (succBlock != nullptr) { - // Since we have linear flow we can update the next block weight. - // - weight_t const nextWeight = nextBlock->bbWeight; - weight_t const newNextWeight = nextWeight - weightLoss; + JITDUMP("Flow no longer reaches from " FMT_BB " -> " FMT_BB ", removing edge\n", compCurBB->bbNum, + succBlock->bbNum); - // If the math would result in a negative weight then there's - // no local repair we can do; just leave things inconsistent. + // Trim the profile data from the linear chain of successor blocks // - if (newNextWeight >= 0) + if (compCurBB->hasProfileWeight()) { - // Note if we'd already morphed the IR in nextblock we might - // have done something profile sensitive that we should arguably reconsider. - // - JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", nextBlock->bbNum, - nextWeight, newNextWeight); + weight_t weightLoss = compCurBB->bbWeight; + BasicBlock* nextBlock = succBlock; - nextBlock->setBBProfileWeight(newNextWeight); - } - else - { - JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT - " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", - nextBlock->bbNum, nextWeight, compCurBB->bbNum, weightLoss); - } + while (nextBlock->hasProfileWeight()) + { + // Since we have linear flow we can update the next block weight. + // + weight_t const nextWeight = nextBlock->bbWeight; + weight_t const newNextWeight = nextWeight - weightLoss; - curBlock = nextBlock; - nextBlock = curBlock->GetUniqueSucc(); - if (nextBlock == nullptr) - { - break; + // If the math would result in a negative weight then there's + // no local repair we can do; just leave things inconsistent. + // + if (newNextWeight >= 0) + { + // Note if we'd already morphed the IR in nextblock we might + // have done something profile sensitive that we should arguably reconsider. + // + JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", + nextBlock->bbNum, nextWeight, newNextWeight); + + nextBlock->setBBProfileWeight(newNextWeight); + } + else + { + JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT + " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", + nextBlock->bbNum, nextWeight, compCurBB->bbNum, weightLoss); + } + + nextBlock = nextBlock->GetUniqueSucc(); + + if (nextBlock == nullptr) + { + break; + } + } } + + // Remove the flow edge + // + fgRemoveRefPred(succBlock, compCurBB); } - } - } -#if !FEATURE_TAILCALL_OPT_SHARED_RETURN - // We enable shared-ret tail call optimization for recursive calls even if - // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined. - if (gtIsRecursiveCall(call)) -#endif - { - // Many tailcalls will have call and ret in the same block, and thus be - // BBJ_RETURN, but if the call falls through to a ret, and we are doing a - // tailcall, change it here. - // (compCurBB may have a jump target, so use SetJumpKind() to avoid nulling it) - compCurBB->SetJumpKind(BBJ_RETURN); + // Modify the block kind + // + JITDUMP("Setting " FMT_BB " to BBJ_RETURN\n", compCurBB->bbNum); + compCurBB->SetJumpKind(BBJ_RETURN); + } } GenTree* stmtExpr = fgMorphStmt->GetRootNode(); @@ -7470,12 +7482,20 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa // Finish hooking things up. fgAddRefPred(block->GetJumpDest(), block); block->bbFlags &= ~BBF_HAS_JMP; + + // If block was the genReturnBB, it no longer is. + // + if (block == genReturnBB) + { + JITDUMP("Forgetting genReturnBB " FMT_BB " and genReturnLcl V%02u\n", genReturnBB->bbNum, genReturnLocal); + genReturnBB = nullptr; + genReturnLocal = BAD_VAR_NUM; + } } //------------------------------------------------------------------------------ // fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter. // -// // Arguments: // arg - argument to assign // late - whether to use early or late arg @@ -13902,15 +13922,6 @@ void Compiler::fgMorphBlocks() // Process all statement trees in the basic block. fgMorphStmts(block); - // Do we need to merge the result of this block into a single return block? - if (block->KindIs(BBJ_RETURN) && ((block->bbFlags & BBF_HAS_JMP) == 0)) - { - if ((genReturnBB != nullptr) && (genReturnBB != block)) - { - fgMergeBlockReturn(block); - } - } - block = block->Next(); } while (block != nullptr); @@ -13940,128 +13951,6 @@ void Compiler::fgMorphBlocks() #endif } -//------------------------------------------------------------------------ -// fgMergeBlockReturn: assign the block return value (if any) into the single return temp -// and branch to the single return block. -// -// Arguments: -// block - the block to process. -// -// Notes: -// A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN. -// For example a method returning void could have an empty block with jump kind BBJ_RETURN. -// Such blocks do materialize as part of in-lining. -// -// A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN. -// It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC. -// For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal -// is BAD_VAR_NUM. -// -void Compiler::fgMergeBlockReturn(BasicBlock* block) -{ - assert(block->KindIs(BBJ_RETURN) && ((block->bbFlags & BBF_HAS_JMP) == 0)); - assert((genReturnBB != nullptr) && (genReturnBB != block)); - - // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN. - - Statement* lastStmt = block->lastStmt(); - GenTree* ret = (lastStmt != nullptr) ? lastStmt->GetRootNode() : nullptr; - - if ((ret != nullptr) && (ret->OperGet() == GT_RETURN) && ((ret->gtFlags & GTF_RET_MERGED) != 0)) - { - // This return was generated during epilog merging, so leave it alone - } - else - { - // We'll jump to the genReturnBB. - CLANG_FORMAT_COMMENT_ANCHOR; - -#if !defined(TARGET_X86) - if (info.compFlags & CORINFO_FLG_SYNCH) - { - fgConvertSyncReturnToLeave(block); - } - else -#endif // !TARGET_X86 - { - block->SetJumpKindAndTarget(BBJ_ALWAYS, genReturnBB DEBUG_ARG(this)); - fgAddRefPred(genReturnBB, block); - fgReturnCount--; - } - if (genReturnLocal != BAD_VAR_NUM) - { - // replace the GT_RETURN node to be a STORE_LCL_VAR that stores the return value into genReturnLocal. - - // Method must be returning a value other than TYP_VOID. - noway_assert(compMethodHasRetVal()); - - // This block must be ending with a GT_RETURN - noway_assert(lastStmt != nullptr); - noway_assert(lastStmt->GetNextStmt() == nullptr); - noway_assert(ret != nullptr); - - // GT_RETURN must have non-null operand as the method is returning the value assigned to - // genReturnLocal - noway_assert(ret->OperGet() == GT_RETURN); - noway_assert(ret->gtGetOp1() != nullptr); - - Statement* pAfterStatement = lastStmt; - const DebugInfo& di = lastStmt->GetDebugInfo(); - GenTree* tree = - gtNewTempStore(genReturnLocal, ret->gtGetOp1(), CHECK_SPILL_NONE, &pAfterStatement, di, block); - if (tree->OperIsCopyBlkOp()) - { - tree = fgMorphCopyBlock(tree); - } - else if (tree->OperIsInitBlkOp()) - { - tree = fgMorphInitBlock(tree); - } - - if (pAfterStatement == lastStmt) - { - lastStmt->SetRootNode(tree); - } - else - { - // gtNewTempStore inserted additional statements after last - fgRemoveStmt(block, lastStmt); - Statement* newStmt = gtNewStmt(tree, di); - fgInsertStmtAfter(block, pAfterStatement, newStmt); - lastStmt = newStmt; - } - } - else if (ret != nullptr && ret->OperGet() == GT_RETURN) - { - // This block ends with a GT_RETURN - noway_assert(lastStmt != nullptr); - noway_assert(lastStmt->GetNextStmt() == nullptr); - - // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn - // block - noway_assert(ret->TypeGet() == TYP_VOID); - noway_assert(ret->gtGetOp1() == nullptr); - - fgRemoveStmt(block, lastStmt); - } - - JITDUMP("\nUpdate " FMT_BB " to jump to common return block.\n", block->bbNum); - DISPBLOCK(block); - - if (block->hasProfileWeight()) - { - weight_t const oldWeight = genReturnBB->hasProfileWeight() ? genReturnBB->bbWeight : BB_ZERO_WEIGHT; - weight_t const newWeight = oldWeight + block->bbWeight; - - JITDUMP("merging profile weight " FMT_WT " from " FMT_BB " to common return " FMT_BB "\n", block->bbWeight, - block->bbNum, genReturnBB->bbNum); - - genReturnBB->setBBProfileWeight(newWeight); - DISPBLOCK(genReturnBB); - } - } -} - /***************************************************************************** * * Make some decisions about the kind of code to generate. @@ -15399,12 +15288,20 @@ bool Compiler::fgCheckStmtAfterTailCall() GenTree* callExpr = callStmt->GetRootNode(); if (!callExpr->OperIs(GT_STORE_LCL_VAR)) { - // The next stmt can be GT_RETURN(TYP_VOID) or GT_RETURN(lclVar), + // The next stmt can be GT_RETURN(TYP_VOID) or GT_RETURN(lclVar) + // or GT_STORE_LCL_VAR(genReturnLocal, ...) // where lclVar was return buffer in the call for structs or simd. Statement* retStmt = nextMorphStmt; GenTree* retExpr = retStmt->GetRootNode(); - noway_assert(retExpr->gtOper == GT_RETURN); + if (retExpr->OperIs(GT_STORE_LCL_VAR)) + { + assert(retExpr->AsLclVarCommon()->GetLclNum() == genReturnLocal); + } + else + { + noway_assert(retExpr->gtOper == GT_RETURN); + } nextMorphStmt = retStmt->GetNextStmt(); } else From 309a025128a94b037b8d1bc969a381e2476fb3bd Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 25 Oct 2023 16:20:18 -0700 Subject: [PATCH 2/5] fix synch method merging --- src/coreclr/jit/flowgraph.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 58e9192b2027a0..e2f2aa73e14726 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2824,15 +2824,16 @@ bool Compiler::fgMergeBlockReturn(BasicBlock* block) if (info.compFlags & CORINFO_FLG_SYNCH) { fgConvertSyncReturnToLeave(block); - return true; } + else #endif // !TARGET_X86 - - // We'll jump to the genReturnBB. - // - block->SetJumpKindAndTarget(BBJ_ALWAYS, genReturnBB DEBUG_ARG(this)); - fgAddRefPred(genReturnBB, block); - fgReturnCount--; + { + // We'll jump to the genReturnBB. + // + block->SetJumpKindAndTarget(BBJ_ALWAYS, genReturnBB DEBUG_ARG(this)); + fgAddRefPred(genReturnBB, block); + fgReturnCount--; + } if (genReturnLocal != BAD_VAR_NUM) { From ee288f808b61eb7d89f2eb437f6b74acddea9f2a Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 26 Oct 2023 10:04:20 -0700 Subject: [PATCH 3/5] fix x86: keep track of blocks that were formerly BBJ_RETURNs --- src/coreclr/jit/block.h | 8 +++++--- src/coreclr/jit/flowgraph.cpp | 1 + src/coreclr/jit/morph.cpp | 4 +++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index f3936955300ac9..6850e5606f6c2e 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -440,6 +440,7 @@ enum BasicBlockFlags : unsigned __int64 BBF_TAILCALL_SUCCESSOR = MAKE_BBFLAG(40), // BB has pred that has potential tail call BBF_RECURSIVE_TAILCALL = MAKE_BBFLAG(41), // Block has recursive tailcall that may turn into a loop BBF_NO_CSE_IN = MAKE_BBFLAG(42), // Block should kill off any incoming CSE + BBF_MERGED_RETURN = MAKE_BBFLAG(43), // Block was originally BBJ_RETURN, now branches to a common return // The following are sets of flags. @@ -450,7 +451,7 @@ enum BasicBlockFlags : unsigned __int64 // Flags to update when two blocks are compacted BBF_COMPACT_UPD = BBF_GC_SAFE_POINT | BBF_NEEDS_GCPOLL | BBF_HAS_JMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_BACKWARD_JUMP | \ - BBF_HAS_NEWOBJ | BBF_HAS_NULLCHECK | BBF_HAS_MDARRAYREF | BBF_LOOP_PREHEADER, + BBF_HAS_NEWOBJ | BBF_HAS_NULLCHECK | BBF_HAS_MDARRAYREF | BBF_LOOP_PREHEADER | BBF_MERGED_RETURN, // Flags a block should not have had before it is split. @@ -461,7 +462,7 @@ enum BasicBlockFlags : unsigned __int64 // For example, the top block might or might not have BBF_GC_SAFE_POINT, // but we assume it does not have BBF_GC_SAFE_POINT any more. - BBF_SPLIT_LOST = BBF_GC_SAFE_POINT | BBF_NEEDS_GCPOLL | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_RECURSIVE_TAILCALL, + BBF_SPLIT_LOST = BBF_GC_SAFE_POINT | BBF_NEEDS_GCPOLL | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_RECURSIVE_TAILCALL | BBF_MERGED_RETURN, // Flags gained by the bottom block when a block is split. // Note, this is a conservative guess. @@ -469,7 +470,8 @@ enum BasicBlockFlags : unsigned __int64 // TODO: Should BBF_RUN_RARELY be added to BBF_SPLIT_GAINED ? BBF_SPLIT_GAINED = BBF_DONT_REMOVE | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_PROF_WEIGHT | \ - BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL, + BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | \ + BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL | BBF_MERGED_RETURN, // Flags that must be propagated to a new block if code is copied from a block to a new block. These are flags that // limit processing of a block if the code in question doesn't exist. This is conservative; we might not diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index e2f2aa73e14726..c90c404b350605 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2831,6 +2831,7 @@ bool Compiler::fgMergeBlockReturn(BasicBlock* block) // We'll jump to the genReturnBB. // block->SetJumpKindAndTarget(BBJ_ALWAYS, genReturnBB DEBUG_ARG(this)); + block->bbFlags |= BBF_MERGED_RETURN; fgAddRefPred(genReturnBB, block); fgReturnCount--; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 08363493c6015e..5d21471c38e1dd 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -6101,7 +6101,9 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) #if !FEATURE_TAILCALL_OPT_SHARED_RETURN // We enable shared-ret tail call optimization for recursive calls even if // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined. - if (gtIsRecursiveCall(call)) + // Also handle blocks that were BBJ_RETURN but were modified during fgAddInternal. + // + if (gtIsRecursiveCall(call) || ((compCurBB->bbFlags & BBF_MERGED_RETURN) != 0)) #endif { // Many tailcalls will have call and ret in the same block, and thus be From d1bef6b7b7ea7a74228ec8082d7748da716eaab5 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 27 Oct 2023 09:46:00 -0700 Subject: [PATCH 4/5] update comments --- src/coreclr/jit/flowgraph.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index c90c404b350605..44e1959b29f38b 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -1850,6 +1850,7 @@ void Compiler::fgConvertSyncReturnToLeave(BasicBlock* block) // Convert the BBJ_RETURN to BBJ_ALWAYS, jumping to genReturnBB. block->SetJumpKindAndTarget(BBJ_ALWAYS, genReturnBB DEBUG_ARG(this)); + block->bbFlags |= BBF_MERGED_RETURN; fgAddRefPred(genReturnBB, block); #ifdef DEBUG @@ -2228,7 +2229,7 @@ class MergedReturns // Add 'return' expression to the return block comp->fgNewStmtAtEnd(newReturnBB, returnExpr); // Flag that this 'return' was generated by return merging so that subsequent - // return block morhping will know to leave it alone. + // return block merging will know to leave it alone. returnExpr->gtFlags |= GTF_RET_MERGED; #ifdef DEBUG @@ -2357,7 +2358,7 @@ class MergedReturns if (mergedReturnBlock == nullptr) { // No constant return block for this return; use the general one. - // We defer flow update and profile update to morph. + // We defer flow and profile updates until later in fgAddInternal. // mergedReturnBlock = comp->genReturnBB; if (mergedReturnBlock == nullptr) From 8426fa88ea002261a6682fca5d573134b2e99dc4 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 27 Oct 2023 11:07:29 -0700 Subject: [PATCH 5/5] indirect call transformer may put a nop after tail call; tolerate --- src/coreclr/jit/morph.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 3bed1b7626ce11..a11aa56e8b29e1 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -15295,6 +15295,10 @@ bool Compiler::fgCheckStmtAfterTailCall() { assert(retExpr->AsLclVarCommon()->GetLclNum() == genReturnLocal); } + else if (retExpr->OperIs(GT_NOP)) + { + // ignore + } else { noway_assert(retExpr->gtOper == GT_RETURN);