From 41bfc514f820e148038785ff615df03724814e23 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 13 May 2026 12:10:57 +0200 Subject: [PATCH 1/9] JIT: Use an indicator variable "did resume?" in runtime async We will need this to enable inlining. --- src/coreclr/jit/async.cpp | 320 ++++++++++++++++++++++++++----- src/coreclr/jit/async.h | 13 +- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 4 + src/coreclr/jit/compiler.hpp | 35 +++- src/coreclr/jit/fgdiagnostic.cpp | 12 +- src/coreclr/jit/fginline.cpp | 2 + src/coreclr/jit/gentree.cpp | 34 ++++ src/coreclr/jit/gentree.h | 2 + src/coreclr/jit/importer.cpp | 2 + src/coreclr/jit/lclmorph.cpp | 61 ++++-- src/coreclr/jit/lclvars.cpp | 4 +- src/coreclr/jit/morph.cpp | 4 + 13 files changed, 411 insertions(+), 84 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index cd4bb35514149e..c6badac1ea89ea 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -95,6 +95,9 @@ PhaseStatus Compiler::SaveAsyncContexts() lvaAsyncSynchronizationContextVar = lvaGrabTemp(false DEBUGARG("Async SynchronizationContext")); lvaGetDesc(lvaAsyncSynchronizationContextVar)->lvType = TYP_REF; + lvaResumedIndicator = lvaGrabTemp(false DEBUGARG("Async Resumed")); + lvaGetDesc(lvaResumedIndicator)->lvType = TYP_UBYTE; + if (opts.IsOSR()) { lvaGetDesc(lvaAsyncExecutionContextVar)->lvIsOSRLocal = true; @@ -183,7 +186,17 @@ PhaseStatus Compiler::SaveAsyncContexts() // Insert CaptureContexts call before the try (keep it before so the // try/finally can be removed if there is no exception side effects). // For OSR, we did this in the tier0 method. - if (!opts.IsOSR()) + if (opts.IsOSR()) + { + // In the OSR method we compute the initial value of the resumption indicator based on the continuation arg. + GenTree* continuation = gtNewLclVarNode(lvaAsyncContinuationArg, TYP_REF); + GenTree* null = gtNewNull(); + GenTree* contNeNull = gtNewOperNode(GT_NE, TYP_INT, continuation, null); + GenTree* storeIndicator = gtNewStoreLclVarNode(lvaResumedIndicator, contNeNull); + Statement* storeIndicatorStmt = fgNewStmtFromTree(storeIndicator); + fgInsertStmtAtBeg(fgFirstBB, storeIndicatorStmt); + } + else { GenTreeCall* captureCall = gtNewCallNode(CT_USER_FUNC, asyncInfo->captureContextsMethHnd, TYP_VOID); SetCallEntrypointForR2R(captureCall, this, asyncInfo->captureContextsMethHnd); @@ -203,21 +216,30 @@ PhaseStatus Compiler::SaveAsyncContexts() JITDUMP("Inserted capture\n"); DISPSTMT(captureStmt); + + // Also initialize resumed indicator var if it will not be initialized by the prolog. + BasicBlock* containingBlock = compIsForInlining() ? impInlineInfo->iciBlock : fgFirstBB; + bool inALoop = containingBlock->HasFlag(BBF_BACKWARD_JUMP); + bool isReturn = containingBlock->KindIs(BBJ_RETURN); + + if ((inALoop && !isReturn) || !impInlineRoot()->info.compInitMem) + { + GenTree* storeIndicator = gtNewStoreLclVarNode(lvaResumedIndicator, gtNewIconNode(0)); + Statement* storeIndicatorStmt = fgNewStmtFromTree(storeIndicator); + fgInsertStmtAtBeg(fgFirstBB, storeIndicatorStmt); + + JITDUMP("Inserted resumed indicator initialization\n"); + DISPSTMT(storeIndicatorStmt); + } + else + { + JITDUMP("Skipping zero init of resumed indicator due to compInitMem\n"); + } } // Insert RestoreContexts call in fault (exceptional case) // First argument: resumed = (continuation != null) - GenTree* resumed; - if (compIsForInlining()) - { - resumed = gtNewFalse(); - } - else - { - GenTree* continuation = gtNewLclvNode(lvaAsyncContinuationArg, TYP_REF); - GenTree* null = gtNewNull(); - resumed = gtNewOperNode(GT_NE, TYP_INT, continuation, null); - } + GenTree* resumed = gtNewLclvNode(lvaResumedIndicator, TYP_INT); GenTreeCall* restoreCall = gtNewCallNode(CT_USER_FUNC, asyncInfo->restoreContextsMethHnd, TYP_VOID); SetCallEntrypointForR2R(restoreCall, this, asyncInfo->restoreContextsMethHnd); @@ -332,15 +354,23 @@ void Compiler::AddContextArgsToAsyncCalls(BasicBlock* block) return WALK_CONTINUE; } - GenTreeCall* call = tree->AsCall(); - GenTree* execCtx = m_compiler->gtNewLclVarNode(m_compiler->lvaAsyncExecutionContextVar, TYP_REF); + GenTreeCall* call = tree->AsCall(); + GenTree* resumed = m_compiler->gtNewLclVarNode(m_compiler->lvaResumedIndicator, TYP_INT); + GenTree* resumedAddr = m_compiler->gtNewLclAddrNode(m_compiler->lvaResumedIndicator, 0); + GenTree* execCtx = m_compiler->gtNewLclVarNode(m_compiler->lvaAsyncExecutionContextVar, TYP_REF); GenTree* syncCtx = m_compiler->gtNewLclVarNode(m_compiler->lvaAsyncSynchronizationContextVar, TYP_REF); - JITDUMP("Adding exec context [%06u], sync context [%06u] to async call [%06u]\n", dspTreeID(execCtx), - dspTreeID(syncCtx), dspTreeID(call)); + JITDUMP( + "Adding resumed use [%06u], resumed def [%06u] exec context [%06u], sync context [%06u] to async call [%06u]\n", + dspTreeID(resumed), dspTreeID(resumedAddr), dspTreeID(execCtx), dspTreeID(syncCtx), dspTreeID(call)); call->gtArgs.PushFront(m_compiler, NewCallArg::Primitive(syncCtx).WellKnown(WellKnownArg::AsyncSynchronizationContext)); call->gtArgs.PushFront(m_compiler, NewCallArg::Primitive(execCtx).WellKnown(WellKnownArg::AsyncExecutionContext)); + call->gtArgs.PushFront(m_compiler, + NewCallArg::Primitive(resumedAddr).WellKnown(WellKnownArg::AsyncResumedDef)); + call->gtArgs.PushFront(m_compiler, NewCallArg::Primitive(resumed).WellKnown(WellKnownArg::AsyncResumedUse)); + + m_compiler->lvaGetDesc(m_compiler->lvaResumedIndicator)->lvHasLdAddrOp = true; return WALK_CONTINUE; } }; @@ -374,17 +404,7 @@ BasicBlock* Compiler::CreateReturnBB(unsigned* mergedReturnLcl) // Insert "restore" call CORINFO_ASYNC_INFO* asyncInfo = eeGetAsyncInfo(); - GenTree* resumed; - if (compIsForInlining()) - { - resumed = gtNewFalse(); - } - else - { - GenTree* continuation = gtNewLclvNode(lvaAsyncContinuationArg, TYP_REF); - GenTree* null = gtNewNull(); - resumed = gtNewOperNode(GT_NE, TYP_INT, continuation, null); - } + GenTree* resumed = gtNewLclvNode(lvaResumedIndicator, TYP_INT); GenTreeCall* restoreCall = gtNewCallNode(CT_USER_FUNC, asyncInfo->restoreContextsMethHnd, TYP_VOID); SetCallEntrypointForR2R(restoreCall, this, asyncInfo->restoreContextsMethHnd); @@ -688,11 +708,13 @@ PhaseStatus AsyncTransformation::Run() } } + GenTreeLclVarCommon* commonAsyncResumedDef = FindAndRemoveCommonAsyncResumedDef(); + CreateResumptionsAndSuspensions(); // After transforming all async calls we have created resumption blocks; // create the resumption switch. - CreateResumptionSwitch(); + CreateResumptionSwitch(commonAsyncResumedDef); m_compiler->fgInvalidateDfsTree(); @@ -2213,8 +2235,10 @@ void AsyncTransformation::FinishContextHandlingAndSuspensionWithHelper(BasicBloc ? m_sharedFinishContextHandlingWithContinuationContextBB : m_sharedFinishContextHandlingWithoutContinuationContextBB; + CallArg* resumedArg = call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncResumedUse); CallArg* execContextArg = call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncExecutionContext); CallArg* syncContextArg = call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncSynchronizationContext); + assert((resumedArg != nullptr) && (execContextArg != nullptr)); assert((execContextArg != nullptr) && (syncContextArg != nullptr)); // Get the contexts from the call node: @@ -2222,6 +2246,17 @@ void AsyncTransformation::FinishContextHandlingAndSuspensionWithHelper(BasicBloc // 2. For non-shared finish, just make sure it is a GT_LCL_VAR since we need to create // a use in a different block. // Also remove the nodes from the original block and the call args. + GenTree* resumed = resumedArg->GetNode(); + if (!resumed->IsInvariant() && !resumed->OperIs(GT_LCL_VAR)) + { + // We are moving resumed into a different BB so create a temp for it. + LIR::Use use(LIR::AsRange(callBlock), &resumedArg->NodeRef(), call); + use.ReplaceWithLclVar(m_compiler); + resumed = use.Def(); + } + LIR::AsRange(callBlock).Remove(resumed); + call->gtArgs.RemoveUnsafe(resumedArg); + GenTree* execContext = execContextArg->GetNode(); if (!execContext->OperIs(GT_LCL_VAR)) { @@ -2246,7 +2281,13 @@ void AsyncTransformation::FinishContextHandlingAndSuspensionWithHelper(BasicBloc if (sharedFinish != nullptr) { - // Store the contexts to the shared locals that the shared finish block will take them from. + // Store the vars to the shared locals that the shared finish block will take them from. + if (m_sharedFinishContextHandlingResumedVar != BAD_VAR_NUM) + { + GenTree* storeResumed = m_compiler->gtNewStoreLclVarNode(m_sharedFinishContextHandlingResumedVar, resumed); + LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_compiler, storeResumed)); + } + if (m_sharedFinishContextHandlingExecContextVar != BAD_VAR_NUM) { GenTree* storeExecContext = @@ -2267,7 +2308,7 @@ void AsyncTransformation::FinishContextHandlingAndSuspensionWithHelper(BasicBloc else { // Otherwise insert a new call - InsertFinishContextHandlingCall(suspendBB, layout, helper, execContext, syncContext); + InsertFinishContextHandlingCall(suspendBB, layout, helper, resumed, execContext, syncContext); // And return either via a new GT_RETURN_SUSPEND or via the shared return BB. if (m_sharedReturnBB != nullptr) @@ -2294,10 +2335,12 @@ void AsyncTransformation::FinishContextHandlingAndSuspensionWithHelper(BasicBloc // void AsyncTransformation::RestoreContexts(BasicBlock* block, GenTreeCall* call, BasicBlock* suspendBB) { + CallArg* resumedArg = call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncResumedUse); CallArg* execContextArg = call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncExecutionContext); CallArg* syncContextArg = call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncSynchronizationContext); + assert((resumedArg != nullptr) == (execContextArg != nullptr)); assert((execContextArg != nullptr) == (syncContextArg != nullptr)); - if (execContextArg == nullptr) + if (resumedArg == nullptr) { JITDUMP(" Call [%06u] does not have async contexts; skipping restore on suspension\n", Compiler::dspTreeID(call)); @@ -2325,19 +2368,27 @@ void AsyncTransformation::RestoreContexts(BasicBlock* block, GenTreeCall* call, LIR::AsRange(suspendBB).InsertAtEnd(LIR::SeqTree(m_compiler, restoreCall)); - // Replace resumedPlaceholder with actual "continuationParameter != null" arg + // Replace resumedPlaceholder with actual resumed arg + GenTree* resumed = resumedArg->GetNode(); + if (!resumed->IsInvariant() && !resumed->OperIs(GT_LCL_VAR)) + { + // We are moving resumed into a different BB so create a temp for it. + LIR::Use use(LIR::AsRange(block), &resumedArg->NodeRef(), call); + use.ReplaceWithLclVar(m_compiler); + resumed = use.Def(); + } + LIR::Use use; bool gotUse = LIR::AsRange(suspendBB).TryGetUse(resumedPlaceholder, &use); assert(gotUse); - GenTree* continuation = m_compiler->gtNewLclvNode(m_compiler->lvaAsyncContinuationArg, TYP_REF); - GenTree* null = m_compiler->gtNewNull(); - GenTree* resumed = m_compiler->gtNewOperNode(GT_NE, TYP_INT, continuation, null); - - LIR::AsRange(suspendBB).InsertBefore(resumedPlaceholder, LIR::SeqTree(m_compiler, resumed)); + LIR::AsRange(block).Remove(resumed); + LIR::AsRange(suspendBB).InsertBefore(resumedPlaceholder, resumed); use.ReplaceWith(resumed); LIR::AsRange(suspendBB).Remove(resumedPlaceholder); + call->gtArgs.RemoveUnsafe(resumedArg); + // Replace execContextPlaceholder with actual value GenTree* execContext = execContextArg->GetNode(); if (!execContext->OperIs(GT_LCL_VAR)) @@ -2502,6 +2553,8 @@ void AsyncTransformation::CreateResumption(BasicBlock* call RestoreFromDataOnResumption(layout, subLayout, resumeBB); } + StoreResumedDef(callBlock, call, resumeBB); + BasicBlock* storeResultBB = resumeBB; if (subLayout.NeedsException()) @@ -2580,6 +2633,70 @@ void AsyncTransformation::RestoreFromDataOnResumption(const ContinuationLayout& } } +//------------------------------------------------------------------------ +// AsyncTransformation::StoreResumedDef: +// Assign the resumed def to 1 from the resumption path. +// +// Parameters: +// callBlock - The basic block containing the async call +// call - The async call node +// resumeBB - The basic block to append IR to +// +void AsyncTransformation::StoreResumedDef(BasicBlock* callBlock, GenTreeCall* call, BasicBlock* resumeBB) +{ + CallArg* resumedDefArg = call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncResumedDef); + + if (resumedDefArg == nullptr) + { + return; + } + + GenTreeLclVarCommon* resumedDef = m_compiler->gtCallGetDefinedAsyncResumedLclAddr(call); + assert((resumedDef != nullptr) && (resumedDefArg->GetNode() == resumedDef)); + + StoreResumedDef(resumedDef, resumeBB); + + LIR::AsRange(callBlock).Remove(resumedDef); + call->gtArgs.RemoveUnsafe(resumedDefArg); +} + +//------------------------------------------------------------------------ +// AsyncTransformation::StoreResumedDef: +// Assign the resumed def to 1 in the specified block. +// +// Parameters: +// resumedDef - The local variable representing the resumed def +// block - The basic block to append IR to +// +void AsyncTransformation::StoreResumedDef(GenTreeLclVarCommon* resumedDef, BasicBlock* block) +{ + JITDUMP(" Have resume def [%06u] to store to\n", Compiler::dspTreeID(resumedDef)); + + LclVarDsc* varDsc = m_compiler->lvaGetDesc(resumedDef); + GenTree* store; + if ((resumedDef->GetLclOffs() == 0) && varDsc->TypeIs(TYP_UBYTE)) + { + store = m_compiler->gtNewStoreLclVarNode(resumedDef->GetLclNum(), m_compiler->gtNewIconNode(1)); + } + else + { + store = m_compiler->gtNewStoreLclFldNode(resumedDef->GetLclNum(), TYP_UBYTE, resumedDef->GetLclOffs(), + m_compiler->gtNewIconNode(1)); + m_compiler->lvaSetVarDoNotEnregister(resumedDef->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + } + + if (block->HasTerminator()) + { + LIR::AsRange(block).InsertBefore(block->lastNode(), LIR::SeqTree(m_compiler, store)); + } + else + { + LIR::AsRange(block).InsertAtEnd(LIR::SeqTree(m_compiler, store)); + } + + JITDUMP(" Created store [%06u] to set resumed def to 1\n", Compiler::dspTreeID(store)); +} + //------------------------------------------------------------------------ // AsyncTransformation::RethrowExceptionOnResumption: // Create IR that checks for an exception and rethrows it at the original @@ -2980,6 +3097,7 @@ void AsyncTransformation::CreateSharedReturnBB() // Parameters: // helper - The type of helper to call // layout - The continuation layout +// invariantResumed - Tree node to clone and use for "resumed" computation // execContextMayVary - If true, callers may use different execution // contexts, and thus we need a local to allow it to vary. // syncContextMayVary - If true, callers may use different synchronization @@ -2990,6 +3108,7 @@ void AsyncTransformation::CreateSharedReturnBB() // BasicBlock* AsyncTransformation::CreateSharedFinishContextHandlingBB(SuspensionContextHelper helper, const ContinuationLayout& layout, + GenTree* invariantResumed, bool execContextMayVary, bool syncContextMayVary) { @@ -3007,6 +3126,23 @@ BasicBlock* AsyncTransformation::CreateSharedFinishContextHandlingBB(SuspensionC block->SetFlags(BBF_PROF_WEIGHT); } + GenTree* resumed; + if (invariantResumed == nullptr) + { + if (m_sharedFinishContextHandlingResumedVar == BAD_VAR_NUM) + { + m_sharedFinishContextHandlingResumedVar = + m_compiler->lvaGrabTemp(false DEBUGARG("'resumed' for shared finish context handling")); + m_compiler->lvaGetDesc(m_sharedFinishContextHandlingResumedVar)->lvType = TYP_REF; + } + + resumed = m_compiler->gtNewLclVarNode(m_sharedFinishContextHandlingResumedVar, TYP_INT); + } + else + { + resumed = m_compiler->gtCloneExpr(invariantResumed); + } + unsigned execContextLclNum; if (execContextMayVary) { @@ -3041,7 +3177,8 @@ BasicBlock* AsyncTransformation::CreateSharedFinishContextHandlingBB(SuspensionC syncContextLclNum = m_compiler->lvaAsyncSynchronizationContextVar; } - InsertFinishContextHandlingCall(block, layout, helper, m_compiler->gtNewLclvNode(execContextLclNum, TYP_REF), + InsertFinishContextHandlingCall(block, layout, helper, resumed, + m_compiler->gtNewLclvNode(execContextLclNum, TYP_REF), m_compiler->gtNewLclvNode(syncContextLclNum, TYP_REF)); return block; @@ -3055,12 +3192,14 @@ BasicBlock* AsyncTransformation::CreateSharedFinishContextHandlingBB(SuspensionC // block - Block that should contain the call (inserted at the end) // layout - The continuation layout // helper - The type of helper +// resumed - The resumed tree to pass to the helper // execContext - The execution context tree to pass to the helper // syncContext - The synchronization context tree to pass to the helper // void AsyncTransformation::InsertFinishContextHandlingCall(BasicBlock* block, const ContinuationLayout& layout, SuspensionContextHelper helper, + GenTree* resumed, GenTree* execContext, GenTree* syncContext) { @@ -3157,11 +3296,7 @@ void AsyncTransformation::InsertFinishContextHandlingCall(BasicBlock* gotUse = LIR::AsRange(block).TryGetUse(resumedPlaceholder, &use); assert(gotUse); - GenTree* continuation = m_compiler->gtNewLclvNode(m_compiler->lvaAsyncContinuationArg, TYP_REF); - GenTree* null = m_compiler->gtNewNull(); - GenTree* resumed = m_compiler->gtNewOperNode(GT_NE, TYP_INT, continuation, null); - - LIR::AsRange(block).InsertBefore(resumedPlaceholder, LIR::SeqTree(m_compiler, resumed)); + LIR::AsRange(block).InsertBefore(resumedPlaceholder, resumed); use.ReplaceWith(resumed); LIR::AsRange(block).Remove(resumedPlaceholder); @@ -3185,6 +3320,65 @@ void AsyncTransformation::InsertFinishContextHandlingCall(BasicBlock* DISPTREERANGE(LIR::AsRange(block), finishCall); } +//------------------------------------------------------------------------ +// AsyncTransformation::FindAndRemoveCommonAsyncResumedDef: +// If all async calls define the same async resumption indicator variable, +// then remove the def from all calls and return it. +// +// Returns: +// The common def, or null if there is no common def. +// +GenTreeLclVarCommon* AsyncTransformation::FindAndRemoveCommonAsyncResumedDef() +{ + if (m_states.size() <= 1) + { + return nullptr; + } + + bool hasCommonDef = true; + GenTreeLclVarCommon* commonDef = nullptr; + unsigned numWithCommonDef = 0; + + for (const AsyncState& state : m_states) + { + GenTreeLclVarCommon* def = m_compiler->gtCallGetDefinedAsyncResumedLclAddr(state.Call); + if (def == nullptr) + { + continue; + } + + if ((commonDef == nullptr) || GenTree::Compare(def, commonDef)) + { + commonDef = def; + numWithCommonDef++; + } + else + { + hasCommonDef = false; + } + } + + if (!hasCommonDef || (numWithCommonDef <= 1)) + { + return nullptr; + } + + JITDUMP(" Found common async resumed def node:\n"); + DISPTREE(commonDef); + + for (const AsyncState& state : m_states) + { + CallArg* arg = state.Call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncResumedDef); + if (arg != nullptr) + { + LIR::AsRange(state.CallBlock).Remove(arg->GetNode()); + state.Call->gtArgs.RemoveUnsafe(arg); + } + } + + return commonDef; +} + //------------------------------------------------------------------------ // AsyncTransformation::CreateResumptionsAndSuspensions: // Walk all recorded async states and create the suspension and resumption @@ -3205,8 +3399,10 @@ void AsyncTransformation::CreateResumptionsAndSuspensions() unsigned numSharedSuspensionsWithContinuationContext = 0; unsigned numSharedSuspensionsWithoutContinuationContext = 0; - bool execContextMayVary = false; - bool syncContextMayVary = false; + bool resumedMayVary = false; + bool execContextMayVary = false; + bool syncContextMayVary = false; + GenTree* invariantResumed = nullptr; for (const AsyncState& state : m_states) { @@ -3228,6 +3424,26 @@ void AsyncTransformation::CreateResumptionsAndSuspensions() // unnecessary additional register moves. This is a common case. if (helper != SuspensionContextHelper::None) { + CallArg* resumedArg = state.Call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncResumedUse); + assert(resumedArg != nullptr); + GenTree* resumed = resumedArg->GetNode(); + + if (resumed->IsInvariant() || resumed->OperIs(GT_LCL_VAR)) + { + if ((invariantResumed == nullptr) || GenTree::Compare(invariantResumed, resumed)) + { + invariantResumed = resumed; + } + else + { + resumedMayVary = true; + } + } + else + { + resumedMayVary = true; + } + CallArg* execContextArg = state.Call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncExecutionContext); CallArg* syncContextArg = state.Call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncSynchronizationContext); @@ -3248,7 +3464,8 @@ void AsyncTransformation::CreateResumptionsAndSuspensions() numSharedSuspensionsWithContinuationContext); m_sharedFinishContextHandlingWithContinuationContextBB = CreateSharedFinishContextHandlingBB(SuspensionContextHelper::WithContinuationContext, *sharedLayout, - execContextMayVary, syncContextMayVary); + resumedMayVary ? nullptr : invariantResumed, execContextMayVary, + syncContextMayVary); } if (numSharedSuspensionsWithoutContinuationContext > 1) @@ -3258,7 +3475,8 @@ void AsyncTransformation::CreateResumptionsAndSuspensions() numSharedSuspensionsWithoutContinuationContext); m_sharedFinishContextHandlingWithoutContinuationContextBB = CreateSharedFinishContextHandlingBB(SuspensionContextHelper::WithoutContinuationContext, *sharedLayout, - execContextMayVary, syncContextMayVary); + resumedMayVary ? nullptr : invariantResumed, execContextMayVary, + syncContextMayVary); } } @@ -3363,7 +3581,7 @@ ContinuationLayoutBuilder* ContinuationLayoutBuilder::CreateSharedLayout(Compile // Create the IR for the entry of the function that checks the continuation // and dispatches on its state number. // -void AsyncTransformation::CreateResumptionSwitch() +void AsyncTransformation::CreateResumptionSwitch(GenTreeLclVarCommon* commonAsyncResumedDef) { m_compiler->fgCreateNewInitBB(); BasicBlock* newEntryBB = m_compiler->fgFirstBB; @@ -3461,6 +3679,12 @@ void AsyncTransformation::CreateResumptionSwitch() resumingEdge->setLikelihood(0); newEntryBB->GetFalseEdge()->setLikelihood(1); + if (commonAsyncResumedDef != nullptr) + { + // If we have a common async resumption def, then we do a manual head merge to move it into the switch block + StoreResumedDef(commonAsyncResumedDef, resumingEdge->getDestinationBlock()); + } + if (m_compiler->doesMethodHavePatchpoints()) { JITDUMP(" Method has patch points...\n"); diff --git a/src/coreclr/jit/async.h b/src/coreclr/jit/async.h index ce25cd991362cf..cc360e2bfedddb 100644 --- a/src/coreclr/jit/async.h +++ b/src/coreclr/jit/async.h @@ -366,7 +366,8 @@ class AsyncTransformation // saves/restores and then suspend. BasicBlock* m_sharedFinishContextHandlingWithContinuationContextBB = nullptr; BasicBlock* m_sharedFinishContextHandlingWithoutContinuationContextBB = nullptr; - // Variables that shared suspension finishing BBs take the exec/sync contexts in + // Variables that shared suspension finishing BBs take the resumed/exec/sync contexts in + unsigned m_sharedFinishContextHandlingResumedVar = BAD_VAR_NUM; unsigned m_sharedFinishContextHandlingExecContextVar = BAD_VAR_NUM; unsigned m_sharedFinishContextHandlingSyncContextVar = BAD_VAR_NUM; @@ -454,6 +455,8 @@ class AsyncTransformation void RestoreFromDataOnResumption(const ContinuationLayout& layout, const ContinuationLayoutBuilder& subLayout, BasicBlock* resumeBB); + void StoreResumedDef(BasicBlock* callBlock, GenTreeCall* call, BasicBlock* resumeBB); + void StoreResumedDef(GenTreeLclVarCommon* resumedDef, BasicBlock* block); BasicBlock* RethrowExceptionOnResumption(BasicBlock* block, const ContinuationLayout& layout, BasicBlock* resumeBB); void CopyReturnValueOnResumption(GenTreeCall* call, const CallDefinitionInfo& callDefInfo, @@ -479,16 +482,20 @@ class AsyncTransformation void CreateSharedReturnBB(); BasicBlock* CreateSharedFinishContextHandlingBB(SuspensionContextHelper helper, const ContinuationLayout& layout, + GenTree* invariantResumed, bool execContextMayVary, bool syncContextMayVary); void InsertFinishContextHandlingCall(BasicBlock* block, const ContinuationLayout& layout, SuspensionContextHelper helper, + GenTree* resumed, GenTree* execContext, GenTree* syncContext); bool ReuseContinuations(); - void CreateResumptionsAndSuspensions(); - void CreateResumptionSwitch(); + + GenTreeLclVarCommon* FindAndRemoveCommonAsyncResumedDef(); + void CreateResumptionsAndSuspensions(); + void CreateResumptionSwitch(GenTreeLclVarCommon* commonAsyncResumedDef); public: AsyncTransformation(Compiler* comp) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 24b980ccc8f755..308e6d465f7a6b 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -10432,7 +10432,7 @@ bool Compiler::lvaIsOSRLocal(unsigned varNum) { // Sanity check for promoted fields of OSR locals. // - if ((varNum >= info.compLocalsCount) && (varNum != lvaMonAcquired) && + if ((varNum >= info.compLocalsCount) && (varNum != lvaMonAcquired) && (varNum != lvaResumedIndicator) && (varNum != lvaAsyncExecutionContextVar) && (varNum != lvaAsyncSynchronizationContextVar)) { assert(varDsc->lvIsStructField); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6d83b3ff0030e5..05ea91e7fed5f0 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3987,6 +3987,7 @@ class Compiler bool gtIsTypeof(GenTree* tree, CORINFO_CLASS_HANDLE* handle = nullptr); GenTreeLclVarCommon* gtCallGetDefinedRetBufLclAddr(GenTreeCall* call); + GenTreeLclVarCommon* gtCallGetDefinedAsyncResumedLclAddr(GenTreeCall* call); //------------------------------------------------------------------------- // Functions to display the trees @@ -4259,6 +4260,9 @@ class Compiler // Variable representing async continuation argument passed. unsigned lvaAsyncContinuationArg = BAD_VAR_NUM; + // Variable representing "have we resumed?" for async methods + unsigned lvaResumedIndicator = BAD_VAR_NUM; + #if defined(DEBUG) && defined(TARGET_XARCH) unsigned lvaReturnSpCheck = BAD_VAR_NUM; // Stores SP to confirm it is not corrupted on return. diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 03b5b33273ebaa..3fcd3901445be5 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4568,15 +4568,25 @@ GenTree::VisitResult GenTree::VisitLocalDefs(Compiler* comp, TVisitor visitor) } if (OperIs(GT_CALL)) { - GenTreeCall* call = AsCall(); - GenTreeLclVarCommon* lclAddr = comp->gtCallGetDefinedRetBufLclAddr(call); - if (lclAddr != nullptr) + GenTreeCall* call = AsCall(); + + GenTreeLclVarCommon* asyncResumedLclAddr = comp->gtCallGetDefinedAsyncResumedLclAddr(call); + if (asyncResumedLclAddr != nullptr) + { + bool isEntire = comp->lvaLclExactSize(asyncResumedLclAddr->GetLclNum()) == 1; + + RETURN_IF_ABORT( + visitor(LocalDef(asyncResumedLclAddr, isEntire, asyncResumedLclAddr->GetLclOffs(), ValueSize(1)))); + } + + GenTreeLclVarCommon* retBufLclAddr = comp->gtCallGetDefinedRetBufLclAddr(call); + if (retBufLclAddr != nullptr) { unsigned storeSize = comp->typGetObjLayout(AsCall()->gtRetClsHnd)->GetSize(); - bool isEntire = storeSize == comp->lvaLclExactSize(lclAddr->GetLclNum()); + bool isEntire = storeSize == comp->lvaLclExactSize(retBufLclAddr->GetLclNum()); - return visitor(LocalDef(lclAddr, isEntire, lclAddr->GetLclOffs(), ValueSize(storeSize))); + return visitor(LocalDef(retBufLclAddr, isEntire, retBufLclAddr->GetLclOffs(), ValueSize(storeSize))); } } @@ -4611,11 +4621,18 @@ GenTree::VisitResult GenTree::VisitLocalDefNodes(Compiler* comp, TVisitor visito } if (OperIs(GT_CALL)) { - GenTreeCall* call = AsCall(); - GenTreeLclVarCommon* lclAddr = comp->gtCallGetDefinedRetBufLclAddr(call); - if (lclAddr != nullptr) + GenTreeCall* call = AsCall(); + + GenTreeLclVarCommon* asyncResumedLclAddr = comp->gtCallGetDefinedAsyncResumedLclAddr(call); + if (asyncResumedLclAddr != nullptr) + { + RETURN_IF_ABORT(visitor(asyncResumedLclAddr)); + } + + GenTreeLclVarCommon* retBufLclAddr = comp->gtCallGetDefinedRetBufLclAddr(call); + if (retBufLclAddr != nullptr) { - return visitor(lclAddr); + return visitor(retBufLclAddr); } } diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 172b1ccc3b1609..86be4639ad8771 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -3815,7 +3815,8 @@ void Compiler::fgDebugCheckLinkedLocals() if (ShouldLink(node)) { if ((user != nullptr) && user->IsCall() && - (node == m_compiler->gtCallGetDefinedRetBufLclAddr(user->AsCall()))) + ((node == m_compiler->gtCallGetDefinedRetBufLclAddr(user->AsCall())) || + (node == m_compiler->gtCallGetDefinedAsyncResumedLclAddr(user->AsCall())))) { } else @@ -3826,7 +3827,14 @@ void Compiler::fgDebugCheckLinkedLocals() if (node->IsCall()) { - GenTree* defined = m_compiler->gtCallGetDefinedRetBufLclAddr(node->AsCall()); + GenTree* defined = m_compiler->gtCallGetDefinedAsyncResumedLclAddr(node->AsCall()); + if (defined != nullptr) + { + assert(ShouldLink(defined)); + m_locals.Push(defined); + } + + defined = m_compiler->gtCallGetDefinedRetBufLclAddr(node->AsCall()); if (defined != nullptr) { assert(ShouldLink(defined)); diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index dda8a3582d7a2c..56d35f7575e026 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -2320,6 +2320,8 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) case WellKnownArg::AsyncContinuation: case WellKnownArg::AsyncExecutionContext: case WellKnownArg::AsyncSynchronizationContext: + case WellKnownArg::AsyncResumedUse: + case WellKnownArg::AsyncResumedDef: continue; case WellKnownArg::InstParam: argInfo = inlineInfo->inlInstParamArgInfo; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index aeaf0182c43130..267720afba11ff 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -1621,6 +1621,8 @@ bool CallArgs::GetCustomRegister(Compiler* comp, CorInfoCallConvExtension cc, We case WellKnownArg::StackArrayLocal: case WellKnownArg::AsyncExecutionContext: case WellKnownArg::AsyncSynchronizationContext: + case WellKnownArg::AsyncResumedUse: + case WellKnownArg::AsyncResumedDef: // These are pseudo-args; they are not actual arguments, but we // reuse the argument mechanism to represent them as arbitrary uses // that are later expanded out. @@ -14521,6 +14523,10 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg) return "exec ctx"; case WellKnownArg::AsyncSynchronizationContext: return "sync ctx"; + case WellKnownArg::AsyncResumedUse: + return "resumed"; + case WellKnownArg::AsyncResumedDef: + return "resumed def"; case WellKnownArg::WasmShadowStackPointer: return "wasm sp"; case WellKnownArg::WasmPortableEntryPoint: @@ -20941,6 +20947,34 @@ GenTreeLclVarCommon* Compiler::gtCallGetDefinedRetBufLclAddr(GenTreeCall* call) return node->AsLclVarCommon(); } +//------------------------------------------------------------------------ +// gtCallGetDefinedAsyncResumedLclAddr: +// Get the tree corresponding to the address of the async resumed indicator that this call defines. +// +// Parameters: +// call - The call node +// +// Returns: +// A tree representing the address of a local. +// +GenTreeLclVarCommon* Compiler::gtCallGetDefinedAsyncResumedLclAddr(GenTreeCall* call) +{ + if (!call->IsAsync()) + { + return nullptr; + } + + CallArg* arg = call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncResumedDef); + if (arg == nullptr) + { + return nullptr; + } + + GenTree* node = arg->GetNode(); + assert(node->OperIs(GT_LCL_ADDR) && lvaGetDesc(node->AsLclVarCommon())->IsDefinedViaAddress()); + return node->AsLclVarCommon(); +} + //------------------------------------------------------------------------ // ParseArrayAddress: Rehydrate the array and index expression from ARR_ADDR. // diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 1897b43014f17d..5bb371e3817eda 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4779,6 +4779,8 @@ enum class WellKnownArg : unsigned RuntimeMethodHandle, AsyncExecutionContext, AsyncSynchronizationContext, + AsyncResumedUse, + AsyncResumedDef, WasmShadowStackPointer, WasmPortableEntryPoint }; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index a83b16e1591cb9..d53e3eb4859276 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -13474,6 +13474,8 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) case WellKnownArg::AsyncContinuation: case WellKnownArg::AsyncExecutionContext: case WellKnownArg::AsyncSynchronizationContext: + case WellKnownArg::AsyncResumedUse: + case WellKnownArg::AsyncResumedDef: // These do not appear in the table of inline arg info; do not include them continue; case WellKnownArg::InstParam: diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 8f86759cdd284d..bc6a319802f660 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -113,6 +113,15 @@ class LocalSequencer final : public GenTreeVisitor // void SequenceCall(GenTreeCall* call) { + if (call->IsAsync()) + { + GenTreeLclVarCommon* asyncResumedDef = m_compiler->gtCallGetDefinedAsyncResumedLclAddr(call); + if (asyncResumedDef != nullptr) + { + MoveNodeToEnd(asyncResumedDef); + } + } + if (call->IsOptimizingRetBufAsLocal()) { // Correct the point at which the definition of the retbuf local appears. @@ -1503,31 +1512,47 @@ class LocalAddressVisitor final : public GenTreeVisitor GenTreeFlags defFlag = GTF_EMPTY; GenTreeCall* callUser = (user != nullptr) && user->IsCall() ? user->AsCall() : nullptr; bool escapeAddr = true; - if (m_compiler->opts.compJitOptimizeStructHiddenBuffer && (callUser != nullptr) && - m_compiler->IsValidLclAddr(lclNum, val.Offset())) - { - // We will only attempt this optimization for locals that do not - // later turn into indirections. - bool isSuitableLocal = - varTypeIsStruct(varDsc) && !m_compiler->lvaIsImplicitByRefLocal(lclNum) && - (!varDsc->lvIsStructField || !m_compiler->lvaIsImplicitByRefLocal(varDsc->lvParentLcl)); + if ((callUser != nullptr) && m_compiler->IsValidLclAddr(lclNum, val.Offset())) + { + unsigned defSize = UINT_MAX; + if (callUser->gtArgs.HasRetBuffer() && (val.Node() == callUser->gtArgs.GetRetBufferArg()->GetNode())) + { + // We will only attempt this optimization for locals that do not + // later turn into indirections. + bool isSuitableLocal = + m_compiler->opts.compJitOptimizeStructHiddenBuffer && varTypeIsStruct(varDsc) && + !m_compiler->lvaIsImplicitByRefLocal(lclNum) && + (!varDsc->lvIsStructField || !m_compiler->lvaIsImplicitByRefLocal(varDsc->lvParentLcl)); #ifdef TARGET_X86 - if (m_compiler->lvaIsArgAccessedViaVarArgsCookie(lclNum)) + if (m_compiler->lvaIsArgAccessedViaVarArgsCookie(lclNum)) + { + isSuitableLocal = false; + } +#endif // TARGET_X86 + + if (isSuitableLocal) + { + m_compiler->lvaSetHiddenBufferStructArg(lclNum); + callUser->gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG_LCLOPT; + defSize = m_compiler->typGetObjLayout(callUser->gtRetClsHnd)->GetSize(); + } + } + else if (callUser->IsAsync()) { - isSuitableLocal = false; + CallArg* asyncResumedDef = callUser->gtArgs.FindWellKnownArg(WellKnownArg::AsyncResumedDef); + if ((asyncResumedDef != nullptr) && (val.Node() == asyncResumedDef->GetNode())) + { + defSize = 1; + } } -#endif // TARGET_X86 - if (isSuitableLocal && callUser->gtArgs.HasRetBuffer() && - (val.Node() == callUser->gtArgs.GetRetBufferArg()->GetNode())) + if (defSize != UINT_MAX) { - m_compiler->lvaSetHiddenBufferStructArg(lclNum); + INDEBUG(varDsc->SetDefinedViaAddress(true)); escapeAddr = false; - callUser->gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG_LCLOPT; - defFlag = GTF_VAR_DEF; + defFlag = GTF_VAR_DEF; - if ((val.Offset() != 0) || - (varDsc->lvExactSize() != m_compiler->typGetObjLayout(callUser->gtRetClsHnd)->GetSize())) + if ((val.Offset() != 0) || (varDsc->lvExactSize() != defSize)) { defFlag |= GTF_VAR_USEASG; } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index adf573deda4674..443e756e4ad879 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -2243,9 +2243,7 @@ void Compiler::lvaSetHiddenBufferStructArg(unsigned varNum) { LclVarDsc* varDsc = lvaGetDesc(varNum); -#ifdef DEBUG - varDsc->SetDefinedViaAddress(true); -#endif + INDEBUG(varDsc->SetDefinedViaAddress(true)); if (varDsc->lvPromoted) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 771732a23624b0..fab998a1790e14 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -633,6 +633,10 @@ const char* getWellKnownArgName(WellKnownArg arg) return "AsyncExecutionContext"; case WellKnownArg::AsyncSynchronizationContext: return "AsyncSynchronizationContext"; + case WellKnownArg::AsyncResumedUse: + return "AsyncResumedUse"; + case WellKnownArg::AsyncResumedDef: + return "AsyncResumedDef"; case WellKnownArg::WasmShadowStackPointer: return "WasmShadowStackPointer"; case WellKnownArg::WasmPortableEntryPoint: From 80c8b105d5e99bc314e028bcd36ad6195917516d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 13 May 2026 18:31:03 +0200 Subject: [PATCH 2/9] Fix --- src/coreclr/jit/async.cpp | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index c6badac1ea89ea..b63b17054189cf 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -3335,32 +3335,22 @@ GenTreeLclVarCommon* AsyncTransformation::FindAndRemoveCommonAsyncResumedDef() return nullptr; } - bool hasCommonDef = true; - GenTreeLclVarCommon* commonDef = nullptr; - unsigned numWithCommonDef = 0; + GenTreeLclVarCommon* commonDef = nullptr; for (const AsyncState& state : m_states) { GenTreeLclVarCommon* def = m_compiler->gtCallGetDefinedAsyncResumedLclAddr(state.Call); if (def == nullptr) { - continue; + return nullptr; } - if ((commonDef == nullptr) || GenTree::Compare(def, commonDef)) + if ((commonDef != nullptr) && !GenTree::Compare(def, commonDef)) { - commonDef = def; - numWithCommonDef++; + return nullptr; } - else - { - hasCommonDef = false; - } - } - if (!hasCommonDef || (numWithCommonDef <= 1)) - { - return nullptr; + commonDef = def; } JITDUMP(" Found common async resumed def node:\n"); @@ -3369,11 +3359,8 @@ GenTreeLclVarCommon* AsyncTransformation::FindAndRemoveCommonAsyncResumedDef() for (const AsyncState& state : m_states) { CallArg* arg = state.Call->gtArgs.FindWellKnownArg(WellKnownArg::AsyncResumedDef); - if (arg != nullptr) - { - LIR::AsRange(state.CallBlock).Remove(arg->GetNode()); - state.Call->gtArgs.RemoveUnsafe(arg); - } + LIR::AsRange(state.CallBlock).Remove(arg->GetNode()); + state.Call->gtArgs.RemoveUnsafe(arg); } return commonDef; From ca5ef77e0f00b63640037b521452d13b340190b9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 13 May 2026 18:31:55 +0200 Subject: [PATCH 3/9] Comment --- src/coreclr/jit/async.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index b63b17054189cf..10421c8370c08e 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -3668,7 +3668,9 @@ void AsyncTransformation::CreateResumptionSwitch(GenTreeLclVarCommon* commonAsyn if (commonAsyncResumedDef != nullptr) { - // If we have a common async resumption def, then we do a manual head merge to move it into the switch block + // If we have a common async resumption def (common), then we do a + // manual head merge to move it into the switch block to avoid storing + // it in every resumption. StoreResumedDef(commonAsyncResumedDef, resumingEdge->getDestinationBlock()); } From cca3fc554030efb93100ac21c51a5b848c13cc09 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 13 May 2026 18:37:21 +0200 Subject: [PATCH 4/9] Copilot feedback --- src/coreclr/jit/async.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 10421c8370c08e..30e1a6495aa6ac 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -3133,10 +3133,10 @@ BasicBlock* AsyncTransformation::CreateSharedFinishContextHandlingBB(SuspensionC { m_sharedFinishContextHandlingResumedVar = m_compiler->lvaGrabTemp(false DEBUGARG("'resumed' for shared finish context handling")); - m_compiler->lvaGetDesc(m_sharedFinishContextHandlingResumedVar)->lvType = TYP_REF; + m_compiler->lvaGetDesc(m_sharedFinishContextHandlingResumedVar)->lvType = TYP_UBYTE; } - resumed = m_compiler->gtNewLclVarNode(m_sharedFinishContextHandlingResumedVar, TYP_INT); + resumed = m_compiler->gtNewLclVarNode(m_sharedFinishContextHandlingResumedVar, TYP_UBYTE); } else { From 6564c125bc9235e4220ee6889de1aec7853445e1 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 15 May 2026 13:32:11 +0200 Subject: [PATCH 5/9] Generalize --- src/coreclr/jit/fgdiagnostic.cpp | 33 ++++++++++++--------------- src/coreclr/jit/lclmorph.cpp | 24 +++++++------------ src/coreclr/jit/liveness.cpp | 26 ++++++++++++--------- src/coreclr/jit/morph.cpp | 8 +++---- src/coreclr/jit/promotionliveness.cpp | 23 +++++++++++++++---- 5 files changed, 60 insertions(+), 54 deletions(-) diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 86be4639ad8771..112720bbe14406 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -3814,12 +3814,7 @@ void Compiler::fgDebugCheckLinkedLocals() GenTree* node = *use; if (ShouldLink(node)) { - if ((user != nullptr) && user->IsCall() && - ((node == m_compiler->gtCallGetDefinedRetBufLclAddr(user->AsCall())) || - (node == m_compiler->gtCallGetDefinedAsyncResumedLclAddr(user->AsCall())))) - { - } - else + if ((user == nullptr) || !user->IsCall() || !IsDefinedByCall(user->AsCall(), node)) { m_locals.Push(node); } @@ -3827,23 +3822,25 @@ void Compiler::fgDebugCheckLinkedLocals() if (node->IsCall()) { - GenTree* defined = m_compiler->gtCallGetDefinedAsyncResumedLclAddr(node->AsCall()); - if (defined != nullptr) - { - assert(ShouldLink(defined)); - m_locals.Push(defined); - } + auto linkDefs = [&](GenTree* def) { + assert(ShouldLink(def)); + m_locals.Push(def); + return GenTree::VisitResult::Continue; + }; - defined = m_compiler->gtCallGetDefinedRetBufLclAddr(node->AsCall()); - if (defined != nullptr) - { - assert(ShouldLink(defined)); - m_locals.Push(defined); - } + node->VisitLocalDefNodes(m_compiler, linkDefs); } return WALK_CONTINUE; } + + bool IsDefinedByCall(GenTreeCall* call, GenTree* node) + { + auto defIsNode = [=](GenTree* def) { + return node == def ? GenTree::VisitResult::Abort : GenTree::VisitResult::Continue; + }; + return call->VisitLocalDefNodes(m_compiler, defIsNode) == GenTree::VisitResult::Abort; + } }; DebugLocalSequencer seq(this); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index bc6a319802f660..be403a634b8100 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -102,31 +102,23 @@ class LocalSequencer final : public GenTreeVisitor } //------------------------------------------------------------------- - // SequenceCall: Post-process a call that may define a local. + // SequenceCall: Post-process a call that may define locals. // // Arguments: // call - the call // // Remarks: - // calls may also define a local that we would like to see - // after all other operands of the call have been evaluated. + // calls may also define locals that we would like to see after all + // other operands of the call have been evaluated. // void SequenceCall(GenTreeCall* call) { - if (call->IsAsync()) - { - GenTreeLclVarCommon* asyncResumedDef = m_compiler->gtCallGetDefinedAsyncResumedLclAddr(call); - if (asyncResumedDef != nullptr) - { - MoveNodeToEnd(asyncResumedDef); - } - } + auto moveToEnd = [&](GenTree* def) { + MoveNodeToEnd(def); + return GenTree::VisitResult::Continue; + }; - if (call->IsOptimizingRetBufAsLocal()) - { - // Correct the point at which the definition of the retbuf local appears. - MoveNodeToEnd(m_compiler->gtCallGetDefinedRetBufLclAddr(call)); - } + call->VisitLocalDefNodes(m_compiler, moveToEnd); } //------------------------------------------------------------------- diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 8449490c068feb..a1d89821ec3944 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -107,7 +107,7 @@ class Liveness bool* pStoreRemoved DEBUGARG(bool* treeModf)); void ComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALARG_TP keepAliveVars); - bool IsTrackedRetBufferAddress(LIR::Range& range, GenTree* node); + bool IsTrackedCallDefinition(LIR::Range& range, GenTree* node); bool TryRemoveDeadStoreLIR(GenTree* store, GenTreeLclVarCommon* lclNode, BasicBlock* block); bool TryRemoveNonLocalLIR(GenTree* node, LIR::Range* blockRange); bool CanUncontainOrRemoveOperands(GenTree* node); @@ -2432,12 +2432,11 @@ void Liveness::ComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VAR } else { - // For LCL_ADDRs that are defined by being passed as a - // retbuf we will handle them when we get to the call. We - // cannot consider them to be defined at the point of the - // LCL_ADDR since there may be uses between the LCL_ADDR - // and call. - if (IsTrackedRetBufferAddress(blockRange, node)) + // For LCL_ADDRs that are definitions for the call we will + // handle them when we get to the call. We cannot consider + // them to be defined at the point of the LCL_ADDR since + // there may be uses between the LCL_ADDR and call. + if (IsTrackedCallDefinition(blockRange, node)) { break; } @@ -2639,15 +2638,15 @@ void Liveness::ComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VAR } //--------------------------------------------------------------------- -// IsTrackedRetBufferAddress - given a LCL_ADDR node, check if it is the -// return buffer definition of a call. +// IsTrackedCallDefinition - given a LCL_ADDR node, check if it is an +// extra definition of a call. // // Arguments // range - the block range containing the LCL_ADDR // node - the LCL_ADDR // template -bool Liveness::IsTrackedRetBufferAddress(LIR::Range& range, GenTree* node) +bool Liveness::IsTrackedCallDefinition(LIR::Range& range, GenTree* node) { assert(node->OperIs(GT_LCL_ADDR)); if ((node->gtFlags & GTF_VAR_DEF) == 0) @@ -2674,7 +2673,12 @@ bool Liveness::IsTrackedRetBufferAddress(LIR::Range& range, GenTree* if (curNode->IsCall()) { - return m_compiler->gtCallGetDefinedRetBufLclAddr(curNode->AsCall()) == node; + auto visit = [=](GenTree* callDef) { + return node == callDef ? GenTree::VisitResult::Abort : GenTree::VisitResult::Continue; + }; + + return + curNode->VisitLocalDefNodes(m_compiler, visit) == GenTree::VisitResult::Abort; } } while (curNode->OperIs(GT_FIELD_LIST) || curNode->OperIsPutArg()); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index fab998a1790e14..94d61531877b9e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1494,10 +1494,10 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) arg.SetEarlyNode(setupArg); call->gtFlags |= setupArg->gtFlags & GTF_SIDE_EFFECT; - // Make sure we do not break recognition of retbuf-as-local - // optimization here. If this is hit it indicates that we are - // unnecessarily creating temps for some ret buf addresses, and - // gtCallGetDefinedRetBufLclAddr relies on this not to happen. + // Make sure we do not break recognition of defs optimization here. + // If this is hit it indicates that we are unnecessarily creating + // temps for some ret buf addresses, and call defs rely on this not + // to happen. noway_assert((arg.GetWellKnownArg() != WellKnownArg::RetBuffer) || !call->IsOptimizingRetBufAsLocal()); } diff --git a/src/coreclr/jit/promotionliveness.cpp b/src/coreclr/jit/promotionliveness.cpp index e137caab827500..5eae2295fb8d75 100644 --- a/src/coreclr/jit/promotionliveness.cpp +++ b/src/coreclr/jit/promotionliveness.cpp @@ -257,12 +257,25 @@ unsigned PromotionLiveness::GetSizeOfStructLocal(Statement* stmt, GenTreeLclVarC { if (lcl->OperIs(GT_LCL_ADDR)) { - // Retbuf definition. Find the definition size from the - // containing call. + // LCL_ADDR definition. Currently we only have calls that define via + // LCL_ADDRs. Find the definition size from the containing call. Compiler::FindLinkData data = m_compiler->gtFindLink(stmt, lcl); - assert((data.parent != nullptr) && data.parent->IsCall() && - (m_compiler->gtCallGetDefinedRetBufLclAddr(data.parent->AsCall()) == lcl)); - return m_compiler->typGetObjLayout(data.parent->AsCall()->gtRetClsHnd)->GetSize(); + assert((data.parent != nullptr) && data.parent->IsCall()); + //(m_compiler->gtCallGetDefinedRetBufLclAddr(data.parent->AsCall()) == lcl)); + unsigned defSize = UINT_MAX; + auto findDef = [&](const LocalDef& def) { + if (def.Def == lcl) + { + defSize = def.Size.GetExact(); + return GenTree::VisitResult::Abort; + } + + return GenTree::VisitResult::Continue; + }; + + GenTree::VisitResult result = data.parent->VisitLocalDefs(m_compiler, findDef); + assert(result == GenTree::VisitResult::Abort); + return defSize; } return lcl->GetLayout(m_compiler)->GetSize(); From 73187afd5f01730a52a64e5d10932e7c19d4dbce Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 15 May 2026 13:41:32 +0200 Subject: [PATCH 6/9] Fixes --- src/coreclr/jit/lclmorph.cpp | 2 +- src/coreclr/jit/liveness.cpp | 6 +++--- src/coreclr/jit/promotionliveness.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index be403a634b8100..a66cc246ae7859 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -113,7 +113,7 @@ class LocalSequencer final : public GenTreeVisitor // void SequenceCall(GenTreeCall* call) { - auto moveToEnd = [&](GenTree* def) { + auto moveToEnd = [&](GenTreeLclVarCommon* def) { MoveNodeToEnd(def); return GenTree::VisitResult::Continue; }; diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index a1d89821ec3944..db8f215c3acd1b 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -845,9 +845,9 @@ void Liveness::PerNodeLocalVarLiveness(GenTree* tree) case GT_LCL_ADDR: if (TLiveness::IsLIR) { - // If this is a definition of a retbuf then we process it as - // part of the GT_CALL node. - if (IsTrackedRetBufferAddress(LIR::AsRange(m_compiler->compCurBB), tree)) + // If this is a call definition then we process it as part of + // the GT_CALL node. + if (IsTrackedCallDefinition(LIR::AsRange(m_compiler->compCurBB), tree)) { break; } diff --git a/src/coreclr/jit/promotionliveness.cpp b/src/coreclr/jit/promotionliveness.cpp index 5eae2295fb8d75..1b5e695e09915a 100644 --- a/src/coreclr/jit/promotionliveness.cpp +++ b/src/coreclr/jit/promotionliveness.cpp @@ -261,7 +261,7 @@ unsigned PromotionLiveness::GetSizeOfStructLocal(Statement* stmt, GenTreeLclVarC // LCL_ADDRs. Find the definition size from the containing call. Compiler::FindLinkData data = m_compiler->gtFindLink(stmt, lcl); assert((data.parent != nullptr) && data.parent->IsCall()); - //(m_compiler->gtCallGetDefinedRetBufLclAddr(data.parent->AsCall()) == lcl)); + unsigned defSize = UINT_MAX; auto findDef = [&](const LocalDef& def) { if (def.Def == lcl) From d13f065356d6bf2b1bd9cf3af07c9f42c73218e5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 15 May 2026 13:06:10 +0200 Subject: [PATCH 7/9] JIT: Move invariant nodes and LCL_VARs in LiftLIREdges Invariant nodes and LCL_VARs do not need to be lifted across async calls. --- src/coreclr/jit/async.cpp | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 30e1a6495aa6ac..ab5bd051ed91a4 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -1093,24 +1093,29 @@ void AsyncTransformation::LiftLIREdges(BasicBlock* block, for (GenTree* tree : defs) { - // TODO-CQ: Enable this. It currently breaks our recognition of how the - // call is stored. - // if (tree->OperIs(GT_LCL_VAR)) - //{ - // LclVarDsc* dsc = m_compiler->lvaGetDesc(tree->AsLclVarCommon()); - // if (!dsc->IsAddressExposed()) - // { - // // No interference by IR invariants. - // LIR::AsRange(block).Remove(tree); - // LIR::AsRange(block).InsertAfter(beyond, tree); - // continue; - // } - //} - LIR::Use use; bool gotUse = LIR::AsRange(block).TryGetUse(tree, &use); assert(gotUse); // Defs list should not contain unused values. + if (tree->IsInvariant()) + { + LIR::AsRange(block).Remove(tree); + LIR::AsRange(block).InsertBefore(use.User(), tree); + continue; + } + + if (tree->OperIs(GT_LCL_VAR)) + { + LclVarDsc* dsc = m_compiler->lvaGetDesc(tree->AsLclVarCommon()); + if (!dsc->IsAddressExposed()) + { + // No interference by IR invariants + LIR::AsRange(block).Remove(tree); + LIR::AsRange(block).InsertAfter(use.User(), tree); + continue; + } + } + unsigned newLclNum = use.ReplaceWithLclVar(m_compiler); layoutBuilder->AddLocal(newLclNum); GenTree* newUse = use.Def(); From e5b707ec3a91cfac9da76b43ee1ba783ab4474fa Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 15 May 2026 13:33:09 +0200 Subject: [PATCH 8/9] Fix --- src/coreclr/jit/async.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index ab5bd051ed91a4..2081bb85521a85 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -1111,7 +1111,7 @@ void AsyncTransformation::LiftLIREdges(BasicBlock* block, { // No interference by IR invariants LIR::AsRange(block).Remove(tree); - LIR::AsRange(block).InsertAfter(use.User(), tree); + LIR::AsRange(block).InsertBefore(use.User(), tree); continue; } } From 6d2556d669ad466f8f4f8a29bafda96745a4ad27 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 15 May 2026 14:27:05 +0200 Subject: [PATCH 9/9] Run jit-format --- src/coreclr/jit/fgdiagnostic.cpp | 4 ++-- src/coreclr/jit/lclmorph.cpp | 2 +- src/coreclr/jit/liveness.cpp | 5 ++--- src/coreclr/jit/promotionliveness.cpp | 4 ++-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 112720bbe14406..75c42931cdfef1 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -3826,7 +3826,7 @@ void Compiler::fgDebugCheckLinkedLocals() assert(ShouldLink(def)); m_locals.Push(def); return GenTree::VisitResult::Continue; - }; + }; node->VisitLocalDefNodes(m_compiler, linkDefs); } @@ -3838,7 +3838,7 @@ void Compiler::fgDebugCheckLinkedLocals() { auto defIsNode = [=](GenTree* def) { return node == def ? GenTree::VisitResult::Abort : GenTree::VisitResult::Continue; - }; + }; return call->VisitLocalDefNodes(m_compiler, defIsNode) == GenTree::VisitResult::Abort; } }; diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index a66cc246ae7859..f681866626b95a 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -116,7 +116,7 @@ class LocalSequencer final : public GenTreeVisitor auto moveToEnd = [&](GenTreeLclVarCommon* def) { MoveNodeToEnd(def); return GenTree::VisitResult::Continue; - }; + }; call->VisitLocalDefNodes(m_compiler, moveToEnd); } diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index db8f215c3acd1b..ea21161b4c92d5 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -2675,10 +2675,9 @@ bool Liveness::IsTrackedCallDefinition(LIR::Range& range, GenTree* no { auto visit = [=](GenTree* callDef) { return node == callDef ? GenTree::VisitResult::Abort : GenTree::VisitResult::Continue; - }; + }; - return - curNode->VisitLocalDefNodes(m_compiler, visit) == GenTree::VisitResult::Abort; + return curNode->VisitLocalDefNodes(m_compiler, visit) == GenTree::VisitResult::Abort; } } while (curNode->OperIs(GT_FIELD_LIST) || curNode->OperIsPutArg()); diff --git a/src/coreclr/jit/promotionliveness.cpp b/src/coreclr/jit/promotionliveness.cpp index 1b5e695e09915a..8ba68f7711c5ca 100644 --- a/src/coreclr/jit/promotionliveness.cpp +++ b/src/coreclr/jit/promotionliveness.cpp @@ -263,7 +263,7 @@ unsigned PromotionLiveness::GetSizeOfStructLocal(Statement* stmt, GenTreeLclVarC assert((data.parent != nullptr) && data.parent->IsCall()); unsigned defSize = UINT_MAX; - auto findDef = [&](const LocalDef& def) { + auto findDef = [&](const LocalDef& def) { if (def.Def == lcl) { defSize = def.Size.GetExact(); @@ -271,7 +271,7 @@ unsigned PromotionLiveness::GetSizeOfStructLocal(Statement* stmt, GenTreeLclVarC } return GenTree::VisitResult::Continue; - }; + }; GenTree::VisitResult result = data.parent->VisitLocalDefs(m_compiler, findDef); assert(result == GenTree::VisitResult::Abort);