From 6e40556786caa3b6e7afd1b9ecc9a388be3d1753 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 9 Oct 2023 13:59:34 +0200 Subject: [PATCH 1/4] JIT: Fix LowerHWIntrinsicCreate LIR ordering issues LowerHWIntrinsicCreate was assuming in many places that operand nodes come in order, which is fundamentally not an assumption that can be made for LIR. Fix #92766 --- src/coreclr/jit/lowerarmarch.cpp | 10 ++++----- src/coreclr/jit/lowerxarch.cpp | 37 ++++++++++++++------------------ 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index ceefc04ed068ae..8c10d5b64625e0 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1609,12 +1609,10 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) for (N = 1; N < argCnt - 1; N++) { - opN = node->Op(N + 1); - idx = comp->gtNewIconNode(N); - BlockRange().InsertBefore(opN, idx); - + opN = node->Op(N + 1); + idx = comp->gtNewIconNode(N); tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, idx, opN, NI_AdvSimd_Insert, simdBaseJitType, simdSize); - BlockRange().InsertAfter(opN, tmp1); + BlockRange().InsertBefore(node, idx, tmp1); LowerNode(tmp1); } @@ -1623,7 +1621,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // For the last insert, we will reuse the existing node and so handle it here, outside the loop. opN = node->Op(argCnt); idx = comp->gtNewIconNode(N); - BlockRange().InsertBefore(opN, idx); + BlockRange().InsertBefore(node, idx); node->ResetHWIntrinsicId(NI_AdvSimd_Insert, comp, tmp1, idx, opN); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 8372fd0959be10..1a9fbc0cd73d31 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3212,11 +3212,11 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node->GetOperandArray(), halfArgCnt, NI_Vector256_Create, simdBaseJitType, 32); - BlockRange().InsertAfter(node->Op(halfArgCnt), lo); + BlockRange().InsertBefore(node, lo); GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node->GetOperandArray(halfArgCnt), halfArgCnt, NI_Vector256_Create, simdBaseJitType, 32); - BlockRange().InsertAfter(node->Op(argCnt), hi); + BlockRange().InsertBefore(node, hi); assert(argCnt >= 7); node->ResetHWIntrinsicId(NI_Vector512_WithUpper, comp, lo, hi); @@ -3265,11 +3265,11 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(), halfArgCnt, NI_Vector128_Create, simdBaseJitType, 16); - BlockRange().InsertAfter(node->Op(halfArgCnt), lo); + BlockRange().InsertBefore(node, lo); GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(halfArgCnt), halfArgCnt, NI_Vector128_Create, simdBaseJitType, 16); - BlockRange().InsertAfter(node->Op(argCnt), hi); + BlockRange().InsertBefore(node, hi); assert(argCnt >= 3); node->ResetHWIntrinsicId(NI_Vector256_WithUpper, comp, lo, hi); @@ -3335,12 +3335,10 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) opN = node->Op(N + 1); - idx = comp->gtNewIconNode(N, TYP_INT); - BlockRange().InsertAfter(opN, idx); - + idx = comp->gtNewIconNode(N, TYP_INT); tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, opN, idx, insIntrinsic, simdBaseJitType, simdSize); - BlockRange().InsertAfter(idx, tmp1); + BlockRange().InsertBefore(node, idx, tmp1); LowerNode(tmp1); } @@ -3361,7 +3359,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) opN = node->Op(argCnt); idx = comp->gtNewIconNode(N, TYP_INT); - BlockRange().InsertAfter(opN, idx); + BlockRange().InsertBefore(node, idx); node->ResetHWIntrinsicId(insIntrinsic, comp, tmp1, opN, idx); break; @@ -3419,17 +3417,16 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[N], op[O], NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, simdSize); - BlockRange().InsertAfter(op[O], tmp1); + BlockRange().InsertBefore(node, tmp1); LowerNode(tmp1); tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[P], op[Q], NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, simdSize); - BlockRange().InsertAfter(op[Q], tmp2); + BlockRange().InsertBefore(node, tmp2); LowerNode(tmp2); tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_SSE2_UnpackLow, CORINFO_TYPE_USHORT, simdSize); - BlockRange().InsertAfter(tmp2, tmp3); LowerNode(tmp3); // This caches the result in index 0 through 3, depending on which @@ -3468,12 +3465,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE2_UnpackLow, CORINFO_TYPE_UINT, simdSize); - BlockRange().InsertAfter(op[1], tmp1); + BlockRange().InsertBefore(node, tmp1); LowerNode(tmp1); tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_SSE2_UnpackLow, CORINFO_TYPE_UINT, simdSize); - BlockRange().InsertAfter(op[3], tmp2); + BlockRange().InsertBefore(node, tmp2); LowerNode(tmp2); node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); @@ -3564,12 +3561,10 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, opN, simdBaseJitType, 16); LowerNode(tmp2); - idx = comp->gtNewIconNode(N << 4, TYP_INT); - BlockRange().InsertAfter(tmp2, idx); - + idx = comp->gtNewIconNode(N << 4, TYP_INT); tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, idx, NI_SSE41_Insert, simdBaseJitType, simdSize); - BlockRange().InsertAfter(idx, tmp3); + BlockRange().InsertBefore(node, idx, tmp3); insertedNodes[N] = tmp3; tmp1 = tmp3; @@ -3597,7 +3592,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) LowerNode(tmp2); idx = comp->gtNewIconNode((argCnt - 1) << 4, TYP_INT); - BlockRange().InsertAfter(tmp2, idx); + BlockRange().InsertBefore(node, idx); node->ResetHWIntrinsicId(NI_SSE41_Insert, comp, tmp1, tmp2, idx); @@ -3655,11 +3650,11 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE_UnpackLow, simdBaseJitType, simdSize); - BlockRange().InsertAfter(op[1], tmp1); + BlockRange().InsertBefore(node, tmp1); LowerNode(tmp1); tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_SSE_UnpackLow, simdBaseJitType, simdSize); - BlockRange().InsertAfter(op[3], tmp2); + BlockRange().InsertBefore(node, tmp2); LowerNode(tmp2); node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2); From e05ecd397d903c5befc274da6ab3b656e0f823f3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 9 Oct 2023 15:06:47 +0200 Subject: [PATCH 2/4] Optimize --- src/coreclr/jit/lir.cpp | 64 ++++++++++++++++++++++++++++++++ src/coreclr/jit/lir.h | 3 ++ src/coreclr/jit/lowerarmarch.cpp | 9 +++-- src/coreclr/jit/lowerxarch.cpp | 42 ++++++++++++++------- 4 files changed, 101 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp index 44e810592a006a..41d4b5caecc660 100644 --- a/src/coreclr/jit/lir.cpp +++ b/src/coreclr/jit/lir.cpp @@ -1794,6 +1794,70 @@ void LIR::InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range) blockRange.InsertBefore(insertionPoint, std::move(range)); } +//------------------------------------------------------------------------ +// LIR::LastNode: +// Given two nodes in the same block range, find which node appears last. +// +// Arguments: +// node1 - The first node +// node2 - The second node +// +// Returns: +// Node that appears last. +// +GenTree* LIR::LastNode(GenTree* node1, GenTree* node2) +{ + assert(node1 != nullptr); + assert(node2 != nullptr); + + if (node1 == node2) + { + return node1; + } + + GenTree* cursor1 = node1->gtNext; + GenTree* cursor2 = node2->gtNext; + + while (true) + { + if ((cursor1 == node2) || (cursor2 == nullptr)) + { + return node2; + } + + if ((cursor2 == node1) || (cursor1 == nullptr)) + { + return node1; + } + + cursor1 = cursor1->gtNext; + cursor2 = cursor2->gtNext; + } +} + +//------------------------------------------------------------------------ +// LIR::LastNode: +// Given an array of nodes in a block range, find the last one. +// +// Arguments: +// nodes - Pointer to nodes +// numNodes - Number of nodes +// +// Returns: +// Node that appears last. +// +GenTree* LIR::LastNode(GenTree** nodes, size_t numNodes) +{ + assert(numNodes > 0); + GenTree* lastNode = nodes[0]; + for (size_t i = 1; i < numNodes; i++) + { + lastNode = LastNode(lastNode, nodes[i]); + } + + return lastNode; +} + #ifdef DEBUG void GenTree::dumpLIRFlags() { diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index 5cdead7e9383bf..9b4f940bc0ae38 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -310,6 +310,9 @@ class LIR final static Range SeqTree(Compiler* compiler, GenTree* tree); static void InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range); + + static GenTree* LastNode(GenTree* node1, GenTree* node2); + static GenTree* LastNode(GenTree** nodes, size_t numNodes); }; inline void GenTree::SetUnusedValue() diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 8c10d5b64625e0..4088338d7faea7 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1609,10 +1609,13 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) for (N = 1; N < argCnt - 1; N++) { - opN = node->Op(N + 1); - idx = comp->gtNewIconNode(N); + opN = node->Op(N + 1); + + // Place the insert as early as possible to avoid creating a lot of long lifetimes. + GenTree* insertionPoint = LIR::LastNode(tmp1, opN); + idx = comp->gtNewIconNode(N); tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, idx, opN, NI_AdvSimd_Insert, simdBaseJitType, simdSize); - BlockRange().InsertBefore(node, idx, tmp1); + BlockRange().InsertAfter(insertionPoint, idx, tmp1); LowerNode(tmp1); } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 1a9fbc0cd73d31..29a1ba9a15fd81 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3210,13 +3210,16 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) size_t halfArgCnt = argCnt / 2; assert((halfArgCnt * 2) == argCnt); + GenTree* loInsertionPoint = LIR::LastNode(node->GetOperandArray(), halfArgCnt); + GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node->GetOperandArray(), halfArgCnt, NI_Vector256_Create, simdBaseJitType, 32); - BlockRange().InsertBefore(node, lo); + BlockRange().InsertAfter(loInsertionPoint, lo); + GenTree* hiInsertionPoint = LIR::LastNode(node->GetOperandArray(halfArgCnt), halfArgCnt); GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node->GetOperandArray(halfArgCnt), halfArgCnt, NI_Vector256_Create, simdBaseJitType, 32); - BlockRange().InsertBefore(node, hi); + BlockRange().InsertAfter(hiInsertionPoint, hi); assert(argCnt >= 7); node->ResetHWIntrinsicId(NI_Vector512_WithUpper, comp, lo, hi); @@ -3263,13 +3266,17 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) size_t halfArgCnt = argCnt / 2; assert((halfArgCnt * 2) == argCnt); + GenTree* loInsertionPoint = LIR::LastNode(node->GetOperandArray(), halfArgCnt); + GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(), halfArgCnt, NI_Vector128_Create, simdBaseJitType, 16); - BlockRange().InsertBefore(node, lo); + BlockRange().InsertAfter(loInsertionPoint, lo); + + GenTree* hiInsertionPoint = LIR::LastNode(node->GetOperandArray(halfArgCnt), halfArgCnt); GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(halfArgCnt), halfArgCnt, NI_Vector128_Create, simdBaseJitType, 16); - BlockRange().InsertBefore(node, hi); + BlockRange().InsertAfter(hiInsertionPoint, hi); assert(argCnt >= 3); node->ResetHWIntrinsicId(NI_Vector256_WithUpper, comp, lo, hi); @@ -3335,10 +3342,13 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) opN = node->Op(N + 1); - idx = comp->gtNewIconNode(N, TYP_INT); + idx = comp->gtNewIconNode(N, TYP_INT); + // Place the insert as early as possible to avoid creating a lot of long lifetimes. + GenTree* insertionPoint = LIR::LastNode(tmp1, opN); + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, opN, idx, insIntrinsic, simdBaseJitType, simdSize); - BlockRange().InsertBefore(node, idx, tmp1); + BlockRange().InsertAfter(insertionPoint, idx, tmp1); LowerNode(tmp1); } @@ -3417,12 +3427,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[N], op[O], NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, simdSize); - BlockRange().InsertBefore(node, tmp1); + BlockRange().InsertAfter(LIR::LastNode(op[N], op[O]), tmp1); LowerNode(tmp1); tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[P], op[Q], NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, simdSize); - BlockRange().InsertBefore(node, tmp2); + BlockRange().InsertAfter(LIR::LastNode(op[P], op[Q]), tmp2); LowerNode(tmp2); tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_SSE2_UnpackLow, CORINFO_TYPE_USHORT, @@ -3465,12 +3475,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE2_UnpackLow, CORINFO_TYPE_UINT, simdSize); - BlockRange().InsertBefore(node, tmp1); + BlockRange().InsertAfter(LIR::LastNode(op[0], op[1]), tmp1); LowerNode(tmp1); tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_SSE2_UnpackLow, CORINFO_TYPE_UINT, simdSize); - BlockRange().InsertBefore(node, tmp2); + BlockRange().InsertAfter(LIR::LastNode(op[2], op[3]), tmp2); LowerNode(tmp2); node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); @@ -3561,10 +3571,14 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, opN, simdBaseJitType, 16); LowerNode(tmp2); - idx = comp->gtNewIconNode(N << 4, TYP_INT); + idx = comp->gtNewIconNode(N << 4, TYP_INT); + + // Place the insert as early as possible to avoid creating a lot of long lifetimes. + GenTree* insertionPoint = LIR::LastNode(tmp1, tmp2); + tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, idx, NI_SSE41_Insert, simdBaseJitType, simdSize); - BlockRange().InsertBefore(node, idx, tmp3); + BlockRange().InsertAfter(insertionPoint, idx, tmp3); insertedNodes[N] = tmp3; tmp1 = tmp3; @@ -3650,11 +3664,11 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE_UnpackLow, simdBaseJitType, simdSize); - BlockRange().InsertBefore(node, tmp1); + BlockRange().InsertAfter(LIR::LastNode(op[0], op[1]), tmp1); LowerNode(tmp1); tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_SSE_UnpackLow, simdBaseJitType, simdSize); - BlockRange().InsertBefore(node, tmp2); + BlockRange().InsertAfter(LIR::LastNode(op[2], op[3]), tmp2); LowerNode(tmp2); node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2); From 35eb0d4ee0e7fd6385bf636442ce1874bb9010f9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 9 Oct 2023 15:10:41 +0200 Subject: [PATCH 3/4] Revert unnecessary changes --- src/coreclr/jit/lowerarmarch.cpp | 2 +- src/coreclr/jit/lowerxarch.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 4088338d7faea7..f2b443719c8f14 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1624,7 +1624,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // For the last insert, we will reuse the existing node and so handle it here, outside the loop. opN = node->Op(argCnt); idx = comp->gtNewIconNode(N); - BlockRange().InsertBefore(node, idx); + BlockRange().InsertAfter(opN, idx); node->ResetHWIntrinsicId(NI_AdvSimd_Insert, comp, tmp1, idx, opN); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 29a1ba9a15fd81..499676498c1cdc 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3369,7 +3369,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) opN = node->Op(argCnt); idx = comp->gtNewIconNode(N, TYP_INT); - BlockRange().InsertBefore(node, idx); + BlockRange().InsertAfter(opN, idx); node->ResetHWIntrinsicId(insIntrinsic, comp, tmp1, opN, idx); break; @@ -3606,7 +3606,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) LowerNode(tmp2); idx = comp->gtNewIconNode((argCnt - 1) << 4, TYP_INT); - BlockRange().InsertBefore(node, idx); + BlockRange().InsertAfter(tmp2, idx); node->ResetHWIntrinsicId(NI_SSE41_Insert, comp, tmp1, tmp2, idx); From 866393d0654197bb0fa70137107422fbc138dd1e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 9 Oct 2023 17:02:19 +0200 Subject: [PATCH 4/4] Revert a change, fix accidental removal --- src/coreclr/jit/lowerarmarch.cpp | 2 +- src/coreclr/jit/lowerxarch.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index f2b443719c8f14..ddb24eb411c3be 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1624,7 +1624,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // For the last insert, we will reuse the existing node and so handle it here, outside the loop. opN = node->Op(argCnt); idx = comp->gtNewIconNode(N); - BlockRange().InsertAfter(opN, idx); + BlockRange().InsertBefore(opN, idx); node->ResetHWIntrinsicId(NI_AdvSimd_Insert, comp, tmp1, idx, opN); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 499676498c1cdc..e459b17daae6d6 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3437,6 +3437,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_SSE2_UnpackLow, CORINFO_TYPE_USHORT, simdSize); + BlockRange().InsertAfter(LIR::LastNode(tmp1, tmp2), tmp3); LowerNode(tmp3); // This caches the result in index 0 through 3, depending on which