diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp index 44e810592a006a..41d4b5caecc660 100644 --- a/src/coreclr/jit/lir.cpp +++ b/src/coreclr/jit/lir.cpp @@ -1794,6 +1794,70 @@ void LIR::InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range) blockRange.InsertBefore(insertionPoint, std::move(range)); } +//------------------------------------------------------------------------ +// LIR::LastNode: +// Given two nodes in the same block range, find which node appears last. +// +// Arguments: +// node1 - The first node +// node2 - The second node +// +// Returns: +// Node that appears last. +// +GenTree* LIR::LastNode(GenTree* node1, GenTree* node2) +{ + assert(node1 != nullptr); + assert(node2 != nullptr); + + if (node1 == node2) + { + return node1; + } + + GenTree* cursor1 = node1->gtNext; + GenTree* cursor2 = node2->gtNext; + + while (true) + { + if ((cursor1 == node2) || (cursor2 == nullptr)) + { + return node2; + } + + if ((cursor2 == node1) || (cursor1 == nullptr)) + { + return node1; + } + + cursor1 = cursor1->gtNext; + cursor2 = cursor2->gtNext; + } +} + +//------------------------------------------------------------------------ +// LIR::LastNode: +// Given an array of nodes in a block range, find the last one. +// +// Arguments: +// nodes - Pointer to nodes +// numNodes - Number of nodes +// +// Returns: +// Node that appears last. +// +GenTree* LIR::LastNode(GenTree** nodes, size_t numNodes) +{ + assert(numNodes > 0); + GenTree* lastNode = nodes[0]; + for (size_t i = 1; i < numNodes; i++) + { + lastNode = LastNode(lastNode, nodes[i]); + } + + return lastNode; +} + #ifdef DEBUG void GenTree::dumpLIRFlags() { diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index 5cdead7e9383bf..9b4f940bc0ae38 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -310,6 +310,9 @@ class LIR final static Range SeqTree(Compiler* compiler, GenTree* tree); static void InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range); + + static GenTree* LastNode(GenTree* node1, GenTree* node2); + static GenTree* LastNode(GenTree** nodes, size_t numNodes); }; inline void GenTree::SetUnusedValue() diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index ceefc04ed068ae..ddb24eb411c3be 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1610,11 +1610,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) for (N = 1; N < argCnt - 1; N++) { opN = node->Op(N + 1); - idx = comp->gtNewIconNode(N); - BlockRange().InsertBefore(opN, idx); + // Place the insert as early as possible to avoid creating a lot of long lifetimes. + GenTree* insertionPoint = LIR::LastNode(tmp1, opN); + idx = comp->gtNewIconNode(N); tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, idx, opN, NI_AdvSimd_Insert, simdBaseJitType, simdSize); - BlockRange().InsertAfter(opN, tmp1); + BlockRange().InsertAfter(insertionPoint, idx, tmp1); LowerNode(tmp1); } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 8372fd0959be10..e459b17daae6d6 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3210,13 +3210,16 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) size_t halfArgCnt = argCnt / 2; assert((halfArgCnt * 2) == argCnt); + GenTree* loInsertionPoint = LIR::LastNode(node->GetOperandArray(), halfArgCnt); + GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node->GetOperandArray(), halfArgCnt, NI_Vector256_Create, simdBaseJitType, 32); - BlockRange().InsertAfter(node->Op(halfArgCnt), lo); + BlockRange().InsertAfter(loInsertionPoint, lo); + GenTree* hiInsertionPoint = LIR::LastNode(node->GetOperandArray(halfArgCnt), halfArgCnt); GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node->GetOperandArray(halfArgCnt), halfArgCnt, NI_Vector256_Create, simdBaseJitType, 32); - BlockRange().InsertAfter(node->Op(argCnt), hi); + BlockRange().InsertAfter(hiInsertionPoint, hi); assert(argCnt >= 7); node->ResetHWIntrinsicId(NI_Vector512_WithUpper, comp, lo, hi); @@ -3263,13 +3266,17 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) size_t halfArgCnt = argCnt / 2; assert((halfArgCnt * 2) == argCnt); + GenTree* loInsertionPoint = LIR::LastNode(node->GetOperandArray(), halfArgCnt); + GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(), halfArgCnt, NI_Vector128_Create, simdBaseJitType, 16); - BlockRange().InsertAfter(node->Op(halfArgCnt), lo); + BlockRange().InsertAfter(loInsertionPoint, lo); + + GenTree* hiInsertionPoint = LIR::LastNode(node->GetOperandArray(halfArgCnt), halfArgCnt); GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(halfArgCnt), halfArgCnt, NI_Vector128_Create, simdBaseJitType, 16); - BlockRange().InsertAfter(node->Op(argCnt), hi); + BlockRange().InsertAfter(hiInsertionPoint, hi); assert(argCnt >= 3); node->ResetHWIntrinsicId(NI_Vector256_WithUpper, comp, lo, hi); @@ -3336,11 +3343,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) opN = node->Op(N + 1); idx = comp->gtNewIconNode(N, TYP_INT); - BlockRange().InsertAfter(opN, idx); + // Place the insert as early as possible to avoid creating a lot of long lifetimes. + GenTree* insertionPoint = LIR::LastNode(tmp1, opN); tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, opN, idx, insIntrinsic, simdBaseJitType, simdSize); - BlockRange().InsertAfter(idx, tmp1); + BlockRange().InsertAfter(insertionPoint, idx, tmp1); LowerNode(tmp1); } @@ -3419,17 +3427,17 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[N], op[O], NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, simdSize); - BlockRange().InsertAfter(op[O], tmp1); + BlockRange().InsertAfter(LIR::LastNode(op[N], op[O]), tmp1); LowerNode(tmp1); tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[P], op[Q], NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, simdSize); - BlockRange().InsertAfter(op[Q], tmp2); + BlockRange().InsertAfter(LIR::LastNode(op[P], op[Q]), tmp2); LowerNode(tmp2); tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_SSE2_UnpackLow, CORINFO_TYPE_USHORT, simdSize); - BlockRange().InsertAfter(tmp2, tmp3); + BlockRange().InsertAfter(LIR::LastNode(tmp1, tmp2), tmp3); LowerNode(tmp3); // This caches the result in index 0 through 3, depending on which @@ -3468,12 +3476,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE2_UnpackLow, CORINFO_TYPE_UINT, simdSize); - BlockRange().InsertAfter(op[1], tmp1); + BlockRange().InsertAfter(LIR::LastNode(op[0], op[1]), tmp1); LowerNode(tmp1); tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_SSE2_UnpackLow, CORINFO_TYPE_UINT, simdSize); - BlockRange().InsertAfter(op[3], tmp2); + BlockRange().InsertAfter(LIR::LastNode(op[2], op[3]), tmp2); LowerNode(tmp2); node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); @@ -3565,11 +3573,13 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) LowerNode(tmp2); idx = comp->gtNewIconNode(N << 4, TYP_INT); - BlockRange().InsertAfter(tmp2, idx); + + // Place the insert as early as possible to avoid creating a lot of long lifetimes. + GenTree* insertionPoint = LIR::LastNode(tmp1, tmp2); tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, idx, NI_SSE41_Insert, simdBaseJitType, simdSize); - BlockRange().InsertAfter(idx, tmp3); + BlockRange().InsertAfter(insertionPoint, idx, tmp3); insertedNodes[N] = tmp3; tmp1 = tmp3; @@ -3655,11 +3665,11 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE_UnpackLow, simdBaseJitType, simdSize); - BlockRange().InsertAfter(op[1], tmp1); + BlockRange().InsertAfter(LIR::LastNode(op[0], op[1]), tmp1); LowerNode(tmp1); tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_SSE_UnpackLow, simdBaseJitType, simdSize); - BlockRange().InsertAfter(op[3], tmp2); + BlockRange().InsertAfter(LIR::LastNode(op[2], op[3]), tmp2); LowerNode(tmp2); node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2);