From 5420c7f318a1843b5a4bdc8c4b16ff4f11f03e8a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 28 Oct 2024 11:39:11 +0000 Subject: [PATCH 01/66] ARM64-SVE: Allow LCLs to be of type MASK --- src/coreclr/jit/compiler.cpp | 2 + src/coreclr/jit/compiler.h | 4 +- src/coreclr/jit/compphases.h | 1 + src/coreclr/jit/morph.cpp | 209 +++++++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 27ed0df9a54238..cc7ecb9cd64abb 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4797,6 +4797,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_STR_ADRLCL, &Compiler::fgMarkAddressExposedLocals); + DoPhase(this, PHASE_LCL_MASKS, &Compiler::optLCLMasks); + // Do an early pass of liveness for forward sub and morph. This data is // valid until after morph. // diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index be988a53b5767f..b69c686a4c6a81 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6870,6 +6870,7 @@ class Compiler AddCodeDsc* fgFindExcptnTarget(SpecialCodeKind kind, BasicBlock* fromBlock); bool fgUseThrowHelperBlocks(); void fgCreateThrowHelperBlockCode(AddCodeDsc* add); + void fgSequenceLocals(Statement* stmt); private: bool fgIsThrowHlpBlk(BasicBlock* block); @@ -6914,7 +6915,6 @@ class Compiler void fgMarkDemotedImplicitByRefArgs(); PhaseStatus fgMarkAddressExposedLocals(); - void fgSequenceLocals(Statement* stmt); bool fgExposeUnpropagatedLocals(bool propagatedAny, class LocalEqualsLocalAddrAssertions* assertions); void fgExposeLocalsInBitVec(BitVec_ValArg_T bitVec); @@ -7304,6 +7304,7 @@ class Compiler public: PhaseStatus optOptimizeValnumCSEs(); + PhaseStatus optLCLMasks(); // some phases (eg hoisting) need to anticipate // what CSE will do @@ -7318,6 +7319,7 @@ class Compiler void optValnumCSE_Availability(); void optValnumCSE_Heuristic(CSE_HeuristicCommon* heuristic); GenTree* optExtractSideEffectsForCSE(GenTree* tree); + bool ConvertLCLMasks(Statement* stmt); bool optDoCSE; // True when we have found a duplicate CSE tree bool optValnumCSE_phase = false; // True when we are executing the optOptimizeValnumCSEs() phase diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 1e9b09a69abcd4..2f8396ddc0f1c2 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -45,6 +45,7 @@ CompPhaseNameMacro(PHASE_EARLY_UPDATE_FLOW_GRAPH, "Update flow graph early pa CompPhaseNameMacro(PHASE_DFS_BLOCKS, "DFS blocks and remove dead code",false, -1, false) CompPhaseNameMacro(PHASE_DFS_BLOCKS2, "DFS blocks and remove dead code 2",false, -1, false) CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", false, -1, false) +CompPhaseNameMacro(PHASE_LCL_MASKS, "Local masks", false, -1, false) CompPhaseNameMacro(PHASE_EARLY_LIVENESS, "Early liveness", false, -1, false) CompPhaseNameMacro(PHASE_PHYSICAL_PROMOTION, "Physical promotion", false, -1, false) CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Substitution", false, -1, false) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 6f76f6042ce5cf..4e027cea9f4ed6 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -15503,3 +15503,212 @@ PhaseStatus Compiler::fgMorphArrayOps() return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } + +//------------------------------------------------------------------------ +// RemoveLCLUseConvert: tree visitor to remove conversion to masks for uses of LCL +// +class RemoveLCLUseConvertVisitor final : public GenTreeVisitor +{ +public: + enum + { + DoPostOrder = true, + UseExecutionOrder = true + }; + + RemoveLCLUseConvertVisitor(Compiler* compiler, unsigned lclNum, Statement* stmt) + : GenTreeVisitor(compiler) + , removedConversion(false) + , lclNum(lclNum) + , stmt(stmt) + { + } + + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + + // RemoveLCLUseConvertVisitor use + // [000031] ----------- * HWINTRINSIC mask ubyte ConvertVectorToMask + // [000030] ----------- +--* HWINTRINSIC mask ubyte CreateTrueMaskAll + // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 + + // RemoveLCLUseConvertVisitor user + // [000029] ---XG------ * HWINTRINSIC simd16 ubyte LoadVector + // [000031] ----------- +--* HWINTRINSIC mask ubyte + // ConvertVectorToMask [000030] ----------- | +--* HWINTRINSIC mask + // ubyte CreateTrueMaskAll [000026] ----------- | \--* LCL_VAR simd16 + // V06 tmp3 [000028] ----------- \--* LCL_VAR long V02 loc1 + + GenTree* const convertOp = *use; + + // Look for: + // user(ConvertVectorToMask(CreateTrueMaskAll, LCL_VAR(lclNum))) + if (convertOp->OperIsConvertVectorToMask()) + { + GenTree* lclOp = convertOp->AsHWIntrinsic()->Op(2); + if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum)) + { + if (m_compiler->verbose) + { + JITDUMP("\nRemoveLCLUseConvertVisitor use\n"); + m_compiler->gtDispTree(*use); + JITDUMP("\nRemoveLCLUseConvertVisitor user\n"); + m_compiler->gtDispTree(user); + } + + // Find the location of convertOp in the user + int opNum = 1; + for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) + { + if (user->AsHWIntrinsic()->Op(opNum) == convertOp) + { + break; + } + } + assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); + + // Fix up the type of the lcl + lclOp->gtType = convertOp->gtType; + + // Remove the convert convertOp + convertOp->gtBashToNOP(); + user->AsHWIntrinsic()->Op(opNum) = lclOp; + m_compiler->fgSequenceLocals(stmt); + + if (m_compiler->verbose) + { + JITDUMP("\nAfter removal:\n"); + m_compiler->gtDispTree(user); + } + + removedConversion = true; + return fgWalkResult::WALK_ABORT; + } + } + + return fgWalkResult::WALK_CONTINUE; + } + + bool removedConversion; + +private: + unsigned lclNum; + Statement* stmt; +}; + +//------------------------------------------------------------------------ +// ConvertLCLMasks: Allow LCL in a statement to be of MASK type +// +// After import of hwintrinsics, all vector masks are converted to vectors before being +// stored to variables (either local or in memory). For correctness, all stores to memory +// must be converted to a vector as there is no way of knowing how that data will be used +// elsewhere. However, the scope of a LCL is the current method, therefore it free to be +// stored in whatever format is most optimal. +// +// If a local variable is created as a vector mask, then the general case is expected that it +// will be used as a mask throughout the code. This is the case that should be optimised for. +// +// Operation: +// Look for a LCL which where the input is converted from a MASK. Remove the conversion and +// updated the type to MASK. Find all uses of the LCL. For each use, update the type to MASK. +// If it is converted to a MASK, then remove the node. Otherwise, add a conversion from vector +// to mask. +// +// Arguments: +// stmt - Statement to check +// +// Returns: +// True if changes were made +// +bool Compiler::ConvertLCLMasks(Statement* stmt) +{ + // Look for: + // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) + + GenTree* tree = stmt->GetRootNode(); + + if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType)) || + (!tree->AsLclVar()->Data()->OperIsConvertMaskToVector())) + { + return false; + } + + JITDUMP("Found Local mask store with conversion\n"); + gtDispTree(tree); + + GenTreeHWIntrinsic* convertOp = tree->AsLclVar()->Data()->AsHWIntrinsic(); + unsigned const lclNum = tree->AsLclVarCommon()->GetLclNum(); + GenTree* maskOp = convertOp->Op(1); + + // Update the type of the STORELCL - including the lclvar. + tree->gtType = maskOp->gtType; + LclVarDsc* varDsc = lvaGetDesc(lclNum); + varDsc->lvType = maskOp->gtType; + + // Remove the convert from the tree. + convertOp->gtBashToNOP(); + tree->AsOp()->gtOp1 = maskOp; + fgSequenceLocals(stmt); + + JITDUMP("\nRemoved conversion\n"); + gtDispTree(tree); + + // Find all uses of the LCL. These could be anywhere in the current method. + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + { + if (lcl->OperIs(GT_LCL_VAR) && (lcl->GetLclNum() == lclNum) && (lcl->gtType != TYP_MASK)) + { + JITDUMP("\nFound a use\n"); + gtDispTree(lcl); + + // Find the parent. If it is a ConvertVectorToMask then remove it. + RemoveLCLUseConvertVisitor ev(this, lcl->GetLclNum(), stmt); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + + // TODO: If a ConvertVectorToMask was not found then insert a ConvertMaskToVector + if (!ev.removedConversion) + { + assert(false); + } + } + } + } + } + + return true; +} + +//------------------------------------------------------------------------ +// optLCLMasks: Allow locls to be of MASK type +// +// Returns: +// Suitable phase status +// +PhaseStatus Compiler::optLCLMasks() +{ + // TODO: Use a different config option +#ifdef DEBUG + if (optConfigDisableCSE()) + { + JITDUMP("Disabled by JitNoCSE\n"); + return PhaseStatus::MODIFIED_NOTHING; + } +#endif + + bool madeChanges = false; + + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + madeChanges |= ConvertLCLMasks(stmt); + } + } + + return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; +} From 8020826926dc1ec8093910061cb0158987ec867a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 29 Oct 2024 14:04:46 +0000 Subject: [PATCH 02/66] Trigger based on OptimizationDisabled --- src/coreclr/jit/morph.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 4e027cea9f4ed6..669d0d22f5b792 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -15684,21 +15684,18 @@ bool Compiler::ConvertLCLMasks(Statement* stmt) } //------------------------------------------------------------------------ -// optLCLMasks: Allow locls to be of MASK type +// optLCLMasks: Allow locals to be of MASK type // // Returns: // Suitable phase status // PhaseStatus Compiler::optLCLMasks() { - // TODO: Use a different config option -#ifdef DEBUG - if (optConfigDisableCSE()) + if (opts.OptimizationDisabled()) { - JITDUMP("Disabled by JitNoCSE\n"); + JITDUMP("Optimizations Disabled\n"); return PhaseStatus::MODIFIED_NOTHING; } -#endif bool madeChanges = false; From 44148b9aaf66f56d4563f4106e397dc935999e5e Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 29 Oct 2024 14:27:21 +0000 Subject: [PATCH 03/66] Add compConvertMaskToVectorUsed check --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/fginline.cpp | 1 + src/coreclr/jit/gentree.cpp | 1 + src/coreclr/jit/morph.cpp | 8 +++++++- 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b69c686a4c6a81..f091fa5176a667 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9961,6 +9961,7 @@ class Compiler bool compSwitchedToOptimized = false; // Codegen initially was Tier0 but jit switched to FullOpts bool compSwitchedToMinOpts = false; // Codegen initially was Tier1/FullOpts but jit switched to MinOpts bool compSuppressedZeroInit = false; // There are vars with lvSuppressedZeroInit set + bool compConvertMaskToVectorUsed = false; // Does the method have Convert Mask To Vector nodes. // NOTE: These values are only reliable after // the importing is completely finished. diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index a3858e664ec206..322001cd2829fc 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -1627,6 +1627,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) compQmarkUsed |= InlineeCompiler->compQmarkUsed; compGSReorderStackLayout |= InlineeCompiler->compGSReorderStackLayout; compHasBackwardJump |= InlineeCompiler->compHasBackwardJump; + compConvertMaskToVectorUsed |= InlineeCompiler->compConvertMaskToVectorUsed; lvaGenericsContextInUse |= InlineeCompiler->lvaGenericsContextInUse; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 748f31087980d0..d312fcd2b05efd 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21419,6 +21419,7 @@ GenTree* Compiler::gtNewSimdCvtMaskToVectorNode(var_types type, { assert(varTypeIsMask(op1)); assert(varTypeIsSIMD(type)); + compConvertMaskToVectorUsed = true; #if defined(TARGET_XARCH) return gtNewSimdHWIntrinsicNode(type, op1, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 669d0d22f5b792..74bd21b178df0c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -15693,7 +15693,13 @@ PhaseStatus Compiler::optLCLMasks() { if (opts.OptimizationDisabled()) { - JITDUMP("Optimizations Disabled\n"); + JITDUMP("Skipping. Optimizations Disabled\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + if (!compConvertMaskToVectorUsed) + { + JITDUMP("Skipping. There are no Convert Mask To Vector nodes\n"); return PhaseStatus::MODIFIED_NOTHING; } From 9800906462bd94bf74ce589b52086da64dd851be Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 30 Oct 2024 14:42:37 +0000 Subject: [PATCH 04/66] Initial version with hashtable --- src/coreclr/jit/compiler.h | 15 +- src/coreclr/jit/jithashtable.h | 2 +- src/coreclr/jit/morph.cpp | 506 ++++++++++++++++++++++++++++----- 3 files changed, 447 insertions(+), 76 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f091fa5176a667..373f0785eed08c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7319,7 +7319,20 @@ class Compiler void optValnumCSE_Availability(); void optValnumCSE_Heuristic(CSE_HeuristicCommon* heuristic); GenTree* optExtractSideEffectsForCSE(GenTree* tree); - bool ConvertLCLMasks(Statement* stmt); + + // struct LCLMasksData; + // { + // unsigned usesAsMask; + // unsigned usesAsVector; + // }; + + typedef JitHashTable, signed> LCLMasksTable; + + void findLCLStoreMask(Statement* stmt, LCLMasksTable *masksTable); + void removeLCLStoreMask(Statement* stmt, LCLMasksTable *masksTable); + void checkLCLVarMask(GenTreeLclVarCommon* lcl, Statement* const stmt, LCLMasksTable *masksTable); + void updateLCLVar(GenTreeLclVarCommon* lcl, Statement* const stmt, LCLMasksTable *masksTable); + bool optDoCSE; // True when we have found a duplicate CSE tree bool optValnumCSE_phase = false; // True when we are executing the optOptimizeValnumCSEs() phase diff --git a/src/coreclr/jit/jithashtable.h b/src/coreclr/jit/jithashtable.h index 3cf7fdcb98cda5..2c71fc50c324a6 100644 --- a/src/coreclr/jit/jithashtable.h +++ b/src/coreclr/jit/jithashtable.h @@ -309,7 +309,7 @@ class JitHashTable // Arguments: // k - the key // v - the value - // kind - Normal, we are not allowed to overwrite + // kind - None, we are not allowed to overwrite // Overwrite, we are allowed to overwrite // currently only used by CHK/DBG builds in an assert. // diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 74bd21b178df0c..8e0f736db985f4 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -15504,10 +15504,243 @@ PhaseStatus Compiler::fgMorphArrayOps() return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } +// //------------------------------------------------------------------------ +// // RemoveLCLUseConvert: tree visitor to remove conversion to masks for uses of LCL +// // +// class RemoveLCLUseConvertVisitor final : public GenTreeVisitor +// { +// public: +// enum +// { +// DoPostOrder = true, +// UseExecutionOrder = true +// }; + +// RemoveLCLUseConvertVisitor(Compiler* compiler, unsigned lclNum, Statement* stmt) +// : GenTreeVisitor(compiler) +// , removedConversion(false) +// , lclNum(lclNum) +// , stmt(stmt) +// { +// } + +// Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) +// { + +// // RemoveLCLUseConvertVisitor use +// // [000031] ----------- * HWINTRINSIC mask ubyte +// ConvertVectorToMask +// // [000030] ----------- +--* HWINTRINSIC mask ubyte +// CreateTrueMaskAll +// // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 + +// // RemoveLCLUseConvertVisitor user +// // [000029] ---XG------ * HWINTRINSIC simd16 ubyte LoadVector +// // [000031] ----------- +--* HWINTRINSIC mask ubyte +// // ConvertVectorToMask [000030] ----------- | +--* HWINTRINSIC mask +// // ubyte CreateTrueMaskAll [000026] ----------- | \--* LCL_VAR +// simd16 +// // V06 tmp3 [000028] ----------- \--* LCL_VAR long V02 loc1 + +// GenTree* const convertOp = *use; + +// // Look for: +// // user(ConvertVectorToMask(CreateTrueMaskAll, LCL_VAR(lclNum))) +// if (convertOp->OperIsConvertVectorToMask()) +// { +// GenTree* lclOp = convertOp->AsHWIntrinsic()->Op(2); +// if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum)) +// { +// if (m_compiler->verbose) +// { +// JITDUMP("\nRemoveLCLUseConvertVisitor use\n"); +// m_compiler->gtDispTree(*use); +// JITDUMP("\nRemoveLCLUseConvertVisitor user\n"); +// m_compiler->gtDispTree(user); +// } + +// // Find the location of convertOp in the user +// int opNum = 1; +// for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) +// { +// if (user->AsHWIntrinsic()->Op(opNum) == convertOp) +// { +// break; +// } +// } +// assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); + +// // Fix up the type of the lcl +// lclOp->gtType = convertOp->gtType; + +// // Remove the convert convertOp +// convertOp->gtBashToNOP(); +// user->AsHWIntrinsic()->Op(opNum) = lclOp; +// m_compiler->fgSequenceLocals(stmt); + +// if (m_compiler->verbose) +// { +// JITDUMP("\nAfter removal:\n"); +// m_compiler->gtDispTree(user); +// } + +// removedConversion = true; +// return fgWalkResult::WALK_ABORT; +// } +// } + +// return fgWalkResult::WALK_CONTINUE; +// } + +// bool removedConversion; + +// private: +// unsigned lclNum; +// Statement* stmt; +// }; + +//------------------------------------------------------------------------ +// ConvertLCLMasks: Allow LCL in a statement to be of MASK type +// +// After import of hwintrinsics, all vector masks are converted to vectors before being +// stored to variables (either local or in memory). For correctness, all stores to memory +// must be converted to a vector as there is no way of knowing how that data will be used +// elsewhere. However, the scope of a LCL is the current method, therefore it free to be +// stored in whatever format is most optimal. +// +// If a local variable is created as a vector mask, then the general case is expected that it +// will be used as a mask throughout the code. This is the case that should be optimised for. +// +// Operation: +// Look for a LCL which where the input is converted from a MASK. Remove the conversion and +// updated the type to MASK. Find all uses of the LCL. For each use, update the type to MASK. +// If it is converted to a MASK, then remove the node. Otherwise, add a conversion from vector +// to mask. +// +// Arguments: +// stmt - Statement to check +// +// Returns: +// True if changes were made +// +// bool Compiler::ConvertLCLMasks(Statement* stmt) +// { +// // Look for: +// // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) + +// GenTree* tree = stmt->GetRootNode(); + +// if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType)) || +// (!tree->AsLclVar()->Data()->OperIsConvertMaskToVector())) +// { +// return false; +// } + +// JITDUMP("Found Local mask store with conversion\n"); +// gtDispTree(tree); + +// GenTreeHWIntrinsic* convertOp = tree->AsLclVar()->Data()->AsHWIntrinsic(); +// unsigned const lclNum = tree->AsLclVarCommon()->GetLclNum(); +// GenTree* maskOp = convertOp->Op(1); + +// // Update the type of the STORELCL - including the lclvar. +// tree->gtType = maskOp->gtType; +// LclVarDsc* varDsc = lvaGetDesc(lclNum); +// varDsc->lvType = maskOp->gtType; + +// // Remove the convert from the tree. +// convertOp->gtBashToNOP(); +// tree->AsOp()->gtOp1 = maskOp; +// fgSequenceLocals(stmt); + +// JITDUMP("\nRemoved conversion\n"); +// gtDispTree(tree); + +// // Find all uses of the LCL. These could be anywhere in the current method. +// for (BasicBlock* block : Blocks()) +// { +// for (Statement* const stmt : block->Statements()) +// { +// for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) +// { +// if (lcl->OperIs(GT_LCL_VAR) && (lcl->GetLclNum() == lclNum) && (lcl->gtType != TYP_MASK)) +// { +// JITDUMP("\nFound a use\n"); +// gtDispTree(lcl); + +// // Find the parent. If it is a ConvertVectorToMask then remove it. +// RemoveLCLUseConvertVisitor ev(this, lcl->GetLclNum(), stmt); +// GenTree* root = stmt->GetRootNode(); +// ev.WalkTree(&root, nullptr); + +// // TODO: If a ConvertVectorToMask was not found then insert a ConvertMaskToVector +// if (!ev.removedConversion) +// { +// assert(false); +// } +// } +// } +// } +// } + +// return true; +// } + +//------------------------------------------------------------------------ +// RemoveLCLUseConvert: tree visitor to remove conversion to masks for uses of LCL var +// +class CheckLCLUseIsConvertVisitor final : public GenTreeVisitor +{ +public: + enum + { + DoPostOrder = true, + UseExecutionOrder = true + }; + + CheckLCLUseIsConvertVisitor(Compiler* compiler, unsigned lclNum) + : GenTreeVisitor(compiler) + , foundConversion(false) + , lclNum(lclNum) + { + } + + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + // CheckLCLUseIsConvertVisitor use + // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 + + // CheckLCLUseIsConvertVisitor user + // [000031] ----------- * HWINTRINSIC mask ubyte ConvertVectorToMask + // [000030] ----------- +--* HWINTRINSIC mask ubyte CreateTrueMaskAll + // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 + + GenTree* const lclOp = *use; + + if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum)) + { + if (user->OperIsConvertVectorToMask()) + { + JITDUMP("\nLCL is used by ConvertVectorToMask:\n"); + m_compiler->gtDispTree(user); + foundConversion = true; + } + } + + return fgWalkResult::WALK_CONTINUE; + } + + bool foundConversion; + +private: + unsigned lclNum; + Statement* stmt; +}; + //------------------------------------------------------------------------ // RemoveLCLUseConvert: tree visitor to remove conversion to masks for uses of LCL // -class RemoveLCLUseConvertVisitor final : public GenTreeVisitor +class UpdateLCLUseToMaskVisitor final : public GenTreeVisitor { public: enum @@ -15516,9 +15749,8 @@ class RemoveLCLUseConvertVisitor final : public GenTreeVisitor(compiler) - , removedConversion(false) + UpdateLCLUseToMaskVisitor(Compiler* compiler, unsigned lclNum, Statement* stmt) + : GenTreeVisitor(compiler) , lclNum(lclNum) , stmt(stmt) { @@ -15527,27 +15759,29 @@ class RemoveLCLUseConvertVisitor final : public GenTreeVisitorOperIsConvertVectorToMask()) + if ((*use)->OperIsConvertVectorToMask()) { - GenTree* lclOp = convertOp->AsHWIntrinsic()->Op(2); + GenTree* const convertOp = *use; + GenTree* lclOp = convertOp->AsHWIntrinsic()->Op(2); + if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum)) { + assert(lclOp->gtType != TYP_MASK); + if (m_compiler->verbose) { JITDUMP("\nRemoveLCLUseConvertVisitor use\n"); @@ -15581,46 +15815,39 @@ class RemoveLCLUseConvertVisitor final : public GenTreeVisitorgtDispTree(user); } - removedConversion = true; return fgWalkResult::WALK_ABORT; } } + // Look for: + // user(LCL_VAR(lclNum))) + else if ((*use)->OperIs(GT_LCL_VAR) && ((*use)->gtType != TYP_MASK) && + ((*use)->AsLclVarCommon()->GetLclNum() == lclNum) && !user->OperIsConvertVectorToMask()) + { + if (m_compiler->verbose) + { + JITDUMP("\nRemoveLCLUseConvertVisitor ELSE use\n"); + m_compiler->gtDispTree(*use); + JITDUMP("\nRemoveLCLUseConvertVisitor ELSE user\n"); + m_compiler->gtDispTree(user); + } + + // TODO: Fill this in when I hit it + assert(false); + + return fgWalkResult::WALK_ABORT; + } + return fgWalkResult::WALK_CONTINUE; } - bool removedConversion; private: unsigned lclNum; Statement* stmt; }; -//------------------------------------------------------------------------ -// ConvertLCLMasks: Allow LCL in a statement to be of MASK type -// -// After import of hwintrinsics, all vector masks are converted to vectors before being -// stored to variables (either local or in memory). For correctness, all stores to memory -// must be converted to a vector as there is no way of knowing how that data will be used -// elsewhere. However, the scope of a LCL is the current method, therefore it free to be -// stored in whatever format is most optimal. -// -// If a local variable is created as a vector mask, then the general case is expected that it -// will be used as a mask throughout the code. This is the case that should be optimised for. -// -// Operation: -// Look for a LCL which where the input is converted from a MASK. Remove the conversion and -// updated the type to MASK. Find all uses of the LCL. For each use, update the type to MASK. -// If it is converted to a MASK, then remove the node. Otherwise, add a conversion from vector -// to mask. -// -// Arguments: -// stmt - Statement to check -// -// Returns: -// True if changes were made -// -bool Compiler::ConvertLCLMasks(Statement* stmt) +void Compiler::findLCLStoreMask(Statement* stmt, LCLMasksTable* masksTable) { // Look for: // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) @@ -15630,57 +15857,146 @@ bool Compiler::ConvertLCLMasks(Statement* stmt) if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType)) || (!tree->AsLclVar()->Data()->OperIsConvertMaskToVector())) { - return false; + return; } JITDUMP("Found Local mask store with conversion\n"); gtDispTree(tree); - GenTreeHWIntrinsic* convertOp = tree->AsLclVar()->Data()->AsHWIntrinsic(); - unsigned const lclNum = tree->AsLclVarCommon()->GetLclNum(); + // Add to the table. + masksTable->Set(tree->AsLclVar(), 0); +} + +void Compiler::removeLCLStoreMask(Statement* stmt, LCLMasksTable* masksTable) +{ + // Look for: + // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) + + GenTree* tree = stmt->GetRootNode(); + + if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType)) || + (!tree->AsLclVar()->Data()->OperIsConvertMaskToVector())) + { + return; + } + + GenTreeLclVar* lcl = tree->AsLclVar(); + + // Lookup the lcl store in the table and check the weight. + + signed maskedUsesWeight = 0; + bool found = masksTable->Lookup(lcl, &maskedUsesWeight); + assert(found); + + if (maskedUsesWeight <= 0) + { + JITDUMP("\nNot enough uses of V%d as a mask. Weight=%d\n", lcl->GetLclNum(), maskedUsesWeight); + return; + } + + // Remove the ConvertMaskToVector + + GenTreeHWIntrinsic* convertOp = lcl->Data()->AsHWIntrinsic(); + unsigned const lclNum = lcl->GetLclNum(); GenTree* maskOp = convertOp->Op(1); // Update the type of the STORELCL - including the lclvar. - tree->gtType = maskOp->gtType; + lcl->gtType = maskOp->gtType; LclVarDsc* varDsc = lvaGetDesc(lclNum); varDsc->lvType = maskOp->gtType; // Remove the convert from the tree. convertOp->gtBashToNOP(); - tree->AsOp()->gtOp1 = maskOp; + lcl->gtOp1 = maskOp; fgSequenceLocals(stmt); - JITDUMP("\nRemoved conversion\n"); - gtDispTree(tree); + JITDUMP("Updated V%d to store as mask\n", lcl->GetLclNum()); + gtDispTree(lcl); +} - // Find all uses of the LCL. These could be anywhere in the current method. - for (BasicBlock* block : Blocks()) +// For the given lcl var, check if it converted to a mask when used. Find the corresponding lcl store in the +// mask table and update. +void Compiler::checkLCLVarMask(GenTreeLclVarCommon* lcl, Statement* const stmt, LCLMasksTable* masksTable) +{ + if (!lcl->OperIs(GT_LCL_VAR)) { - for (Statement* const stmt : block->Statements()) + return; + } + + // Find the parent of the lcl var + CheckLCLUseIsConvertVisitor ev(this, lcl->GetLclNum()); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + + // Find the corresponding lcl store(s) in masksTable + // (Each lcl may store more than once) + for (LCLMasksTable::Node* const iter : LCLMasksTable::KeyValueIteration(masksTable)) + { + GenTreeLclVar* lclStore = iter->GetKey(); + + if (lcl->GetLclNum() != lclStore->GetLclNum()) { - for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) - { - if (lcl->OperIs(GT_LCL_VAR) && (lcl->GetLclNum() == lclNum) && (lcl->gtType != TYP_MASK)) - { - JITDUMP("\nFound a use\n"); - gtDispTree(lcl); + continue; + } - // Find the parent. If it is a ConvertVectorToMask then remove it. - RemoveLCLUseConvertVisitor ev(this, lcl->GetLclNum(), stmt); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); + JITDUMP("\nFound a use of lcl store %d\n", lcl->GetLclNum()); + JITDUMP("\nlcl store:\n"); + gtDispTree(lclStore); + JITDUMP("\nlcl var:\n"); + gtDispTree(lcl); - // TODO: If a ConvertVectorToMask was not found then insert a ConvertMaskToVector - if (!ev.removedConversion) - { - assert(false); - } - } - } + // Update value in masksTable + + signed maskedUsesWeight = 0; + bool foundKey = masksTable->Lookup(lclStore, &maskedUsesWeight); + assert(foundKey); + + if (ev.foundConversion) + { + maskedUsesWeight++; + } + else + { + maskedUsesWeight--; } + + masksTable->Set(lclStore, maskedUsesWeight, LCLMasksTable::Overwrite); + + JITDUMP("\nTable weight for V%d updated to %d\n", lcl->GetLclNum(), maskedUsesWeight); } +} - return true; +void Compiler::updateLCLVar(GenTreeLclVarCommon* lcl, Statement* const stmt, LCLMasksTable* masksTable) +{ + if (!lcl->OperIs(GT_LCL_VAR)) + { + return; + } + + // Find the corresponding lcl store(s) in masksTable + // (Each lcl may store more than once) + for (LCLMasksTable::Node* const iter : LCLMasksTable::KeyValueIteration(masksTable)) + { + GenTreeLclVar* lclStore = iter->GetKey(); + + if (lcl->GetLclNum() != lclStore->GetLclNum()) + { + continue; + } + + signed maskedUsesWeight = iter->GetValue(); + + if (maskedUsesWeight <= 0) + { + JITDUMP("\nNot enough uses of V%d as a mask.\n", lcl->GetLclNum()); + return; + } + + // Remove or add convert.... + UpdateLCLUseToMaskVisitor ev(this, lcl->GetLclNum(), stmt); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + } } //------------------------------------------------------------------------ @@ -15703,13 +16019,55 @@ PhaseStatus Compiler::optLCLMasks() return PhaseStatus::MODIFIED_NOTHING; } + LCLMasksTable masksTable = LCLMasksTable(getAllocator()); + + // Find every local store that is first converted from a mask and add them to masksTable. + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + findLCLStoreMask(stmt, &masksTable); + } + } + + if (masksTable.GetCount() == 0) + { + JITDUMP("Done. No local stores of masks found\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + // Find the uses of every local and check if it is converted to a mask, updating the keys in the masksTable. + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + { + checkLCLVarMask(lcl, stmt, &masksTable); + } + } + } + bool madeChanges = false; + // For each local store, potentially remove the convert from mask. + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + removeLCLStoreMask(stmt, &masksTable); + } + } + + // Find the uses of every local and potentially convert to mask. for (BasicBlock* block : Blocks()) { for (Statement* const stmt : block->Statements()) { - madeChanges |= ConvertLCLMasks(stmt); + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + { + updateLCLVar(lcl, stmt, &masksTable); + } } } From 96d859032d0a91f1971ebc967353a8d390ec3c7a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 Nov 2024 16:17:31 +0000 Subject: [PATCH 05/66] Use double weighting method --- src/coreclr/jit/compiler.h | 27 +- src/coreclr/jit/morph.cpp | 496 +++++++++++++------------------------ 2 files changed, 188 insertions(+), 335 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 373f0785eed08c..0af08c4fa021af 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7320,19 +7320,26 @@ class Compiler void optValnumCSE_Heuristic(CSE_HeuristicCommon* heuristic); GenTree* optExtractSideEffectsForCSE(GenTree* tree); - // struct LCLMasksData; - // { - // unsigned usesAsMask; - // unsigned usesAsVector; - // }; + struct LCLMasksWeight + { + // For a given var, number of Lcl Stores with conversion from mask minus number of Lcl Stores without conversion from mask. + signed storeWeight; + + // For a given var, number of Lcl var with conversion to mask minus number of Lcl vars without conversion to mask. + signed varWeight; - typedef JitHashTable, signed> LCLMasksTable; + bool MaskConversionsDominate() + { + return ((storeWeight > 0) && (varWeight > 0)); + } + }; - void findLCLStoreMask(Statement* stmt, LCLMasksTable *masksTable); - void removeLCLStoreMask(Statement* stmt, LCLMasksTable *masksTable); - void checkLCLVarMask(GenTreeLclVarCommon* lcl, Statement* const stmt, LCLMasksTable *masksTable); - void updateLCLVar(GenTreeLclVarCommon* lcl, Statement* const stmt, LCLMasksTable *masksTable); + typedef JitHashTable, LCLMasksWeight> LCLMasksWeightTable; + bool LCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable *weightsTable); + void LCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); + bool LCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable); + void LCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); bool optDoCSE; // True when we have found a duplicate CSE tree bool optValnumCSE_phase = false; // True when we are executing the optOptimizeValnumCSEs() phase diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 8e0f736db985f4..370998eb376b32 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -15504,192 +15504,10 @@ PhaseStatus Compiler::fgMorphArrayOps() return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } -// //------------------------------------------------------------------------ -// // RemoveLCLUseConvert: tree visitor to remove conversion to masks for uses of LCL -// // -// class RemoveLCLUseConvertVisitor final : public GenTreeVisitor -// { -// public: -// enum -// { -// DoPostOrder = true, -// UseExecutionOrder = true -// }; - -// RemoveLCLUseConvertVisitor(Compiler* compiler, unsigned lclNum, Statement* stmt) -// : GenTreeVisitor(compiler) -// , removedConversion(false) -// , lclNum(lclNum) -// , stmt(stmt) -// { -// } - -// Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) -// { - -// // RemoveLCLUseConvertVisitor use -// // [000031] ----------- * HWINTRINSIC mask ubyte -// ConvertVectorToMask -// // [000030] ----------- +--* HWINTRINSIC mask ubyte -// CreateTrueMaskAll -// // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 - -// // RemoveLCLUseConvertVisitor user -// // [000029] ---XG------ * HWINTRINSIC simd16 ubyte LoadVector -// // [000031] ----------- +--* HWINTRINSIC mask ubyte -// // ConvertVectorToMask [000030] ----------- | +--* HWINTRINSIC mask -// // ubyte CreateTrueMaskAll [000026] ----------- | \--* LCL_VAR -// simd16 -// // V06 tmp3 [000028] ----------- \--* LCL_VAR long V02 loc1 - -// GenTree* const convertOp = *use; - -// // Look for: -// // user(ConvertVectorToMask(CreateTrueMaskAll, LCL_VAR(lclNum))) -// if (convertOp->OperIsConvertVectorToMask()) -// { -// GenTree* lclOp = convertOp->AsHWIntrinsic()->Op(2); -// if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum)) -// { -// if (m_compiler->verbose) -// { -// JITDUMP("\nRemoveLCLUseConvertVisitor use\n"); -// m_compiler->gtDispTree(*use); -// JITDUMP("\nRemoveLCLUseConvertVisitor user\n"); -// m_compiler->gtDispTree(user); -// } - -// // Find the location of convertOp in the user -// int opNum = 1; -// for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) -// { -// if (user->AsHWIntrinsic()->Op(opNum) == convertOp) -// { -// break; -// } -// } -// assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); - -// // Fix up the type of the lcl -// lclOp->gtType = convertOp->gtType; - -// // Remove the convert convertOp -// convertOp->gtBashToNOP(); -// user->AsHWIntrinsic()->Op(opNum) = lclOp; -// m_compiler->fgSequenceLocals(stmt); - -// if (m_compiler->verbose) -// { -// JITDUMP("\nAfter removal:\n"); -// m_compiler->gtDispTree(user); -// } - -// removedConversion = true; -// return fgWalkResult::WALK_ABORT; -// } -// } - -// return fgWalkResult::WALK_CONTINUE; -// } - -// bool removedConversion; - -// private: -// unsigned lclNum; -// Statement* stmt; -// }; - //------------------------------------------------------------------------ -// ConvertLCLMasks: Allow LCL in a statement to be of MASK type -// -// After import of hwintrinsics, all vector masks are converted to vectors before being -// stored to variables (either local or in memory). For correctness, all stores to memory -// must be converted to a vector as there is no way of knowing how that data will be used -// elsewhere. However, the scope of a LCL is the current method, therefore it free to be -// stored in whatever format is most optimal. -// -// If a local variable is created as a vector mask, then the general case is expected that it -// will be used as a mask throughout the code. This is the case that should be optimised for. -// -// Operation: -// Look for a LCL which where the input is converted from a MASK. Remove the conversion and -// updated the type to MASK. Find all uses of the LCL. For each use, update the type to MASK. -// If it is converted to a MASK, then remove the node. Otherwise, add a conversion from vector -// to mask. -// -// Arguments: -// stmt - Statement to check -// -// Returns: -// True if changes were made -// -// bool Compiler::ConvertLCLMasks(Statement* stmt) -// { -// // Look for: -// // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) - -// GenTree* tree = stmt->GetRootNode(); - -// if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType)) || -// (!tree->AsLclVar()->Data()->OperIsConvertMaskToVector())) -// { -// return false; -// } - -// JITDUMP("Found Local mask store with conversion\n"); -// gtDispTree(tree); - -// GenTreeHWIntrinsic* convertOp = tree->AsLclVar()->Data()->AsHWIntrinsic(); -// unsigned const lclNum = tree->AsLclVarCommon()->GetLclNum(); -// GenTree* maskOp = convertOp->Op(1); - -// // Update the type of the STORELCL - including the lclvar. -// tree->gtType = maskOp->gtType; -// LclVarDsc* varDsc = lvaGetDesc(lclNum); -// varDsc->lvType = maskOp->gtType; - -// // Remove the convert from the tree. -// convertOp->gtBashToNOP(); -// tree->AsOp()->gtOp1 = maskOp; -// fgSequenceLocals(stmt); - -// JITDUMP("\nRemoved conversion\n"); -// gtDispTree(tree); - -// // Find all uses of the LCL. These could be anywhere in the current method. -// for (BasicBlock* block : Blocks()) -// { -// for (Statement* const stmt : block->Statements()) -// { -// for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) -// { -// if (lcl->OperIs(GT_LCL_VAR) && (lcl->GetLclNum() == lclNum) && (lcl->gtType != TYP_MASK)) -// { -// JITDUMP("\nFound a use\n"); -// gtDispTree(lcl); - -// // Find the parent. If it is a ConvertVectorToMask then remove it. -// RemoveLCLUseConvertVisitor ev(this, lcl->GetLclNum(), stmt); -// GenTree* root = stmt->GetRootNode(); -// ev.WalkTree(&root, nullptr); - -// // TODO: If a ConvertVectorToMask was not found then insert a ConvertMaskToVector -// if (!ev.removedConversion) -// { -// assert(false); -// } -// } -// } -// } -// } - -// return true; -// } +// LCLMasksCheckLCLVarVisitor: Find the user of a lcl var and check if it is a convert to mask -//------------------------------------------------------------------------ -// RemoveLCLUseConvert: tree visitor to remove conversion to masks for uses of LCL var -// -class CheckLCLUseIsConvertVisitor final : public GenTreeVisitor +class LCLMasksCheckLCLVarVisitor final : public GenTreeVisitor { public: enum @@ -15698,8 +15516,8 @@ class CheckLCLUseIsConvertVisitor final : public GenTreeVisitor(compiler) + LCLMasksCheckLCLVarVisitor(Compiler* compiler, unsigned lclNum) + : GenTreeVisitor(compiler) , foundConversion(false) , lclNum(lclNum) { @@ -15707,24 +15525,20 @@ class CheckLCLUseIsConvertVisitor final : public GenTreeVisitorOperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum)) + if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum) && + user->OperIsConvertVectorToMask()) { - if (user->OperIsConvertVectorToMask()) - { - JITDUMP("\nLCL is used by ConvertVectorToMask:\n"); - m_compiler->gtDispTree(user); - foundConversion = true; - } + foundConversion = true; } return fgWalkResult::WALK_CONTINUE; @@ -15733,14 +15547,13 @@ class CheckLCLUseIsConvertVisitor final : public GenTreeVisitor +class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitor { public: enum @@ -15749,8 +15562,8 @@ class UpdateLCLUseToMaskVisitor final : public GenTreeVisitor(compiler) + LCLMasksUpdateLCLVarVisitor(Compiler* compiler, unsigned lclNum, Statement* stmt) + : GenTreeVisitor(compiler) , lclNum(lclNum) , stmt(stmt) { @@ -15759,12 +15572,12 @@ class UpdateLCLUseToMaskVisitor final : public GenTreeVisitorgtType != TYP_MASK); - if (m_compiler->verbose) - { - JITDUMP("\nRemoveLCLUseConvertVisitor use\n"); - m_compiler->gtDispTree(*use); - JITDUMP("\nRemoveLCLUseConvertVisitor user\n"); - m_compiler->gtDispTree(user); - } + // if (m_compiler->verbose) + // { + // JITDUMP("\nRemoveLCLUseConvertVisitor use\n"); + // m_compiler->gtDispTree(*use); + // JITDUMP("\nRemoveLCLUseConvertVisitor user\n"); + // m_compiler->gtDispTree(user); + // } // Find the location of convertOp in the user int opNum = 1; @@ -15809,11 +15622,11 @@ class UpdateLCLUseToMaskVisitor final : public GenTreeVisitorAsHWIntrinsic()->Op(opNum) = lclOp; m_compiler->fgSequenceLocals(stmt); - if (m_compiler->verbose) - { - JITDUMP("\nAfter removal:\n"); - m_compiler->gtDispTree(user); - } + // if (m_compiler->verbose) + // { + // JITDUMP("\nAfter removal:\n"); + // m_compiler->gtDispTree(user); + // } return fgWalkResult::WALK_ABORT; } @@ -15847,156 +15660,180 @@ class UpdateLCLUseToMaskVisitor final : public GenTreeVisitorGetRootNode(); - if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType)) || - (!tree->AsLclVar()->Data()->OperIsConvertMaskToVector())) + if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType))) { - return; + return false; } - JITDUMP("Found Local mask store with conversion\n"); - gtDispTree(tree); + GenTreeLclVar* lclStore = tree->AsLclVar(); - // Add to the table. - masksTable->Set(tree->AsLclVar(), 0); -} - -void Compiler::removeLCLStoreMask(Statement* stmt, LCLMasksTable* masksTable) -{ - // Look for: - // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) + LCLMasksWeight weight = {0, 0}; + bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); - GenTree* tree = stmt->GetRootNode(); - - if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType)) || - (!tree->AsLclVar()->Data()->OperIsConvertMaskToVector())) + // Check if the store is converted from mask + bool isConverted = lclStore->Data()->OperIsConvertMaskToVector(); + if (isConverted) { - return; + weight.storeWeight++; + JITDUMP("Local Store V%02d at [%06u] is converted from mask. Incrementing store weight to %d\n", + lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); + } + else + { + weight.storeWeight--; + JITDUMP("Local Store V%02d at [%06u] has no conversion. Decrementing store weight to %d\n", + lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); } - GenTreeLclVar* lcl = tree->AsLclVar(); - - // Lookup the lcl store in the table and check the weight. + // Update the table. + weightsTable->Set(lclStore->GetLclNum(), weight, LCLMasksWeightTable::Overwrite); - signed maskedUsesWeight = 0; - bool found = masksTable->Lookup(lcl, &maskedUsesWeight); - assert(found); + return isConverted; +} - if (maskedUsesWeight <= 0) +// For the given lcl var, update the var weights +void Compiler::LCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, + Statement* const stmt, + LCLMasksWeightTable* weightsTable) +{ + if (!lclVar->OperIs(GT_LCL_VAR)) { - JITDUMP("\nNot enough uses of V%d as a mask. Weight=%d\n", lcl->GetLclNum(), maskedUsesWeight); return; } - // Remove the ConvertMaskToVector + LCLMasksWeight weight = {0, 0}; + bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); - GenTreeHWIntrinsic* convertOp = lcl->Data()->AsHWIntrinsic(); - unsigned const lclNum = lcl->GetLclNum(); - GenTree* maskOp = convertOp->Op(1); + // If there no entry, then the var does not have a local store. + if (!found) + { + JITDUMP("Local Var V%02d at [%06u] is not stored to local.\n", lclVar->GetLclNum(), dspTreeID(lclVar)); + return; + } - // Update the type of the STORELCL - including the lclvar. - lcl->gtType = maskOp->gtType; - LclVarDsc* varDsc = lvaGetDesc(lclNum); - varDsc->lvType = maskOp->gtType; + // Find the parent of the lcl var + LCLMasksCheckLCLVarVisitor ev(this, lclVar->GetLclNum()); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); - // Remove the convert from the tree. - convertOp->gtBashToNOP(); - lcl->gtOp1 = maskOp; - fgSequenceLocals(stmt); + if (ev.foundConversion) + { + weight.varWeight++; + JITDUMP("Local Var V%02d at [%06u] is converted to mask. Incrementing var weight to %d\n", lclVar->GetLclNum(), + dspTreeID(lclVar), weight.varWeight); + } + else + { + weight.varWeight--; + JITDUMP("Local Var V%02d at [%06u] has no conversion. Decrementing var weight to %d\n", lclVar->GetLclNum(), + dspTreeID(lclVar), weight.varWeight); + } - JITDUMP("Updated V%d to store as mask\n", lcl->GetLclNum()); - gtDispTree(lcl); + // Update the table. + weightsTable->Set(lclVar->GetLclNum(), weight, LCLMasksWeightTable::Overwrite); } -// For the given lcl var, check if it converted to a mask when used. Find the corresponding lcl store in the -// mask table and update. -void Compiler::checkLCLVarMask(GenTreeLclVarCommon* lcl, Statement* const stmt, LCLMasksTable* masksTable) +bool Compiler::LCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable) { - if (!lcl->OperIs(GT_LCL_VAR)) - { - return; - } + // Look for: + // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) - // Find the parent of the lcl var - CheckLCLUseIsConvertVisitor ev(this, lcl->GetLclNum()); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); + GenTree* tree = stmt->GetRootNode(); - // Find the corresponding lcl store(s) in masksTable - // (Each lcl may store more than once) - for (LCLMasksTable::Node* const iter : LCLMasksTable::KeyValueIteration(masksTable)) + if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType))) { - GenTreeLclVar* lclStore = iter->GetKey(); + return false; + } - if (lcl->GetLclNum() != lclStore->GetLclNum()) - { - continue; - } + GenTreeLclVar* lclStore = tree->AsLclVar(); - JITDUMP("\nFound a use of lcl store %d\n", lcl->GetLclNum()); - JITDUMP("\nlcl store:\n"); - gtDispTree(lclStore); - JITDUMP("\nlcl var:\n"); - gtDispTree(lcl); + LCLMasksWeight weight = {0, 0}; + bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); + assert(found); + + if (!weight.MaskConversionsDominate()) + { + JITDUMP("Local Store V%02d at [%06u] will not be converted. Weighting {%d, %d}\n", lclStore->GetLclNum(), + dspTreeID(lclStore), weight.storeWeight, weight.varWeight); + return false; + } - // Update value in masksTable + JITDUMP("Local Store V%02d at [%06u] will be converted. Weighting {%d, %d}\n", lclStore->GetLclNum(), + dspTreeID(lclStore), weight.storeWeight, weight.varWeight); - signed maskedUsesWeight = 0; - bool foundKey = masksTable->Lookup(lclStore, &maskedUsesWeight); - assert(foundKey); + if (lclStore->Data()->OperIsConvertMaskToVector()) + { + // Remove the ConvertMaskToVector - if (ev.foundConversion) - { - maskedUsesWeight++; - } - else - { - maskedUsesWeight--; - } + GenTreeHWIntrinsic* convertOp = lclStore->Data()->AsHWIntrinsic(); + GenTree* maskOp = convertOp->Op(1); - masksTable->Set(lclStore, maskedUsesWeight, LCLMasksTable::Overwrite); + // Update the type of the STORELCL - including the lclvar. + lclStore->gtType = maskOp->gtType; + LclVarDsc* varDsc = lvaGetDesc(lclStore->GetLclNum()); + varDsc->lvType = maskOp->gtType; - JITDUMP("\nTable weight for V%d updated to %d\n", lcl->GetLclNum(), maskedUsesWeight); + // Remove the convert from the tree. + convertOp->gtBashToNOP(); + lclStore->gtOp1 = maskOp; + fgSequenceLocals(stmt); } + else + { + // TODO: Fill out. Need to add a convertVectorToMask + gtDispTree(lclStore); + assert(false); + } + + JITDUMP("Updated V%02d to store as mask\n", lclStore->GetLclNum()); + gtDispTree(lclStore); + return true; } -void Compiler::updateLCLVar(GenTreeLclVarCommon* lcl, Statement* const stmt, LCLMasksTable* masksTable) +void Compiler::LCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, + Statement* const stmt, + LCLMasksWeightTable* weightsTable) { - if (!lcl->OperIs(GT_LCL_VAR)) + if (!lclVar->OperIs(GT_LCL_VAR)) { return; } - // Find the corresponding lcl store(s) in masksTable - // (Each lcl may store more than once) - for (LCLMasksTable::Node* const iter : LCLMasksTable::KeyValueIteration(masksTable)) + LCLMasksWeight weight = {0, 0}; + bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); + + // If there no entry, then the var does not have a local store. + if (!found) { - GenTreeLclVar* lclStore = iter->GetKey(); + JITDUMP("Local Var V%02d at [%06u] is not stored to local.\n", lclVar->GetLclNum(), dspTreeID(lclVar)); + return; + } - if (lcl->GetLclNum() != lclStore->GetLclNum()) - { - continue; - } + if (!weight.MaskConversionsDominate()) + { + JITDUMP("Local Var V%02d at [%06u] will not be converted. Weighting {%d, %d}\n", lclVar->GetLclNum(), + dspTreeID(lclVar), weight.storeWeight, weight.varWeight); + return; + } - signed maskedUsesWeight = iter->GetValue(); + JITDUMP("Local Var V%02d at [%06u] will be converted. Weighting {%d, %d}\n", lclVar->GetLclNum(), dspTreeID(lclVar), + weight.storeWeight, weight.varWeight); - if (maskedUsesWeight <= 0) - { - JITDUMP("\nNot enough uses of V%d as a mask.\n", lcl->GetLclNum()); - return; - } + // Remove or add convert.... + LCLMasksUpdateLCLVarVisitor ev(this, lclVar->GetLclNum(), stmt); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); - // Remove or add convert.... - UpdateLCLUseToMaskVisitor ev(this, lcl->GetLclNum(), stmt); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); - } + JITDUMP("Updated V%02d to be a mask\n", lclVar->GetLclNum()); + gtDispTree(lclVar); } //------------------------------------------------------------------------ @@ -16019,57 +15856,66 @@ PhaseStatus Compiler::optLCLMasks() return PhaseStatus::MODIFIED_NOTHING; } - LCLMasksTable masksTable = LCLMasksTable(getAllocator()); + LCLMasksWeightTable weightsTable = LCLMasksWeightTable(getAllocator()); // Find every local store that is first converted from a mask and add them to masksTable. + bool foundConvertingStore = false; + JITDUMP("\n"); for (BasicBlock* block : Blocks()) { for (Statement* const stmt : block->Statements()) { - findLCLStoreMask(stmt, &masksTable); + foundConvertingStore |= LCLMasksCheckLCLStore(stmt, &weightsTable); } } - if (masksTable.GetCount() == 0) + if (!foundConvertingStore) { JITDUMP("Done. No local stores of masks found\n"); return PhaseStatus::MODIFIED_NOTHING; } // Find the uses of every local and check if it is converted to a mask, updating the keys in the masksTable. + JITDUMP("\n"); for (BasicBlock* block : Blocks()) { for (Statement* const stmt : block->Statements()) { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - checkLCLVarMask(lcl, stmt, &masksTable); + LCLMasksCheckLCLVar(lcl, stmt, &weightsTable); } } } + // For each local store, potentially add/remove a conversion. bool madeChanges = false; - - // For each local store, potentially remove the convert from mask. + JITDUMP("\n"); for (BasicBlock* block : Blocks()) { for (Statement* const stmt : block->Statements()) { - removeLCLStoreMask(stmt, &masksTable); + madeChanges |= LCLMasksUpdateLCLStore(stmt, &weightsTable); } } - // Find the uses of every local and potentially convert to mask. + if (!madeChanges) + { + JITDUMP("Done. No local stores converted\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + // For each Local Var, potentially add/remove a conversion. for (BasicBlock* block : Blocks()) { for (Statement* const stmt : block->Statements()) { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - updateLCLVar(lcl, stmt, &masksTable); + LCLMasksUpdateLCLVar(lcl, stmt, &weightsTable); } } } - return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; + return PhaseStatus::MODIFIED_EVERYTHING; } From 9436497ee4341f66a766347c58210573ad07c045 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 4 Nov 2024 11:39:50 +0000 Subject: [PATCH 06/66] Move to lclmorph --- src/coreclr/jit/compiler.cpp | 4 +- src/coreclr/jit/compiler.h | 47 ++-- src/coreclr/jit/lclmorph.cpp | 436 +++++++++++++++++++++++++++++++++++ src/coreclr/jit/morph.cpp | 416 --------------------------------- 4 files changed, 464 insertions(+), 439 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index cc7ecb9cd64abb..25cea74249e708 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4797,7 +4797,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_STR_ADRLCL, &Compiler::fgMarkAddressExposedLocals); - DoPhase(this, PHASE_LCL_MASKS, &Compiler::optLCLMasks); + // Optimize away conversions to/from masks in local variables. + // + DoPhase(this, PHASE_LCL_MASKS, &Compiler::fgOptimizeLCLMasks); // Do an early pass of liveness for forward sub and morph. This data is // valid until after morph. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 0af08c4fa021af..ee41b7aeca6bb1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6918,6 +6918,31 @@ class Compiler bool fgExposeUnpropagatedLocals(bool propagatedAny, class LocalEqualsLocalAddrAssertions* assertions); void fgExposeLocalsInBitVec(BitVec_ValArg_T bitVec); + PhaseStatus fgOptimizeLCLMasks(); + + struct LCLMasksWeight + { + // For a given var, number of Lcl Stores with conversion from mask minus number of Lcl Stores + // without conversion from mask. + signed storeWeight; + + // For a given var, number of Lcl var with conversion to mask minus number of Lcl vars without + // conversion to mask. + signed varWeight; + + bool MaskConversionsDominate() + { + return ((storeWeight > 0) && (varWeight > 0)); + } + }; + + typedef JitHashTable, LCLMasksWeight> LCLMasksWeightTable; + + bool fgLCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable *weightsTable); + void fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); + bool fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable); + void fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); + PhaseStatus PhysicalPromotion(); PhaseStatus fgForwardSub(); @@ -7304,7 +7329,6 @@ class Compiler public: PhaseStatus optOptimizeValnumCSEs(); - PhaseStatus optLCLMasks(); // some phases (eg hoisting) need to anticipate // what CSE will do @@ -7320,27 +7344,6 @@ class Compiler void optValnumCSE_Heuristic(CSE_HeuristicCommon* heuristic); GenTree* optExtractSideEffectsForCSE(GenTree* tree); - struct LCLMasksWeight - { - // For a given var, number of Lcl Stores with conversion from mask minus number of Lcl Stores without conversion from mask. - signed storeWeight; - - // For a given var, number of Lcl var with conversion to mask minus number of Lcl vars without conversion to mask. - signed varWeight; - - bool MaskConversionsDominate() - { - return ((storeWeight > 0) && (varWeight > 0)); - } - }; - - typedef JitHashTable, LCLMasksWeight> LCLMasksWeightTable; - - bool LCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable *weightsTable); - void LCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); - bool LCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable); - void LCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); - bool optDoCSE; // True when we have found a duplicate CSE tree bool optValnumCSE_phase = false; // True when we are executing the optOptimizeValnumCSEs() phase unsigned optCSECandidateCount = 0; // Count of CSE candidates diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 90750945082aea..2c8d4789c6c033 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2426,3 +2426,439 @@ void Compiler::fgExposeLocalsInBitVec(BitVec_ValArg_T bitVec) lvaSetVarAddrExposed(lclNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); } } + +//----------------------------------------------------------------------------- +// LCLMasksCheckLCLVarVisitor: Find the user of a lcl var and check if it is a convert to mask +// +class LCLMasksCheckLCLVarVisitor final : public GenTreeVisitor +{ +public: + enum + { + DoPostOrder = true, + UseExecutionOrder = true + }; + + LCLMasksCheckLCLVarVisitor(Compiler* compiler, unsigned lclNum) + : GenTreeVisitor(compiler) + , foundConversion(false) + , lclNum(lclNum) + { + } + + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + // LCLMasksCheckLCLVarVisitor use + // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 + + // LCLMasksCheckLCLVarVisitor user + // [000031] ----------- * HWINTRINSIC mask ubyte ConvertVectorToMask + // [000030] ----------- +--* HWINTRINSIC mask ubyte CreateTrueMaskAll + // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 + + GenTree* const lclOp = *use; + + if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum) && + user->OperIsConvertVectorToMask()) + { + foundConversion = true; + } + + return fgWalkResult::WALK_CONTINUE; + } + + bool foundConversion; + +private: + unsigned lclNum; +}; + +//----------------------------------------------------------------------------- +// LCLMasksUpdateLCLVarVisitor: tree visitor to remove conversion to masks for uses of LCL +// +class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitor +{ +public: + enum + { + DoPostOrder = true, + UseExecutionOrder = true + }; + + LCLMasksUpdateLCLVarVisitor(Compiler* compiler, unsigned lclNum, Statement* stmt) + : GenTreeVisitor(compiler) + , lclNum(lclNum) + , stmt(stmt) + { + } + + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + + // LCLMasksUpdateLCLVarVisitor use + // [000031] ----------- * HWINTRINSIC mask ubyte ConvertVectorToMask + // [000030] ----------- +--* HWINTRINSIC mask ubyte CreateTrueMaskAll + // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 + + // LCLMasksUpdateLCLVarVisitor user + // [000029] ---XG------ * HWINTRINSIC simd16 ubyte LoadVector + // [000031] ----------- +--* HWINTRINSIC mask ubyte ConvertVectorToMask + // [000030] ----------- +--* HWINTRINSIC mask ubyte CreateTrueMaskAll + // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 + // [000028] ----------- \--* LCL_VAR long V02 + + // Look for: + // user(ConvertVectorToMask(CreateTrueMaskAll, LCL_VAR(lclNum))) + if ((*use)->OperIsConvertVectorToMask()) + { + GenTree* const convertOp = *use; + GenTree* lclOp = convertOp->AsHWIntrinsic()->Op(2); + + if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum)) + { + assert(lclOp->gtType != TYP_MASK); + + // if (m_compiler->verbose) + // { + // JITDUMP("\nRemoveLCLUseConvertVisitor use\n"); + // m_compiler->gtDispTree(*use); + // JITDUMP("\nRemoveLCLUseConvertVisitor user\n"); + // m_compiler->gtDispTree(user); + // } + + // Find the location of convertOp in the user + int opNum = 1; + for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) + { + if (user->AsHWIntrinsic()->Op(opNum) == convertOp) + { + break; + } + } + assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); + + // Fix up the type of the lcl + lclOp->gtType = convertOp->gtType; + + // Remove the convert convertOp + convertOp->gtBashToNOP(); + user->AsHWIntrinsic()->Op(opNum) = lclOp; + m_compiler->fgSequenceLocals(stmt); + + // if (m_compiler->verbose) + // { + // JITDUMP("\nAfter removal:\n"); + // m_compiler->gtDispTree(user); + // } + + return fgWalkResult::WALK_ABORT; + } + } + + // Look for: + // user(LCL_VAR(lclNum))) + else if ((*use)->OperIs(GT_LCL_VAR) && ((*use)->gtType != TYP_MASK) && + ((*use)->AsLclVarCommon()->GetLclNum() == lclNum) && !user->OperIsConvertVectorToMask()) + { + if (m_compiler->verbose) + { + JITDUMP("\nRemoveLCLUseConvertVisitor ELSE use\n"); + m_compiler->gtDispTree(*use); + JITDUMP("\nRemoveLCLUseConvertVisitor ELSE user\n"); + m_compiler->gtDispTree(user); + } + + // TODO: Fill this in when I hit it + assert(false); + + return fgWalkResult::WALK_ABORT; + } + + return fgWalkResult::WALK_CONTINUE; + } + + +private: + unsigned lclNum; + Statement* stmt; +}; + +//----------------------------------------------------------------------------- +// fgLCLMasksCheckLCLStore: For the given statement, if it is a local store, +// then update the store weights in the table. +// +// Arguments: +// stmt - The statement. +// weightsTable - table to update. +// +// Returns: +// True if a converted local store was found. +// +bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable) +{ + // Look for: + // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) + + GenTree* tree = stmt->GetRootNode(); + + if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType))) + { + return false; + } + + GenTreeLclVar* lclStore = tree->AsLclVar(); + + LCLMasksWeight weight = {0, 0}; + bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); + + // Check if the store is converted from mask + bool isConverted = lclStore->Data()->OperIsConvertMaskToVector(); + if (isConverted) + { + weight.storeWeight++; + JITDUMP("Local Store V%02d at [%06u] is converted from mask. Incrementing store weight to %d\n", + lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); + } + else + { + weight.storeWeight--; + JITDUMP("Local Store V%02d at [%06u] has no conversion. Decrementing store weight to %d\n", + lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); + } + + // Update the table. + weightsTable->Set(lclStore->GetLclNum(), weight, LCLMasksWeightTable::Overwrite); + + return isConverted; +} + +//----------------------------------------------------------------------------- +// fgLCLMasksCheckLCLStore: For the given lcl var, update the var weights in +// the table. +// +// Arguments: +// stmt - The statement. +// weightsTable - table to update. +// +// Returns: +// True if a converted local store was found. +// +void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, + Statement* const stmt, + LCLMasksWeightTable* weightsTable) +{ + if (!lclVar->OperIs(GT_LCL_VAR)) + { + return; + } + + LCLMasksWeight weight = {0, 0}; + bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); + + // If there no entry, then the var does not have a local store. + if (!found) + { + JITDUMP("Local Var V%02d at [%06u] is not stored to local.\n", lclVar->GetLclNum(), dspTreeID(lclVar)); + return; + } + + // Find the parent of the lcl var + LCLMasksCheckLCLVarVisitor ev(this, lclVar->GetLclNum()); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + + if (ev.foundConversion) + { + weight.varWeight++; + JITDUMP("Local Var V%02d at [%06u] is converted to mask. Incrementing var weight to %d\n", lclVar->GetLclNum(), + dspTreeID(lclVar), weight.varWeight); + } + else + { + weight.varWeight--; + JITDUMP("Local Var V%02d at [%06u] has no conversion. Decrementing var weight to %d\n", lclVar->GetLclNum(), + dspTreeID(lclVar), weight.varWeight); + } + + // Update the table. + weightsTable->Set(lclVar->GetLclNum(), weight, LCLMasksWeightTable::Overwrite); +} + +bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable) +{ + // Look for: + // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) + + GenTree* tree = stmt->GetRootNode(); + + if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType))) + { + return false; + } + + GenTreeLclVar* lclStore = tree->AsLclVar(); + + LCLMasksWeight weight = {0, 0}; + bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); + assert(found); + + if (!weight.MaskConversionsDominate()) + { + JITDUMP("Local Store V%02d at [%06u] will not be converted. Weighting {%d, %d}\n", lclStore->GetLclNum(), + dspTreeID(lclStore), weight.storeWeight, weight.varWeight); + return false; + } + + JITDUMP("Local Store V%02d at [%06u] will be converted. Weighting {%d, %d}\n", lclStore->GetLclNum(), + dspTreeID(lclStore), weight.storeWeight, weight.varWeight); + + if (lclStore->Data()->OperIsConvertMaskToVector()) + { + // Remove the ConvertMaskToVector + + GenTreeHWIntrinsic* convertOp = lclStore->Data()->AsHWIntrinsic(); + GenTree* maskOp = convertOp->Op(1); + + // Update the type of the STORELCL - including the lclvar. + lclStore->gtType = maskOp->gtType; + LclVarDsc* varDsc = lvaGetDesc(lclStore->GetLclNum()); + varDsc->lvType = maskOp->gtType; + + // Remove the convert from the tree. + convertOp->gtBashToNOP(); + lclStore->gtOp1 = maskOp; + fgSequenceLocals(stmt); + } + else + { + // TODO: Fill out. Need to add a convertVectorToMask + gtDispTree(lclStore); + assert(false); + } + + JITDUMP("Updated V%02d to store as mask\n", lclStore->GetLclNum()); + gtDispTree(lclStore); + return true; +} + +void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, + Statement* const stmt, + LCLMasksWeightTable* weightsTable) +{ + if (!lclVar->OperIs(GT_LCL_VAR)) + { + return; + } + + LCLMasksWeight weight = {0, 0}; + bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); + + // If there no entry, then the var does not have a local store. + if (!found) + { + JITDUMP("Local Var V%02d at [%06u] is not stored to local.\n", lclVar->GetLclNum(), dspTreeID(lclVar)); + return; + } + + if (!weight.MaskConversionsDominate()) + { + JITDUMP("Local Var V%02d at [%06u] will not be converted. Weighting {%d, %d}\n", lclVar->GetLclNum(), + dspTreeID(lclVar), weight.storeWeight, weight.varWeight); + return; + } + + JITDUMP("Local Var V%02d at [%06u] will be converted. Weighting {%d, %d}\n", lclVar->GetLclNum(), dspTreeID(lclVar), + weight.storeWeight, weight.varWeight); + + // Remove or add convert.... + LCLMasksUpdateLCLVarVisitor ev(this, lclVar->GetLclNum(), stmt); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + + JITDUMP("Updated V%02d to be a mask\n", lclVar->GetLclNum()); + gtDispTree(lclVar); +} + +//------------------------------------------------------------------------ +// optLCLMasks: Allow locals to be of MASK type +// +// Returns: +// Suitable phase status +// +PhaseStatus Compiler::fgOptimizeLCLMasks() +{ + if (opts.OptimizationDisabled()) + { + JITDUMP("Skipping. Optimizations Disabled\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + if (!compConvertMaskToVectorUsed) + { + JITDUMP("Skipping. There are no Convert Mask To Vector nodes\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + LCLMasksWeightTable weightsTable = LCLMasksWeightTable(getAllocator()); + + // Find every local store that is first converted from a mask and add them to masksTable. + bool foundConvertingStore = false; + JITDUMP("\n"); + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + foundConvertingStore |= fgLCLMasksCheckLCLStore(stmt, &weightsTable); + } + } + + if (!foundConvertingStore) + { + JITDUMP("Done. No local stores of masks found\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + // Find the uses of every local and check if it is converted to a mask, updating the keys in the masksTable. + JITDUMP("\n"); + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + { + fgLCLMasksCheckLCLVar(lcl, stmt, &weightsTable); + } + } + } + + // For each local store, potentially add/remove a conversion. + bool madeChanges = false; + JITDUMP("\n"); + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + madeChanges |= fgLCLMasksUpdateLCLStore(stmt, &weightsTable); + } + } + + if (!madeChanges) + { + JITDUMP("Done. No local stores converted\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + // For each Local Var, potentially add/remove a conversion. + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + { + fgLCLMasksUpdateLCLVar(lcl, stmt, &weightsTable); + } + } + } + + return PhaseStatus::MODIFIED_EVERYTHING; +} diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 370998eb376b32..6f76f6042ce5cf 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -15503,419 +15503,3 @@ PhaseStatus Compiler::fgMorphArrayOps() return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } - -//------------------------------------------------------------------------ -// LCLMasksCheckLCLVarVisitor: Find the user of a lcl var and check if it is a convert to mask - -class LCLMasksCheckLCLVarVisitor final : public GenTreeVisitor -{ -public: - enum - { - DoPostOrder = true, - UseExecutionOrder = true - }; - - LCLMasksCheckLCLVarVisitor(Compiler* compiler, unsigned lclNum) - : GenTreeVisitor(compiler) - , foundConversion(false) - , lclNum(lclNum) - { - } - - Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) - { - // LCLMasksCheckLCLVarVisitor use - // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 - - // LCLMasksCheckLCLVarVisitor user - // [000031] ----------- * HWINTRINSIC mask ubyte ConvertVectorToMask - // [000030] ----------- +--* HWINTRINSIC mask ubyte CreateTrueMaskAll - // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 - - GenTree* const lclOp = *use; - - if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum) && - user->OperIsConvertVectorToMask()) - { - foundConversion = true; - } - - return fgWalkResult::WALK_CONTINUE; - } - - bool foundConversion; - -private: - unsigned lclNum; -}; - -//------------------------------------------------------------------------ -// LCLMasksUpdateLCLVarVisitor: tree visitor to remove conversion to masks for uses of LCL -// -class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitor -{ -public: - enum - { - DoPostOrder = true, - UseExecutionOrder = true - }; - - LCLMasksUpdateLCLVarVisitor(Compiler* compiler, unsigned lclNum, Statement* stmt) - : GenTreeVisitor(compiler) - , lclNum(lclNum) - , stmt(stmt) - { - } - - Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) - { - - // LCLMasksUpdateLCLVarVisitor use - // [000031] ----------- * HWINTRINSIC mask ubyte ConvertVectorToMask - // [000030] ----------- +--* HWINTRINSIC mask ubyte CreateTrueMaskAll - // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 - - // LCLMasksUpdateLCLVarVisitor user - // [000029] ---XG------ * HWINTRINSIC simd16 ubyte LoadVector - // [000031] ----------- +--* HWINTRINSIC mask ubyte ConvertVectorToMask - // [000030] ----------- +--* HWINTRINSIC mask ubyte CreateTrueMaskAll - // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 - // [000028] ----------- \--* LCL_VAR long V02 - - // Look for: - // user(ConvertVectorToMask(CreateTrueMaskAll, LCL_VAR(lclNum))) - if ((*use)->OperIsConvertVectorToMask()) - { - GenTree* const convertOp = *use; - GenTree* lclOp = convertOp->AsHWIntrinsic()->Op(2); - - if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum)) - { - assert(lclOp->gtType != TYP_MASK); - - // if (m_compiler->verbose) - // { - // JITDUMP("\nRemoveLCLUseConvertVisitor use\n"); - // m_compiler->gtDispTree(*use); - // JITDUMP("\nRemoveLCLUseConvertVisitor user\n"); - // m_compiler->gtDispTree(user); - // } - - // Find the location of convertOp in the user - int opNum = 1; - for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) - { - if (user->AsHWIntrinsic()->Op(opNum) == convertOp) - { - break; - } - } - assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); - - // Fix up the type of the lcl - lclOp->gtType = convertOp->gtType; - - // Remove the convert convertOp - convertOp->gtBashToNOP(); - user->AsHWIntrinsic()->Op(opNum) = lclOp; - m_compiler->fgSequenceLocals(stmt); - - // if (m_compiler->verbose) - // { - // JITDUMP("\nAfter removal:\n"); - // m_compiler->gtDispTree(user); - // } - - return fgWalkResult::WALK_ABORT; - } - } - - // Look for: - // user(LCL_VAR(lclNum))) - else if ((*use)->OperIs(GT_LCL_VAR) && ((*use)->gtType != TYP_MASK) && - ((*use)->AsLclVarCommon()->GetLclNum() == lclNum) && !user->OperIsConvertVectorToMask()) - { - if (m_compiler->verbose) - { - JITDUMP("\nRemoveLCLUseConvertVisitor ELSE use\n"); - m_compiler->gtDispTree(*use); - JITDUMP("\nRemoveLCLUseConvertVisitor ELSE user\n"); - m_compiler->gtDispTree(user); - } - - // TODO: Fill this in when I hit it - assert(false); - - return fgWalkResult::WALK_ABORT; - } - - return fgWalkResult::WALK_CONTINUE; - } - - -private: - unsigned lclNum; - Statement* stmt; -}; - -// For the given statement, if it is a local store, then update the store weights -bool Compiler::LCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable) -{ - // Look for: - // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) - - GenTree* tree = stmt->GetRootNode(); - - if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType))) - { - return false; - } - - GenTreeLclVar* lclStore = tree->AsLclVar(); - - LCLMasksWeight weight = {0, 0}; - bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); - - // Check if the store is converted from mask - bool isConverted = lclStore->Data()->OperIsConvertMaskToVector(); - if (isConverted) - { - weight.storeWeight++; - JITDUMP("Local Store V%02d at [%06u] is converted from mask. Incrementing store weight to %d\n", - lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); - } - else - { - weight.storeWeight--; - JITDUMP("Local Store V%02d at [%06u] has no conversion. Decrementing store weight to %d\n", - lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); - } - - // Update the table. - weightsTable->Set(lclStore->GetLclNum(), weight, LCLMasksWeightTable::Overwrite); - - return isConverted; -} - -// For the given lcl var, update the var weights -void Compiler::LCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, - Statement* const stmt, - LCLMasksWeightTable* weightsTable) -{ - if (!lclVar->OperIs(GT_LCL_VAR)) - { - return; - } - - LCLMasksWeight weight = {0, 0}; - bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); - - // If there no entry, then the var does not have a local store. - if (!found) - { - JITDUMP("Local Var V%02d at [%06u] is not stored to local.\n", lclVar->GetLclNum(), dspTreeID(lclVar)); - return; - } - - // Find the parent of the lcl var - LCLMasksCheckLCLVarVisitor ev(this, lclVar->GetLclNum()); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); - - if (ev.foundConversion) - { - weight.varWeight++; - JITDUMP("Local Var V%02d at [%06u] is converted to mask. Incrementing var weight to %d\n", lclVar->GetLclNum(), - dspTreeID(lclVar), weight.varWeight); - } - else - { - weight.varWeight--; - JITDUMP("Local Var V%02d at [%06u] has no conversion. Decrementing var weight to %d\n", lclVar->GetLclNum(), - dspTreeID(lclVar), weight.varWeight); - } - - // Update the table. - weightsTable->Set(lclVar->GetLclNum(), weight, LCLMasksWeightTable::Overwrite); -} - -bool Compiler::LCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable) -{ - // Look for: - // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) - - GenTree* tree = stmt->GetRootNode(); - - if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType))) - { - return false; - } - - GenTreeLclVar* lclStore = tree->AsLclVar(); - - LCLMasksWeight weight = {0, 0}; - bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); - assert(found); - - if (!weight.MaskConversionsDominate()) - { - JITDUMP("Local Store V%02d at [%06u] will not be converted. Weighting {%d, %d}\n", lclStore->GetLclNum(), - dspTreeID(lclStore), weight.storeWeight, weight.varWeight); - return false; - } - - JITDUMP("Local Store V%02d at [%06u] will be converted. Weighting {%d, %d}\n", lclStore->GetLclNum(), - dspTreeID(lclStore), weight.storeWeight, weight.varWeight); - - if (lclStore->Data()->OperIsConvertMaskToVector()) - { - // Remove the ConvertMaskToVector - - GenTreeHWIntrinsic* convertOp = lclStore->Data()->AsHWIntrinsic(); - GenTree* maskOp = convertOp->Op(1); - - // Update the type of the STORELCL - including the lclvar. - lclStore->gtType = maskOp->gtType; - LclVarDsc* varDsc = lvaGetDesc(lclStore->GetLclNum()); - varDsc->lvType = maskOp->gtType; - - // Remove the convert from the tree. - convertOp->gtBashToNOP(); - lclStore->gtOp1 = maskOp; - fgSequenceLocals(stmt); - } - else - { - // TODO: Fill out. Need to add a convertVectorToMask - gtDispTree(lclStore); - assert(false); - } - - JITDUMP("Updated V%02d to store as mask\n", lclStore->GetLclNum()); - gtDispTree(lclStore); - return true; -} - -void Compiler::LCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, - Statement* const stmt, - LCLMasksWeightTable* weightsTable) -{ - if (!lclVar->OperIs(GT_LCL_VAR)) - { - return; - } - - LCLMasksWeight weight = {0, 0}; - bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); - - // If there no entry, then the var does not have a local store. - if (!found) - { - JITDUMP("Local Var V%02d at [%06u] is not stored to local.\n", lclVar->GetLclNum(), dspTreeID(lclVar)); - return; - } - - if (!weight.MaskConversionsDominate()) - { - JITDUMP("Local Var V%02d at [%06u] will not be converted. Weighting {%d, %d}\n", lclVar->GetLclNum(), - dspTreeID(lclVar), weight.storeWeight, weight.varWeight); - return; - } - - JITDUMP("Local Var V%02d at [%06u] will be converted. Weighting {%d, %d}\n", lclVar->GetLclNum(), dspTreeID(lclVar), - weight.storeWeight, weight.varWeight); - - // Remove or add convert.... - LCLMasksUpdateLCLVarVisitor ev(this, lclVar->GetLclNum(), stmt); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); - - JITDUMP("Updated V%02d to be a mask\n", lclVar->GetLclNum()); - gtDispTree(lclVar); -} - -//------------------------------------------------------------------------ -// optLCLMasks: Allow locals to be of MASK type -// -// Returns: -// Suitable phase status -// -PhaseStatus Compiler::optLCLMasks() -{ - if (opts.OptimizationDisabled()) - { - JITDUMP("Skipping. Optimizations Disabled\n"); - return PhaseStatus::MODIFIED_NOTHING; - } - - if (!compConvertMaskToVectorUsed) - { - JITDUMP("Skipping. There are no Convert Mask To Vector nodes\n"); - return PhaseStatus::MODIFIED_NOTHING; - } - - LCLMasksWeightTable weightsTable = LCLMasksWeightTable(getAllocator()); - - // Find every local store that is first converted from a mask and add them to masksTable. - bool foundConvertingStore = false; - JITDUMP("\n"); - for (BasicBlock* block : Blocks()) - { - for (Statement* const stmt : block->Statements()) - { - foundConvertingStore |= LCLMasksCheckLCLStore(stmt, &weightsTable); - } - } - - if (!foundConvertingStore) - { - JITDUMP("Done. No local stores of masks found\n"); - return PhaseStatus::MODIFIED_NOTHING; - } - - // Find the uses of every local and check if it is converted to a mask, updating the keys in the masksTable. - JITDUMP("\n"); - for (BasicBlock* block : Blocks()) - { - for (Statement* const stmt : block->Statements()) - { - for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) - { - LCLMasksCheckLCLVar(lcl, stmt, &weightsTable); - } - } - } - - // For each local store, potentially add/remove a conversion. - bool madeChanges = false; - JITDUMP("\n"); - for (BasicBlock* block : Blocks()) - { - for (Statement* const stmt : block->Statements()) - { - madeChanges |= LCLMasksUpdateLCLStore(stmt, &weightsTable); - } - } - - if (!madeChanges) - { - JITDUMP("Done. No local stores converted\n"); - return PhaseStatus::MODIFIED_NOTHING; - } - - // For each Local Var, potentially add/remove a conversion. - for (BasicBlock* block : Blocks()) - { - for (Statement* const stmt : block->Statements()) - { - for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) - { - LCLMasksUpdateLCLVar(lcl, stmt, &weightsTable); - } - } - } - - return PhaseStatus::MODIFIED_EVERYTHING; -} From 7d21d6c01e607ca7602c003acfa005c7689bd03b Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 4 Nov 2024 12:35:19 +0000 Subject: [PATCH 07/66] Better commenting --- src/coreclr/jit/lclmorph.cpp | 70 +++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 2c8d4789c6c033..65364b39035b0b 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2633,16 +2633,14 @@ bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable* wei } //----------------------------------------------------------------------------- -// fgLCLMasksCheckLCLStore: For the given lcl var, update the var weights in +// fgLCLMasksCheckLCLVar: For the given lcl var, update the var weights in // the table. // // Arguments: -// stmt - The statement. +// lclVar - The local variable. +// stmt - The statement the local vairable is contained in. // weightsTable - table to update. // -// Returns: -// True if a converted local store was found. -// void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable* weightsTable) @@ -2684,6 +2682,17 @@ void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, weightsTable->Set(lclVar->GetLclNum(), weight, LCLMasksWeightTable::Overwrite); } +//----------------------------------------------------------------------------- +// fgLCLMasksUpdateLCLStore: For the given statement, if it is a local store, +// and mask conversions dominate in the weightings, then update to store as a mask. +// +// Arguments: +// stmt - The statement. +// weightsTable - table to update. +// +// Returns: +// True if a converted local store was found. +// bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable) { // Look for: @@ -2741,6 +2750,15 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we return true; } +//----------------------------------------------------------------------------- +// fgLCLMasksUpdateLCLVar: For the given lcl var, if mask conversions dominate in +// the weightings, then update to use as the source as a mask. +// +// Arguments: +// lclVar - The local variable. +// stmt - The statement the local vairable is contained in. +// weightsTable - table to update. +// void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable* weightsTable) @@ -2770,7 +2788,7 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, JITDUMP("Local Var V%02d at [%06u] will be converted. Weighting {%d, %d}\n", lclVar->GetLclNum(), dspTreeID(lclVar), weight.storeWeight, weight.varWeight); - // Remove or add convert.... + // Remove or add a mask conversion/ LCLMasksUpdateLCLVarVisitor ev(this, lclVar->GetLclNum(), stmt); GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); @@ -2782,6 +2800,40 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, //------------------------------------------------------------------------ // optLCLMasks: Allow locals to be of MASK type // +// At the C# level, Masks share the same type as a Vector. It's possible for the same +// variable to be used as a mask or vector. Any APIs that return a mask must first convert +// the value to a vector before storing it to a variable. Any uses of a variable as a mask +// must first convert from vector before using it. In many cases this creates unnecessary +// conversions. For variables that live outside the scope of the current method then the +// conversions are required to ensure correctness. However, for local variables where the +// scope is local to the current method, then it is possible to keep the value as a mask, +// by updating all definitions and uses. +// +// In the common case it is expected that uses of masks are consistent - once a variable is +// created as a mask it will continue to be used and updated as a mask. +// +// In the uncommon case, a variable may be created in one type, used as another and/or +// updated to a different type. +// +// For example (the conversion is implicit) +// vector x = _ConvertMaskToVector_(CreateMask()); +// x = Add(x, y); +// +// To account for this, this pass uses a weighting. For each variable, count the count the +// number of definitions with a convert from mask minus the number of definitions without a +// convert. Then do the same for each use. If both totals for a variable are positive, then +// convert every definition and use to use a mask instead of a vector. +// +// This weighting does not account for: +// * Loops. Uses/definitions inside a loop will account for more real weight than any outside +// a loop. This is not expected to be an issue as in most cases when using loops, a variable +// will be set once outside a loop then used/defined multiple times inside a loop. +// * Re-definition. A variable may first be created as a mask used as such, then much later in +// the method defined as a vector and used as such from then on. This can be worked around at +// the user level by encouraging users not to reuse variable names. +// +// It is assumed that the simple weighting will be good enough for almost all use cases. +// // Returns: // Suitable phase status // @@ -2801,7 +2853,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() LCLMasksWeightTable weightsTable = LCLMasksWeightTable(getAllocator()); - // Find every local store that is first converted from a mask and add them to masksTable. + // Find every local store and add them to masksTable. bool foundConvertingStore = false; JITDUMP("\n"); for (BasicBlock* block : Blocks()) @@ -2818,7 +2870,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() return PhaseStatus::MODIFIED_NOTHING; } - // Find the uses of every local and check if it is converted to a mask, updating the keys in the masksTable. + // Find the uses of every local and add them to masksTable. JITDUMP("\n"); for (BasicBlock* block : Blocks()) { @@ -2848,7 +2900,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() return PhaseStatus::MODIFIED_NOTHING; } - // For each Local Var, potentially add/remove a conversion. + // For each Local variable, potentially add/remove a conversion. for (BasicBlock* block : Blocks()) { for (Statement* const stmt : block->Statements()) From 56623225458f19a2a032f5a4f7669df9c9ae82ac Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 4 Nov 2024 12:41:04 +0000 Subject: [PATCH 08/66] Add TARGET_ARM64 check --- src/coreclr/jit/compiler.h | 2 ++ src/coreclr/jit/lclmorph.cpp | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ee41b7aeca6bb1..bb7ed56765a2df 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6920,6 +6920,7 @@ class Compiler PhaseStatus fgOptimizeLCLMasks(); +#if defined(TARGET_ARM64) struct LCLMasksWeight { // For a given var, number of Lcl Stores with conversion from mask minus number of Lcl Stores @@ -6942,6 +6943,7 @@ class Compiler void fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); bool fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable); void fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); +#endif // TARGET_ARM64 PhaseStatus PhysicalPromotion(); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 65364b39035b0b..9135bf769e2643 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2427,6 +2427,8 @@ void Compiler::fgExposeLocalsInBitVec(BitVec_ValArg_T bitVec) } } +#if defined(TARGET_ARM64) + //----------------------------------------------------------------------------- // LCLMasksCheckLCLVarVisitor: Find the user of a lcl var and check if it is a convert to mask // @@ -2797,8 +2799,10 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, gtDispTree(lclVar); } +#endif // TARGET_ARM64 + //------------------------------------------------------------------------ -// optLCLMasks: Allow locals to be of MASK type +// optLCLMasks: Allow locals to be of Mask type // // At the C# level, Masks share the same type as a Vector. It's possible for the same // variable to be used as a mask or vector. Any APIs that return a mask must first convert @@ -2839,6 +2843,10 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, // PhaseStatus Compiler::fgOptimizeLCLMasks() { +#if !defined(TARGET_ARM64) + return PhaseStatus::MODIFIED_NOTHING; +#endif // !TARGET_ARM64 + if (opts.OptimizationDisabled()) { JITDUMP("Skipping. Optimizations Disabled\n"); From a8942c8b29ca5e390f065c3d7cd8abb1a3392e74 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 4 Nov 2024 15:40:30 +0000 Subject: [PATCH 09/66] tidy --- src/coreclr/jit/lclmorph.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 9135bf769e2643..5a896fbb08fbcb 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2843,9 +2843,7 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, // PhaseStatus Compiler::fgOptimizeLCLMasks() { -#if !defined(TARGET_ARM64) - return PhaseStatus::MODIFIED_NOTHING; -#endif // !TARGET_ARM64 +#if defined(TARGET_ARM64) if (opts.OptimizationDisabled()) { @@ -2909,6 +2907,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() } // For each Local variable, potentially add/remove a conversion. + JITDUMP("\n"); for (BasicBlock* block : Blocks()) { for (Statement* const stmt : block->Statements()) @@ -2921,4 +2920,8 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() } return PhaseStatus::MODIFIED_EVERYTHING; + +#else + return PhaseStatus::MODIFIED_NOTHING; +#endif // TARGET_ARM64 } From 66f84f774518dbd16ba3634dfb069467a557470f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 4 Nov 2024 16:34:20 +0000 Subject: [PATCH 10/66] Add DEBUG ifdefs --- src/coreclr/jit/lclmorph.cpp | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 5a896fbb08fbcb..d8c156b6fcc242 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2562,6 +2562,7 @@ class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitorOperIs(GT_LCL_VAR) && ((*use)->gtType != TYP_MASK) && ((*use)->AsLclVarCommon()->GetLclNum() == lclNum) && !user->OperIsConvertVectorToMask()) { +#ifdef DEBUG if (m_compiler->verbose) { JITDUMP("\nRemoveLCLUseConvertVisitor ELSE use\n"); @@ -2569,6 +2570,7 @@ class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitorgtDispTree(user); } +#endif // TODO: Fill this in when I hit it assert(false); @@ -2742,13 +2744,24 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we } else { +#ifdef DEBUG // TODO: Fill out. Need to add a convertVectorToMask - gtDispTree(lclStore); + if (verbose) + { + gtDispTree(lclStore); + } +#endif assert(false); } +#ifdef DEBUG JITDUMP("Updated V%02d to store as mask\n", lclStore->GetLclNum()); - gtDispTree(lclStore); + if (verbose) + { + gtDispTree(lclStore); + } +#endif + return true; } @@ -2795,8 +2808,13 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); +#ifdef DEBUG JITDUMP("Updated V%02d to be a mask\n", lclVar->GetLclNum()); - gtDispTree(lclVar); + if (verbose) + { + gtDispTree(lclVar); + } +#endif } #endif // TARGET_ARM64 From 4025aa571b149d0a98b6007ee44c9f98f7c14fe3 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 4 Nov 2024 17:28:59 +0000 Subject: [PATCH 11/66] Add mask check in lsrabuild Change-Id: Ic18f575e266d63db38f95601d374441cdbf28b44 --- src/coreclr/jit/lsrabuild.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 19397cad5d1a58..835f876a44e31d 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2766,12 +2766,12 @@ void LinearScan::buildIntervals() { calleeSaveCount = CNT_CALLEE_ENREG; } -#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) +#if (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(FEATURE_SIMD) else if (varTypeUsesMaskReg(interval->registerType)) { calleeSaveCount = CNT_CALLEE_SAVED_MASK; } -#endif // TARGET_XARCH && FEATURE_SIMD +#endif // (TARGET_XARCH || TARGET_ARM64) && FEATURE_SIMD else { assert(varTypeUsesFloatReg(interval->registerType)); From a94c9af8f2d302c385d2320344a3a2140244cd9e Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 5 Nov 2024 12:04:21 +0000 Subject: [PATCH 12/66] Add conversion for local var uses --- src/coreclr/jit/compiler.h | 4 ++ src/coreclr/jit/lclmorph.cpp | 134 +++++++++++++++++------------------ 2 files changed, 71 insertions(+), 67 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index bb7ed56765a2df..581fdacd5db62b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6921,6 +6921,7 @@ class Compiler PhaseStatus fgOptimizeLCLMasks(); #if defined(TARGET_ARM64) + struct LCLMasksWeight { // For a given var, number of Lcl Stores with conversion from mask minus number of Lcl Stores @@ -6931,6 +6932,9 @@ class Compiler // conversion to mask. signed varWeight; + CorInfoType simdBaseJitType; + unsigned simdSize; + bool MaskConversionsDominate() { return ((storeWeight > 0) && (varWeight > 0)); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index d8c156b6fcc242..0750008bc3e081 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2487,74 +2487,56 @@ class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitor(compiler) , lclNum(lclNum) , stmt(stmt) + , simdBaseJitType(simdBaseJitType) + , simdSize(simdSize) { } Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { - - // LCLMasksUpdateLCLVarVisitor use - // [000031] ----------- * HWINTRINSIC mask ubyte ConvertVectorToMask - // [000030] ----------- +--* HWINTRINSIC mask ubyte CreateTrueMaskAll - // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 - - // LCLMasksUpdateLCLVarVisitor user - // [000029] ---XG------ * HWINTRINSIC simd16 ubyte LoadVector - // [000031] ----------- +--* HWINTRINSIC mask ubyte ConvertVectorToMask - // [000030] ----------- +--* HWINTRINSIC mask ubyte CreateTrueMaskAll - // [000026] ----------- \--* LCL_VAR simd16 V06 tmp3 - // [000028] ----------- \--* LCL_VAR long V02 - // Look for: // user(ConvertVectorToMask(CreateTrueMaskAll, LCL_VAR(lclNum))) - if ((*use)->OperIsConvertVectorToMask()) + if ((*use)->OperIsConvertVectorToMask() && (*use)->AsHWIntrinsic()->Op(2)->OperIs(GT_LCL_VAR) && + ((*use)->AsHWIntrinsic()->Op(2)->AsLclVar()->GetLclNum() == lclNum)) { GenTree* const convertOp = *use; - GenTree* lclOp = convertOp->AsHWIntrinsic()->Op(2); + GenTreeLclVar* lclOp = (*use)->AsHWIntrinsic()->Op(2)->AsLclVar(); + + assert(lclOp->gtType != TYP_MASK); - if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum)) + // Find the location of convertOp in the user + int opNum = 1; + for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) { - assert(lclOp->gtType != TYP_MASK); - - // if (m_compiler->verbose) - // { - // JITDUMP("\nRemoveLCLUseConvertVisitor use\n"); - // m_compiler->gtDispTree(*use); - // JITDUMP("\nRemoveLCLUseConvertVisitor user\n"); - // m_compiler->gtDispTree(user); - // } - - // Find the location of convertOp in the user - int opNum = 1; - for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) + if (user->AsHWIntrinsic()->Op(opNum) == convertOp) { - if (user->AsHWIntrinsic()->Op(opNum) == convertOp) - { - break; - } + break; } - assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); - - // Fix up the type of the lcl - lclOp->gtType = convertOp->gtType; + } + assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); - // Remove the convert convertOp - convertOp->gtBashToNOP(); - user->AsHWIntrinsic()->Op(opNum) = lclOp; - m_compiler->fgSequenceLocals(stmt); + // Fix up the type of the lcl + lclOp->gtType = convertOp->gtType; - // if (m_compiler->verbose) - // { - // JITDUMP("\nAfter removal:\n"); - // m_compiler->gtDispTree(user); - // } + // Remove the convert convertOp + convertOp->gtBashToNOP(); + *use = lclOp; + m_compiler->fgSequenceLocals(stmt); - return fgWalkResult::WALK_ABORT; +#ifdef DEBUG + JITDUMP("Updated V%02d to be a mask (Removed conversion)\n", lclOp->GetLclNum()); + if (m_compiler->verbose) + { + m_compiler->gtDispTree(*use); } +#endif + + return fgWalkResult::WALK_ABORT; } // Look for: @@ -2562,19 +2544,27 @@ class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitorOperIs(GT_LCL_VAR) && ((*use)->gtType != TYP_MASK) && ((*use)->AsLclVarCommon()->GetLclNum() == lclNum) && !user->OperIsConvertVectorToMask()) { + GenTreeLclVar* lclOp = (*use)->AsLclVar(); + + // Fix up the type of the lcl + var_types vectorType = lclOp->gtType; + lclOp->gtType = TYP_MASK; + + // Create a convert to mask node and insert it infront of the lcl. + // There is not enough information in the lcl to get simd types. Instead we reuse the cached simd + // types from the removed convert nodes. + assert((simdBaseJitType != CORINFO_TYPE_UNDEF) && (simdSize != 0)); + *use = m_compiler->gtNewSimdCvtMaskToVectorNode(vectorType, lclOp, simdBaseJitType, simdSize); + ; + #ifdef DEBUG + JITDUMP("Updated V%02d to be a mask (Added conversion)\n", lclOp->GetLclNum()); if (m_compiler->verbose) { - JITDUMP("\nRemoveLCLUseConvertVisitor ELSE use\n"); m_compiler->gtDispTree(*use); - JITDUMP("\nRemoveLCLUseConvertVisitor ELSE user\n"); - m_compiler->gtDispTree(user); } #endif - // TODO: Fill this in when I hit it - assert(false); - return fgWalkResult::WALK_ABORT; } @@ -2583,8 +2573,10 @@ class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitorAsLclVar(); - LCLMasksWeight weight = {0, 0}; + LCLMasksWeight weight = {0, 0, CORINFO_TYPE_UNDEF, 0}; bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); // Check if the store is converted from mask @@ -2622,6 +2614,19 @@ bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable* wei weight.storeWeight++; JITDUMP("Local Store V%02d at [%06u] is converted from mask. Incrementing store weight to %d\n", lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); + + // Cache the simd type data. If it has already been cached then the types should match. + + GenTreeHWIntrinsic* convertOp = lclStore->Data()->AsHWIntrinsic(); + CorInfoType simdBaseJitType = convertOp->Op(1)->AsHWIntrinsic()->GetSimdBaseJitType(); + unsigned simdSize = convertOp->Op(1)->AsHWIntrinsic()->GetSimdSize(); + ; + + assert((weight.simdBaseJitType == CORINFO_TYPE_UNDEF) && (weight.simdSize == 0) || + (weight.simdBaseJitType == simdBaseJitType) && (weight.simdSize == simdSize)); + + weight.simdBaseJitType = simdBaseJitType; + weight.simdSize = simdSize; } else { @@ -2741,6 +2746,8 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we convertOp->gtBashToNOP(); lclStore->gtOp1 = maskOp; fgSequenceLocals(stmt); + + JITDUMP("Updated V%02d to store as mask (Removed conversion)\n", lclStore->GetLclNum()); } else { @@ -2752,10 +2759,11 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we } #endif assert(false); + + JITDUMP("Updated V%02d to store as mask (Added conversion)\n", lclStore->GetLclNum()); } #ifdef DEBUG - JITDUMP("Updated V%02d to store as mask\n", lclStore->GetLclNum()); if (verbose) { gtDispTree(lclStore); @@ -2804,17 +2812,9 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, weight.storeWeight, weight.varWeight); // Remove or add a mask conversion/ - LCLMasksUpdateLCLVarVisitor ev(this, lclVar->GetLclNum(), stmt); + LCLMasksUpdateLCLVarVisitor ev(this, lclVar->GetLclNum(), stmt, weight.simdBaseJitType, weight.simdSize); GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); - -#ifdef DEBUG - JITDUMP("Updated V%02d to be a mask\n", lclVar->GetLclNum()); - if (verbose) - { - gtDispTree(lclVar); - } -#endif } #endif // TARGET_ARM64 From 63a8b00e94b4f0cd94b2cb13fdfd717b5485b71d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 5 Nov 2024 13:32:57 +0000 Subject: [PATCH 13/66] Add conversion for local stores --- src/coreclr/jit/lclmorph.cpp | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 0750008bc3e081..8856363976e8ec 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2450,14 +2450,6 @@ class LCLMasksCheckLCLVarVisitor final : public GenTreeVisitorOperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum) && @@ -2555,7 +2547,6 @@ class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitorgtNewSimdCvtMaskToVectorNode(vectorType, lclOp, simdBaseJitType, simdSize); - ; #ifdef DEBUG JITDUMP("Updated V%02d to be a mask (Added conversion)\n", lclOp->GetLclNum()); @@ -2620,7 +2611,6 @@ bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable* wei GenTreeHWIntrinsic* convertOp = lclStore->Data()->AsHWIntrinsic(); CorInfoType simdBaseJitType = convertOp->Op(1)->AsHWIntrinsic()->GetSimdBaseJitType(); unsigned simdSize = convertOp->Op(1)->AsHWIntrinsic()->GetSimdSize(); - ; assert((weight.simdBaseJitType == CORINFO_TYPE_UNDEF) && (weight.simdSize == 0) || (weight.simdBaseJitType == simdBaseJitType) && (weight.simdSize == simdSize)); @@ -2730,6 +2720,11 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we JITDUMP("Local Store V%02d at [%06u] will be converted. Weighting {%d, %d}\n", lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight, weight.varWeight); + // Update the type of the STORELCL - including the lclvar. + lclStore->gtType = TYP_MASK; + LclVarDsc* varDsc = lvaGetDesc(lclStore->GetLclNum()); + varDsc->lvType = TYP_MASK; + if (lclStore->Data()->OperIsConvertMaskToVector()) { // Remove the ConvertMaskToVector @@ -2737,11 +2732,6 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we GenTreeHWIntrinsic* convertOp = lclStore->Data()->AsHWIntrinsic(); GenTree* maskOp = convertOp->Op(1); - // Update the type of the STORELCL - including the lclvar. - lclStore->gtType = maskOp->gtType; - LclVarDsc* varDsc = lvaGetDesc(lclStore->GetLclNum()); - varDsc->lvType = maskOp->gtType; - // Remove the convert from the tree. convertOp->gtBashToNOP(); lclStore->gtOp1 = maskOp; @@ -2751,14 +2741,11 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we } else { -#ifdef DEBUG - // TODO: Fill out. Need to add a convertVectorToMask - if (verbose) - { - gtDispTree(lclStore); - } -#endif - assert(false); + // Convert the input of the store to a mask. + assert((weight.simdBaseJitType != CORINFO_TYPE_UNDEF) && (weight.simdSize != 0)); + GenTree* convertOp = + gtNewSimdCvtVectorToMaskNode(TYP_MASK, lclStore->Data(), weight.simdBaseJitType, weight.simdSize); + lclStore->Data() = convertOp; JITDUMP("Updated V%02d to store as mask (Added conversion)\n", lclStore->GetLclNum()); } From a08cf111f0b853d5e7331b4b7f18eb6a02ff17b8 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 5 Nov 2024 13:41:55 +0000 Subject: [PATCH 14/66] Comment for simd types --- src/coreclr/jit/compiler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 581fdacd5db62b..9af3636309a1b8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6932,6 +6932,7 @@ class Compiler // conversion to mask. signed varWeight; + // The simd types of the Lcl Store after conversion to vector. CorInfoType simdBaseJitType; unsigned simdSize; From 4cb8999386e3a45f57492695d6a90fc6f048f6be Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 5 Nov 2024 13:51:53 +0000 Subject: [PATCH 15/66] Use weight_t for weighting values Change-Id: I0d39d59a121682e8e583cccd710d13f2dd33bdc5 --- src/coreclr/jit/compiler.h | 10 +++++----- src/coreclr/jit/lclmorph.cpp | 24 ++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9af3636309a1b8..768b5d4ade723e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6926,19 +6926,19 @@ class Compiler { // For a given var, number of Lcl Stores with conversion from mask minus number of Lcl Stores // without conversion from mask. - signed storeWeight; + weight_t storeWeight = 0.0; // For a given var, number of Lcl var with conversion to mask minus number of Lcl vars without // conversion to mask. - signed varWeight; + weight_t varWeight = 0.0; // The simd types of the Lcl Store after conversion to vector. - CorInfoType simdBaseJitType; - unsigned simdSize; + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + unsigned simdSize = 0; bool MaskConversionsDominate() { - return ((storeWeight > 0) && (varWeight > 0)); + return ((storeWeight > 0.0) && (varWeight > 0.0)); } }; diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 8856363976e8ec..a30f34503fa005 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2595,7 +2595,7 @@ bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable* wei GenTreeLclVar* lclStore = tree->AsLclVar(); - LCLMasksWeight weight = {0, 0, CORINFO_TYPE_UNDEF, 0}; + LCLMasksWeight weight; bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); // Check if the store is converted from mask @@ -2603,7 +2603,7 @@ bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable* wei if (isConverted) { weight.storeWeight++; - JITDUMP("Local Store V%02d at [%06u] is converted from mask. Incrementing store weight to %d\n", + JITDUMP("Local Store V%02d at [%06u] is converted from mask. Incrementing store weight to %f\n", lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); // Cache the simd type data. If it has already been cached then the types should match. @@ -2621,7 +2621,7 @@ bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable* wei else { weight.storeWeight--; - JITDUMP("Local Store V%02d at [%06u] has no conversion. Decrementing store weight to %d\n", + JITDUMP("Local Store V%02d at [%06u] has no conversion. Decrementing store weight to %f\n", lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); } @@ -2649,7 +2649,7 @@ void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, return; } - LCLMasksWeight weight = {0, 0}; + LCLMasksWeight weight; bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); // If there no entry, then the var does not have a local store. @@ -2667,13 +2667,13 @@ void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, if (ev.foundConversion) { weight.varWeight++; - JITDUMP("Local Var V%02d at [%06u] is converted to mask. Incrementing var weight to %d\n", lclVar->GetLclNum(), + JITDUMP("Local Var V%02d at [%06u] is converted to mask. Incrementing var weight to %f\n", lclVar->GetLclNum(), dspTreeID(lclVar), weight.varWeight); } else { weight.varWeight--; - JITDUMP("Local Var V%02d at [%06u] has no conversion. Decrementing var weight to %d\n", lclVar->GetLclNum(), + JITDUMP("Local Var V%02d at [%06u] has no conversion. Decrementing var weight to %f\n", lclVar->GetLclNum(), dspTreeID(lclVar), weight.varWeight); } @@ -2706,18 +2706,18 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we GenTreeLclVar* lclStore = tree->AsLclVar(); - LCLMasksWeight weight = {0, 0}; + LCLMasksWeight weight; bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); assert(found); if (!weight.MaskConversionsDominate()) { - JITDUMP("Local Store V%02d at [%06u] will not be converted. Weighting {%d, %d}\n", lclStore->GetLclNum(), + JITDUMP("Local Store V%02d at [%06u] will not be converted. Weighting {%f, %f}\n", lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight, weight.varWeight); return false; } - JITDUMP("Local Store V%02d at [%06u] will be converted. Weighting {%d, %d}\n", lclStore->GetLclNum(), + JITDUMP("Local Store V%02d at [%06u] will be converted. Weighting {%f, %f}\n", lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight, weight.varWeight); // Update the type of the STORELCL - including the lclvar. @@ -2778,7 +2778,7 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, return; } - LCLMasksWeight weight = {0, 0}; + LCLMasksWeight weight; bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); // If there no entry, then the var does not have a local store. @@ -2790,12 +2790,12 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, if (!weight.MaskConversionsDominate()) { - JITDUMP("Local Var V%02d at [%06u] will not be converted. Weighting {%d, %d}\n", lclVar->GetLclNum(), + JITDUMP("Local Var V%02d at [%06u] will not be converted. Weighting {%f, %f}\n", lclVar->GetLclNum(), dspTreeID(lclVar), weight.storeWeight, weight.varWeight); return; } - JITDUMP("Local Var V%02d at [%06u] will be converted. Weighting {%d, %d}\n", lclVar->GetLclNum(), dspTreeID(lclVar), + JITDUMP("Local Var V%02d at [%06u] will be converted. Weighting {%f, %f}\n", lclVar->GetLclNum(), dspTreeID(lclVar), weight.storeWeight, weight.varWeight); // Remove or add a mask conversion/ From a15cbb7ed50e7ad156368c51eed0acd85728caf5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 5 Nov 2024 15:13:51 +0000 Subject: [PATCH 16/66] Account for block weights and number of instructions in weighting. --- src/coreclr/jit/compiler.h | 47 ++++++++++++++-- src/coreclr/jit/lclmorph.cpp | 105 ++++++++++++++++------------------- 2 files changed, 90 insertions(+), 62 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 768b5d4ade723e..3e0ccd71a00a28 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6936,16 +6936,55 @@ class Compiler CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; unsigned simdSize = 0; - bool MaskConversionsDominate() + void UpdateStoreWeight(bool increment, weight_t blockWeight) { - return ((storeWeight > 0.0) && (varWeight > 0.0)); + weight_t incVal = increment ? blockWeight : -blockWeight; + storeWeight += incVal; + JITDUMP("Incrementing store weight by %f. ", incVal); + DumpTotalWeight(); + } + + void UpdateVarWeight(bool increment, weight_t blockWeight) + { + weight_t incVal = increment ? blockWeight : -blockWeight; + varWeight += incVal; + JITDUMP("Incrementing var weight by %f. ", incVal); + DumpTotalWeight(); + } + + weight_t GetTotalWeight() + { + // Conversion of mask to vector is one instruction. + const weight_t costOfStoreWeight = 1.0; + + // Conversion of vector to mask is two instructions. + const weight_t costOfVarWeight = 2.0; + + return ((costOfStoreWeight * storeWeight) + (costOfVarWeight * varWeight)); + } + + void DumpTotalWeight() + { + JITDUMP("Weighting: %f {%fs, %fv}\n", GetTotalWeight(), storeWeight, varWeight); + } + + void CacheSimdTypes(GenTreeHWIntrinsic* op) + { + CorInfoType newSimdBaseJitType = op->GetSimdBaseJitType(); + unsigned newSimdSize = op->GetSimdSize(); + + assert((simdBaseJitType == CORINFO_TYPE_UNDEF) && (simdSize == 0) || + (simdBaseJitType == newSimdBaseJitType) && (simdSize == newSimdSize)); + + simdBaseJitType = newSimdBaseJitType; + simdSize = newSimdSize; } }; typedef JitHashTable, LCLMasksWeight> LCLMasksWeightTable; - bool fgLCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable *weightsTable); - void fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); + bool fgLCLMasksCheckLCLStore(Statement* stmt, BasicBlock* const block, LCLMasksWeightTable *weightsTable); + void fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, LCLMasksWeightTable *weightsTable); bool fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable); void fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index a30f34503fa005..bba3df16a60932 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2456,12 +2456,14 @@ class LCLMasksCheckLCLVarVisitor final : public GenTreeVisitorOperIsConvertVectorToMask()) { foundConversion = true; + convertOp = user->AsHWIntrinsic(); } return fgWalkResult::WALK_CONTINUE; } - bool foundConversion; + bool foundConversion; + GenTreeHWIntrinsic* convertOp = nullptr; private: unsigned lclNum; @@ -2581,7 +2583,7 @@ class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitorAsLclVar(); LCLMasksWeight weight; - bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); + bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); - // Check if the store is converted from mask + // Check if the store is converted from mask. bool isConverted = lclStore->Data()->OperIsConvertMaskToVector(); - if (isConverted) - { - weight.storeWeight++; - JITDUMP("Local Store V%02d at [%06u] is converted from mask. Incrementing store weight to %f\n", - lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); - - // Cache the simd type data. If it has already been cached then the types should match. - GenTreeHWIntrinsic* convertOp = lclStore->Data()->AsHWIntrinsic(); - CorInfoType simdBaseJitType = convertOp->Op(1)->AsHWIntrinsic()->GetSimdBaseJitType(); - unsigned simdSize = convertOp->Op(1)->AsHWIntrinsic()->GetSimdSize(); + // Update the weights. + JITDUMP("Local Store V%02d at [%06u] has %s conversion. ", lclStore->GetLclNum(), dspTreeID(lclStore), + isConverted ? "mask" : "no"); + weight.UpdateStoreWeight(isConverted, block->getBBWeight(this)); - assert((weight.simdBaseJitType == CORINFO_TYPE_UNDEF) && (weight.simdSize == 0) || - (weight.simdBaseJitType == simdBaseJitType) && (weight.simdSize == simdSize)); - - weight.simdBaseJitType = simdBaseJitType; - weight.simdSize = simdSize; - } - else + if (isConverted) { - weight.storeWeight--; - JITDUMP("Local Store V%02d at [%06u] has no conversion. Decrementing store weight to %f\n", - lclStore->GetLclNum(), dspTreeID(lclStore), weight.storeWeight); + // Cache the simd type data of the convert. + weight.CacheSimdTypes(lclStore->Data()->AsHWIntrinsic()); } // Update the table. @@ -2637,11 +2626,13 @@ bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, LCLMasksWeightTable* wei // // Arguments: // lclVar - The local variable. -// stmt - The statement the local vairable is contained in. +// stmt - The statement the local variable is contained in. +// stmt - The block the local variable is contained in. // weightsTable - table to update. // void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, + BasicBlock* const block, LCLMasksWeightTable* weightsTable) { if (!lclVar->OperIs(GT_LCL_VAR)) @@ -2650,7 +2641,7 @@ void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, } LCLMasksWeight weight; - bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); + bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); // If there no entry, then the var does not have a local store. if (!found) @@ -2664,17 +2655,16 @@ void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); + // Update the weights. + JITDUMP("Local Var V%02d at [%06u] has %s conversion. ", lclVar->GetLclNum(), dspTreeID(lclVar), + ev.foundConversion ? "mask" : "no"); + weight.UpdateVarWeight(ev.foundConversion, block->getBBWeight(this)); + if (ev.foundConversion) { - weight.varWeight++; - JITDUMP("Local Var V%02d at [%06u] is converted to mask. Incrementing var weight to %f\n", lclVar->GetLclNum(), - dspTreeID(lclVar), weight.varWeight); - } - else - { - weight.varWeight--; - JITDUMP("Local Var V%02d at [%06u] has no conversion. Decrementing var weight to %f\n", lclVar->GetLclNum(), - dspTreeID(lclVar), weight.varWeight); + // Cache the simd type data of the convert. + assert(ev.convertOp != nullptr); + weight.CacheSimdTypes(ev.convertOp); } // Update the table. @@ -2707,18 +2697,18 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we GenTreeLclVar* lclStore = tree->AsLclVar(); LCLMasksWeight weight; - bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); + bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); assert(found); - if (!weight.MaskConversionsDominate()) + if (weight.GetTotalWeight() <= 0.0) { - JITDUMP("Local Store V%02d at [%06u] will not be converted. Weighting {%f, %f}\n", lclStore->GetLclNum(), - dspTreeID(lclStore), weight.storeWeight, weight.varWeight); + JITDUMP("Local Store V%02d at [%06u] will not be converted. ", lclStore->GetLclNum(), dspTreeID(lclStore)); + weight.DumpTotalWeight(); return false; } - JITDUMP("Local Store V%02d at [%06u] will be converted. Weighting {%f, %f}\n", lclStore->GetLclNum(), - dspTreeID(lclStore), weight.storeWeight, weight.varWeight); + JITDUMP("Local Store V%02d at [%06u] will be converted. ", lclStore->GetLclNum(), dspTreeID(lclStore)); + weight.DumpTotalWeight(); // Update the type of the STORELCL - including the lclvar. lclStore->gtType = TYP_MASK; @@ -2779,7 +2769,7 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, } LCLMasksWeight weight; - bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); + bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); // If there no entry, then the var does not have a local store. if (!found) @@ -2788,15 +2778,15 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, return; } - if (!weight.MaskConversionsDominate()) + if (weight.GetTotalWeight() <= 0.0) { - JITDUMP("Local Var V%02d at [%06u] will not be converted. Weighting {%f, %f}\n", lclVar->GetLclNum(), - dspTreeID(lclVar), weight.storeWeight, weight.varWeight); + JITDUMP("Local Var V%02d at [%06u] will not be converted. ", lclVar->GetLclNum(), dspTreeID(lclVar)); + weight.DumpTotalWeight(); return; } - JITDUMP("Local Var V%02d at [%06u] will be converted. Weighting {%f, %f}\n", lclVar->GetLclNum(), dspTreeID(lclVar), - weight.storeWeight, weight.varWeight); + JITDUMP("Local Var V%02d at [%06u] will be converted. ", lclVar->GetLclNum(), dspTreeID(lclVar)); + weight.DumpTotalWeight(); // Remove or add a mask conversion/ LCLMasksUpdateLCLVarVisitor ev(this, lclVar->GetLclNum(), stmt, weight.simdBaseJitType, weight.simdSize); @@ -2830,16 +2820,15 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, // // To account for this, this pass uses a weighting. For each variable, count the count the // number of definitions with a convert from mask minus the number of definitions without a -// convert. Then do the same for each use. If both totals for a variable are positive, then -// convert every definition and use to use a mask instead of a vector. +// convert. Then do the same for each use. To account for looping, each count is multiplied +// by the weight of it's basic basic. In addition, each count is multiplied by the number of +// instructions required for the conversion. If the totals for both definitions and uses are +// positive, then convert all definintions and uses. // -// This weighting does not account for: -// * Loops. Uses/definitions inside a loop will account for more real weight than any outside -// a loop. This is not expected to be an issue as in most cases when using loops, a variable -// will be set once outside a loop then used/defined multiple times inside a loop. -// * Re-definition. A variable may first be created as a mask used as such, then much later in -// the method defined as a vector and used as such from then on. This can be worked around at -// the user level by encouraging users not to reuse variable names. +// This weighting does not account for re-definition. A variable may first be created as a +// mask used as such, then much later in the method defined as a vector and used as such from +// then on. This can be worked around at the user level by encouraging users not to reuse +// variable names. // // It is assumed that the simple weighting will be good enough for almost all use cases. // @@ -2871,7 +2860,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() { for (Statement* const stmt : block->Statements()) { - foundConvertingStore |= fgLCLMasksCheckLCLStore(stmt, &weightsTable); + foundConvertingStore |= fgLCLMasksCheckLCLStore(stmt, block, &weightsTable); } } @@ -2889,7 +2878,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - fgLCLMasksCheckLCLVar(lcl, stmt, &weightsTable); + fgLCLMasksCheckLCLVar(lcl, stmt, block, &weightsTable); } } } From 968ba8dcaf7b0edd60b937bd85d7ebbb3c837a55 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 5 Nov 2024 16:54:15 +0000 Subject: [PATCH 17/66] Fix asserts --- src/coreclr/jit/compiler.h | 5 +++-- src/coreclr/jit/lclmorph.cpp | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3e0ccd71a00a28..f70d5e11206629 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6973,8 +6973,9 @@ class Compiler CorInfoType newSimdBaseJitType = op->GetSimdBaseJitType(); unsigned newSimdSize = op->GetSimdSize(); - assert((simdBaseJitType == CORINFO_TYPE_UNDEF) && (simdSize == 0) || - (simdBaseJitType == newSimdBaseJitType) && (simdSize == newSimdSize)); + assert((newSimdBaseJitType != CORINFO_TYPE_UNDEF)); + assert((simdBaseJitType == CORINFO_TYPE_UNDEF) || + ((simdBaseJitType == newSimdBaseJitType) && (simdSize == newSimdSize))); simdBaseJitType = newSimdBaseJitType; simdSize = newSimdSize; diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index bba3df16a60932..a9c2d5a6f1a1ea 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2547,7 +2547,7 @@ class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitorgtNewSimdCvtMaskToVectorNode(vectorType, lclOp, simdBaseJitType, simdSize); #ifdef DEBUG @@ -2732,7 +2732,7 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we else { // Convert the input of the store to a mask. - assert((weight.simdBaseJitType != CORINFO_TYPE_UNDEF) && (weight.simdSize != 0)); + assert(weight.simdBaseJitType != CORINFO_TYPE_UNDEF); GenTree* convertOp = gtNewSimdCvtVectorToMaskNode(TYP_MASK, lclStore->Data(), weight.simdBaseJitType, weight.simdSize); lclStore->Data() = convertOp; From 51619b54ca715138c1fd5abc842472b13f2228bf Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 6 Nov 2024 13:45:30 +0000 Subject: [PATCH 18/66] Split weighting into current and switch --- src/coreclr/jit/compiler.h | 58 ++++++++------------------ src/coreclr/jit/lclmorph.cpp | 79 ++++++++++++++++++++++++++++++++++-- 2 files changed, 91 insertions(+), 46 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f70d5e11206629..dc3d808f983ef3 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6924,62 +6924,36 @@ class Compiler struct LCLMasksWeight { - // For a given var, number of Lcl Stores with conversion from mask minus number of Lcl Stores - // without conversion from mask. - weight_t storeWeight = 0.0; + // For the given variable, the cost of storing as vector. + weight_t currentCost = 0.0; - // For a given var, number of Lcl var with conversion to mask minus number of Lcl vars without - // conversion to mask. - weight_t varWeight = 0.0; + // For the given variable, the cost of storing as mask. + weight_t switchCost = 0.0; + + // Conversion of mask to vector is one instruction. + static const weight_t costOfConvertMaskToVector = 1.0; + + // Conversion of vector to mask is two instructions. + static const weight_t costOfConvertVectorToMask = 2.0; // The simd types of the Lcl Store after conversion to vector. CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; unsigned simdSize = 0; - void UpdateStoreWeight(bool increment, weight_t blockWeight) - { - weight_t incVal = increment ? blockWeight : -blockWeight; - storeWeight += incVal; - JITDUMP("Incrementing store weight by %f. ", incVal); - DumpTotalWeight(); - } + void UpdateStoreWeight(bool hasConvertFromMask, weight_t blockWeight); + void UpdateVarWeight(bool hasConvertToMask, weight_t blockWeight); - void UpdateVarWeight(bool increment, weight_t blockWeight) + bool ShouldSwitch() { - weight_t incVal = increment ? blockWeight : -blockWeight; - varWeight += incVal; - JITDUMP("Incrementing var weight by %f. ", incVal); - DumpTotalWeight(); - } - - weight_t GetTotalWeight() - { - // Conversion of mask to vector is one instruction. - const weight_t costOfStoreWeight = 1.0; - - // Conversion of vector to mask is two instructions. - const weight_t costOfVarWeight = 2.0; - - return ((costOfStoreWeight * storeWeight) + (costOfVarWeight * varWeight)); + return currentCost > switchCost; } void DumpTotalWeight() { - JITDUMP("Weighting: %f {%fs, %fv}\n", GetTotalWeight(), storeWeight, varWeight); + JITDUMP("Weighting: {%fc %fs}\n", currentCost, switchCost); } - void CacheSimdTypes(GenTreeHWIntrinsic* op) - { - CorInfoType newSimdBaseJitType = op->GetSimdBaseJitType(); - unsigned newSimdSize = op->GetSimdSize(); - - assert((newSimdBaseJitType != CORINFO_TYPE_UNDEF)); - assert((simdBaseJitType == CORINFO_TYPE_UNDEF) || - ((simdBaseJitType == newSimdBaseJitType) && (simdSize == newSimdSize))); - - simdBaseJitType = newSimdBaseJitType; - simdSize = newSimdSize; - } + void CacheSimdTypes(GenTreeHWIntrinsic* op); }; typedef JitHashTable, LCLMasksWeight> LCLMasksWeightTable; diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index a9c2d5a6f1a1ea..c2452b3256311d 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2429,6 +2429,79 @@ void Compiler::fgExposeLocalsInBitVec(BitVec_ValArg_T bitVec) #if defined(TARGET_ARM64) +//----------------------------------------------------------------------------- +// UpdateStoreWeight: Updates the weighting to take account of a local store. +// +// Arguments: +// hasConvertFromMask - Is this the store of a convert from mask +// blockWeight - Weight of the block the store is contained in +// +void Compiler::LCLMasksWeight::UpdateStoreWeight(bool hasConvertFromMask, weight_t blockWeight) +{ + if (hasConvertFromMask) + { + // Count the cost of the existing convert mask to vector. + weight_t incVal = blockWeight * costOfConvertMaskToVector; + JITDUMP("Incrementing currentCost by %f. ", incVal); + currentCost += incVal; + } + else + { + // Switching would require adding a convert vector to mask. + weight_t incVal = blockWeight * costOfConvertVectorToMask; + JITDUMP("Incrementing switchCost by %f. ", incVal); + switchCost += incVal; + } + DumpTotalWeight(); + JITDUMP("bbWeight=%f\n", blockWeight); +} + +//----------------------------------------------------------------------------- +// UpdateVarWeight: Updates the weighting to take account of a local variable use. +// +// Arguments: +// hasConvertFromMask - Is this variable converted to a mask when used +// blockWeight - Weight of the block the use is contained in +// +void Compiler::LCLMasksWeight::UpdateVarWeight(bool hasConvertToMask, weight_t blockWeight) +{ + if (hasConvertToMask) + { + // Count the cost of the existing convert vector to mask. + weight_t incVal = blockWeight * costOfConvertVectorToMask; + JITDUMP("Incrementing currentCost by %f. ", incVal); + currentCost += incVal; + } + else + { + // Switching would require adding a convert mask to vector. + weight_t incVal = blockWeight * costOfConvertMaskToVector; + JITDUMP("Incrementing switchCost by %f. ", incVal); + switchCost += incVal; + } + DumpTotalWeight(); + JITDUMP("bbWeight=%f\n", blockWeight); +} + +//----------------------------------------------------------------------------- +// CacheSimdTypes: Cache the simd types of a hwintrinsic +// +// Arguments: +// op - The HW intrinsic to cache +// +void Compiler::LCLMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) +{ + CorInfoType newSimdBaseJitType = op->GetSimdBaseJitType(); + unsigned newSimdSize = op->GetSimdSize(); + + assert((newSimdBaseJitType != CORINFO_TYPE_UNDEF)); + assert((simdBaseJitType == CORINFO_TYPE_UNDEF) || + ((simdBaseJitType == newSimdBaseJitType) && (simdSize == newSimdSize))); + + simdBaseJitType = newSimdBaseJitType; + simdSize = newSimdSize; +} + //----------------------------------------------------------------------------- // LCLMasksCheckLCLVarVisitor: Find the user of a lcl var and check if it is a convert to mask // @@ -2646,7 +2719,6 @@ void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, // If there no entry, then the var does not have a local store. if (!found) { - JITDUMP("Local Var V%02d at [%06u] is not stored to local.\n", lclVar->GetLclNum(), dspTreeID(lclVar)); return; } @@ -2700,7 +2772,7 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); assert(found); - if (weight.GetTotalWeight() <= 0.0) + if (!weight.ShouldSwitch()) { JITDUMP("Local Store V%02d at [%06u] will not be converted. ", lclStore->GetLclNum(), dspTreeID(lclStore)); weight.DumpTotalWeight(); @@ -2774,11 +2846,10 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, // If there no entry, then the var does not have a local store. if (!found) { - JITDUMP("Local Var V%02d at [%06u] is not stored to local.\n", lclVar->GetLclNum(), dspTreeID(lclVar)); return; } - if (weight.GetTotalWeight() <= 0.0) + if (!weight.ShouldSwitch()) { JITDUMP("Local Var V%02d at [%06u] will not be converted. ", lclVar->GetLclNum(), dspTreeID(lclVar)); weight.DumpTotalWeight(); From ef21a991859d1d34d78d0277e27b709cb252256b Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 6 Nov 2024 13:48:44 +0000 Subject: [PATCH 19/66] Add tests --- .../JIT/opt/LocalMasks/ChangeMatchUse.cs | 91 +++++++++++++++++++ .../JIT/opt/LocalMasks/ChangeMatchUse.csproj | 19 ++++ 2 files changed, 110 insertions(+) create mode 100644 src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs create mode 100644 src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs new file mode 100644 index 00000000000000..9287fea57cb11a --- /dev/null +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -0,0 +1,91 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Unit tests for the local masks optimization + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +public class AcrossAndCselToAcross +{ + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Consume(T value) { } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Consume(T value, T2 value2) { } + + + // Create a mask. Use it as a mask. + // Conversion of mask1 will be removed. + [Fact] + [method: MethodImpl(MethodImplOptions.NoInlining)] + public static void UseMaskAsMask() + { + if (Sve.IsSupported) + { + Vector mask1 = Sve.CreateWhileLessThanMask64Bit(2, 9); // Create lcl mask + Vector vec1 = Vector.Create(5); + Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask + Consume(vec2); + } + } + + // Create a mask. Use it as a vector. + // No conversions will be changed: Mask->Vector is optimal. + [Fact] + [method: MethodImpl(MethodImplOptions.NoInlining)] + public static void UseMaskAsVector() + { + if (Sve.IsSupported) + { + Vector mask1 = Sve.CreateFalseMaskInt16(); // Create lcl mask + Vector vec1 = Vector.Create(9); + Vector vec2 = Sve.Add(vec1, mask1); // Use as vector + Consume(vec2); + } + } + + // Create a mask. Use it as a mask, then use as a vector. + // Mask1 conversions will be switched. + [Fact] + [method: MethodImpl(MethodImplOptions.NoInlining)] + public static void UseMaskAsMaskAndVector() + { + if (Sve.IsSupported) + { + Vector mask1 = Sve.CreateWhileLessThanOrEqualMask8Bit(2, 9); // Create lcl mask + Vector vec1 = Vector.Create(3); + Vector vec2 = Vector.Create(4); + Vector vec3 = Sve.ConditionalExtractAfterLastActiveElement(mask1, vec1, vec2); // Use as mask + Vector vec4 = Sve.PopCount(mask1); // Use as vector + Consume(vec3, vec4); + } + } + + [Fact] + // Create a mask. Use it as a mask, then use as a vector inside a loop. + // No conversions will be changed: vector use inside the loop dominates. + [method: MethodImpl(MethodImplOptions.NoInlining)] + public static void UseMaskAsMaskAndVectorInsideLoop() + { + if (Sve.IsSupported) + { + Vector mask1 = Sve.CreateFalseMaskInt16(); // Create lcl mask + Vector vec1 = Vector.Create(3); + Vector vec2 = Vector.Create(4); + Vector vec3 = Sve.Splice(mask1, vec1, vec2); // Use as mask + + for (int i = 0; i < 100; i++) + { + Vector vec4 = Sve.ReverseElement8(mask1); // Use as vector + Consume(vec3, vec4); + } + } + } + +} \ No newline at end of file diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj new file mode 100644 index 00000000000000..ef626609d9fca3 --- /dev/null +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj @@ -0,0 +1,19 @@ + + + + true + + + None + True + $(NoWarn);SYSLIB5003 + + + + true + + + + + + \ No newline at end of file From da2dd8db9d2a0ac96444f37200ad15492cf9e5a0 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 6 Nov 2024 17:38:52 +0000 Subject: [PATCH 20/66] Allow tests to tier --- src/coreclr/jit/lclmorph.cpp | 2 - .../JIT/opt/LocalMasks/ChangeMatchUse.cs | 101 +++++++++++++++++- .../JIT/opt/LocalMasks/ChangeMatchUse.csproj | 17 ++- 3 files changed, 103 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index c2452b3256311d..429cc573579e60 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2453,7 +2453,6 @@ void Compiler::LCLMasksWeight::UpdateStoreWeight(bool hasConvertFromMask, weight switchCost += incVal; } DumpTotalWeight(); - JITDUMP("bbWeight=%f\n", blockWeight); } //----------------------------------------------------------------------------- @@ -2480,7 +2479,6 @@ void Compiler::LCLMasksWeight::UpdateVarWeight(bool hasConvertToMask, weight_t b switchCost += incVal; } DumpTotalWeight(); - JITDUMP("bbWeight=%f\n", blockWeight); } //----------------------------------------------------------------------------- diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs index 9287fea57cb11a..7d01aa71c2a36d 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -9,6 +9,7 @@ using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Threading; using Xunit; public class AcrossAndCselToAcross @@ -23,8 +24,22 @@ private static void Consume(T value, T2 value2) { } // Create a mask. Use it as a mask. // Conversion of mask1 will be removed. [Fact] - [method: MethodImpl(MethodImplOptions.NoInlining)] public static void UseMaskAsMask() + { + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 200; j++) + { + InnerUseMaskAsMask(); + } + + Thread.Sleep(100); + } + InnerUseMaskAsMask(); + } + + [method: MethodImpl(MethodImplOptions.NoInlining)] + private static void InnerUseMaskAsMask() { if (Sve.IsSupported) { @@ -38,8 +53,22 @@ public static void UseMaskAsMask() // Create a mask. Use it as a vector. // No conversions will be changed: Mask->Vector is optimal. [Fact] - [method: MethodImpl(MethodImplOptions.NoInlining)] public static void UseMaskAsVector() + { + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 200; j++) + { + InnerUseMaskAsVector(); + } + + Thread.Sleep(100); + } + InnerUseMaskAsVector(); + } + + [method: MethodImpl(MethodImplOptions.NoInlining)] + private static void InnerUseMaskAsVector() { if (Sve.IsSupported) { @@ -53,8 +82,22 @@ public static void UseMaskAsVector() // Create a mask. Use it as a mask, then use as a vector. // Mask1 conversions will be switched. [Fact] - [method: MethodImpl(MethodImplOptions.NoInlining)] public static void UseMaskAsMaskAndVector() + { + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 200; j++) + { + InnerUseMaskAsMaskAndVector(); + } + + Thread.Sleep(100); + } + InnerUseMaskAsMaskAndVector(); + } + + [method: MethodImpl(MethodImplOptions.NoInlining)] + private static void InnerUseMaskAsMaskAndVector() { if (Sve.IsSupported) { @@ -67,11 +110,25 @@ public static void UseMaskAsMaskAndVector() } } - [Fact] // Create a mask. Use it as a mask, then use as a vector inside a loop. // No conversions will be changed: vector use inside the loop dominates. - [method: MethodImpl(MethodImplOptions.NoInlining)] + [Fact] public static void UseMaskAsMaskAndVectorInsideLoop() + { + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 200; j++) + { + InnerUseMaskAsMaskAndVectorInsideLoop(); + } + + Thread.Sleep(100); + } + InnerUseMaskAsMaskAndVectorInsideLoop(); + } + + [method: MethodImpl(MethodImplOptions.NoInlining)] + private static void InnerUseMaskAsMaskAndVectorInsideLoop() { if (Sve.IsSupported) { @@ -88,4 +145,38 @@ public static void UseMaskAsMaskAndVectorInsideLoop() } } + // Create a mask. Use it as a vector, then use as a mask inside a loop. + // No conversions will be changed: vector use inside the loop dominates. + [Fact] + public static void UseMaskAsVectorAndMaskInsideLoop() + { + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 200; j++) + { + InnerUseMaskAsVectorAndMaskInsideLoop(); + } + + Thread.Sleep(100); + } + InnerUseMaskAsVectorAndMaskInsideLoop(); + } + + [method: MethodImpl(MethodImplOptions.NoInlining)] + private static void InnerUseMaskAsVectorAndMaskInsideLoop() + { + if (Sve.IsSupported) + { + Vector vec1 = Vector.Create(7); // Create lcl vector + Vector vec2 = Vector.Create(3); + Vector vec3 = Sve.Add(vec1, vec2); // Use as vector + + for (int i = 0; i < 100; i++) + { + Vector vec4 = Sve.Compact(vec1, vec3); // Use as mask + Consume(vec3, vec4); + } + } + } + } \ No newline at end of file diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj index ef626609d9fca3..f3063063f494a9 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj @@ -1,19 +1,16 @@ - - true - - - None True + None $(NoWarn);SYSLIB5003 + true + + true - - true - + - - + + \ No newline at end of file From b35729664e64e425a7367dd6dc57170447dbe324 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 6 Nov 2024 18:11:54 +0000 Subject: [PATCH 21/66] update comments --- src/coreclr/jit/lclmorph.cpp | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 429cc573579e60..4d2ebb3b568190 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2692,7 +2692,7 @@ bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, BasicBlock* const block, } //----------------------------------------------------------------------------- -// fgLCLMasksCheckLCLVar: For the given lcl var, update the var weights in +// fgLCLMasksCheckLCLVar: For the given lcl var, update the weights in // the table. // // Arguments: @@ -2743,7 +2743,7 @@ void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, //----------------------------------------------------------------------------- // fgLCLMasksUpdateLCLStore: For the given statement, if it is a local store, -// and mask conversions dominate in the weightings, then update to store as a mask. +// and the weighting recommends to switch, then update to store as a mask. // // Arguments: // stmt - The statement. @@ -2821,8 +2821,8 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we } //----------------------------------------------------------------------------- -// fgLCLMasksUpdateLCLVar: For the given lcl var, if mask conversions dominate in -// the weightings, then update to use as the source as a mask. +// fgLCLMasksUpdateLCLVar: For the given lcl var, if the weighting recommends to switch, +// then update to use the source as a mask. // // Arguments: // lclVar - The local variable. @@ -2887,20 +2887,17 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, // vector x = _ConvertMaskToVector_(CreateMask()); // x = Add(x, y); // -// To account for this, this pass uses a weighting. For each variable, count the count the -// number of definitions with a convert from mask minus the number of definitions without a -// convert. Then do the same for each use. To account for looping, each count is multiplied -// by the weight of it's basic basic. In addition, each count is multiplied by the number of -// instructions required for the conversion. If the totals for both definitions and uses are -// positive, then convert all definintions and uses. +// To account for this, the pass uses a weighting. For each variable, if it is a local store, +// then count the existing cost of every covert to/from mask. Also count the cost for +// switching the variable to store as mask (this may include adding additional conversions +// as well as removing). For each counted instance, take into account the number of +// instructions in the conversion and the weight of the block. // // This weighting does not account for re-definition. A variable may first be created as a -// mask used as such, then much later in the method defined as a vector and used as such from +// mask used as such, then later in the method defined as a vector and used as such from // then on. This can be worked around at the user level by encouraging users not to reuse // variable names. // -// It is assumed that the simple weighting will be good enough for almost all use cases. -// // Returns: // Suitable phase status // From a52adb4735f20a737c14f52411476b580942122a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 08:19:38 +0000 Subject: [PATCH 22/66] additional comments --- src/coreclr/jit/lclmorph.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 4d2ebb3b568190..cd81d10f853a44 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2893,6 +2893,10 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, // as well as removing). For each counted instance, take into account the number of // instructions in the conversion and the weight of the block. // +// Local variables that are defined then immediately used just once may not be saved to a +// store. Here a convert to to vector will be used by a convert to mask. These instances will +// be caught in the lowering phase. +// // This weighting does not account for re-definition. A variable may first be created as a // mask used as such, then later in the method defined as a vector and used as such from // then on. This can be worked around at the user level by encouraging users not to reuse From 267971883416aae729074660213d8f8c007cf59c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 09:18:28 +0000 Subject: [PATCH 23/66] abort walking once found --- src/coreclr/jit/lclmorph.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index cd81d10f853a44..a8b410e18e82d0 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2528,6 +2528,7 @@ class LCLMasksCheckLCLVarVisitor final : public GenTreeVisitorAsHWIntrinsic(); + return fgWalkResult::WALK_ABORT; } return fgWalkResult::WALK_CONTINUE; From 6f008de68976c4f4f3b95199a406d44968988216 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 09:24:44 +0000 Subject: [PATCH 24/66] add comment to LCLMasksCheckLCLVarVisitor --- src/coreclr/jit/lclmorph.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index a8b410e18e82d0..542df33e64a736 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2521,6 +2521,9 @@ class LCLMasksCheckLCLVarVisitor final : public GenTreeVisitorOperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum) && From 08efa965f8c35b32711b8e3aca2a24ba421b487d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 09:33:26 +0000 Subject: [PATCH 25/66] LCLMasks -> LclMasks --- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 14 +++---- src/coreclr/jit/lclmorph.cpp | 72 ++++++++++++++++++------------------ 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 25cea74249e708..32d1d960bcc3c2 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4799,7 +4799,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Optimize away conversions to/from masks in local variables. // - DoPhase(this, PHASE_LCL_MASKS, &Compiler::fgOptimizeLCLMasks); + DoPhase(this, PHASE_LCL_MASKS, &Compiler::fgOptimizeLclMasks); // Do an early pass of liveness for forward sub and morph. This data is // valid until after morph. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index dc3d808f983ef3..da4e5bbc8a8b2b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6918,11 +6918,11 @@ class Compiler bool fgExposeUnpropagatedLocals(bool propagatedAny, class LocalEqualsLocalAddrAssertions* assertions); void fgExposeLocalsInBitVec(BitVec_ValArg_T bitVec); - PhaseStatus fgOptimizeLCLMasks(); + PhaseStatus fgOptimizeLclMasks(); #if defined(TARGET_ARM64) - struct LCLMasksWeight + struct LclMasksWeight { // For the given variable, the cost of storing as vector. weight_t currentCost = 0.0; @@ -6956,12 +6956,12 @@ class Compiler void CacheSimdTypes(GenTreeHWIntrinsic* op); }; - typedef JitHashTable, LCLMasksWeight> LCLMasksWeightTable; + typedef JitHashTable, LclMasksWeight> LclMasksWeightTable; - bool fgLCLMasksCheckLCLStore(Statement* stmt, BasicBlock* const block, LCLMasksWeightTable *weightsTable); - void fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, LCLMasksWeightTable *weightsTable); - bool fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable); - void fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LCLMasksWeightTable *weightsTable); + bool fgLclMasksCheckLCLStore(Statement* stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); + void fgLclMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); + bool fgLclMasksUpdateLCLStore(Statement* stmt, LclMasksWeightTable* weightsTable); + void fgLclMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LclMasksWeightTable *weightsTable); #endif // TARGET_ARM64 PhaseStatus PhysicalPromotion(); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 542df33e64a736..f8e7d8c160d0d0 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2436,7 +2436,7 @@ void Compiler::fgExposeLocalsInBitVec(BitVec_ValArg_T bitVec) // hasConvertFromMask - Is this the store of a convert from mask // blockWeight - Weight of the block the store is contained in // -void Compiler::LCLMasksWeight::UpdateStoreWeight(bool hasConvertFromMask, weight_t blockWeight) +void Compiler::LclMasksWeight::UpdateStoreWeight(bool hasConvertFromMask, weight_t blockWeight) { if (hasConvertFromMask) { @@ -2462,7 +2462,7 @@ void Compiler::LCLMasksWeight::UpdateStoreWeight(bool hasConvertFromMask, weight // hasConvertFromMask - Is this variable converted to a mask when used // blockWeight - Weight of the block the use is contained in // -void Compiler::LCLMasksWeight::UpdateVarWeight(bool hasConvertToMask, weight_t blockWeight) +void Compiler::LclMasksWeight::UpdateVarWeight(bool hasConvertToMask, weight_t blockWeight) { if (hasConvertToMask) { @@ -2487,7 +2487,7 @@ void Compiler::LCLMasksWeight::UpdateVarWeight(bool hasConvertToMask, weight_t b // Arguments: // op - The HW intrinsic to cache // -void Compiler::LCLMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) +void Compiler::LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) { CorInfoType newSimdBaseJitType = op->GetSimdBaseJitType(); unsigned newSimdSize = op->GetSimdSize(); @@ -2501,9 +2501,9 @@ void Compiler::LCLMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) } //----------------------------------------------------------------------------- -// LCLMasksCheckLCLVarVisitor: Find the user of a lcl var and check if it is a convert to mask +// LclMasksCheckLCLVarVisitor: Find the user of a lcl var and check if it is a convert to mask // -class LCLMasksCheckLCLVarVisitor final : public GenTreeVisitor +class LclMasksCheckLCLVarVisitor final : public GenTreeVisitor { public: enum @@ -2512,8 +2512,8 @@ class LCLMasksCheckLCLVarVisitor final : public GenTreeVisitor(compiler) + LclMasksCheckLCLVarVisitor(Compiler* compiler, unsigned lclNum) + : GenTreeVisitor(compiler) , foundConversion(false) , lclNum(lclNum) { @@ -2545,9 +2545,9 @@ class LCLMasksCheckLCLVarVisitor final : public GenTreeVisitor +class LclMasksUpdateLCLVarVisitor final : public GenTreeVisitor { public: enum @@ -2556,9 +2556,9 @@ class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitor(compiler) + : GenTreeVisitor(compiler) , lclNum(lclNum) , stmt(stmt) , simdBaseJitType(simdBaseJitType) @@ -2648,7 +2648,7 @@ class LCLMasksUpdateLCLVarVisitor final : public GenTreeVisitorAsLclVar(); - LCLMasksWeight weight; + LclMasksWeight weight; bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); // Check if the store is converted from mask. @@ -2690,13 +2690,13 @@ bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, BasicBlock* const block, } // Update the table. - weightsTable->Set(lclStore->GetLclNum(), weight, LCLMasksWeightTable::Overwrite); + weightsTable->Set(lclStore->GetLclNum(), weight, LclMasksWeightTable::Overwrite); return isConverted; } //----------------------------------------------------------------------------- -// fgLCLMasksCheckLCLVar: For the given lcl var, update the weights in +// fgLclMasksCheckLCLVar: For the given lcl var, update the weights in // the table. // // Arguments: @@ -2705,17 +2705,17 @@ bool Compiler::fgLCLMasksCheckLCLStore(Statement* stmt, BasicBlock* const block, // stmt - The block the local variable is contained in. // weightsTable - table to update. // -void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, +void Compiler::fgLclMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, - LCLMasksWeightTable* weightsTable) + LclMasksWeightTable* weightsTable) { if (!lclVar->OperIs(GT_LCL_VAR)) { return; } - LCLMasksWeight weight; + LclMasksWeight weight; bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); // If there no entry, then the var does not have a local store. @@ -2725,7 +2725,7 @@ void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, } // Find the parent of the lcl var - LCLMasksCheckLCLVarVisitor ev(this, lclVar->GetLclNum()); + LclMasksCheckLCLVarVisitor ev(this, lclVar->GetLclNum()); GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); @@ -2742,11 +2742,11 @@ void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, } // Update the table. - weightsTable->Set(lclVar->GetLclNum(), weight, LCLMasksWeightTable::Overwrite); + weightsTable->Set(lclVar->GetLclNum(), weight, LclMasksWeightTable::Overwrite); } //----------------------------------------------------------------------------- -// fgLCLMasksUpdateLCLStore: For the given statement, if it is a local store, +// fgLclMasksUpdateLCLStore: For the given statement, if it is a local store, // and the weighting recommends to switch, then update to store as a mask. // // Arguments: @@ -2756,7 +2756,7 @@ void Compiler::fgLCLMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, // Returns: // True if a converted local store was found. // -bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* weightsTable) +bool Compiler::fgLclMasksUpdateLCLStore(Statement* stmt, LclMasksWeightTable* weightsTable) { // Look for: // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) @@ -2770,7 +2770,7 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we GenTreeLclVar* lclStore = tree->AsLclVar(); - LCLMasksWeight weight; + LclMasksWeight weight; bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); assert(found); @@ -2825,7 +2825,7 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we } //----------------------------------------------------------------------------- -// fgLCLMasksUpdateLCLVar: For the given lcl var, if the weighting recommends to switch, +// fgLclMasksUpdateLCLVar: For the given lcl var, if the weighting recommends to switch, // then update to use the source as a mask. // // Arguments: @@ -2833,16 +2833,16 @@ bool Compiler::fgLCLMasksUpdateLCLStore(Statement* stmt, LCLMasksWeightTable* we // stmt - The statement the local vairable is contained in. // weightsTable - table to update. // -void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, +void Compiler::fgLclMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, - LCLMasksWeightTable* weightsTable) + LclMasksWeightTable* weightsTable) { if (!lclVar->OperIs(GT_LCL_VAR)) { return; } - LCLMasksWeight weight; + LclMasksWeight weight; bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); // If there no entry, then the var does not have a local store. @@ -2862,7 +2862,7 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, weight.DumpTotalWeight(); // Remove or add a mask conversion/ - LCLMasksUpdateLCLVarVisitor ev(this, lclVar->GetLclNum(), stmt, weight.simdBaseJitType, weight.simdSize); + LclMasksUpdateLCLVarVisitor ev(this, lclVar->GetLclNum(), stmt, weight.simdBaseJitType, weight.simdSize); GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); } @@ -2870,7 +2870,7 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, #endif // TARGET_ARM64 //------------------------------------------------------------------------ -// optLCLMasks: Allow locals to be of Mask type +// optLclMasks: Allow locals to be of Mask type // // At the C# level, Masks share the same type as a Vector. It's possible for the same // variable to be used as a mask or vector. Any APIs that return a mask must first convert @@ -2909,7 +2909,7 @@ void Compiler::fgLCLMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, // Returns: // Suitable phase status // -PhaseStatus Compiler::fgOptimizeLCLMasks() +PhaseStatus Compiler::fgOptimizeLclMasks() { #if defined(TARGET_ARM64) @@ -2925,7 +2925,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() return PhaseStatus::MODIFIED_NOTHING; } - LCLMasksWeightTable weightsTable = LCLMasksWeightTable(getAllocator()); + LclMasksWeightTable weightsTable = LclMasksWeightTable(getAllocator()); // Find every local store and add them to masksTable. bool foundConvertingStore = false; @@ -2934,7 +2934,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() { for (Statement* const stmt : block->Statements()) { - foundConvertingStore |= fgLCLMasksCheckLCLStore(stmt, block, &weightsTable); + foundConvertingStore |= fgLclMasksCheckLCLStore(stmt, block, &weightsTable); } } @@ -2952,7 +2952,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - fgLCLMasksCheckLCLVar(lcl, stmt, block, &weightsTable); + fgLclMasksCheckLCLVar(lcl, stmt, block, &weightsTable); } } } @@ -2964,7 +2964,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() { for (Statement* const stmt : block->Statements()) { - madeChanges |= fgLCLMasksUpdateLCLStore(stmt, &weightsTable); + madeChanges |= fgLclMasksUpdateLCLStore(stmt, &weightsTable); } } @@ -2982,7 +2982,7 @@ PhaseStatus Compiler::fgOptimizeLCLMasks() { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - fgLCLMasksUpdateLCLVar(lcl, stmt, &weightsTable); + fgLclMasksUpdateLCLVar(lcl, stmt, &weightsTable); } } } From 6c27e4061483bfd267559b03bd10958169ec64f5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 09:40:11 +0000 Subject: [PATCH 26/66] LCLStore -> LclStore --- src/coreclr/jit/compiler.h | 4 ++-- src/coreclr/jit/lclmorph.cpp | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index da4e5bbc8a8b2b..d89ad2968e630d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6958,9 +6958,9 @@ class Compiler typedef JitHashTable, LclMasksWeight> LclMasksWeightTable; - bool fgLclMasksCheckLCLStore(Statement* stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); + bool fgLclMasksCheckLclStore(Statement* stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); void fgLclMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); - bool fgLclMasksUpdateLCLStore(Statement* stmt, LclMasksWeightTable* weightsTable); + bool fgLclMasksUpdateLclStore(Statement* stmt, LclMasksWeightTable* weightsTable); void fgLclMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LclMasksWeightTable *weightsTable); #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index f8e7d8c160d0d0..438ddce48550d8 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2648,7 +2648,7 @@ class LclMasksUpdateLCLVarVisitor final : public GenTreeVisitorStatements()) { - foundConvertingStore |= fgLclMasksCheckLCLStore(stmt, block, &weightsTable); + foundConvertingStore |= fgLclMasksCheckLclStore(stmt, block, &weightsTable); } } @@ -2964,7 +2964,7 @@ PhaseStatus Compiler::fgOptimizeLclMasks() { for (Statement* const stmt : block->Statements()) { - madeChanges |= fgLclMasksUpdateLCLStore(stmt, &weightsTable); + madeChanges |= fgLclMasksUpdateLclStore(stmt, &weightsTable); } } From f37a32188b6e11576fd92ffc83970364d50fbc5c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 09:41:16 +0000 Subject: [PATCH 27/66] LCLVar -> LclVar --- src/coreclr/jit/compiler.h | 4 ++-- src/coreclr/jit/lclmorph.cpp | 32 ++++++++++++++++---------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d89ad2968e630d..ceabf245dae751 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6959,9 +6959,9 @@ class Compiler typedef JitHashTable, LclMasksWeight> LclMasksWeightTable; bool fgLclMasksCheckLclStore(Statement* stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); - void fgLclMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); + void fgLclMasksCheckLclVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); bool fgLclMasksUpdateLclStore(Statement* stmt, LclMasksWeightTable* weightsTable); - void fgLclMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LclMasksWeightTable *weightsTable); + void fgLclMasksUpdateLclVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LclMasksWeightTable *weightsTable); #endif // TARGET_ARM64 PhaseStatus PhysicalPromotion(); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 438ddce48550d8..b5e79c258c01f8 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2501,9 +2501,9 @@ void Compiler::LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) } //----------------------------------------------------------------------------- -// LclMasksCheckLCLVarVisitor: Find the user of a lcl var and check if it is a convert to mask +// LclMasksCheckLclVarVisitor: Find the user of a lcl var and check if it is a convert to mask // -class LclMasksCheckLCLVarVisitor final : public GenTreeVisitor +class LclMasksCheckLclVarVisitor final : public GenTreeVisitor { public: enum @@ -2512,8 +2512,8 @@ class LclMasksCheckLCLVarVisitor final : public GenTreeVisitor(compiler) + LclMasksCheckLclVarVisitor(Compiler* compiler, unsigned lclNum) + : GenTreeVisitor(compiler) , foundConversion(false) , lclNum(lclNum) { @@ -2545,9 +2545,9 @@ class LclMasksCheckLCLVarVisitor final : public GenTreeVisitor +class LclMasksUpdateLclVarVisitor final : public GenTreeVisitor { public: enum @@ -2556,9 +2556,9 @@ class LclMasksUpdateLCLVarVisitor final : public GenTreeVisitor(compiler) + : GenTreeVisitor(compiler) , lclNum(lclNum) , stmt(stmt) , simdBaseJitType(simdBaseJitType) @@ -2696,7 +2696,7 @@ bool Compiler::fgLclMasksCheckLclStore(Statement* stmt, BasicBlock* const block, } //----------------------------------------------------------------------------- -// fgLclMasksCheckLCLVar: For the given lcl var, update the weights in +// fgLclMasksCheckLclVar: For the given lcl var, update the weights in // the table. // // Arguments: @@ -2705,7 +2705,7 @@ bool Compiler::fgLclMasksCheckLclStore(Statement* stmt, BasicBlock* const block, // stmt - The block the local variable is contained in. // weightsTable - table to update. // -void Compiler::fgLclMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, +void Compiler::fgLclMasksCheckLclVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, LclMasksWeightTable* weightsTable) @@ -2725,7 +2725,7 @@ void Compiler::fgLclMasksCheckLCLVar(GenTreeLclVarCommon* lclVar, } // Find the parent of the lcl var - LclMasksCheckLCLVarVisitor ev(this, lclVar->GetLclNum()); + LclMasksCheckLclVarVisitor ev(this, lclVar->GetLclNum()); GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); @@ -2825,7 +2825,7 @@ bool Compiler::fgLclMasksUpdateLclStore(Statement* stmt, LclMasksWeightTable* we } //----------------------------------------------------------------------------- -// fgLclMasksUpdateLCLVar: For the given lcl var, if the weighting recommends to switch, +// fgLclMasksUpdateLclVar: For the given lcl var, if the weighting recommends to switch, // then update to use the source as a mask. // // Arguments: @@ -2833,7 +2833,7 @@ bool Compiler::fgLclMasksUpdateLclStore(Statement* stmt, LclMasksWeightTable* we // stmt - The statement the local vairable is contained in. // weightsTable - table to update. // -void Compiler::fgLclMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, +void Compiler::fgLclMasksUpdateLclVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LclMasksWeightTable* weightsTable) { @@ -2862,7 +2862,7 @@ void Compiler::fgLclMasksUpdateLCLVar(GenTreeLclVarCommon* lclVar, weight.DumpTotalWeight(); // Remove or add a mask conversion/ - LclMasksUpdateLCLVarVisitor ev(this, lclVar->GetLclNum(), stmt, weight.simdBaseJitType, weight.simdSize); + LclMasksUpdateLclVarVisitor ev(this, lclVar->GetLclNum(), stmt, weight.simdBaseJitType, weight.simdSize); GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); } @@ -2952,7 +2952,7 @@ PhaseStatus Compiler::fgOptimizeLclMasks() { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - fgLclMasksCheckLCLVar(lcl, stmt, block, &weightsTable); + fgLclMasksCheckLclVar(lcl, stmt, block, &weightsTable); } } } @@ -2982,7 +2982,7 @@ PhaseStatus Compiler::fgOptimizeLclMasks() { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - fgLclMasksUpdateLCLVar(lcl, stmt, &weightsTable); + fgLclMasksUpdateLclVar(lcl, stmt, &weightsTable); } } } From dcf37f9006a75943f72113fa22e9549198cf5f9a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 10:03:03 +0000 Subject: [PATCH 28/66] UpdateVarWeight -> UpdateUseWeight --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/lclmorph.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ceabf245dae751..76d3ced1260cfa 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6941,7 +6941,7 @@ class Compiler unsigned simdSize = 0; void UpdateStoreWeight(bool hasConvertFromMask, weight_t blockWeight); - void UpdateVarWeight(bool hasConvertToMask, weight_t blockWeight); + void UpdateUseWeight(bool hasConvertToMask, weight_t blockWeight); bool ShouldSwitch() { diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index b5e79c258c01f8..e6c214676de62f 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2456,13 +2456,13 @@ void Compiler::LclMasksWeight::UpdateStoreWeight(bool hasConvertFromMask, weight } //----------------------------------------------------------------------------- -// UpdateVarWeight: Updates the weighting to take account of a local variable use. +// UpdateUseWeight: Updates the weighting to take account of a local variable use. // // Arguments: // hasConvertFromMask - Is this variable converted to a mask when used // blockWeight - Weight of the block the use is contained in // -void Compiler::LclMasksWeight::UpdateVarWeight(bool hasConvertToMask, weight_t blockWeight) +void Compiler::LclMasksWeight::UpdateUseWeight(bool hasConvertToMask, weight_t blockWeight) { if (hasConvertToMask) { @@ -2732,7 +2732,7 @@ void Compiler::fgLclMasksCheckLclVar(GenTreeLclVarCommon* lclVar, // Update the weights. JITDUMP("Local Var V%02d at [%06u] has %s conversion. ", lclVar->GetLclNum(), dspTreeID(lclVar), ev.foundConversion ? "mask" : "no"); - weight.UpdateVarWeight(ev.foundConversion, block->getBBWeight(this)); + weight.UpdateUseWeight(ev.foundConversion, block->getBBWeight(this)); if (ev.foundConversion) { From 98d3a7c4742f0663999414ca60724b3f9bdb7689 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 12:05:30 +0000 Subject: [PATCH 29/66] combine checks --- src/coreclr/jit/compiler.h | 9 +- src/coreclr/jit/lclmorph.cpp | 568 ++++++++---------- .../JIT/opt/LocalMasks/ChangeMatchUse.cs | 2 +- 3 files changed, 238 insertions(+), 341 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 76d3ced1260cfa..4876c943ec85e8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6940,8 +6940,7 @@ class Compiler CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; unsigned simdSize = 0; - void UpdateStoreWeight(bool hasConvertFromMask, weight_t blockWeight); - void UpdateUseWeight(bool hasConvertToMask, weight_t blockWeight); + void UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight); bool ShouldSwitch() { @@ -6958,10 +6957,8 @@ class Compiler typedef JitHashTable, LclMasksWeight> LclMasksWeightTable; - bool fgLclMasksCheckLclStore(Statement* stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); - void fgLclMasksCheckLclVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); - bool fgLclMasksUpdateLclStore(Statement* stmt, LclMasksWeightTable* weightsTable); - void fgLclMasksUpdateLclVar(GenTreeLclVarCommon* lclVar, Statement* const stmt, LclMasksWeightTable *weightsTable); + bool fgLclMasksCheckLcl(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); + void fgLclMasksUpdateLcl(GenTreeLclVarCommon* lclVar, Statement* const stmt, LclMasksWeightTable *weightsTable); #endif // TARGET_ARM64 PhaseStatus PhysicalPromotion(); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index e6c214676de62f..f92fdd97167b39 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2430,53 +2430,32 @@ void Compiler::fgExposeLocalsInBitVec(BitVec_ValArg_T bitVec) #if defined(TARGET_ARM64) //----------------------------------------------------------------------------- -// UpdateStoreWeight: Updates the weighting to take account of a local store. +// UpdateWeight: Updates the weighting to take account of a local. // // Arguments: -// hasConvertFromMask - Is this the store of a convert from mask +// isStore - Is this a lcl store +// hasConvert - Is this local converted // blockWeight - Weight of the block the store is contained in // -void Compiler::LclMasksWeight::UpdateStoreWeight(bool hasConvertFromMask, weight_t blockWeight) +void Compiler::LclMasksWeight::UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight) { - if (hasConvertFromMask) + if (hasConvert) { - // Count the cost of the existing convert mask to vector. - weight_t incVal = blockWeight * costOfConvertMaskToVector; - JITDUMP("Incrementing currentCost by %f. ", incVal); - currentCost += incVal; - } - else - { - // Switching would require adding a convert vector to mask. - weight_t incVal = blockWeight * costOfConvertVectorToMask; - JITDUMP("Incrementing switchCost by %f. ", incVal); - switchCost += incVal; - } - DumpTotalWeight(); -} + // Count the cost of the existing convert. + weight_t cost = isStore ? costOfConvertMaskToVector : costOfConvertVectorToMask; + cost *= blockWeight; -//----------------------------------------------------------------------------- -// UpdateUseWeight: Updates the weighting to take account of a local variable use. -// -// Arguments: -// hasConvertFromMask - Is this variable converted to a mask when used -// blockWeight - Weight of the block the use is contained in -// -void Compiler::LclMasksWeight::UpdateUseWeight(bool hasConvertToMask, weight_t blockWeight) -{ - if (hasConvertToMask) - { - // Count the cost of the existing convert vector to mask. - weight_t incVal = blockWeight * costOfConvertVectorToMask; - JITDUMP("Incrementing currentCost by %f. ", incVal); - currentCost += incVal; + JITDUMP("Incrementing currentCost by %f. ", cost); + currentCost += cost; } else { - // Switching would require adding a convert mask to vector. - weight_t incVal = blockWeight * costOfConvertMaskToVector; - JITDUMP("Incrementing switchCost by %f. ", incVal); - switchCost += incVal; + // Switching would require adding a convert. + weight_t cost = isStore ? costOfConvertVectorToMask : costOfConvertMaskToVector; + cost *= blockWeight; + + JITDUMP("Incrementing switchCost by %f. ", cost); + switchCost += cost; } DumpTotalWeight(); } @@ -2501,9 +2480,9 @@ void Compiler::LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) } //----------------------------------------------------------------------------- -// LclMasksCheckLclVarVisitor: Find the user of a lcl var and check if it is a convert to mask +// LclMasksCheckLclVisitor: Find the user of a lcl var and check if it is a convert to mask // -class LclMasksCheckLclVarVisitor final : public GenTreeVisitor +class LclMasksCheckLclVisitor final : public GenTreeVisitor { public: enum @@ -2512,359 +2491,307 @@ class LclMasksCheckLclVarVisitor final : public GenTreeVisitor(compiler) - , foundConversion(false) - , lclNum(lclNum) + LclMasksCheckLclVisitor(Compiler* compiler, GenTreeLclVarCommon* lclOp) + : GenTreeVisitor(compiler) + , lclOp(lclOp) { } Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { - // Look for: - // ConvertVectorToMask(CreateTrueMaskAll, LCL_VAR(lclNum))) - - GenTree* const lclOp = *use; - - if (lclOp->OperIs(GT_LCL_VAR) && (lclOp->AsLclVarCommon()->GetLclNum() == lclNum) && - user->OperIsConvertVectorToMask()) + if ((*use) == lclOp) { - foundConversion = true; - convertOp = user->AsHWIntrinsic(); - return fgWalkResult::WALK_ABORT; - } - - return fgWalkResult::WALK_CONTINUE; - } - - bool foundConversion; - GenTreeHWIntrinsic* convertOp = nullptr; - -private: - unsigned lclNum; -}; - -//----------------------------------------------------------------------------- -// LclMasksUpdateLclVarVisitor: tree visitor to remove conversion to masks for uses of LCL -// -class LclMasksUpdateLclVarVisitor final : public GenTreeVisitor -{ -public: - enum - { - DoPostOrder = true, - UseExecutionOrder = true - }; - - LclMasksUpdateLclVarVisitor( - Compiler* compiler, unsigned lclNum, Statement* stmt, CorInfoType simdBaseJitType, unsigned simdSize) - : GenTreeVisitor(compiler) - , lclNum(lclNum) - , stmt(stmt) - , simdBaseJitType(simdBaseJitType) - , simdSize(simdSize) - { - } - - Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) - { - // Look for: - // user(ConvertVectorToMask(CreateTrueMaskAll, LCL_VAR(lclNum))) - if ((*use)->OperIsConvertVectorToMask() && (*use)->AsHWIntrinsic()->Op(2)->OperIs(GT_LCL_VAR) && - ((*use)->AsHWIntrinsic()->Op(2)->AsLclVar()->GetLclNum() == lclNum)) - { - GenTree* const convertOp = *use; - GenTreeLclVar* lclOp = (*use)->AsHWIntrinsic()->Op(2)->AsLclVar(); - - assert(lclOp->gtType != TYP_MASK); - - // Find the location of convertOp in the user - int opNum = 1; - for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) + switch (lclOp->OperGet()) { - if (user->AsHWIntrinsic()->Op(opNum) == convertOp) - { - break; - } - } - assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); - - // Fix up the type of the lcl - lclOp->gtType = convertOp->gtType; - - // Remove the convert convertOp - convertOp->gtBashToNOP(); - *use = lclOp; - m_compiler->fgSequenceLocals(stmt); + case GT_STORE_LCL_VAR: + // Look for: + // STORE_LCL_VAR(ConvertMaskToVector(x)) -#ifdef DEBUG - JITDUMP("Updated V%02d to be a mask (Removed conversion)\n", lclOp->GetLclNum()); - if (m_compiler->verbose) - { - m_compiler->gtDispTree(*use); - } -#endif - - return fgWalkResult::WALK_ABORT; - } - - // Look for: - // user(LCL_VAR(lclNum))) - else if ((*use)->OperIs(GT_LCL_VAR) && ((*use)->gtType != TYP_MASK) && - ((*use)->AsLclVarCommon()->GetLclNum() == lclNum) && !user->OperIsConvertVectorToMask()) - { - GenTreeLclVar* lclOp = (*use)->AsLclVar(); + if (lclOp->Data()->OperIsConvertMaskToVector()) + { + convertOp = lclOp->Data()->AsHWIntrinsic(); + } + break; - // Fix up the type of the lcl - var_types vectorType = lclOp->gtType; - lclOp->gtType = TYP_MASK; + case GT_LCL_VAR: + // Look for: + // ConvertVectorToMask(LCL_VAR(x))) - // Create a convert to mask node and insert it infront of the lcl. - // There is not enough information in the lcl to get simd types. Instead we reuse the cached simd - // types from the removed convert nodes. - assert(simdBaseJitType != CORINFO_TYPE_UNDEF); - *use = m_compiler->gtNewSimdCvtMaskToVectorNode(vectorType, lclOp, simdBaseJitType, simdSize); + if (user->OperIsConvertVectorToMask()) + { + convertOp = user->AsHWIntrinsic(); + } + break; -#ifdef DEBUG - JITDUMP("Updated V%02d to be a mask (Added conversion)\n", lclOp->GetLclNum()); - if (m_compiler->verbose) - { - m_compiler->gtDispTree(*use); + default: + break; } -#endif - return fgWalkResult::WALK_ABORT; } - return fgWalkResult::WALK_CONTINUE; } + GenTreeHWIntrinsic* convertOp = nullptr; private: - unsigned lclNum; - Statement* stmt; - CorInfoType simdBaseJitType; - unsigned simdSize; + GenTreeLclVarCommon* lclOp; }; //----------------------------------------------------------------------------- -// fgLclMasksCheckLclStore: For the given statement, if it is a local store, -// then update the store weights in the table. +// fgLclMasksCheckLclVar: For the given lcl var, update the weights in the table. // // Arguments: -// stmt - The statement. +// lclVar - The local variable. +// stmt - The statement the local variable is contained in. +// block - The block the local variable is contained in. // weightsTable - table to update. // // Returns: // True if a converted local store was found. // -bool Compiler::fgLclMasksCheckLclStore(Statement* stmt, BasicBlock* const block, LclMasksWeightTable* weightsTable) +bool Compiler::fgLclMasksCheckLcl(GenTreeLclVarCommon* lclOp, + Statement* const stmt, + BasicBlock* const block, + LclMasksWeightTable* weightsTable) { - // Look for: - // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) - - GenTree* tree = stmt->GetRootNode(); - - if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType))) + // Only these can have conversions. + if (!lclOp->OperIs(GT_STORE_LCL_VAR) && !lclOp->OperIs(GT_LCL_VAR)) { return false; } - GenTreeLclVar* lclStore = tree->AsLclVar(); + bool isStore = lclOp->OperIs(GT_STORE_LCL_VAR); + // Get the existing weighting (if any). LclMasksWeight weight; - bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); + weightsTable->Lookup(lclOp->GetLclNum(), &weight); - // Check if the store is converted from mask. - bool isConverted = lclStore->Data()->OperIsConvertMaskToVector(); + // Find the parent of the lcl var. + LclMasksCheckLclVisitor ev(this, lclOp); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + bool foundConversion = (ev.convertOp != nullptr); // Update the weights. - JITDUMP("Local Store V%02d at [%06u] has %s conversion. ", lclStore->GetLclNum(), dspTreeID(lclStore), - isConverted ? "mask" : "no"); - weight.UpdateStoreWeight(isConverted, block->getBBWeight(this)); + JITDUMP("Local %s V%02d at [%06u] has %s conversion. ", isStore ? "store" : "var", lclOp->GetLclNum(), + dspTreeID(lclOp), foundConversion ? "mask" : "no"); + weight.UpdateWeight(isStore, foundConversion, block->getBBWeight(this)); - if (isConverted) + // Cache the simd type data of the conversion. + if (foundConversion) { - // Cache the simd type data of the convert. - weight.CacheSimdTypes(lclStore->Data()->AsHWIntrinsic()); + assert(ev.convertOp != nullptr); + weight.CacheSimdTypes(ev.convertOp); } // Update the table. - weightsTable->Set(lclStore->GetLclNum(), weight, LclMasksWeightTable::Overwrite); + weightsTable->Set(lclOp->GetLclNum(), weight, LclMasksWeightTable::Overwrite); - return isConverted; + return foundConversion; } //----------------------------------------------------------------------------- -// fgLclMasksCheckLclVar: For the given lcl var, update the weights in -// the table. +// LclMasksUpdateLclVisitor: tree visitor to remove conversion to masks for uses of LCL // -// Arguments: -// lclVar - The local variable. -// stmt - The statement the local variable is contained in. -// stmt - The block the local variable is contained in. -// weightsTable - table to update. -// -void Compiler::fgLclMasksCheckLclVar(GenTreeLclVarCommon* lclVar, - Statement* const stmt, - BasicBlock* const block, - LclMasksWeightTable* weightsTable) +class LclMasksUpdateLclVisitor final : public GenTreeVisitor { - if (!lclVar->OperIs(GT_LCL_VAR)) +public: + enum { - return; - } - - LclMasksWeight weight; - bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); + DoPostOrder = true, + UseExecutionOrder = true + }; - // If there no entry, then the var does not have a local store. - if (!found) + LclMasksUpdateLclVisitor( + Compiler* compiler, GenTreeLclVarCommon* lclOp, Statement* stmt, CorInfoType simdBaseJitType, unsigned simdSize) + : GenTreeVisitor(compiler) + , lclOp(lclOp) + , stmt(stmt) + , simdBaseJitType(simdBaseJitType) + , simdSize(simdSize) { - return; } - // Find the parent of the lcl var - LclMasksCheckLclVarVisitor ev(this, lclVar->GetLclNum()); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); - - // Update the weights. - JITDUMP("Local Var V%02d at [%06u] has %s conversion. ", lclVar->GetLclNum(), dspTreeID(lclVar), - ev.foundConversion ? "mask" : "no"); - weight.UpdateUseWeight(ev.foundConversion, block->getBBWeight(this)); - - if (ev.foundConversion) + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { - // Cache the simd type data of the convert. - assert(ev.convertOp != nullptr); - weight.CacheSimdTypes(ev.convertOp); - } - - // Update the table. - weightsTable->Set(lclVar->GetLclNum(), weight, LclMasksWeightTable::Overwrite); -} - -//----------------------------------------------------------------------------- -// fgLclMasksUpdateLclStore: For the given statement, if it is a local store, -// and the weighting recommends to switch, then update to store as a mask. -// -// Arguments: -// stmt - The statement. -// weightsTable - table to update. -// -// Returns: -// True if a converted local store was found. -// -bool Compiler::fgLclMasksUpdateLclStore(Statement* stmt, LclMasksWeightTable* weightsTable) -{ - // Look for: - // STORELCL(TYP_SIMD, ConvertMaskToVector(mask)) - - GenTree* tree = stmt->GetRootNode(); + switch (lclOp->OperGet()) + { + case GT_STORE_LCL_VAR: + if ((*use) == lclOp) + { + // Either Convert + // use:STORE_LCL_VAR(ConvertMaskToVector(x)) + // to + // use:STORE_LCL_VAR(x) + // + // Or, convert + // use:STORE_LCL_VAR(x) + // to + // use:STORE_LCL_VAR(ConvertVectorToMask(x)) + + // Update the type of the STORELCL - including the lclvar. + assert(lclOp->gtType != TYP_MASK); + lclOp->gtType = TYP_MASK; + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclOp->GetLclNum()); + assert(varDsc->lvType != TYP_MASK); + varDsc->lvType = TYP_MASK; + + if (lclOp->Data()->OperIsConvertMaskToVector()) + { + // Remove the ConvertMaskToVector - if ((tree->OperGet() != GT_STORE_LCL_VAR) || (!varTypeIsSIMD(tree->gtType))) - { - return false; - } + GenTreeHWIntrinsic* convertOp = lclOp->Data()->AsHWIntrinsic(); + GenTree* maskOp = convertOp->Op(1); - GenTreeLclVar* lclStore = tree->AsLclVar(); + convertOp->gtBashToNOP(); + lclOp->gtOp1 = maskOp; + m_compiler->fgSequenceLocals(stmt); + } + else + { + // Convert the input of the store to a mask. + // There is not enough information in the lcl to get simd types. Instead we reuse the cached + // simd types from the removed convert nodes. + assert(simdBaseJitType != CORINFO_TYPE_UNDEF); + GenTree* convertOp = m_compiler->gtNewSimdCvtVectorToMaskNode(TYP_MASK, lclOp->Data(), + simdBaseJitType, simdSize); + lclOp->Data() = convertOp; + + addedConversion = true; + } - LclMasksWeight weight; - bool found = weightsTable->Lookup(lclStore->GetLclNum(), &weight); - assert(found); + found = true; + } + break; - if (!weight.ShouldSwitch()) - { - JITDUMP("Local Store V%02d at [%06u] will not be converted. ", lclStore->GetLclNum(), dspTreeID(lclStore)); - weight.DumpTotalWeight(); - return false; - } + case GT_LCL_VAR: + if ((*use)->OperIsConvertVectorToMask() && (*use)->AsHWIntrinsic()->Op(2) == lclOp) + { + // Convert + // user(use:ConvertVectorToMask(LCL_VAR(x))) + // to + // user(use:LCL_VAR(x)) - JITDUMP("Local Store V%02d at [%06u] will be converted. ", lclStore->GetLclNum(), dspTreeID(lclStore)); - weight.DumpTotalWeight(); + GenTree* const convertOp = *use; - // Update the type of the STORELCL - including the lclvar. - lclStore->gtType = TYP_MASK; - LclVarDsc* varDsc = lvaGetDesc(lclStore->GetLclNum()); - varDsc->lvType = TYP_MASK; + // Find the location of convertOp in the user + int opNum = 1; + for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) + { + if (user->AsHWIntrinsic()->Op(opNum) == convertOp) + { + break; + } + } + assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); - if (lclStore->Data()->OperIsConvertMaskToVector()) - { - // Remove the ConvertMaskToVector + // Fix up the type of the lcl + assert(lclOp->gtType != TYP_MASK); + lclOp->gtType = convertOp->gtType; - GenTreeHWIntrinsic* convertOp = lclStore->Data()->AsHWIntrinsic(); - GenTree* maskOp = convertOp->Op(1); + // Remove the convert convertOp + convertOp->gtBashToNOP(); + *use = lclOp; + m_compiler->fgSequenceLocals(stmt); - // Remove the convert from the tree. - convertOp->gtBashToNOP(); - lclStore->gtOp1 = maskOp; - fgSequenceLocals(stmt); + found = true; + } + else if (((*use) == lclOp) && (!user->OperIsConvertVectorToMask())) + { + // Convert + // user(use:LCL_VAR(x)) + // to + // user(ConvertMaskToVector(use:LCL_VAR(x))) + + GenTreeLclVar* lclOp = (*use)->AsLclVar(); + + // Fix up the type of the lcl + assert(lclOp->gtType != TYP_MASK); + var_types vectorType = lclOp->gtType; + lclOp->gtType = TYP_MASK; + + // Create a convert to mask node and insert it infront of the lcl. + // There is not enough information in the lcl to get simd types. Instead we reuse the cached simd + // types from the removed convert nodes. + assert(simdBaseJitType != CORINFO_TYPE_UNDEF); + *use = m_compiler->gtNewSimdCvtMaskToVectorNode(vectorType, lclOp, simdBaseJitType, simdSize); + + addedConversion = true; + found = true; + } + break; - JITDUMP("Updated V%02d to store as mask (Removed conversion)\n", lclStore->GetLclNum()); - } - else - { - // Convert the input of the store to a mask. - assert(weight.simdBaseJitType != CORINFO_TYPE_UNDEF); - GenTree* convertOp = - gtNewSimdCvtVectorToMaskNode(TYP_MASK, lclStore->Data(), weight.simdBaseJitType, weight.simdSize); - lclStore->Data() = convertOp; + default: + break; + } - JITDUMP("Updated V%02d to store as mask (Added conversion)\n", lclStore->GetLclNum()); + return found ? fgWalkResult::WALK_ABORT : fgWalkResult::WALK_CONTINUE; } -#ifdef DEBUG - if (verbose) - { - gtDispTree(lclStore); - } -#endif +public: + bool addedConversion = false; + bool found = false; - return true; -} +private: + GenTreeLclVarCommon* lclOp; + Statement* stmt; + CorInfoType simdBaseJitType; + unsigned simdSize; +}; //----------------------------------------------------------------------------- -// fgLclMasksUpdateLclVar: For the given lcl var, if the weighting recommends to switch, -// then update to use the source as a mask. +// fgLclMasksUpdateLcl: For the given lcl, if the weighting recommends to switch, then update. // // Arguments: -// lclVar - The local variable. -// stmt - The statement the local vairable is contained in. +// lclOp - The local variable. +// stmt - The statement the local variable is contained in. // weightsTable - table to update. // -void Compiler::fgLclMasksUpdateLclVar(GenTreeLclVarCommon* lclVar, - Statement* const stmt, - LclMasksWeightTable* weightsTable) +void Compiler::fgLclMasksUpdateLcl(GenTreeLclVarCommon* lclOp, Statement* const stmt, LclMasksWeightTable* weightsTable) { - if (!lclVar->OperIs(GT_LCL_VAR)) + + // Only these can have conversions. + if (!lclOp->OperIs(GT_STORE_LCL_VAR) && !lclOp->OperIs(GT_LCL_VAR)) { return; } - LclMasksWeight weight; - bool found = weightsTable->Lookup(lclVar->GetLclNum(), &weight); + bool isStore = lclOp->OperIs(GT_STORE_LCL_VAR); - // If there no entry, then the var does not have a local store. - if (!found) - { - return; - } + // Get the existing weighting (if any). + LclMasksWeight weight; + bool found = weightsTable->Lookup(lclOp->GetLclNum(), &weight); + assert(found); if (!weight.ShouldSwitch()) { - JITDUMP("Local Var V%02d at [%06u] will not be converted. ", lclVar->GetLclNum(), dspTreeID(lclVar)); + JITDUMP("Local %s V%02d at [%06u] will not be converted. ", isStore ? "store" : "var", lclOp->GetLclNum(), + dspTreeID(lclOp)); weight.DumpTotalWeight(); return; } - JITDUMP("Local Var V%02d at [%06u] will be converted. ", lclVar->GetLclNum(), dspTreeID(lclVar)); + JITDUMP("Local %s V%02d at [%06u] will be converted. ", isStore ? "store" : "var", lclOp->GetLclNum(), + dspTreeID(lclOp)); weight.DumpTotalWeight(); - // Remove or add a mask conversion/ - LclMasksUpdateLclVarVisitor ev(this, lclVar->GetLclNum(), stmt, weight.simdBaseJitType, weight.simdSize); - GenTree* root = stmt->GetRootNode(); + // Remove or add a mask conversion. + LclMasksUpdateLclVisitor ev(this, lclOp, stmt, weight.simdBaseJitType, weight.simdSize); + GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); + + if (ev.found) + { + JITDUMP("Updated %s V%02d at [%06u] to mask (%s conversion)\n", isStore ? "store" : "var", lclOp->GetLclNum(), + dspTreeID(lclOp), ev.addedConversion ? "added" : "removed"); + +#ifdef DEBUG + if (verbose) + { + gtDispTree(lclOp); + } +#endif + } } #endif // TARGET_ARM64 @@ -2927,24 +2854,8 @@ PhaseStatus Compiler::fgOptimizeLclMasks() LclMasksWeightTable weightsTable = LclMasksWeightTable(getAllocator()); - // Find every local store and add them to masksTable. - bool foundConvertingStore = false; - JITDUMP("\n"); - for (BasicBlock* block : Blocks()) - { - for (Statement* const stmt : block->Statements()) - { - foundConvertingStore |= fgLclMasksCheckLclStore(stmt, block, &weightsTable); - } - } - - if (!foundConvertingStore) - { - JITDUMP("Done. No local stores of masks found\n"); - return PhaseStatus::MODIFIED_NOTHING; - } - - // Find the uses of every local and add them to masksTable. + // Find every local and add them to weightsTable. + bool foundConversion = false; JITDUMP("\n"); for (BasicBlock* block : Blocks()) { @@ -2952,29 +2863,18 @@ PhaseStatus Compiler::fgOptimizeLclMasks() { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - fgLclMasksCheckLclVar(lcl, stmt, block, &weightsTable); + foundConversion |= fgLclMasksCheckLcl(lcl, stmt, block, &weightsTable); } } } - // For each local store, potentially add/remove a conversion. - bool madeChanges = false; - JITDUMP("\n"); - for (BasicBlock* block : Blocks()) - { - for (Statement* const stmt : block->Statements()) - { - madeChanges |= fgLclMasksUpdateLclStore(stmt, &weightsTable); - } - } - - if (!madeChanges) + if (!foundConversion) { - JITDUMP("Done. No local stores converted\n"); + JITDUMP("Done. No conversions of locals found.\n"); return PhaseStatus::MODIFIED_NOTHING; } - // For each Local variable, potentially add/remove a conversion. + // For each Local, potentially add/remove a conversion. JITDUMP("\n"); for (BasicBlock* block : Blocks()) { @@ -2982,7 +2882,7 @@ PhaseStatus Compiler::fgOptimizeLclMasks() { for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - fgLclMasksUpdateLclVar(lcl, stmt, &weightsTable); + fgLclMasksUpdateLcl(lcl, stmt, &weightsTable); } } } diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs index 7d01aa71c2a36d..80cf42b9dce6ef 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -146,7 +146,7 @@ private static void InnerUseMaskAsMaskAndVectorInsideLoop() } // Create a mask. Use it as a vector, then use as a mask inside a loop. - // No conversions will be changed: vector use inside the loop dominates. + // Will be converted: mask use inside the loop dominates. [Fact] public static void UseMaskAsVectorAndMaskInsideLoop() { From 7242cee0b2589e18bebd4877b1fdfa212043c20b Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 15:16:00 +0000 Subject: [PATCH 30/66] Catch conversions of both types --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fginline.cpp | 2 +- src/coreclr/jit/gentree.cpp | 3 ++- src/coreclr/jit/lclmorph.cpp | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4876c943ec85e8..274bab673f6f54 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10002,7 +10002,7 @@ class Compiler bool compSwitchedToOptimized = false; // Codegen initially was Tier0 but jit switched to FullOpts bool compSwitchedToMinOpts = false; // Codegen initially was Tier1/FullOpts but jit switched to MinOpts bool compSuppressedZeroInit = false; // There are vars with lvSuppressedZeroInit set - bool compConvertMaskToVectorUsed = false; // Does the method have Convert Mask To Vector nodes. + bool compMaskConvertUsed = false; // Does the method have Convert Mask To Vector nodes. // NOTE: These values are only reliable after // the importing is completely finished. diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 322001cd2829fc..7a8711d717e816 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -1627,7 +1627,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) compQmarkUsed |= InlineeCompiler->compQmarkUsed; compGSReorderStackLayout |= InlineeCompiler->compGSReorderStackLayout; compHasBackwardJump |= InlineeCompiler->compHasBackwardJump; - compConvertMaskToVectorUsed |= InlineeCompiler->compConvertMaskToVectorUsed; + compMaskConvertUsed |= InlineeCompiler->compMaskConvertUsed; lvaGenericsContextInUse |= InlineeCompiler->lvaGenericsContextInUse; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d312fcd2b05efd..2b1287403eedf1 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21419,7 +21419,7 @@ GenTree* Compiler::gtNewSimdCvtMaskToVectorNode(var_types type, { assert(varTypeIsMask(op1)); assert(varTypeIsSIMD(type)); - compConvertMaskToVectorUsed = true; + compMaskConvertUsed = true; #if defined(TARGET_XARCH) return gtNewSimdHWIntrinsicNode(type, op1, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); @@ -21812,6 +21812,7 @@ GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type, { assert(varTypeIsMask(type)); assert(varTypeIsSIMD(op1)); + compMaskConvertUsed = true; #if defined(TARGET_XARCH) return gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_EVEX_ConvertVectorToMask, simdBaseJitType, simdSize); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index f92fdd97167b39..363381c9957d59 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2846,9 +2846,9 @@ PhaseStatus Compiler::fgOptimizeLclMasks() return PhaseStatus::MODIFIED_NOTHING; } - if (!compConvertMaskToVectorUsed) + if (!compMaskConvertUsed) { - JITDUMP("Skipping. There are no Convert Mask To Vector nodes\n"); + JITDUMP("Skipping. There are no converts of locals \n"); return PhaseStatus::MODIFIED_NOTHING; } From d6df5e33862410e6b1ca9c32178dd04f0c30c9e5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 15:20:35 +0000 Subject: [PATCH 31/66] better float printing --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/lclmorph.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 274bab673f6f54..eceb9781b7ec60 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6949,7 +6949,7 @@ class Compiler void DumpTotalWeight() { - JITDUMP("Weighting: {%fc %fs}\n", currentCost, switchCost); + JITDUMP("Weighting: {%.2fc %.2fs}\n", currentCost, switchCost); } void CacheSimdTypes(GenTreeHWIntrinsic* op); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 363381c9957d59..495119f50e169d 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2445,7 +2445,7 @@ void Compiler::LclMasksWeight::UpdateWeight(bool isStore, bool hasConvert, weigh weight_t cost = isStore ? costOfConvertMaskToVector : costOfConvertVectorToMask; cost *= blockWeight; - JITDUMP("Incrementing currentCost by %f. ", cost); + JITDUMP("Incrementing currentCost by %.2f. ", cost); currentCost += cost; } else @@ -2454,7 +2454,7 @@ void Compiler::LclMasksWeight::UpdateWeight(bool isStore, bool hasConvert, weigh weight_t cost = isStore ? costOfConvertVectorToMask : costOfConvertMaskToVector; cost *= blockWeight; - JITDUMP("Incrementing switchCost by %f. ", cost); + JITDUMP("Incrementing switchCost by %.2f. ", cost); switchCost += cost; } DumpTotalWeight(); From 6b907e7fa65af8e2347dbafdb917332ba819973f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 7 Nov 2024 15:39:26 +0000 Subject: [PATCH 32/66] dump the updated tree with conversion --- src/coreclr/jit/lclmorph.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 495119f50e169d..57f00b39a8b228 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2660,7 +2660,8 @@ class LclMasksUpdateLclVisitor final : public GenTreeVisitorfgSequenceLocals(stmt); - found = true; + found = true; + modifiedOp = *use; } else if (((*use) == lclOp) && (!user->OperIsConvertVectorToMask())) { @@ -2718,6 +2720,7 @@ class LclMasksUpdateLclVisitor final : public GenTreeVisitor Date: Thu, 7 Nov 2024 16:31:58 +0000 Subject: [PATCH 33/66] fix formatting --- src/coreclr/jit/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index eceb9781b7ec60..86f261c0a81273 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10002,7 +10002,7 @@ class Compiler bool compSwitchedToOptimized = false; // Codegen initially was Tier0 but jit switched to FullOpts bool compSwitchedToMinOpts = false; // Codegen initially was Tier1/FullOpts but jit switched to MinOpts bool compSuppressedZeroInit = false; // There are vars with lvSuppressedZeroInit set - bool compMaskConvertUsed = false; // Does the method have Convert Mask To Vector nodes. + bool compMaskConvertUsed = false; // Does the method have Convert Mask To Vector nodes. // NOTE: These values are only reliable after // the importing is completely finished. From e562eecbe89abc2b13ba891db2a9aa94234ceb1c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 8 Nov 2024 09:38:11 +0000 Subject: [PATCH 34/66] Update explanation --- src/coreclr/jit/lclmorph.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 57f00b39a8b228..2ca22a82686978 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2822,11 +2822,20 @@ void Compiler::fgLclMasksUpdateLcl(GenTreeLclVarCommon* lclOp, Statement* const // vector x = _ConvertMaskToVector_(CreateMask()); // x = Add(x, y); // -// To account for this, the pass uses a weighting. For each variable, if it is a local store, -// then count the existing cost of every covert to/from mask. Also count the cost for -// switching the variable to store as mask (this may include adding additional conversions -// as well as removing). For each counted instance, take into account the number of -// instructions in the conversion and the weight of the block. +// To optimize this, the pass searches every local variable definition (GT_STORE_LCL_VAR) +// and use (GT_LCL_VAR). A weighting is calculated and kept in a hash table - one entry +// for each lclvar number. The weighting contains two values. The first value is the count of +// of every convert node for the var, each instance multiplied by the number of instructions +// in the convert and the weighting of the block it exists in. The second value assumes the +// local var has been switched to store as a mask and performs the same count. The switch +// will count removes every existing convert and add a convert where there isn't currently +// a convert. +// +// Once every definition and use has been parsed, the parsing runs again. At each step, +// if the weighting for switching that var is lower than the current weighting then switch +// to store as mask and add/remove conversions as required. +// +// Limitations: // // Local variables that are defined then immediately used just once may not be saved to a // store. Here a convert to to vector will be used by a convert to mask. These instances will From 659b7d9e99aff0901c50b4652acc5d82696f64e2 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 8 Nov 2024 12:32:27 +0000 Subject: [PATCH 35/66] extra tests and remove asserts --- src/coreclr/jit/lclmorph.cpp | 3 - .../JIT/opt/LocalMasks/ChangeMatchUse.cs | 60 +++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 2ca22a82686978..a5a78db946fc91 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2472,8 +2472,6 @@ void Compiler::LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) unsigned newSimdSize = op->GetSimdSize(); assert((newSimdBaseJitType != CORINFO_TYPE_UNDEF)); - assert((simdBaseJitType == CORINFO_TYPE_UNDEF) || - ((simdBaseJitType == newSimdBaseJitType) && (simdSize == newSimdSize))); simdBaseJitType = newSimdBaseJitType; simdSize = newSimdSize; @@ -2633,7 +2631,6 @@ class LclMasksUpdateLclVisitor final : public GenTreeVisitorgtType != TYP_MASK); lclOp->gtType = TYP_MASK; LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclOp->GetLclNum()); - assert(varDsc->lvType != TYP_MASK); varDsc->lvType = TYP_MASK; if (lclOp->Data()->OperIsConvertMaskToVector()) diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs index 80cf42b9dce6ef..e4ab4879328def 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -179,4 +179,64 @@ private static void InnerUseMaskAsVectorAndMaskInsideLoop() } } + [Fact] + public static void CastMaskUseAsVector() + { + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 200; j++) + { + InnerCastMaskUseAsVector(); + } + + Thread.Sleep(100); + } + InnerCastMaskUseAsVector(); + } + + [method: MethodImpl(MethodImplOptions.NoInlining)] + private static void InnerCastMaskUseAsVector() + { + if (Sve.IsSupported) + { + Vector mask1; + if (Environment.TickCount % 2 == 0) + mask1 = Sve.CreateTrueMaskInt32(); + else + mask1 = Unsafe.BitCast, Vector>(Sve.CreateTrueMaskUInt32()); + Consume(mask1); // Use as vector + } + } + + [Fact] + public static void CastMaskUseAsMask() + { + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 200; j++) + { + InnerCastMaskUseAsMask(); + } + + Thread.Sleep(100); + } + InnerCastMaskUseAsMask(); + } + + [method: MethodImpl(MethodImplOptions.NoInlining)] + private static void InnerCastMaskUseAsMask() + { + if (Sve.IsSupported) + { + Vector mask1; + if (Environment.TickCount % 2 == 0) + mask1 = Sve.CreateTrueMaskInt32(); + else + mask1 = Unsafe.BitCast, Vector>(Sve.CreateTrueMaskUInt32()); + + Vector vec1 = Vector.Create(25); + Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask + Consume(vec2); + } + } } \ No newline at end of file From 9f54fa85f027e6cbeedde0428edf51228adafd5f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 8 Nov 2024 12:37:12 +0000 Subject: [PATCH 36/66] move pass to lclmasks.cpp --- src/coreclr/jit/CMakeLists.txt | 1 + src/coreclr/jit/lclmasks.cpp | 481 +++++++++++++++++++++++++++++++++ src/coreclr/jit/lclmorph.cpp | 477 -------------------------------- 3 files changed, 482 insertions(+), 477 deletions(-) create mode 100644 src/coreclr/jit/lclmasks.cpp diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 68155021d8eb78..33eeda7742fff3 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -142,6 +142,7 @@ set( JIT_SOURCES jithashtable.cpp jitmetadata.cpp layout.cpp + lclmasks.cpp lclmorph.cpp lclvars.cpp likelyclass.cpp diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp new file mode 100644 index 00000000000000..67283afd5ecb2c --- /dev/null +++ b/src/coreclr/jit/lclmasks.cpp @@ -0,0 +1,481 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" + +#if defined(TARGET_ARM64) + +//----------------------------------------------------------------------------- +// UpdateWeight: Updates the weighting to take account of a local. +// +// Arguments: +// isStore - Is this a lcl store +// hasConvert - Is this local converted +// blockWeight - Weight of the block the store is contained in +// +void Compiler::LclMasksWeight::UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight) +{ + if (hasConvert) + { + // Count the cost of the existing convert. + weight_t cost = isStore ? costOfConvertMaskToVector : costOfConvertVectorToMask; + cost *= blockWeight; + + JITDUMP("Incrementing currentCost by %.2f. ", cost); + currentCost += cost; + } + else + { + // Switching would require adding a convert. + weight_t cost = isStore ? costOfConvertVectorToMask : costOfConvertMaskToVector; + cost *= blockWeight; + + JITDUMP("Incrementing switchCost by %.2f. ", cost); + switchCost += cost; + } + DumpTotalWeight(); +} + +//----------------------------------------------------------------------------- +// CacheSimdTypes: Cache the simd types of a hwintrinsic +// +// Arguments: +// op - The HW intrinsic to cache +// +void Compiler::LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) +{ + CorInfoType newSimdBaseJitType = op->GetSimdBaseJitType(); + unsigned newSimdSize = op->GetSimdSize(); + + assert((newSimdBaseJitType != CORINFO_TYPE_UNDEF)); + + simdBaseJitType = newSimdBaseJitType; + simdSize = newSimdSize; +} + +//----------------------------------------------------------------------------- +// LclMasksCheckLclVisitor: Find the user of a lcl var and check if it is a convert to mask +// +class LclMasksCheckLclVisitor final : public GenTreeVisitor +{ +public: + enum + { + DoPostOrder = true, + UseExecutionOrder = true + }; + + LclMasksCheckLclVisitor(Compiler* compiler, GenTreeLclVarCommon* lclOp) + : GenTreeVisitor(compiler) + , lclOp(lclOp) + { + } + + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + if ((*use) == lclOp) + { + switch (lclOp->OperGet()) + { + case GT_STORE_LCL_VAR: + // Look for: + // STORE_LCL_VAR(ConvertMaskToVector(x)) + + if (lclOp->Data()->OperIsConvertMaskToVector()) + { + convertOp = lclOp->Data()->AsHWIntrinsic(); + } + break; + + case GT_LCL_VAR: + // Look for: + // ConvertVectorToMask(LCL_VAR(x))) + + if (user->OperIsConvertVectorToMask()) + { + convertOp = user->AsHWIntrinsic(); + } + break; + + default: + break; + } + return fgWalkResult::WALK_ABORT; + } + return fgWalkResult::WALK_CONTINUE; + } + + GenTreeHWIntrinsic* convertOp = nullptr; + +private: + GenTreeLclVarCommon* lclOp; +}; + +//----------------------------------------------------------------------------- +// fgLclMasksCheckLcl: For the given lcl var, update the weights in the table. +// +// Arguments: +// lclVar - The local variable. +// stmt - The statement the local variable is contained in. +// block - The block the local variable is contained in. +// weightsTable - table to update. +// +// Returns: +// True if a converted local store was found. +// +bool Compiler::fgLclMasksCheckLcl(GenTreeLclVarCommon* lclOp, + Statement* const stmt, + BasicBlock* const block, + LclMasksWeightTable* weightsTable) +{ + // Only these can have conversions. + if (!lclOp->OperIs(GT_STORE_LCL_VAR) && !lclOp->OperIs(GT_LCL_VAR)) + { + return false; + } + + bool isStore = lclOp->OperIs(GT_STORE_LCL_VAR); + + // Get the existing weighting (if any). + LclMasksWeight weight; + weightsTable->Lookup(lclOp->GetLclNum(), &weight); + + // Find the parent of the lcl var. + LclMasksCheckLclVisitor ev(this, lclOp); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + bool foundConversion = (ev.convertOp != nullptr); + + // Update the weights. + JITDUMP("Local %s V%02d at [%06u] has %s conversion. ", isStore ? "store" : "var", lclOp->GetLclNum(), + dspTreeID(lclOp), foundConversion ? "mask" : "no"); + weight.UpdateWeight(isStore, foundConversion, block->getBBWeight(this)); + + // Cache the simd type data of the conversion. + if (foundConversion) + { + assert(ev.convertOp != nullptr); + weight.CacheSimdTypes(ev.convertOp); + } + + // Update the table. + weightsTable->Set(lclOp->GetLclNum(), weight, LclMasksWeightTable::Overwrite); + + return foundConversion; +} + +//----------------------------------------------------------------------------- +// LclMasksUpdateLclVisitor: tree visitor to remove conversion to masks for uses of LCL +// +class LclMasksUpdateLclVisitor final : public GenTreeVisitor +{ +public: + enum + { + DoPostOrder = true, + UseExecutionOrder = true + }; + + LclMasksUpdateLclVisitor( + Compiler* compiler, GenTreeLclVarCommon* lclOp, Statement* stmt, CorInfoType simdBaseJitType, unsigned simdSize) + : GenTreeVisitor(compiler) + , lclOp(lclOp) + , stmt(stmt) + , simdBaseJitType(simdBaseJitType) + , simdSize(simdSize) + { + } + + Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + switch (lclOp->OperGet()) + { + case GT_STORE_LCL_VAR: + if ((*use) == lclOp) + { + // Either Convert + // use:STORE_LCL_VAR(ConvertMaskToVector(x)) + // to + // use:STORE_LCL_VAR(x) + // + // Or, convert + // use:STORE_LCL_VAR(x) + // to + // use:STORE_LCL_VAR(ConvertVectorToMask(x)) + + // Update the type of the STORELCL - including the lclvar. + assert(lclOp->gtType != TYP_MASK); + lclOp->gtType = TYP_MASK; + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclOp->GetLclNum()); + varDsc->lvType = TYP_MASK; + + if (lclOp->Data()->OperIsConvertMaskToVector()) + { + // Remove the ConvertMaskToVector + + GenTreeHWIntrinsic* convertOp = lclOp->Data()->AsHWIntrinsic(); + GenTree* maskOp = convertOp->Op(1); + + convertOp->gtBashToNOP(); + lclOp->gtOp1 = maskOp; + m_compiler->fgSequenceLocals(stmt); + } + else + { + // Convert the input of the store to a mask. + // There is not enough information in the lcl to get simd types. Instead we reuse the cached + // simd types from the removed convert nodes. + assert(simdBaseJitType != CORINFO_TYPE_UNDEF); + GenTree* convertOp = m_compiler->gtNewSimdCvtVectorToMaskNode(TYP_MASK, lclOp->Data(), + simdBaseJitType, simdSize); + lclOp->Data() = convertOp; + + addedConversion = true; + } + + found = true; + modifiedOp = *use; + } + break; + + case GT_LCL_VAR: + if ((*use)->OperIsConvertVectorToMask() && (*use)->AsHWIntrinsic()->Op(2) == lclOp) + { + // Convert + // user(use:ConvertVectorToMask(LCL_VAR(x))) + // to + // user(use:LCL_VAR(x)) + + GenTree* const convertOp = *use; + + // Find the location of convertOp in the user + int opNum = 1; + for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) + { + if (user->AsHWIntrinsic()->Op(opNum) == convertOp) + { + break; + } + } + assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); + + // Fix up the type of the lcl + assert(lclOp->gtType != TYP_MASK); + lclOp->gtType = convertOp->gtType; + + // Remove the convert convertOp + convertOp->gtBashToNOP(); + *use = lclOp; + m_compiler->fgSequenceLocals(stmt); + + found = true; + modifiedOp = *use; + } + else if (((*use) == lclOp) && (!user->OperIsConvertVectorToMask())) + { + // Convert + // user(use:LCL_VAR(x)) + // to + // user(ConvertMaskToVector(use:LCL_VAR(x))) + + GenTreeLclVar* lclOp = (*use)->AsLclVar(); + + // Fix up the type of the lcl + assert(lclOp->gtType != TYP_MASK); + var_types vectorType = lclOp->gtType; + lclOp->gtType = TYP_MASK; + + // Create a convert to mask node and insert it infront of the lcl. + // There is not enough information in the lcl to get simd types. Instead we reuse the cached simd + // types from the removed convert nodes. + assert(simdBaseJitType != CORINFO_TYPE_UNDEF); + *use = m_compiler->gtNewSimdCvtMaskToVectorNode(vectorType, lclOp, simdBaseJitType, simdSize); + + addedConversion = true; + found = true; + modifiedOp = *use; + } + break; + + default: + break; + } + + return found ? fgWalkResult::WALK_ABORT : fgWalkResult::WALK_CONTINUE; + } + +public: + bool addedConversion = false; + bool found = false; + GenTree* modifiedOp = nullptr; + +private: + GenTreeLclVarCommon* lclOp; + Statement* stmt; + CorInfoType simdBaseJitType; + unsigned simdSize; +}; + +//----------------------------------------------------------------------------- +// fgLclMasksUpdateLcl: For the given lcl, if the weighting recommends to switch, then update. +// +// Arguments: +// lclOp - The local variable. +// stmt - The statement the local variable is contained in. +// weightsTable - table to update. +// +void Compiler::fgLclMasksUpdateLcl(GenTreeLclVarCommon* lclOp, Statement* const stmt, LclMasksWeightTable* weightsTable) +{ + + // Only these can have conversions. + if (!lclOp->OperIs(GT_STORE_LCL_VAR) && !lclOp->OperIs(GT_LCL_VAR)) + { + return; + } + + bool isStore = lclOp->OperIs(GT_STORE_LCL_VAR); + + // Get the existing weighting (if any). + LclMasksWeight weight; + bool found = weightsTable->Lookup(lclOp->GetLclNum(), &weight); + assert(found); + + if (!weight.ShouldSwitch()) + { + JITDUMP("Local %s V%02d at [%06u] will not be converted. ", isStore ? "store" : "var", lclOp->GetLclNum(), + dspTreeID(lclOp)); + weight.DumpTotalWeight(); + return; + } + + JITDUMP("Local %s V%02d at [%06u] will be converted. ", isStore ? "store" : "var", lclOp->GetLclNum(), + dspTreeID(lclOp)); + weight.DumpTotalWeight(); + + // Remove or add a mask conversion. + LclMasksUpdateLclVisitor ev(this, lclOp, stmt, weight.simdBaseJitType, weight.simdSize); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + + if (ev.found) + { + JITDUMP("Updated %s V%02d at [%06u] to mask (%s conversion)\n", isStore ? "store" : "var", lclOp->GetLclNum(), + dspTreeID(lclOp), ev.addedConversion ? "added" : "removed"); + +#ifdef DEBUG + if (verbose) + { + gtDispTree(ev.modifiedOp); + } +#endif + } +} + +#endif // TARGET_ARM64 + +//------------------------------------------------------------------------ +// optLclMasks: Allow locals to be of Mask type +// +// At the C# level, Masks share the same type as a Vector. It's possible for the same +// variable to be used as a mask or vector. Any APIs that return a mask must first convert +// the value to a vector before storing it to a variable. Any uses of a variable as a mask +// must first convert from vector before using it. In many cases this creates unnecessary +// conversions. For variables that live outside the scope of the current method then the +// conversions are required to ensure correctness. However, for local variables where the +// scope is local to the current method, then it is possible to keep the value as a mask, +// by updating all definitions and uses. +// +// In the common case it is expected that uses of masks are consistent - once a variable is +// created as a mask it will continue to be used and updated as a mask. +// +// In the uncommon case, a variable may be created in one type, used as another and/or +// updated to a different type. +// +// For example (the conversion is implicit) +// vector x = _ConvertMaskToVector_(CreateMask()); +// x = Add(x, y); +// +// To optimize this, the pass searches every local variable definition (GT_STORE_LCL_VAR) +// and use (GT_LCL_VAR). A weighting is calculated and kept in a hash table - one entry +// for each lclvar number. The weighting contains two values. The first value is the count of +// of every convert node for the var, each instance multiplied by the number of instructions +// in the convert and the weighting of the block it exists in. The second value assumes the +// local var has been switched to store as a mask and performs the same count. The switch +// will count removes every existing convert and add a convert where there isn't currently +// a convert. +// +// Once every definition and use has been parsed, the parsing runs again. At each step, +// if the weighting for switching that var is lower than the current weighting then switch +// to store as mask and add/remove conversions as required. +// +// Limitations: +// +// Local variables that are defined then immediately used just once may not be saved to a +// store. Here a convert to to vector will be used by a convert to mask. These instances will +// be caught in the lowering phase. +// +// This weighting does not account for re-definition. A variable may first be created as a +// mask used as such, then later in the method defined as a vector and used as such from +// then on. This can be worked around at the user level by encouraging users not to reuse +// variable names. +// +// Returns: +// Suitable phase status +// +PhaseStatus Compiler::fgOptimizeLclMasks() +{ +#if defined(TARGET_ARM64) + + if (opts.OptimizationDisabled()) + { + JITDUMP("Skipping. Optimizations Disabled\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + if (!compMaskConvertUsed) + { + JITDUMP("Skipping. There are no converts of locals \n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + LclMasksWeightTable weightsTable = LclMasksWeightTable(getAllocator()); + + // Find every local and add them to weightsTable. + bool foundConversion = false; + JITDUMP("\n"); + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + { + foundConversion |= fgLclMasksCheckLcl(lcl, stmt, block, &weightsTable); + } + } + } + + if (!foundConversion) + { + JITDUMP("Done. No conversions of locals found.\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + // For each Local, potentially add/remove a conversion. + JITDUMP("\n"); + for (BasicBlock* block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + { + fgLclMasksUpdateLcl(lcl, stmt, &weightsTable); + } + } + } + + return PhaseStatus::MODIFIED_EVERYTHING; + +#else + return PhaseStatus::MODIFIED_NOTHING; +#endif // TARGET_ARM64 +} diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index a5a78db946fc91..90750945082aea 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -2426,480 +2426,3 @@ void Compiler::fgExposeLocalsInBitVec(BitVec_ValArg_T bitVec) lvaSetVarAddrExposed(lclNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); } } - -#if defined(TARGET_ARM64) - -//----------------------------------------------------------------------------- -// UpdateWeight: Updates the weighting to take account of a local. -// -// Arguments: -// isStore - Is this a lcl store -// hasConvert - Is this local converted -// blockWeight - Weight of the block the store is contained in -// -void Compiler::LclMasksWeight::UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight) -{ - if (hasConvert) - { - // Count the cost of the existing convert. - weight_t cost = isStore ? costOfConvertMaskToVector : costOfConvertVectorToMask; - cost *= blockWeight; - - JITDUMP("Incrementing currentCost by %.2f. ", cost); - currentCost += cost; - } - else - { - // Switching would require adding a convert. - weight_t cost = isStore ? costOfConvertVectorToMask : costOfConvertMaskToVector; - cost *= blockWeight; - - JITDUMP("Incrementing switchCost by %.2f. ", cost); - switchCost += cost; - } - DumpTotalWeight(); -} - -//----------------------------------------------------------------------------- -// CacheSimdTypes: Cache the simd types of a hwintrinsic -// -// Arguments: -// op - The HW intrinsic to cache -// -void Compiler::LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) -{ - CorInfoType newSimdBaseJitType = op->GetSimdBaseJitType(); - unsigned newSimdSize = op->GetSimdSize(); - - assert((newSimdBaseJitType != CORINFO_TYPE_UNDEF)); - - simdBaseJitType = newSimdBaseJitType; - simdSize = newSimdSize; -} - -//----------------------------------------------------------------------------- -// LclMasksCheckLclVisitor: Find the user of a lcl var and check if it is a convert to mask -// -class LclMasksCheckLclVisitor final : public GenTreeVisitor -{ -public: - enum - { - DoPostOrder = true, - UseExecutionOrder = true - }; - - LclMasksCheckLclVisitor(Compiler* compiler, GenTreeLclVarCommon* lclOp) - : GenTreeVisitor(compiler) - , lclOp(lclOp) - { - } - - Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) - { - if ((*use) == lclOp) - { - switch (lclOp->OperGet()) - { - case GT_STORE_LCL_VAR: - // Look for: - // STORE_LCL_VAR(ConvertMaskToVector(x)) - - if (lclOp->Data()->OperIsConvertMaskToVector()) - { - convertOp = lclOp->Data()->AsHWIntrinsic(); - } - break; - - case GT_LCL_VAR: - // Look for: - // ConvertVectorToMask(LCL_VAR(x))) - - if (user->OperIsConvertVectorToMask()) - { - convertOp = user->AsHWIntrinsic(); - } - break; - - default: - break; - } - return fgWalkResult::WALK_ABORT; - } - return fgWalkResult::WALK_CONTINUE; - } - - GenTreeHWIntrinsic* convertOp = nullptr; - -private: - GenTreeLclVarCommon* lclOp; -}; - -//----------------------------------------------------------------------------- -// fgLclMasksCheckLclVar: For the given lcl var, update the weights in the table. -// -// Arguments: -// lclVar - The local variable. -// stmt - The statement the local variable is contained in. -// block - The block the local variable is contained in. -// weightsTable - table to update. -// -// Returns: -// True if a converted local store was found. -// -bool Compiler::fgLclMasksCheckLcl(GenTreeLclVarCommon* lclOp, - Statement* const stmt, - BasicBlock* const block, - LclMasksWeightTable* weightsTable) -{ - // Only these can have conversions. - if (!lclOp->OperIs(GT_STORE_LCL_VAR) && !lclOp->OperIs(GT_LCL_VAR)) - { - return false; - } - - bool isStore = lclOp->OperIs(GT_STORE_LCL_VAR); - - // Get the existing weighting (if any). - LclMasksWeight weight; - weightsTable->Lookup(lclOp->GetLclNum(), &weight); - - // Find the parent of the lcl var. - LclMasksCheckLclVisitor ev(this, lclOp); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); - bool foundConversion = (ev.convertOp != nullptr); - - // Update the weights. - JITDUMP("Local %s V%02d at [%06u] has %s conversion. ", isStore ? "store" : "var", lclOp->GetLclNum(), - dspTreeID(lclOp), foundConversion ? "mask" : "no"); - weight.UpdateWeight(isStore, foundConversion, block->getBBWeight(this)); - - // Cache the simd type data of the conversion. - if (foundConversion) - { - assert(ev.convertOp != nullptr); - weight.CacheSimdTypes(ev.convertOp); - } - - // Update the table. - weightsTable->Set(lclOp->GetLclNum(), weight, LclMasksWeightTable::Overwrite); - - return foundConversion; -} - -//----------------------------------------------------------------------------- -// LclMasksUpdateLclVisitor: tree visitor to remove conversion to masks for uses of LCL -// -class LclMasksUpdateLclVisitor final : public GenTreeVisitor -{ -public: - enum - { - DoPostOrder = true, - UseExecutionOrder = true - }; - - LclMasksUpdateLclVisitor( - Compiler* compiler, GenTreeLclVarCommon* lclOp, Statement* stmt, CorInfoType simdBaseJitType, unsigned simdSize) - : GenTreeVisitor(compiler) - , lclOp(lclOp) - , stmt(stmt) - , simdBaseJitType(simdBaseJitType) - , simdSize(simdSize) - { - } - - Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) - { - switch (lclOp->OperGet()) - { - case GT_STORE_LCL_VAR: - if ((*use) == lclOp) - { - // Either Convert - // use:STORE_LCL_VAR(ConvertMaskToVector(x)) - // to - // use:STORE_LCL_VAR(x) - // - // Or, convert - // use:STORE_LCL_VAR(x) - // to - // use:STORE_LCL_VAR(ConvertVectorToMask(x)) - - // Update the type of the STORELCL - including the lclvar. - assert(lclOp->gtType != TYP_MASK); - lclOp->gtType = TYP_MASK; - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclOp->GetLclNum()); - varDsc->lvType = TYP_MASK; - - if (lclOp->Data()->OperIsConvertMaskToVector()) - { - // Remove the ConvertMaskToVector - - GenTreeHWIntrinsic* convertOp = lclOp->Data()->AsHWIntrinsic(); - GenTree* maskOp = convertOp->Op(1); - - convertOp->gtBashToNOP(); - lclOp->gtOp1 = maskOp; - m_compiler->fgSequenceLocals(stmt); - } - else - { - // Convert the input of the store to a mask. - // There is not enough information in the lcl to get simd types. Instead we reuse the cached - // simd types from the removed convert nodes. - assert(simdBaseJitType != CORINFO_TYPE_UNDEF); - GenTree* convertOp = m_compiler->gtNewSimdCvtVectorToMaskNode(TYP_MASK, lclOp->Data(), - simdBaseJitType, simdSize); - lclOp->Data() = convertOp; - - addedConversion = true; - } - - found = true; - modifiedOp = *use; - } - break; - - case GT_LCL_VAR: - if ((*use)->OperIsConvertVectorToMask() && (*use)->AsHWIntrinsic()->Op(2) == lclOp) - { - // Convert - // user(use:ConvertVectorToMask(LCL_VAR(x))) - // to - // user(use:LCL_VAR(x)) - - GenTree* const convertOp = *use; - - // Find the location of convertOp in the user - int opNum = 1; - for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) - { - if (user->AsHWIntrinsic()->Op(opNum) == convertOp) - { - break; - } - } - assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); - - // Fix up the type of the lcl - assert(lclOp->gtType != TYP_MASK); - lclOp->gtType = convertOp->gtType; - - // Remove the convert convertOp - convertOp->gtBashToNOP(); - *use = lclOp; - m_compiler->fgSequenceLocals(stmt); - - found = true; - modifiedOp = *use; - } - else if (((*use) == lclOp) && (!user->OperIsConvertVectorToMask())) - { - // Convert - // user(use:LCL_VAR(x)) - // to - // user(ConvertMaskToVector(use:LCL_VAR(x))) - - GenTreeLclVar* lclOp = (*use)->AsLclVar(); - - // Fix up the type of the lcl - assert(lclOp->gtType != TYP_MASK); - var_types vectorType = lclOp->gtType; - lclOp->gtType = TYP_MASK; - - // Create a convert to mask node and insert it infront of the lcl. - // There is not enough information in the lcl to get simd types. Instead we reuse the cached simd - // types from the removed convert nodes. - assert(simdBaseJitType != CORINFO_TYPE_UNDEF); - *use = m_compiler->gtNewSimdCvtMaskToVectorNode(vectorType, lclOp, simdBaseJitType, simdSize); - - addedConversion = true; - found = true; - modifiedOp = *use; - } - break; - - default: - break; - } - - return found ? fgWalkResult::WALK_ABORT : fgWalkResult::WALK_CONTINUE; - } - -public: - bool addedConversion = false; - bool found = false; - GenTree* modifiedOp = nullptr; - -private: - GenTreeLclVarCommon* lclOp; - Statement* stmt; - CorInfoType simdBaseJitType; - unsigned simdSize; -}; - -//----------------------------------------------------------------------------- -// fgLclMasksUpdateLcl: For the given lcl, if the weighting recommends to switch, then update. -// -// Arguments: -// lclOp - The local variable. -// stmt - The statement the local variable is contained in. -// weightsTable - table to update. -// -void Compiler::fgLclMasksUpdateLcl(GenTreeLclVarCommon* lclOp, Statement* const stmt, LclMasksWeightTable* weightsTable) -{ - - // Only these can have conversions. - if (!lclOp->OperIs(GT_STORE_LCL_VAR) && !lclOp->OperIs(GT_LCL_VAR)) - { - return; - } - - bool isStore = lclOp->OperIs(GT_STORE_LCL_VAR); - - // Get the existing weighting (if any). - LclMasksWeight weight; - bool found = weightsTable->Lookup(lclOp->GetLclNum(), &weight); - assert(found); - - if (!weight.ShouldSwitch()) - { - JITDUMP("Local %s V%02d at [%06u] will not be converted. ", isStore ? "store" : "var", lclOp->GetLclNum(), - dspTreeID(lclOp)); - weight.DumpTotalWeight(); - return; - } - - JITDUMP("Local %s V%02d at [%06u] will be converted. ", isStore ? "store" : "var", lclOp->GetLclNum(), - dspTreeID(lclOp)); - weight.DumpTotalWeight(); - - // Remove or add a mask conversion. - LclMasksUpdateLclVisitor ev(this, lclOp, stmt, weight.simdBaseJitType, weight.simdSize); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); - - if (ev.found) - { - JITDUMP("Updated %s V%02d at [%06u] to mask (%s conversion)\n", isStore ? "store" : "var", lclOp->GetLclNum(), - dspTreeID(lclOp), ev.addedConversion ? "added" : "removed"); - -#ifdef DEBUG - if (verbose) - { - gtDispTree(ev.modifiedOp); - } -#endif - } -} - -#endif // TARGET_ARM64 - -//------------------------------------------------------------------------ -// optLclMasks: Allow locals to be of Mask type -// -// At the C# level, Masks share the same type as a Vector. It's possible for the same -// variable to be used as a mask or vector. Any APIs that return a mask must first convert -// the value to a vector before storing it to a variable. Any uses of a variable as a mask -// must first convert from vector before using it. In many cases this creates unnecessary -// conversions. For variables that live outside the scope of the current method then the -// conversions are required to ensure correctness. However, for local variables where the -// scope is local to the current method, then it is possible to keep the value as a mask, -// by updating all definitions and uses. -// -// In the common case it is expected that uses of masks are consistent - once a variable is -// created as a mask it will continue to be used and updated as a mask. -// -// In the uncommon case, a variable may be created in one type, used as another and/or -// updated to a different type. -// -// For example (the conversion is implicit) -// vector x = _ConvertMaskToVector_(CreateMask()); -// x = Add(x, y); -// -// To optimize this, the pass searches every local variable definition (GT_STORE_LCL_VAR) -// and use (GT_LCL_VAR). A weighting is calculated and kept in a hash table - one entry -// for each lclvar number. The weighting contains two values. The first value is the count of -// of every convert node for the var, each instance multiplied by the number of instructions -// in the convert and the weighting of the block it exists in. The second value assumes the -// local var has been switched to store as a mask and performs the same count. The switch -// will count removes every existing convert and add a convert where there isn't currently -// a convert. -// -// Once every definition and use has been parsed, the parsing runs again. At each step, -// if the weighting for switching that var is lower than the current weighting then switch -// to store as mask and add/remove conversions as required. -// -// Limitations: -// -// Local variables that are defined then immediately used just once may not be saved to a -// store. Here a convert to to vector will be used by a convert to mask. These instances will -// be caught in the lowering phase. -// -// This weighting does not account for re-definition. A variable may first be created as a -// mask used as such, then later in the method defined as a vector and used as such from -// then on. This can be worked around at the user level by encouraging users not to reuse -// variable names. -// -// Returns: -// Suitable phase status -// -PhaseStatus Compiler::fgOptimizeLclMasks() -{ -#if defined(TARGET_ARM64) - - if (opts.OptimizationDisabled()) - { - JITDUMP("Skipping. Optimizations Disabled\n"); - return PhaseStatus::MODIFIED_NOTHING; - } - - if (!compMaskConvertUsed) - { - JITDUMP("Skipping. There are no converts of locals \n"); - return PhaseStatus::MODIFIED_NOTHING; - } - - LclMasksWeightTable weightsTable = LclMasksWeightTable(getAllocator()); - - // Find every local and add them to weightsTable. - bool foundConversion = false; - JITDUMP("\n"); - for (BasicBlock* block : Blocks()) - { - for (Statement* const stmt : block->Statements()) - { - for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) - { - foundConversion |= fgLclMasksCheckLcl(lcl, stmt, block, &weightsTable); - } - } - } - - if (!foundConversion) - { - JITDUMP("Done. No conversions of locals found.\n"); - return PhaseStatus::MODIFIED_NOTHING; - } - - // For each Local, potentially add/remove a conversion. - JITDUMP("\n"); - for (BasicBlock* block : Blocks()) - { - for (Statement* const stmt : block->Statements()) - { - for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) - { - fgLclMasksUpdateLcl(lcl, stmt, &weightsTable); - } - } - } - - return PhaseStatus::MODIFIED_EVERYTHING; - -#else - return PhaseStatus::MODIFIED_NOTHING; -#endif // TARGET_ARM64 -} From 7ef5f6bcd68b01ea1f51c7c585296d2bf66dae7a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 8 Nov 2024 13:47:55 +0000 Subject: [PATCH 37/66] Use vistors to iterate all nodes --- src/coreclr/jit/compiler.h | 41 --- src/coreclr/jit/lclmasks.cpp | 501 ++++++++++++++++++----------------- 2 files changed, 252 insertions(+), 290 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 86f261c0a81273..b642ba985615cc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6920,47 +6920,6 @@ class Compiler PhaseStatus fgOptimizeLclMasks(); -#if defined(TARGET_ARM64) - - struct LclMasksWeight - { - // For the given variable, the cost of storing as vector. - weight_t currentCost = 0.0; - - // For the given variable, the cost of storing as mask. - weight_t switchCost = 0.0; - - // Conversion of mask to vector is one instruction. - static const weight_t costOfConvertMaskToVector = 1.0; - - // Conversion of vector to mask is two instructions. - static const weight_t costOfConvertVectorToMask = 2.0; - - // The simd types of the Lcl Store after conversion to vector. - CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - unsigned simdSize = 0; - - void UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight); - - bool ShouldSwitch() - { - return currentCost > switchCost; - } - - void DumpTotalWeight() - { - JITDUMP("Weighting: {%.2fc %.2fs}\n", currentCost, switchCost); - } - - void CacheSimdTypes(GenTreeHWIntrinsic* op); - }; - - typedef JitHashTable, LclMasksWeight> LclMasksWeightTable; - - bool fgLclMasksCheckLcl(GenTreeLclVarCommon* lclVar, Statement* const stmt, BasicBlock* const block, LclMasksWeightTable *weightsTable); - void fgLclMasksUpdateLcl(GenTreeLclVarCommon* lclVar, Statement* const stmt, LclMasksWeightTable *weightsTable); -#endif // TARGET_ARM64 - PhaseStatus PhysicalPromotion(); PhaseStatus fgForwardSub(); diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 67283afd5ecb2c..c25215ef6d5973 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -5,6 +5,37 @@ #if defined(TARGET_ARM64) +struct LclMasksWeight +{ + // For the given variable, the cost of storing as vector. + weight_t currentCost = 0.0; + + // For the given variable, the cost of storing as mask. + weight_t switchCost = 0.0; + + // Conversion of mask to vector is one instruction. + static constexpr const weight_t costOfConvertMaskToVector = 1.0; + + // Conversion of vector to mask is two instructions. + static constexpr const weight_t costOfConvertVectorToMask = 2.0; + + // The simd types of the Lcl Store after conversion to vector. + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + unsigned simdSize = 0; + + void UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight); + + void DumpTotalWeight() + { + JITDUMP("Weighting: {%.2fc %.2fs}\n", currentCost, switchCost); + } + + void CacheSimdTypes(GenTreeHWIntrinsic* op); +}; + +typedef JitHashTable, LclMasksWeight> LclMasksWeightTable; + + //----------------------------------------------------------------------------- // UpdateWeight: Updates the weighting to take account of a local. // @@ -13,7 +44,7 @@ // hasConvert - Is this local converted // blockWeight - Weight of the block the store is contained in // -void Compiler::LclMasksWeight::UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight) +void LclMasksWeight::UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight) { if (hasConvert) { @@ -42,7 +73,7 @@ void Compiler::LclMasksWeight::UpdateWeight(bool isStore, bool hasConvert, weigh // Arguments: // op - The HW intrinsic to cache // -void Compiler::LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) +void LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) { CorInfoType newSimdBaseJitType = op->GetSimdBaseJitType(); unsigned newSimdSize = op->GetSimdSize(); @@ -54,7 +85,7 @@ void Compiler::LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) } //----------------------------------------------------------------------------- -// LclMasksCheckLclVisitor: Find the user of a lcl var and check if it is a convert to mask +// LclMasksCheckLclVisitor: Find all lcl var definitions and uses. For each one, update the weighting. // class LclMasksCheckLclVisitor final : public GenTreeVisitor { @@ -65,104 +96,89 @@ class LclMasksCheckLclVisitor final : public GenTreeVisitor(compiler) - , lclOp(lclOp) + , bbWeight(bbWeight) + , weightsTable(weightsTable) { } Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { - if ((*use) == lclOp) + GenTreeHWIntrinsic* convertOp = nullptr; + + bool isLocalStore = false; + bool isLocalUse = false; + bool hasConversion = false; + + switch ((*use)->OperGet()) { - switch (lclOp->OperGet()) - { - case GT_STORE_LCL_VAR: - // Look for: - // STORE_LCL_VAR(ConvertMaskToVector(x)) - - if (lclOp->Data()->OperIsConvertMaskToVector()) - { - convertOp = lclOp->Data()->AsHWIntrinsic(); - } - break; + case GT_STORE_LCL_VAR: + isLocalStore = true; - case GT_LCL_VAR: - // Look for: - // ConvertVectorToMask(LCL_VAR(x))) + // Look for: + // use:STORE_LCL_VAR(ConvertMaskToVector(x)) - if (user->OperIsConvertVectorToMask()) - { - convertOp = user->AsHWIntrinsic(); - } - break; + if ((*use)->AsLclVar()->Data()->OperIsConvertMaskToVector()) + { + convertOp = (*use)->AsLclVar()->Data()->AsHWIntrinsic(); + hasConversion = true; + } + break; - default: - break; - } - return fgWalkResult::WALK_ABORT; - } - return fgWalkResult::WALK_CONTINUE; - } + case GT_LCL_VAR: + isLocalUse = true; - GenTreeHWIntrinsic* convertOp = nullptr; + // Look for: + // user:ConvertVectorToMask(use:LCL_VAR(x))) -private: - GenTreeLclVarCommon* lclOp; -}; + if (user->OperIsConvertVectorToMask()) + { + convertOp = user->AsHWIntrinsic(); + hasConversion = true; + } + break; -//----------------------------------------------------------------------------- -// fgLclMasksCheckLcl: For the given lcl var, update the weights in the table. -// -// Arguments: -// lclVar - The local variable. -// stmt - The statement the local variable is contained in. -// block - The block the local variable is contained in. -// weightsTable - table to update. -// -// Returns: -// True if a converted local store was found. -// -bool Compiler::fgLclMasksCheckLcl(GenTreeLclVarCommon* lclOp, - Statement* const stmt, - BasicBlock* const block, - LclMasksWeightTable* weightsTable) -{ - // Only these can have conversions. - if (!lclOp->OperIs(GT_STORE_LCL_VAR) && !lclOp->OperIs(GT_LCL_VAR)) - { - return false; - } + default: + break; + } - bool isStore = lclOp->OperIs(GT_STORE_LCL_VAR); + if (isLocalStore || isLocalUse) + { + GenTreeLclVarCommon* lclOp = (*use)->AsLclVarCommon(); - // Get the existing weighting (if any). - LclMasksWeight weight; - weightsTable->Lookup(lclOp->GetLclNum(), &weight); + // Get the existing weighting (if any). + LclMasksWeight weight; + weightsTable->Lookup(lclOp->GetLclNum(), &weight); - // Find the parent of the lcl var. - LclMasksCheckLclVisitor ev(this, lclOp); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); - bool foundConversion = (ev.convertOp != nullptr); + // Update the weights. + JITDUMP("Local %s V%02d at [%06u] has %s conversion. ", isLocalStore ? "store" : "var", lclOp->GetLclNum(), + m_compiler->dspTreeID(lclOp), hasConversion ? "mask" : "no"); + weight.UpdateWeight(isLocalStore, hasConversion, bbWeight); - // Update the weights. - JITDUMP("Local %s V%02d at [%06u] has %s conversion. ", isStore ? "store" : "var", lclOp->GetLclNum(), - dspTreeID(lclOp), foundConversion ? "mask" : "no"); - weight.UpdateWeight(isStore, foundConversion, block->getBBWeight(this)); + // Cache the simd type data of the conversion. + if (hasConversion) + { + assert(convertOp != nullptr); + weight.CacheSimdTypes(convertOp); + } - // Cache the simd type data of the conversion. - if (foundConversion) - { - assert(ev.convertOp != nullptr); - weight.CacheSimdTypes(ev.convertOp); + // Update the table. + weightsTable->Set(lclOp->GetLclNum(), weight, LclMasksWeightTable::Overwrite); + + foundConversions |= hasConversion; + } + + return fgWalkResult::WALK_CONTINUE; } - // Update the table. - weightsTable->Set(lclOp->GetLclNum(), weight, LclMasksWeightTable::Overwrite); + + bool foundConversions = false; - return foundConversion; -} +private: + weight_t bbWeight; + LclMasksWeightTable* weightsTable; +}; //----------------------------------------------------------------------------- // LclMasksUpdateLclVisitor: tree visitor to remove conversion to masks for uses of LCL @@ -177,199 +193,179 @@ class LclMasksUpdateLclVisitor final : public GenTreeVisitor(compiler) - , lclOp(lclOp) , stmt(stmt) - , simdBaseJitType(simdBaseJitType) - , simdSize(simdSize) + , weightsTable(weightsTable) { } Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { - switch (lclOp->OperGet()) + GenTreeLclVarCommon* lclOp = nullptr; + bool isLocalStore = false; + bool isLocalUse = false; + bool addConversion = false; + bool removeConversion = false; + + if ((*use)->OperIs(GT_STORE_LCL_VAR) && (*use)->AsLclVarCommon()->Data()->OperIsConvertMaskToVector()) { - case GT_STORE_LCL_VAR: - if ((*use) == lclOp) - { - // Either Convert - // use:STORE_LCL_VAR(ConvertMaskToVector(x)) - // to - // use:STORE_LCL_VAR(x) - // - // Or, convert - // use:STORE_LCL_VAR(x) - // to - // use:STORE_LCL_VAR(ConvertVectorToMask(x)) - - // Update the type of the STORELCL - including the lclvar. - assert(lclOp->gtType != TYP_MASK); - lclOp->gtType = TYP_MASK; - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclOp->GetLclNum()); - varDsc->lvType = TYP_MASK; - - if (lclOp->Data()->OperIsConvertMaskToVector()) - { - // Remove the ConvertMaskToVector - - GenTreeHWIntrinsic* convertOp = lclOp->Data()->AsHWIntrinsic(); - GenTree* maskOp = convertOp->Op(1); - - convertOp->gtBashToNOP(); - lclOp->gtOp1 = maskOp; - m_compiler->fgSequenceLocals(stmt); - } - else - { - // Convert the input of the store to a mask. - // There is not enough information in the lcl to get simd types. Instead we reuse the cached - // simd types from the removed convert nodes. - assert(simdBaseJitType != CORINFO_TYPE_UNDEF); - GenTree* convertOp = m_compiler->gtNewSimdCvtVectorToMaskNode(TYP_MASK, lclOp->Data(), - simdBaseJitType, simdSize); - lclOp->Data() = convertOp; - - addedConversion = true; - } - - found = true; - modifiedOp = *use; - } - break; + // Found + // use:STORE_LCL_VAR(ConvertMaskToVector(x)) + lclOp = (*use)->AsLclVarCommon(); + isLocalStore = true; + removeConversion = true; + } + else if ((*use)->OperIs(GT_STORE_LCL_VAR) && !(*use)->AsLclVarCommon()->Data()->OperIsConvertMaskToVector()) + { + // Found + // use:STORE_LCL_VAR(x) + lclOp = (*use)->AsLclVarCommon(); + isLocalStore = true; + addConversion = true; + } + else if ((*use)->OperIsConvertVectorToMask() && (*use)->AsHWIntrinsic()->Op(2)->OperIs(GT_LCL_VAR)) + { + // Found + // user(use:ConvertVectorToMask(LCL_VAR(x))) + lclOp = (*use)->AsHWIntrinsic()->Op(2)->AsLclVarCommon(); + isLocalUse = true; + removeConversion = true; + } + else if ((*use)->OperIs(GT_LCL_VAR) && (!user->OperIsConvertVectorToMask())) + { + // Found + // user(use:LCL_VAR(x)) + lclOp = (*use)->AsLclVar(); + isLocalUse = true; + addConversion = true; + } + else + { + // Found something else + return fgWalkResult::WALK_CONTINUE; + } - case GT_LCL_VAR: - if ((*use)->OperIsConvertVectorToMask() && (*use)->AsHWIntrinsic()->Op(2) == lclOp) - { - // Convert - // user(use:ConvertVectorToMask(LCL_VAR(x))) - // to - // user(use:LCL_VAR(x)) - - GenTree* const convertOp = *use; - - // Find the location of convertOp in the user - int opNum = 1; - for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) - { - if (user->AsHWIntrinsic()->Op(opNum) == convertOp) - { - break; - } - } - assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); - - // Fix up the type of the lcl - assert(lclOp->gtType != TYP_MASK); - lclOp->gtType = convertOp->gtType; - - // Remove the convert convertOp - convertOp->gtBashToNOP(); - *use = lclOp; - m_compiler->fgSequenceLocals(stmt); - - found = true; - modifiedOp = *use; - } - else if (((*use) == lclOp) && (!user->OperIsConvertVectorToMask())) - { - // Convert - // user(use:LCL_VAR(x)) - // to - // user(ConvertMaskToVector(use:LCL_VAR(x))) - - GenTreeLclVar* lclOp = (*use)->AsLclVar(); - - // Fix up the type of the lcl - assert(lclOp->gtType != TYP_MASK); - var_types vectorType = lclOp->gtType; - lclOp->gtType = TYP_MASK; - - // Create a convert to mask node and insert it infront of the lcl. - // There is not enough information in the lcl to get simd types. Instead we reuse the cached simd - // types from the removed convert nodes. - assert(simdBaseJitType != CORINFO_TYPE_UNDEF); - *use = m_compiler->gtNewSimdCvtMaskToVectorNode(vectorType, lclOp, simdBaseJitType, simdSize); - - addedConversion = true; - found = true; - modifiedOp = *use; - } - break; + assert(isLocalStore || isLocalUse); + assert(addConversion || removeConversion); + assert(lclOp != nullptr); - default: - break; + // Get the existing weighting. + LclMasksWeight weight; + bool found = weightsTable->Lookup(lclOp->GetLclNum(), &weight); + assert(found); + + // Quit if the cost of changing is higher. + if (weight.currentCost <= weight.switchCost) + { + JITDUMP("Local %s V%02d at [%06u] will not be converted. ", isLocalStore ? "store" : "var", lclOp->GetLclNum(), + Compiler::dspTreeID(lclOp)); + weight.DumpTotalWeight(); + return fgWalkResult::WALK_CONTINUE; } - return found ? fgWalkResult::WALK_ABORT : fgWalkResult::WALK_CONTINUE; - } + JITDUMP("Local %s V%02d at [%06u] will be converted. ", isLocalStore ? "store" : "var", lclOp->GetLclNum(), + Compiler::dspTreeID(lclOp)); + weight.DumpTotalWeight(); -public: - bool addedConversion = false; - bool found = false; - GenTree* modifiedOp = nullptr; + // Fix up the type of the lcl and the lclvar. + assert(lclOp->gtType != TYP_MASK); + var_types lclOrigType = lclOp->gtType; + lclOp->gtType = TYP_MASK; + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclOp->GetLclNum()); + varDsc->lvType = TYP_MASK; + -private: - GenTreeLclVarCommon* lclOp; - Statement* stmt; - CorInfoType simdBaseJitType; - unsigned simdSize; -}; + // Add or remove a conversion -//----------------------------------------------------------------------------- -// fgLclMasksUpdateLcl: For the given lcl, if the weighting recommends to switch, then update. -// -// Arguments: -// lclOp - The local variable. -// stmt - The statement the local variable is contained in. -// weightsTable - table to update. -// -void Compiler::fgLclMasksUpdateLcl(GenTreeLclVarCommon* lclOp, Statement* const stmt, LclMasksWeightTable* weightsTable) -{ + if (isLocalStore && removeConversion) + { + // Convert + // use:STORE_LCL_VAR(ConvertMaskToVector(x)) + // to + // use:STORE_LCL_VAR(x) - // Only these can have conversions. - if (!lclOp->OperIs(GT_STORE_LCL_VAR) && !lclOp->OperIs(GT_LCL_VAR)) - { - return; - } + GenTreeHWIntrinsic* convertOp = lclOp->Data()->AsHWIntrinsic(); - bool isStore = lclOp->OperIs(GT_STORE_LCL_VAR); + lclOp->gtOp1 = convertOp->Op(1); + convertOp->gtBashToNOP(); + m_compiler->fgSequenceLocals(stmt); + } - // Get the existing weighting (if any). - LclMasksWeight weight; - bool found = weightsTable->Lookup(lclOp->GetLclNum(), &weight); - assert(found); + else if (isLocalStore && addConversion) + { + // Convert + // use:STORE_LCL_VAR(x) + // to + // use:STORE_LCL_VAR(ConvertVectorToMask(x)) + + // There is not enough information in the lcl to get simd types. Instead reuse the cached + // simd types from the removed convert nodes. + assert(weight.simdBaseJitType != CORINFO_TYPE_UNDEF); + GenTree* convertOp = m_compiler->gtNewSimdCvtVectorToMaskNode(TYP_MASK, lclOp->Data(), + weight.simdBaseJitType, weight.simdSize); + lclOp->Data() = convertOp; + } - if (!weight.ShouldSwitch()) - { - JITDUMP("Local %s V%02d at [%06u] will not be converted. ", isStore ? "store" : "var", lclOp->GetLclNum(), - dspTreeID(lclOp)); - weight.DumpTotalWeight(); - return; - } + else if (isLocalUse && removeConversion) + { + // Convert + // user(use:ConvertVectorToMask(LCL_VAR(x))) + // to + // user(use:LCL_VAR(x)) + + GenTree* const convertOp = *use; + + // Find the location of convertOp in the user + int opNum = 1; + for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) + { + if (user->AsHWIntrinsic()->Op(opNum) == convertOp) + { + break; + } + } + assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); - JITDUMP("Local %s V%02d at [%06u] will be converted. ", isStore ? "store" : "var", lclOp->GetLclNum(), - dspTreeID(lclOp)); - weight.DumpTotalWeight(); + // Remove the convert convertOp + convertOp->gtBashToNOP(); + *use = lclOp; + m_compiler->fgSequenceLocals(stmt); + } - // Remove or add a mask conversion. - LclMasksUpdateLclVisitor ev(this, lclOp, stmt, weight.simdBaseJitType, weight.simdSize); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); + else if (isLocalUse && addConversion) + { + // Convert + // user(use:LCL_VAR(x)) + // to + // user(ConvertMaskToVector(use:LCL_VAR(x))) + + // There is not enough information in the lcl to get simd types. Instead reuse the cached simd + // types from the removed convert nodes. + assert(weight.simdBaseJitType != CORINFO_TYPE_UNDEF); + *use = m_compiler->gtNewSimdCvtMaskToVectorNode(lclOrigType, lclOp, weight.simdBaseJitType, weight.simdSize); + } - if (ev.found) - { - JITDUMP("Updated %s V%02d at [%06u] to mask (%s conversion)\n", isStore ? "store" : "var", lclOp->GetLclNum(), - dspTreeID(lclOp), ev.addedConversion ? "added" : "removed"); + JITDUMP("Updated %s V%02d at [%06u] to mask (%s conversion)\n", isLocalStore ? "store" : "var", lclOp->GetLclNum(), + m_compiler->dspTreeID(lclOp), addConversion ? "added" : "removed"); #ifdef DEBUG - if (verbose) + if (m_compiler->verbose) { - gtDispTree(ev.modifiedOp); + m_compiler->gtDispTree(*use); } #endif + + return fgWalkResult::WALK_CONTINUE; } -} + +public: + +private: + Statement* stmt; + LclMasksWeightTable* weightsTable; +}; + #endif // TARGET_ARM64 @@ -447,9 +443,13 @@ PhaseStatus Compiler::fgOptimizeLclMasks() { for (Statement* const stmt : block->Statements()) { - for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + GenTreeLclVarCommon* firstLcl = *stmt->LocalsTreeList().begin(); + if (firstLcl != nullptr) { - foundConversion |= fgLclMasksCheckLcl(lcl, stmt, block, &weightsTable); + LclMasksCheckLclVisitor ev(this, block->getBBWeight(this), &weightsTable); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + foundConversion |= ev.foundConversions; } } } @@ -466,9 +466,12 @@ PhaseStatus Compiler::fgOptimizeLclMasks() { for (Statement* const stmt : block->Statements()) { - for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + GenTreeLclVarCommon* firstLcl = *stmt->LocalsTreeList().begin(); + if (firstLcl != nullptr) { - fgLclMasksUpdateLcl(lcl, stmt, &weightsTable); + LclMasksUpdateLclVisitor ev(this, stmt, &weightsTable); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); } } } From 04f0eb88808490b6fa6ef69d1ef2efe1d5ca1200 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 8 Nov 2024 17:27:23 +0000 Subject: [PATCH 38/66] Rename visitors --- src/coreclr/jit/lclmasks.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index c25215ef6d5973..65c2df8f9964f9 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -85,9 +85,9 @@ void LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) } //----------------------------------------------------------------------------- -// LclMasksCheckLclVisitor: Find all lcl var definitions and uses. For each one, update the weighting. +// LclMasksCheckVisitor: Find all lcl var definitions and uses. For each one, update the weighting. // -class LclMasksCheckLclVisitor final : public GenTreeVisitor +class LclMasksCheckVisitor final : public GenTreeVisitor { public: enum @@ -96,8 +96,8 @@ class LclMasksCheckLclVisitor final : public GenTreeVisitor(compiler) + LclMasksCheckVisitor(Compiler* compiler, weight_t bbWeight, LclMasksWeightTable* weightsTable) + : GenTreeVisitor(compiler) , bbWeight(bbWeight) , weightsTable(weightsTable) { @@ -181,9 +181,9 @@ class LclMasksCheckLclVisitor final : public GenTreeVisitor +class LclMasksUpdateVisitor final : public GenTreeVisitor { public: enum @@ -192,9 +192,9 @@ class LclMasksUpdateLclVisitor final : public GenTreeVisitor(compiler) + : GenTreeVisitor(compiler) , stmt(stmt) , weightsTable(weightsTable) { @@ -446,7 +446,7 @@ PhaseStatus Compiler::fgOptimizeLclMasks() GenTreeLclVarCommon* firstLcl = *stmt->LocalsTreeList().begin(); if (firstLcl != nullptr) { - LclMasksCheckLclVisitor ev(this, block->getBBWeight(this), &weightsTable); + LclMasksCheckVisitor ev(this, block->getBBWeight(this), &weightsTable); GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); foundConversion |= ev.foundConversions; @@ -469,7 +469,7 @@ PhaseStatus Compiler::fgOptimizeLclMasks() GenTreeLclVarCommon* firstLcl = *stmt->LocalsTreeList().begin(); if (firstLcl != nullptr) { - LclMasksUpdateLclVisitor ev(this, stmt, &weightsTable); + LclMasksUpdateVisitor ev(this, stmt, &weightsTable); GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); } From 5abaaa148c90dc19cc195d47756fc48ed65b094c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 8 Nov 2024 17:32:24 +0000 Subject: [PATCH 39/66] fix formatting --- src/coreclr/jit/lclmasks.cpp | 70 +++++++++++++++++------------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 65c2df8f9964f9..f7113bb774f36f 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -21,7 +21,7 @@ struct LclMasksWeight // The simd types of the Lcl Store after conversion to vector. CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - unsigned simdSize = 0; + unsigned simdSize = 0; void UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight); @@ -35,7 +35,6 @@ struct LclMasksWeight typedef JitHashTable, LclMasksWeight> LclMasksWeightTable; - //----------------------------------------------------------------------------- // UpdateWeight: Updates the weighting to take account of a local. // @@ -107,8 +106,8 @@ class LclMasksCheckVisitor final : public GenTreeVisitor { GenTreeHWIntrinsic* convertOp = nullptr; - bool isLocalStore = false; - bool isLocalUse = false; + bool isLocalStore = false; + bool isLocalUse = false; bool hasConversion = false; switch ((*use)->OperGet()) @@ -121,7 +120,7 @@ class LclMasksCheckVisitor final : public GenTreeVisitor if ((*use)->AsLclVar()->Data()->OperIsConvertMaskToVector()) { - convertOp = (*use)->AsLclVar()->Data()->AsHWIntrinsic(); + convertOp = (*use)->AsLclVar()->Data()->AsHWIntrinsic(); hasConversion = true; } break; @@ -134,7 +133,7 @@ class LclMasksCheckVisitor final : public GenTreeVisitor if (user->OperIsConvertVectorToMask()) { - convertOp = user->AsHWIntrinsic(); + convertOp = user->AsHWIntrinsic(); hasConversion = true; } break; @@ -172,11 +171,10 @@ class LclMasksCheckVisitor final : public GenTreeVisitor return fgWalkResult::WALK_CONTINUE; } - bool foundConversions = false; private: - weight_t bbWeight; + weight_t bbWeight; LclMasksWeightTable* weightsTable; }; @@ -192,8 +190,7 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor UseExecutionOrder = true }; - LclMasksUpdateVisitor( - Compiler* compiler, Statement* stmt, LclMasksWeightTable* weightsTable) + LclMasksUpdateVisitor(Compiler* compiler, Statement* stmt, LclMasksWeightTable* weightsTable) : GenTreeVisitor(compiler) , stmt(stmt) , weightsTable(weightsTable) @@ -202,42 +199,42 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { - GenTreeLclVarCommon* lclOp = nullptr; - bool isLocalStore = false; - bool isLocalUse = false; - bool addConversion = false; - bool removeConversion = false; + GenTreeLclVarCommon* lclOp = nullptr; + bool isLocalStore = false; + bool isLocalUse = false; + bool addConversion = false; + bool removeConversion = false; if ((*use)->OperIs(GT_STORE_LCL_VAR) && (*use)->AsLclVarCommon()->Data()->OperIsConvertMaskToVector()) { // Found // use:STORE_LCL_VAR(ConvertMaskToVector(x)) - lclOp = (*use)->AsLclVarCommon(); - isLocalStore = true; + lclOp = (*use)->AsLclVarCommon(); + isLocalStore = true; removeConversion = true; } else if ((*use)->OperIs(GT_STORE_LCL_VAR) && !(*use)->AsLclVarCommon()->Data()->OperIsConvertMaskToVector()) { // Found // use:STORE_LCL_VAR(x) - lclOp = (*use)->AsLclVarCommon(); - isLocalStore = true; + lclOp = (*use)->AsLclVarCommon(); + isLocalStore = true; addConversion = true; } else if ((*use)->OperIsConvertVectorToMask() && (*use)->AsHWIntrinsic()->Op(2)->OperIs(GT_LCL_VAR)) { // Found // user(use:ConvertVectorToMask(LCL_VAR(x))) - lclOp = (*use)->AsHWIntrinsic()->Op(2)->AsLclVarCommon(); - isLocalUse = true; + lclOp = (*use)->AsHWIntrinsic()->Op(2)->AsLclVarCommon(); + isLocalUse = true; removeConversion = true; } else if ((*use)->OperIs(GT_LCL_VAR) && (!user->OperIsConvertVectorToMask())) { // Found // user(use:LCL_VAR(x)) - lclOp = (*use)->AsLclVar(); - isLocalUse = true; + lclOp = (*use)->AsLclVar(); + isLocalUse = true; addConversion = true; } else @@ -258,8 +255,8 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor // Quit if the cost of changing is higher. if (weight.currentCost <= weight.switchCost) { - JITDUMP("Local %s V%02d at [%06u] will not be converted. ", isLocalStore ? "store" : "var", lclOp->GetLclNum(), - Compiler::dspTreeID(lclOp)); + JITDUMP("Local %s V%02d at [%06u] will not be converted. ", isLocalStore ? "store" : "var", + lclOp->GetLclNum(), Compiler::dspTreeID(lclOp)); weight.DumpTotalWeight(); return fgWalkResult::WALK_CONTINUE; } @@ -271,10 +268,9 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor // Fix up the type of the lcl and the lclvar. assert(lclOp->gtType != TYP_MASK); var_types lclOrigType = lclOp->gtType; - lclOp->gtType = TYP_MASK; - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclOp->GetLclNum()); - varDsc->lvType = TYP_MASK; - + lclOp->gtType = TYP_MASK; + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclOp->GetLclNum()); + varDsc->lvType = TYP_MASK; // Add or remove a conversion @@ -287,7 +283,7 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor GenTreeHWIntrinsic* convertOp = lclOp->Data()->AsHWIntrinsic(); - lclOp->gtOp1 = convertOp->Op(1); + lclOp->gtOp1 = convertOp->Op(1); convertOp->gtBashToNOP(); m_compiler->fgSequenceLocals(stmt); } @@ -343,11 +339,12 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor // There is not enough information in the lcl to get simd types. Instead reuse the cached simd // types from the removed convert nodes. assert(weight.simdBaseJitType != CORINFO_TYPE_UNDEF); - *use = m_compiler->gtNewSimdCvtMaskToVectorNode(lclOrigType, lclOp, weight.simdBaseJitType, weight.simdSize); + *use = + m_compiler->gtNewSimdCvtMaskToVectorNode(lclOrigType, lclOp, weight.simdBaseJitType, weight.simdSize); } - JITDUMP("Updated %s V%02d at [%06u] to mask (%s conversion)\n", isLocalStore ? "store" : "var", lclOp->GetLclNum(), - m_compiler->dspTreeID(lclOp), addConversion ? "added" : "removed"); + JITDUMP("Updated %s V%02d at [%06u] to mask (%s conversion)\n", isLocalStore ? "store" : "var", + lclOp->GetLclNum(), m_compiler->dspTreeID(lclOp), addConversion ? "added" : "removed"); #ifdef DEBUG if (m_compiler->verbose) @@ -362,11 +359,10 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor public: private: - Statement* stmt; + Statement* stmt; LclMasksWeightTable* weightsTable; }; - #endif // TARGET_ARM64 //------------------------------------------------------------------------ @@ -447,7 +443,7 @@ PhaseStatus Compiler::fgOptimizeLclMasks() if (firstLcl != nullptr) { LclMasksCheckVisitor ev(this, block->getBBWeight(this), &weightsTable); - GenTree* root = stmt->GetRootNode(); + GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); foundConversion |= ev.foundConversions; } @@ -470,7 +466,7 @@ PhaseStatus Compiler::fgOptimizeLclMasks() if (firstLcl != nullptr) { LclMasksUpdateVisitor ev(this, stmt, &weightsTable); - GenTree* root = stmt->GetRootNode(); + GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); } } From 960e9f170455cf245c48f77ee9020eca0b8e8207 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 11:37:28 +0000 Subject: [PATCH 40/66] Add checks for LCL_ADDR --- src/coreclr/jit/lclmasks.cpp | 38 +++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index f7113bb774f36f..3789aae1c17b92 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -13,6 +13,9 @@ struct LclMasksWeight // For the given variable, the cost of storing as mask. weight_t switchCost = 0.0; + // The weighting is invalid. + bool invalid = false; + // Conversion of mask to vector is one instruction. static constexpr const weight_t costOfConvertMaskToVector = 1.0; @@ -25,9 +28,16 @@ struct LclMasksWeight void UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight); + void InvalidateWeight() + { + JITDUMP("Invalidating weight. \n"); + invalid = true; + DumpTotalWeight(); + } + void DumpTotalWeight() { - JITDUMP("Weighting: {%.2fc %.2fs}\n", currentCost, switchCost); + JITDUMP("Weighting: %s{%.2fc %.2fs}\n", invalid ? "Invalid" : "", currentCost, switchCost); } void CacheSimdTypes(GenTreeHWIntrinsic* op); @@ -108,6 +118,7 @@ class LclMasksCheckVisitor final : public GenTreeVisitor bool isLocalStore = false; bool isLocalUse = false; + bool isInvalid = false; bool hasConversion = false; switch ((*use)->OperGet()) @@ -138,11 +149,15 @@ class LclMasksCheckVisitor final : public GenTreeVisitor } break; + case GT_LCL_ADDR: + isInvalid = true; + break; + default: break; } - if (isLocalStore || isLocalUse) + if (isLocalStore || isLocalUse || isInvalid) { GenTreeLclVarCommon* lclOp = (*use)->AsLclVarCommon(); @@ -151,9 +166,18 @@ class LclMasksCheckVisitor final : public GenTreeVisitor weightsTable->Lookup(lclOp->GetLclNum(), &weight); // Update the weights. - JITDUMP("Local %s V%02d at [%06u] has %s conversion. ", isLocalStore ? "store" : "var", lclOp->GetLclNum(), - m_compiler->dspTreeID(lclOp), hasConversion ? "mask" : "no"); - weight.UpdateWeight(isLocalStore, hasConversion, bbWeight); + JITDUMP("Local %s V%02d at [%06u] ", isLocalStore ? "store" : "var", lclOp->GetLclNum(), + m_compiler->dspTreeID(lclOp)); + if (isInvalid) + { + JITDUMP("cannot be converted. "); + weight.InvalidateWeight(); + } + else + { + JITDUMP("has %s conversion. ", hasConversion ? "mask" : "no"); + weight.UpdateWeight(isLocalStore, hasConversion, bbWeight); + } // Cache the simd type data of the conversion. if (hasConversion) @@ -252,8 +276,8 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor bool found = weightsTable->Lookup(lclOp->GetLclNum(), &weight); assert(found); - // Quit if the cost of changing is higher. - if (weight.currentCost <= weight.switchCost) + // Quit if the cost of changing is higher or is invalid. + if (weight.currentCost <= weight.switchCost || weight.invalid) { JITDUMP("Local %s V%02d at [%06u] will not be converted. ", isLocalStore ? "store" : "var", lclOp->GetLclNum(), Compiler::dspTreeID(lclOp)); From 206c6634c0b464bed5db4134f324529f78ca89d7 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 12:39:25 +0000 Subject: [PATCH 41/66] Add config option --- src/coreclr/jit/jitconfigvalues.h | 3 ++- src/coreclr/jit/lclmasks.cpp | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 23158d49342dec..e7a2ba20abfe4a 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -571,7 +571,8 @@ OPT_CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value OPT_CONFIG_STRING(JitOptRepeatRange, W("JitOptRepeatRange")) // Enable JitOptRepeat based on method hash range -OPT_CONFIG_INTEGER(JitDoIfConversion, W("JitDoIfConversion"), 1) // Perform If conversion +OPT_CONFIG_INTEGER(JitDoIfConversion, W("JitDoIfConversion"), 1) // Perform If conversion +OPT_CONFIG_INTEGER(JitDoOptimizeLclMasks, W("JitDoOptimizeLclMasks"), 1) // Perform optimization of local masks RELEASE_CONFIG_INTEGER(JitEnableOptRepeat, W("JitEnableOptRepeat"), 1) // If zero, do not allow JitOptRepeat RELEASE_CONFIG_METHODSET(JitOptRepeat, W("JitOptRepeat")) // Runs optimizer multiple times on specified methods diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 3789aae1c17b92..59ebe4318c1f51 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -448,9 +448,17 @@ PhaseStatus Compiler::fgOptimizeLclMasks() return PhaseStatus::MODIFIED_NOTHING; } +#if defined(DEBUG) + if (JitConfig.JitDoOptimizeLclMasks() == 0) + { + JITDUMP("Skipping. Disable by config option\n"); + return PhaseStatus::MODIFIED_NOTHING; + } +#endif + if (!compMaskConvertUsed) { - JITDUMP("Skipping. There are no converts of locals \n"); + JITDUMP("Skipping. There are no converts of locals\n"); return PhaseStatus::MODIFIED_NOTHING; } From c54561817f6d89742049d13e6116bc9cca049300 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 13:55:35 +0000 Subject: [PATCH 42/66] Single Fact for all the tests --- .../JIT/opt/LocalMasks/ChangeMatchUse.cs | 127 ++++-------------- 1 file changed, 26 insertions(+), 101 deletions(-) diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs index e4ab4879328def..e36a162f791a7c 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -12,7 +12,7 @@ using System.Threading; using Xunit; -public class AcrossAndCselToAcross +public class ChangeMatchUse { [MethodImpl(MethodImplOptions.NoInlining)] private static void Consume(T value) { } @@ -24,22 +24,37 @@ private static void Consume(T value, T2 value2) { } // Create a mask. Use it as a mask. // Conversion of mask1 will be removed. [Fact] - public static void UseMaskAsMask() + public static void ChangeMatchUseTests() { for (int i = 0; i < 4; i++) { for (int j = 0; j < 200; j++) { - InnerUseMaskAsMask(); + UseMaskAsMask(); + UseMaskAsVector(); + UseMaskAsMaskAndVector(); + UseMaskAsMaskAndVectorInsideLoop(); + UseMaskAsVectorAndMaskInsideLoop(); + CastMaskUseAsVector(); + CastMaskUseAsMask(); } Thread.Sleep(100); } - InnerUseMaskAsMask(); + UseMaskAsMask(); + UseMaskAsVector(); + UseMaskAsMaskAndVector(); + UseMaskAsMaskAndVectorInsideLoop(); + UseMaskAsVectorAndMaskInsideLoop(); + CastMaskUseAsVector(); + CastMaskUseAsMask(); } + + // Create a mask. Use it as a mask. + // Conversion of mask1 will be removed. [method: MethodImpl(MethodImplOptions.NoInlining)] - private static void InnerUseMaskAsMask() + private static void UseMaskAsMask() { if (Sve.IsSupported) { @@ -52,23 +67,8 @@ private static void InnerUseMaskAsMask() // Create a mask. Use it as a vector. // No conversions will be changed: Mask->Vector is optimal. - [Fact] - public static void UseMaskAsVector() - { - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 200; j++) - { - InnerUseMaskAsVector(); - } - - Thread.Sleep(100); - } - InnerUseMaskAsVector(); - } - [method: MethodImpl(MethodImplOptions.NoInlining)] - private static void InnerUseMaskAsVector() + private static void UseMaskAsVector() { if (Sve.IsSupported) { @@ -81,23 +81,8 @@ private static void InnerUseMaskAsVector() // Create a mask. Use it as a mask, then use as a vector. // Mask1 conversions will be switched. - [Fact] - public static void UseMaskAsMaskAndVector() - { - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 200; j++) - { - InnerUseMaskAsMaskAndVector(); - } - - Thread.Sleep(100); - } - InnerUseMaskAsMaskAndVector(); - } - [method: MethodImpl(MethodImplOptions.NoInlining)] - private static void InnerUseMaskAsMaskAndVector() + private static void UseMaskAsMaskAndVector() { if (Sve.IsSupported) { @@ -112,23 +97,8 @@ private static void InnerUseMaskAsMaskAndVector() // Create a mask. Use it as a mask, then use as a vector inside a loop. // No conversions will be changed: vector use inside the loop dominates. - [Fact] - public static void UseMaskAsMaskAndVectorInsideLoop() - { - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 200; j++) - { - InnerUseMaskAsMaskAndVectorInsideLoop(); - } - - Thread.Sleep(100); - } - InnerUseMaskAsMaskAndVectorInsideLoop(); - } - [method: MethodImpl(MethodImplOptions.NoInlining)] - private static void InnerUseMaskAsMaskAndVectorInsideLoop() + private static void UseMaskAsMaskAndVectorInsideLoop() { if (Sve.IsSupported) { @@ -147,23 +117,8 @@ private static void InnerUseMaskAsMaskAndVectorInsideLoop() // Create a mask. Use it as a vector, then use as a mask inside a loop. // Will be converted: mask use inside the loop dominates. - [Fact] - public static void UseMaskAsVectorAndMaskInsideLoop() - { - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 200; j++) - { - InnerUseMaskAsVectorAndMaskInsideLoop(); - } - - Thread.Sleep(100); - } - InnerUseMaskAsVectorAndMaskInsideLoop(); - } - [method: MethodImpl(MethodImplOptions.NoInlining)] - private static void InnerUseMaskAsVectorAndMaskInsideLoop() + private static void UseMaskAsVectorAndMaskInsideLoop() { if (Sve.IsSupported) { @@ -179,23 +134,8 @@ private static void InnerUseMaskAsVectorAndMaskInsideLoop() } } - [Fact] - public static void CastMaskUseAsVector() - { - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 200; j++) - { - InnerCastMaskUseAsVector(); - } - - Thread.Sleep(100); - } - InnerCastMaskUseAsVector(); - } - [method: MethodImpl(MethodImplOptions.NoInlining)] - private static void InnerCastMaskUseAsVector() + private static void CastMaskUseAsVector() { if (Sve.IsSupported) { @@ -208,23 +148,8 @@ private static void InnerCastMaskUseAsVector() } } - [Fact] - public static void CastMaskUseAsMask() - { - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 200; j++) - { - InnerCastMaskUseAsMask(); - } - - Thread.Sleep(100); - } - InnerCastMaskUseAsMask(); - } - [method: MethodImpl(MethodImplOptions.NoInlining)] - private static void InnerCastMaskUseAsMask() + private static void CastMaskUseAsMask() { if (Sve.IsSupported) { From b247ed1d85e56dcfe8762a65db4a8712a49e18ea Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 14:10:00 +0000 Subject: [PATCH 43/66] Only check statements where there is a local of type TYP_SIMD16/TYP_MASK --- src/coreclr/jit/lclmasks.cpp | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 59ebe4318c1f51..23ee3d9225910c 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -471,13 +471,18 @@ PhaseStatus Compiler::fgOptimizeLclMasks() { for (Statement* const stmt : block->Statements()) { - GenTreeLclVarCommon* firstLcl = *stmt->LocalsTreeList().begin(); - if (firstLcl != nullptr) + // Only check statements where there is a local of type TYP_SIMD16/TYP_MASK. + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - LclMasksCheckVisitor ev(this, block->getBBWeight(this), &weightsTable); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); - foundConversion |= ev.foundConversions; + if (lcl->gtType == TYP_SIMD16 || lcl->gtType == TYP_MASK) + { + // Parse the entire statement. + LclMasksCheckVisitor ev(this, block->getBBWeight(this), &weightsTable); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + foundConversion |= ev.foundConversions; + break; + } } } } @@ -494,12 +499,17 @@ PhaseStatus Compiler::fgOptimizeLclMasks() { for (Statement* const stmt : block->Statements()) { - GenTreeLclVarCommon* firstLcl = *stmt->LocalsTreeList().begin(); - if (firstLcl != nullptr) + // Only check statements where there is a local of type TYP_SIMD16/TYP_MASK. + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - LclMasksUpdateVisitor ev(this, stmt, &weightsTable); - GenTree* root = stmt->GetRootNode(); - ev.WalkTree(&root, nullptr); + if (lcl->gtType == TYP_SIMD16 || lcl->gtType == TYP_MASK) + { + // Parse the entire statement. + LclMasksUpdateVisitor ev(this, stmt, &weightsTable); + GenTree* root = stmt->GetRootNode(); + ev.WalkTree(&root, nullptr); + break; + } } } } From 0f453003da56e7c1d8de9fad1979320fce68d0d1 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 14:17:51 +0000 Subject: [PATCH 44/66] Call fgSequenceLocals() once per statement --- src/coreclr/jit/lclmasks.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 23ee3d9225910c..4e14143caabf47 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -309,7 +309,6 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor lclOp->gtOp1 = convertOp->Op(1); convertOp->gtBashToNOP(); - m_compiler->fgSequenceLocals(stmt); } else if (isLocalStore && addConversion) @@ -350,7 +349,6 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor // Remove the convert convertOp convertOp->gtBashToNOP(); *use = lclOp; - m_compiler->fgSequenceLocals(stmt); } else if (isLocalUse && addConversion) @@ -377,10 +375,12 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor } #endif + updatedConversions = true; return fgWalkResult::WALK_CONTINUE; } public: + bool updatedConversions = false; private: Statement* stmt; @@ -508,6 +508,10 @@ PhaseStatus Compiler::fgOptimizeLclMasks() LclMasksUpdateVisitor ev(this, stmt, &weightsTable); GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); + if (ev.updatedConversions) + { + fgSequenceLocals(stmt); + } break; } } From 0666cc28c4f6128f6a6bac5d7e17000c97a226a2 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 14:19:52 +0000 Subject: [PATCH 45/66] Use JitSmallPrimitiveKeyFuncs --- src/coreclr/jit/lclmasks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 4e14143caabf47..61af166865797a 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -43,7 +43,7 @@ struct LclMasksWeight void CacheSimdTypes(GenTreeHWIntrinsic* op); }; -typedef JitHashTable, LclMasksWeight> LclMasksWeightTable; +typedef JitHashTable, LclMasksWeight> LclMasksWeightTable; //----------------------------------------------------------------------------- // UpdateWeight: Updates the weighting to take account of a local. From 43cc1853319fea8e41e23c9c57dd7d1b6b4038b0 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 14:30:51 +0000 Subject: [PATCH 46/66] allow for nullptr user --- src/coreclr/jit/lclmasks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 61af166865797a..dc77e62aeac407 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -253,7 +253,7 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor isLocalUse = true; removeConversion = true; } - else if ((*use)->OperIs(GT_LCL_VAR) && (!user->OperIsConvertVectorToMask())) + else if ((*use)->OperIs(GT_LCL_VAR) && ((user == nullptr) || !user->OperIsConvertVectorToMask())) { // Found // user(use:LCL_VAR(x)) From cf3ceed642a542f5c0bb009f1e849736333e2007 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 14:38:34 +0000 Subject: [PATCH 47/66] Remove uses of gtBashToNOP() --- src/coreclr/jit/lclmasks.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index dc77e62aeac407..d8191866ca87ab 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -305,10 +305,7 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor // to // use:STORE_LCL_VAR(x) - GenTreeHWIntrinsic* convertOp = lclOp->Data()->AsHWIntrinsic(); - - lclOp->gtOp1 = convertOp->Op(1); - convertOp->gtBashToNOP(); + lclOp->Data() = lclOp->Data()->AsHWIntrinsic()->Op(1); } else if (isLocalStore && addConversion) @@ -347,7 +344,6 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); // Remove the convert convertOp - convertOp->gtBashToNOP(); *use = lclOp; } From 0a3a31a8ba5fcc504bb6c153cffe1fea31706612 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 14:41:11 +0000 Subject: [PATCH 48/66] Use DISPTREE --- src/coreclr/jit/lclmasks.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index d8191866ca87ab..6668edc7b10bb5 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -363,13 +363,7 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor JITDUMP("Updated %s V%02d at [%06u] to mask (%s conversion)\n", isLocalStore ? "store" : "var", lclOp->GetLclNum(), m_compiler->dspTreeID(lclOp), addConversion ? "added" : "removed"); - -#ifdef DEBUG - if (m_compiler->verbose) - { - m_compiler->gtDispTree(*use); - } -#endif + DISPTREE(*use); updatedConversions = true; return fgWalkResult::WALK_CONTINUE; From 6d1451946b630770f708082648f86a7ad87ac7b6 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 14:46:34 +0000 Subject: [PATCH 49/66] update asserts --- src/coreclr/jit/lclmasks.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 6668edc7b10bb5..c8734c3202cfd0 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -267,8 +267,8 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor return fgWalkResult::WALK_CONTINUE; } - assert(isLocalStore || isLocalUse); - assert(addConversion || removeConversion); + assert(isLocalStore != isLocalUse); + assert(addConversion != removeConversion); assert(lclOp != nullptr); // Get the existing weighting. From e243baa890c8fcb4a4ad6319bbae7c04e90fa393 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 14:48:57 +0000 Subject: [PATCH 50/66] Remove searching of convertOp --- src/coreclr/jit/lclmasks.cpp | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index c8734c3202cfd0..13f289481de658 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -318,9 +318,8 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor // There is not enough information in the lcl to get simd types. Instead reuse the cached // simd types from the removed convert nodes. assert(weight.simdBaseJitType != CORINFO_TYPE_UNDEF); - GenTree* convertOp = m_compiler->gtNewSimdCvtVectorToMaskNode(TYP_MASK, lclOp->Data(), - weight.simdBaseJitType, weight.simdSize); - lclOp->Data() = convertOp; + lclOp->Data() = m_compiler->gtNewSimdCvtVectorToMaskNode(TYP_MASK, lclOp->Data(), weight.simdBaseJitType, + weight.simdSize); } else if (isLocalUse && removeConversion) @@ -330,20 +329,6 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor // to // user(use:LCL_VAR(x)) - GenTree* const convertOp = *use; - - // Find the location of convertOp in the user - int opNum = 1; - for (; opNum <= user->AsHWIntrinsic()->GetOperandCount(); opNum++) - { - if (user->AsHWIntrinsic()->Op(opNum) == convertOp) - { - break; - } - } - assert(opNum <= user->AsHWIntrinsic()->GetOperandCount()); - - // Remove the convert convertOp *use = lclOp; } From 1c64781adbbba2fd45f620b97546cc0644c328f5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 14:53:43 +0000 Subject: [PATCH 51/66] remove "method" in tests --- src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs index e36a162f791a7c..7a93a3012f7b2e 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -53,7 +53,7 @@ public static void ChangeMatchUseTests() // Create a mask. Use it as a mask. // Conversion of mask1 will be removed. - [method: MethodImpl(MethodImplOptions.NoInlining)] + [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsMask() { if (Sve.IsSupported) @@ -67,7 +67,7 @@ private static void UseMaskAsMask() // Create a mask. Use it as a vector. // No conversions will be changed: Mask->Vector is optimal. - [method: MethodImpl(MethodImplOptions.NoInlining)] + [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsVector() { if (Sve.IsSupported) @@ -81,7 +81,7 @@ private static void UseMaskAsVector() // Create a mask. Use it as a mask, then use as a vector. // Mask1 conversions will be switched. - [method: MethodImpl(MethodImplOptions.NoInlining)] + [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsMaskAndVector() { if (Sve.IsSupported) @@ -97,7 +97,7 @@ private static void UseMaskAsMaskAndVector() // Create a mask. Use it as a mask, then use as a vector inside a loop. // No conversions will be changed: vector use inside the loop dominates. - [method: MethodImpl(MethodImplOptions.NoInlining)] + [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsMaskAndVectorInsideLoop() { if (Sve.IsSupported) @@ -117,7 +117,7 @@ private static void UseMaskAsMaskAndVectorInsideLoop() // Create a mask. Use it as a vector, then use as a mask inside a loop. // Will be converted: mask use inside the loop dominates. - [method: MethodImpl(MethodImplOptions.NoInlining)] + [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsVectorAndMaskInsideLoop() { if (Sve.IsSupported) @@ -134,7 +134,7 @@ private static void UseMaskAsVectorAndMaskInsideLoop() } } - [method: MethodImpl(MethodImplOptions.NoInlining)] + [MethodImpl(MethodImplOptions.NoInlining)] private static void CastMaskUseAsVector() { if (Sve.IsSupported) @@ -148,7 +148,7 @@ private static void CastMaskUseAsVector() } } - [method: MethodImpl(MethodImplOptions.NoInlining)] + [MethodImpl(MethodImplOptions.NoInlining)] private static void CastMaskUseAsMask() { if (Sve.IsSupported) From 8563352622a002a81f9a3cb323a025d6ecf3e7c3 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 16:02:47 +0000 Subject: [PATCH 52/66] Use LookupPointerOrAdd() --- src/coreclr/jit/lclmasks.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 13f289481de658..4235e42a883d3f 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -162,8 +162,8 @@ class LclMasksCheckVisitor final : public GenTreeVisitor GenTreeLclVarCommon* lclOp = (*use)->AsLclVarCommon(); // Get the existing weighting (if any). - LclMasksWeight weight; - weightsTable->Lookup(lclOp->GetLclNum(), &weight); + LclMasksWeight defaultWeight; + LclMasksWeight* weight = weightsTable->LookupPointerOrAdd(lclOp->GetLclNum(), defaultWeight); // Update the weights. JITDUMP("Local %s V%02d at [%06u] ", isLocalStore ? "store" : "var", lclOp->GetLclNum(), @@ -171,23 +171,23 @@ class LclMasksCheckVisitor final : public GenTreeVisitor if (isInvalid) { JITDUMP("cannot be converted. "); - weight.InvalidateWeight(); + weight->InvalidateWeight(); } else { JITDUMP("has %s conversion. ", hasConversion ? "mask" : "no"); - weight.UpdateWeight(isLocalStore, hasConversion, bbWeight); + weight->UpdateWeight(isLocalStore, hasConversion, bbWeight); } // Cache the simd type data of the conversion. if (hasConversion) { assert(convertOp != nullptr); - weight.CacheSimdTypes(convertOp); + weight->CacheSimdTypes(convertOp); } // Update the table. - weightsTable->Set(lclOp->GetLclNum(), weight, LclMasksWeightTable::Overwrite); + weightsTable->Set(lclOp->GetLclNum(), *weight, LclMasksWeightTable::Overwrite); foundConversions |= hasConversion; } From 3c90ec8d3e5ad56185fb13903413beb7a394f3b5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 17:43:52 +0000 Subject: [PATCH 53/66] Remove Set() to table --- src/coreclr/jit/lclmasks.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 4235e42a883d3f..95e2c132927dfa 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -186,9 +186,6 @@ class LclMasksCheckVisitor final : public GenTreeVisitor weight->CacheSimdTypes(convertOp); } - // Update the table. - weightsTable->Set(lclOp->GetLclNum(), *weight, LclMasksWeightTable::Overwrite); - foundConversions |= hasConversion; } From 82d959da892268559e8af7d3fce22ebc8b89f4c1 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 11 Nov 2024 17:46:31 +0000 Subject: [PATCH 54/66] fix formatting --- src/coreclr/jit/lclmasks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/lclmasks.cpp index 95e2c132927dfa..14b8cff0528518 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/lclmasks.cpp @@ -162,7 +162,7 @@ class LclMasksCheckVisitor final : public GenTreeVisitor GenTreeLclVarCommon* lclOp = (*use)->AsLclVarCommon(); // Get the existing weighting (if any). - LclMasksWeight defaultWeight; + LclMasksWeight defaultWeight; LclMasksWeight* weight = weightsTable->LookupPointerOrAdd(lclOp->GetLclNum(), defaultWeight); // Update the weights. From 41c3d37212297a1e7a4099b2f97bd667e30f2297 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 12 Nov 2024 12:14:19 +0000 Subject: [PATCH 55/66] Rename all functions --- src/coreclr/jit/CMakeLists.txt | 2 +- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/compphases.h | 2 +- src/coreclr/jit/jitconfigvalues.h | 5 +- ...lmasks.cpp => optimizemaskconversions.cpp} | 56 +++++++++---------- 6 files changed, 35 insertions(+), 34 deletions(-) rename src/coreclr/jit/{lclmasks.cpp => optimizemaskconversions.cpp} (88%) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 33eeda7742fff3..f5d1a20a730948 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -142,7 +142,6 @@ set( JIT_SOURCES jithashtable.cpp jitmetadata.cpp layout.cpp - lclmasks.cpp lclmorph.cpp lclvars.cpp likelyclass.cpp @@ -157,6 +156,7 @@ set( JIT_SOURCES objectalloc.cpp optcse.cpp optimizebools.cpp + optimizemaskconversions.cpp optimizer.cpp patchpoint.cpp phase.cpp diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 32d1d960bcc3c2..3f166355455a45 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4799,7 +4799,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Optimize away conversions to/from masks in local variables. // - DoPhase(this, PHASE_LCL_MASKS, &Compiler::fgOptimizeLclMasks); + DoPhase(this, PHASE_OPTIMIZE_MASK_CONVERSIONS, &Compiler::fgOptimizeMaskConversions); // Do an early pass of liveness for forward sub and morph. This data is // valid until after morph. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b642ba985615cc..16b3197f122108 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6918,7 +6918,7 @@ class Compiler bool fgExposeUnpropagatedLocals(bool propagatedAny, class LocalEqualsLocalAddrAssertions* assertions); void fgExposeLocalsInBitVec(BitVec_ValArg_T bitVec); - PhaseStatus fgOptimizeLclMasks(); + PhaseStatus fgOptimizeMaskConversions(); PhaseStatus PhysicalPromotion(); diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 2f8396ddc0f1c2..92369555c86ce9 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -45,7 +45,7 @@ CompPhaseNameMacro(PHASE_EARLY_UPDATE_FLOW_GRAPH, "Update flow graph early pa CompPhaseNameMacro(PHASE_DFS_BLOCKS, "DFS blocks and remove dead code",false, -1, false) CompPhaseNameMacro(PHASE_DFS_BLOCKS2, "DFS blocks and remove dead code 2",false, -1, false) CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", false, -1, false) -CompPhaseNameMacro(PHASE_LCL_MASKS, "Local masks", false, -1, false) +CompPhaseNameMacro(PHASE_OPTIMIZE_MASK_CONVERSIONS, "Optimize mask conversions", false, -1, false) CompPhaseNameMacro(PHASE_EARLY_LIVENESS, "Early liveness", false, -1, false) CompPhaseNameMacro(PHASE_PHYSICAL_PROMOTION, "Physical promotion", false, -1, false) CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Substitution", false, -1, false) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index e7a2ba20abfe4a..68d960874df321 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -571,8 +571,9 @@ OPT_CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value OPT_CONFIG_STRING(JitOptRepeatRange, W("JitOptRepeatRange")) // Enable JitOptRepeat based on method hash range -OPT_CONFIG_INTEGER(JitDoIfConversion, W("JitDoIfConversion"), 1) // Perform If conversion -OPT_CONFIG_INTEGER(JitDoOptimizeLclMasks, W("JitDoOptimizeLclMasks"), 1) // Perform optimization of local masks +OPT_CONFIG_INTEGER(JitDoIfConversion, W("JitDoIfConversion"), 1) // Perform If conversion +OPT_CONFIG_INTEGER(JitDoOptimizeMaskConversions, W("JitDoOptimizeMaskConversions"), 1) // Perform optimization of mask + // conversions RELEASE_CONFIG_INTEGER(JitEnableOptRepeat, W("JitEnableOptRepeat"), 1) // If zero, do not allow JitOptRepeat RELEASE_CONFIG_METHODSET(JitOptRepeat, W("JitOptRepeat")) // Runs optimizer multiple times on specified methods diff --git a/src/coreclr/jit/lclmasks.cpp b/src/coreclr/jit/optimizemaskconversions.cpp similarity index 88% rename from src/coreclr/jit/lclmasks.cpp rename to src/coreclr/jit/optimizemaskconversions.cpp index 14b8cff0528518..9ca336d6aa11ff 100644 --- a/src/coreclr/jit/lclmasks.cpp +++ b/src/coreclr/jit/optimizemaskconversions.cpp @@ -5,7 +5,7 @@ #if defined(TARGET_ARM64) -struct LclMasksWeight +struct MaskConversionsWeight { // For the given variable, the cost of storing as vector. weight_t currentCost = 0.0; @@ -43,7 +43,7 @@ struct LclMasksWeight void CacheSimdTypes(GenTreeHWIntrinsic* op); }; -typedef JitHashTable, LclMasksWeight> LclMasksWeightTable; +typedef JitHashTable, MaskConversionsWeight> MaskConversionsWeightTable; //----------------------------------------------------------------------------- // UpdateWeight: Updates the weighting to take account of a local. @@ -53,7 +53,7 @@ typedef JitHashTable, LclMasksWeig // hasConvert - Is this local converted // blockWeight - Weight of the block the store is contained in // -void LclMasksWeight::UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight) +void MaskConversionsWeight::UpdateWeight(bool isStore, bool hasConvert, weight_t blockWeight) { if (hasConvert) { @@ -82,7 +82,7 @@ void LclMasksWeight::UpdateWeight(bool isStore, bool hasConvert, weight_t blockW // Arguments: // op - The HW intrinsic to cache // -void LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) +void MaskConversionsWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) { CorInfoType newSimdBaseJitType = op->GetSimdBaseJitType(); unsigned newSimdSize = op->GetSimdSize(); @@ -94,9 +94,9 @@ void LclMasksWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) } //----------------------------------------------------------------------------- -// LclMasksCheckVisitor: Find all lcl var definitions and uses. For each one, update the weighting. +// MaskConversionsCheckVisitor: Find all lcl var definitions and uses. For each one, update the weighting. // -class LclMasksCheckVisitor final : public GenTreeVisitor +class MaskConversionsCheckVisitor final : public GenTreeVisitor { public: enum @@ -105,8 +105,8 @@ class LclMasksCheckVisitor final : public GenTreeVisitor UseExecutionOrder = true }; - LclMasksCheckVisitor(Compiler* compiler, weight_t bbWeight, LclMasksWeightTable* weightsTable) - : GenTreeVisitor(compiler) + MaskConversionsCheckVisitor(Compiler* compiler, weight_t bbWeight, MaskConversionsWeightTable* weightsTable) + : GenTreeVisitor(compiler) , bbWeight(bbWeight) , weightsTable(weightsTable) { @@ -162,8 +162,8 @@ class LclMasksCheckVisitor final : public GenTreeVisitor GenTreeLclVarCommon* lclOp = (*use)->AsLclVarCommon(); // Get the existing weighting (if any). - LclMasksWeight defaultWeight; - LclMasksWeight* weight = weightsTable->LookupPointerOrAdd(lclOp->GetLclNum(), defaultWeight); + MaskConversionsWeight defaultWeight; + MaskConversionsWeight* weight = weightsTable->LookupPointerOrAdd(lclOp->GetLclNum(), defaultWeight); // Update the weights. JITDUMP("Local %s V%02d at [%06u] ", isLocalStore ? "store" : "var", lclOp->GetLclNum(), @@ -195,14 +195,14 @@ class LclMasksCheckVisitor final : public GenTreeVisitor bool foundConversions = false; private: - weight_t bbWeight; - LclMasksWeightTable* weightsTable; + weight_t bbWeight; + MaskConversionsWeightTable* weightsTable; }; //----------------------------------------------------------------------------- -// LclMasksUpdateVisitor: tree visitor to remove conversion to masks for uses of LCL +// MaskConversionsUpdateVisitor: tree visitor to remove conversion to masks for uses of LCL // -class LclMasksUpdateVisitor final : public GenTreeVisitor +class MaskConversionsUpdateVisitor final : public GenTreeVisitor { public: enum @@ -211,8 +211,8 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor UseExecutionOrder = true }; - LclMasksUpdateVisitor(Compiler* compiler, Statement* stmt, LclMasksWeightTable* weightsTable) - : GenTreeVisitor(compiler) + MaskConversionsUpdateVisitor(Compiler* compiler, Statement* stmt, MaskConversionsWeightTable* weightsTable) + : GenTreeVisitor(compiler) , stmt(stmt) , weightsTable(weightsTable) { @@ -269,8 +269,8 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor assert(lclOp != nullptr); // Get the existing weighting. - LclMasksWeight weight; - bool found = weightsTable->Lookup(lclOp->GetLclNum(), &weight); + MaskConversionsWeight weight; + bool found = weightsTable->Lookup(lclOp->GetLclNum(), &weight); assert(found); // Quit if the cost of changing is higher or is invalid. @@ -355,14 +355,14 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor bool updatedConversions = false; private: - Statement* stmt; - LclMasksWeightTable* weightsTable; + Statement* stmt; + MaskConversionsWeightTable* weightsTable; }; #endif // TARGET_ARM64 //------------------------------------------------------------------------ -// optLclMasks: Allow locals to be of Mask type +// fgOptimizeMaskConversions: Allow locals to be of Mask type // // At the C# level, Masks share the same type as a Vector. It's possible for the same // variable to be used as a mask or vector. Any APIs that return a mask must first convert @@ -410,7 +410,7 @@ class LclMasksUpdateVisitor final : public GenTreeVisitor // Returns: // Suitable phase status // -PhaseStatus Compiler::fgOptimizeLclMasks() +PhaseStatus Compiler::fgOptimizeMaskConversions() { #if defined(TARGET_ARM64) @@ -421,7 +421,7 @@ PhaseStatus Compiler::fgOptimizeLclMasks() } #if defined(DEBUG) - if (JitConfig.JitDoOptimizeLclMasks() == 0) + if (JitConfig.JitDoOptimizeMaskConversions() == 0) { JITDUMP("Skipping. Disable by config option\n"); return PhaseStatus::MODIFIED_NOTHING; @@ -434,7 +434,7 @@ PhaseStatus Compiler::fgOptimizeLclMasks() return PhaseStatus::MODIFIED_NOTHING; } - LclMasksWeightTable weightsTable = LclMasksWeightTable(getAllocator()); + MaskConversionsWeightTable weightsTable = MaskConversionsWeightTable(getAllocator()); // Find every local and add them to weightsTable. bool foundConversion = false; @@ -449,8 +449,8 @@ PhaseStatus Compiler::fgOptimizeLclMasks() if (lcl->gtType == TYP_SIMD16 || lcl->gtType == TYP_MASK) { // Parse the entire statement. - LclMasksCheckVisitor ev(this, block->getBBWeight(this), &weightsTable); - GenTree* root = stmt->GetRootNode(); + MaskConversionsCheckVisitor ev(this, block->getBBWeight(this), &weightsTable); + GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); foundConversion |= ev.foundConversions; break; @@ -477,8 +477,8 @@ PhaseStatus Compiler::fgOptimizeLclMasks() if (lcl->gtType == TYP_SIMD16 || lcl->gtType == TYP_MASK) { // Parse the entire statement. - LclMasksUpdateVisitor ev(this, stmt, &weightsTable); - GenTree* root = stmt->GetRootNode(); + MaskConversionsUpdateVisitor ev(this, stmt, &weightsTable); + GenTree* root = stmt->GetRootNode(); ev.WalkTree(&root, nullptr); if (ev.updatedConversions) { From ca9bf30ceaacf17174ac41bc9a58d4cb92cd26c5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 12 Nov 2024 12:57:12 +0000 Subject: [PATCH 56/66] Use TypeIs() --- src/coreclr/jit/optimizemaskconversions.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/optimizemaskconversions.cpp b/src/coreclr/jit/optimizemaskconversions.cpp index 9ca336d6aa11ff..d3ac94938df5f7 100644 --- a/src/coreclr/jit/optimizemaskconversions.cpp +++ b/src/coreclr/jit/optimizemaskconversions.cpp @@ -446,7 +446,7 @@ PhaseStatus Compiler::fgOptimizeMaskConversions() // Only check statements where there is a local of type TYP_SIMD16/TYP_MASK. for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - if (lcl->gtType == TYP_SIMD16 || lcl->gtType == TYP_MASK) + if (lcl->TypeIs(TYP_SIMD16, TYP_MASK)) { // Parse the entire statement. MaskConversionsCheckVisitor ev(this, block->getBBWeight(this), &weightsTable); @@ -474,7 +474,7 @@ PhaseStatus Compiler::fgOptimizeMaskConversions() // Only check statements where there is a local of type TYP_SIMD16/TYP_MASK. for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) { - if (lcl->gtType == TYP_SIMD16 || lcl->gtType == TYP_MASK) + if (lcl->TypeIs(TYP_SIMD16, TYP_MASK)) { // Parse the entire statement. MaskConversionsUpdateVisitor ev(this, stmt, &weightsTable); From 518dabf1b2e64e12a52da4b69f6dbcd133bc26ba Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 12 Nov 2024 13:30:50 +0000 Subject: [PATCH 57/66] invalidate if cached simdtype differs --- src/coreclr/jit/optimizemaskconversions.cpp | 24 ++++++++++++++----- .../JIT/opt/LocalMasks/ChangeMatchUse.cs | 4 ++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/optimizemaskconversions.cpp b/src/coreclr/jit/optimizemaskconversions.cpp index d3ac94938df5f7..95ba10d8d6ea0e 100644 --- a/src/coreclr/jit/optimizemaskconversions.cpp +++ b/src/coreclr/jit/optimizemaskconversions.cpp @@ -30,7 +30,7 @@ struct MaskConversionsWeight void InvalidateWeight() { - JITDUMP("Invalidating weight. \n"); + JITDUMP("Invalidating weight. "); invalid = true; DumpTotalWeight(); } @@ -40,7 +40,7 @@ struct MaskConversionsWeight JITDUMP("Weighting: %s{%.2fc %.2fs}\n", invalid ? "Invalid" : "", currentCost, switchCost); } - void CacheSimdTypes(GenTreeHWIntrinsic* op); + void CacheSimdTypes(GenTreeHWIntrinsic* op, unsigned lclnum); }; typedef JitHashTable, MaskConversionsWeight> MaskConversionsWeightTable; @@ -81,16 +81,28 @@ void MaskConversionsWeight::UpdateWeight(bool isStore, bool hasConvert, weight_t // // Arguments: // op - The HW intrinsic to cache +// lclnum - The local using the op // -void MaskConversionsWeight::CacheSimdTypes(GenTreeHWIntrinsic* op) +void MaskConversionsWeight::CacheSimdTypes(GenTreeHWIntrinsic* op, unsigned lclnum) { CorInfoType newSimdBaseJitType = op->GetSimdBaseJitType(); unsigned newSimdSize = op->GetSimdSize(); assert((newSimdBaseJitType != CORINFO_TYPE_UNDEF)); - simdBaseJitType = newSimdBaseJitType; - simdSize = newSimdSize; + if (simdBaseJitType == CORINFO_TYPE_UNDEF) + { + // Types have not already been cached. Set them. + simdBaseJitType = newSimdBaseJitType; + simdSize = newSimdSize; + } + else if ((simdBaseJitType != newSimdBaseJitType) || (simdSize != newSimdSize)) + { + // Type mismatch with existing cached type. + JITDUMP("Local V%02d has different types: (%d, %d) vs (%d, %d). ", lclnum, simdBaseJitType, simdSize, + newSimdBaseJitType, newSimdSize); + InvalidateWeight(); + } } //----------------------------------------------------------------------------- @@ -183,7 +195,7 @@ class MaskConversionsCheckVisitor final : public GenTreeVisitorCacheSimdTypes(convertOp); + weight->CacheSimdTypes(convertOp, lclOp->GetLclNum()); } foundConversions |= hasConversion; diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs index 7a93a3012f7b2e..a1998ba325c151 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -134,6 +134,8 @@ private static void UseMaskAsVectorAndMaskInsideLoop() } } + // Create a mask, potentially bitcasting it. Use it as a vector. + // No conversion due to the bitcasting. [MethodImpl(MethodImplOptions.NoInlining)] private static void CastMaskUseAsVector() { @@ -148,6 +150,8 @@ private static void CastMaskUseAsVector() } } + // Create a mask, potentially bitcasting it. Use it as a mask. + // No conversion due to the bitcasting. [MethodImpl(MethodImplOptions.NoInlining)] private static void CastMaskUseAsMask() { From 6d8e36d715cf73f3916f6d7019e00e04d8d0e047 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 12 Nov 2024 14:00:20 +0000 Subject: [PATCH 58/66] use constructor for weightsTable Change-Id: I884307955274dac90bf1b30c5dd44be1e2917d49 --- src/coreclr/jit/optimizemaskconversions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizemaskconversions.cpp b/src/coreclr/jit/optimizemaskconversions.cpp index 95ba10d8d6ea0e..3ed706cf5d6632 100644 --- a/src/coreclr/jit/optimizemaskconversions.cpp +++ b/src/coreclr/jit/optimizemaskconversions.cpp @@ -446,7 +446,7 @@ PhaseStatus Compiler::fgOptimizeMaskConversions() return PhaseStatus::MODIFIED_NOTHING; } - MaskConversionsWeightTable weightsTable = MaskConversionsWeightTable(getAllocator()); + MaskConversionsWeightTable weightsTable(getAllocator()); // Find every local and add them to weightsTable. bool foundConversion = false; From 4f09a3ccc833bd0001fa642771a2ea9feb9955b2 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 12 Nov 2024 14:57:16 +0000 Subject: [PATCH 59/66] check for address exposed variables --- src/coreclr/jit/optimizemaskconversions.cpp | 26 +++++++++---------- .../JIT/opt/LocalMasks/ChangeMatchUse.cs | 23 +++++++++++++++- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/optimizemaskconversions.cpp b/src/coreclr/jit/optimizemaskconversions.cpp index 3ed706cf5d6632..a75af67ba86088 100644 --- a/src/coreclr/jit/optimizemaskconversions.cpp +++ b/src/coreclr/jit/optimizemaskconversions.cpp @@ -161,35 +161,33 @@ class MaskConversionsCheckVisitor final : public GenTreeVisitorAsLclVarCommon(); + GenTreeLclVarCommon* lclOp = (*use)->AsLclVarCommon(); + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclOp->GetLclNum()); // Get the existing weighting (if any). MaskConversionsWeight defaultWeight; MaskConversionsWeight* weight = weightsTable->LookupPointerOrAdd(lclOp->GetLclNum(), defaultWeight); - // Update the weights. JITDUMP("Local %s V%02d at [%06u] ", isLocalStore ? "store" : "var", lclOp->GetLclNum(), m_compiler->dspTreeID(lclOp)); - if (isInvalid) + + // Cannot convert any locals with an exposed address. + if (varDsc->IsAddressExposed()) { - JITDUMP("cannot be converted. "); + JITDUMP("is address exposed elsewhere. "); weight->InvalidateWeight(); + return fgWalkResult::WALK_CONTINUE; } - else - { - JITDUMP("has %s conversion. ", hasConversion ? "mask" : "no"); - weight->UpdateWeight(isLocalStore, hasConversion, bbWeight); - } + + // Update the weights. + JITDUMP("has %s conversion. ", hasConversion ? "mask" : "no"); + weight->UpdateWeight(isLocalStore, hasConversion, bbWeight); // Cache the simd type data of the conversion. if (hasConversion) diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs index a1998ba325c151..9e7b9d9e101e0a 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -20,6 +20,9 @@ private static void Consume(T value) { } [MethodImpl(MethodImplOptions.NoInlining)] private static void Consume(T value, T2 value2) { } + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ConsumrAddressExposed(ref Vector value) {} + // Create a mask. Use it as a mask. // Conversion of mask1 will be removed. @@ -37,6 +40,7 @@ public static void ChangeMatchUseTests() UseMaskAsVectorAndMaskInsideLoop(); CastMaskUseAsVector(); CastMaskUseAsMask(); + UseMaskAsMaskAndRef(); } Thread.Sleep(100); @@ -48,6 +52,7 @@ public static void ChangeMatchUseTests() UseMaskAsVectorAndMaskInsideLoop(); CastMaskUseAsVector(); CastMaskUseAsMask(); + UseMaskAsMaskAndRef(); } @@ -168,4 +173,20 @@ private static void CastMaskUseAsMask() Consume(vec2); } } -} \ No newline at end of file + + // Create a mask. Use it as a mask and a reference. + // No conversion due to the reference. + [MethodImpl(MethodImplOptions.NoInlining)] + private static void UseMaskAsMaskAndRef() + { + if (Sve.IsSupported) + { + Vector mask1 = Sve.CreateFalseMaskDouble(); // Create lcl mask + Vector vec1 = Vector.Create(1.3); + ConsumrAddressExposed(ref mask1); // Use as ref + Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask + Consume(vec2); + } + } + +} From 9fc3b65eb789de80ea62dbbdf037661ca7eb458e Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Nov 2024 10:24:42 +0000 Subject: [PATCH 60/66] Add allocator CMK_MaskConversionOpt --- src/coreclr/jit/compmemkind.h | 1 + src/coreclr/jit/optimizemaskconversions.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 0221eadb067492..959176dcc965ad 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -64,6 +64,7 @@ CompMemKindMacro(TailMergeThrows) CompMemKindMacro(EarlyProp) CompMemKindMacro(ZeroInit) CompMemKindMacro(Pgo) +CompMemKindMacro(MaskConversionOpt) //clang-format on #undef CompMemKindMacro diff --git a/src/coreclr/jit/optimizemaskconversions.cpp b/src/coreclr/jit/optimizemaskconversions.cpp index a75af67ba86088..3d48dbf556bc86 100644 --- a/src/coreclr/jit/optimizemaskconversions.cpp +++ b/src/coreclr/jit/optimizemaskconversions.cpp @@ -444,7 +444,7 @@ PhaseStatus Compiler::fgOptimizeMaskConversions() return PhaseStatus::MODIFIED_NOTHING; } - MaskConversionsWeightTable weightsTable(getAllocator()); + MaskConversionsWeightTable weightsTable(getAllocator(CMK_MaskConversionOpt)); // Find every local and add them to weightsTable. bool foundConversion = false; From 06b2e72845612c6502115768abae51dd007d8ffd Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Nov 2024 10:26:10 +0000 Subject: [PATCH 61/66] Simplify ChangeMatchUse.csproj --- src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs | 2 +- src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj | 11 ++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs index 9e7b9d9e101e0a..801e0c54983ec6 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -27,7 +27,7 @@ private static void ConsumrAddressExposed(ref Vector value) {} // Create a mask. Use it as a mask. // Conversion of mask1 will be removed. [Fact] - public static void ChangeMatchUseTests() + public static void TestEntryPoint() { for (int i = 0; i < 4; i++) { diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj index f3063063f494a9..1352ebe3277bc7 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj @@ -1,16 +1,9 @@ True - None - $(NoWarn);SYSLIB5003 - true - - true + $(NoWarn),SYSLIB5003 - - - - \ No newline at end of file + From 316ee5ec1210f1b4f21dce882fcd96c4b1d97854 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Nov 2024 10:29:03 +0000 Subject: [PATCH 62/66] Hoist Sve check in testing --- .../JIT/opt/LocalMasks/ChangeMatchUse.cs | 169 ++++++++---------- 1 file changed, 72 insertions(+), 97 deletions(-) diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs index 801e0c54983ec6..69ee309defd101 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -23,51 +23,47 @@ private static void Consume(T value, T2 value2) { } [MethodImpl(MethodImplOptions.NoInlining)] private static void ConsumrAddressExposed(ref Vector value) {} - - // Create a mask. Use it as a mask. - // Conversion of mask1 will be removed. [Fact] public static void TestEntryPoint() { - for (int i = 0; i < 4; i++) + if (Sve.IsSupported) { - for (int j = 0; j < 200; j++) + for (int i = 0; i < 4; i++) { - UseMaskAsMask(); - UseMaskAsVector(); - UseMaskAsMaskAndVector(); - UseMaskAsMaskAndVectorInsideLoop(); - UseMaskAsVectorAndMaskInsideLoop(); - CastMaskUseAsVector(); - CastMaskUseAsMask(); - UseMaskAsMaskAndRef(); + for (int j = 0; j < 200; j++) + { + UseMaskAsMask(); + UseMaskAsVector(); + UseMaskAsMaskAndVector(); + UseMaskAsMaskAndVectorInsideLoop(); + UseMaskAsVectorAndMaskInsideLoop(); + CastMaskUseAsVector(); + CastMaskUseAsMask(); + UseMaskAsMaskAndRef(); + } + + Thread.Sleep(100); } - - Thread.Sleep(100); + UseMaskAsMask(); + UseMaskAsVector(); + UseMaskAsMaskAndVector(); + UseMaskAsMaskAndVectorInsideLoop(); + UseMaskAsVectorAndMaskInsideLoop(); + CastMaskUseAsVector(); + CastMaskUseAsMask(); + UseMaskAsMaskAndRef(); } - UseMaskAsMask(); - UseMaskAsVector(); - UseMaskAsMaskAndVector(); - UseMaskAsMaskAndVectorInsideLoop(); - UseMaskAsVectorAndMaskInsideLoop(); - CastMaskUseAsVector(); - CastMaskUseAsMask(); - UseMaskAsMaskAndRef(); } - // Create a mask. Use it as a mask. // Conversion of mask1 will be removed. [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsMask() { - if (Sve.IsSupported) - { - Vector mask1 = Sve.CreateWhileLessThanMask64Bit(2, 9); // Create lcl mask - Vector vec1 = Vector.Create(5); - Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask - Consume(vec2); - } + Vector mask1 = Sve.CreateWhileLessThanMask64Bit(2, 9); // Create lcl mask + Vector vec1 = Vector.Create(5); + Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask + Consume(vec2); } // Create a mask. Use it as a vector. @@ -75,13 +71,10 @@ private static void UseMaskAsMask() [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsVector() { - if (Sve.IsSupported) - { - Vector mask1 = Sve.CreateFalseMaskInt16(); // Create lcl mask - Vector vec1 = Vector.Create(9); - Vector vec2 = Sve.Add(vec1, mask1); // Use as vector - Consume(vec2); - } + Vector mask1 = Sve.CreateFalseMaskInt16(); // Create lcl mask + Vector vec1 = Vector.Create(9); + Vector vec2 = Sve.Add(vec1, mask1); // Use as vector + Consume(vec2); } // Create a mask. Use it as a mask, then use as a vector. @@ -89,15 +82,12 @@ private static void UseMaskAsVector() [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsMaskAndVector() { - if (Sve.IsSupported) - { - Vector mask1 = Sve.CreateWhileLessThanOrEqualMask8Bit(2, 9); // Create lcl mask - Vector vec1 = Vector.Create(3); - Vector vec2 = Vector.Create(4); - Vector vec3 = Sve.ConditionalExtractAfterLastActiveElement(mask1, vec1, vec2); // Use as mask - Vector vec4 = Sve.PopCount(mask1); // Use as vector - Consume(vec3, vec4); - } + Vector mask1 = Sve.CreateWhileLessThanOrEqualMask8Bit(2, 9); // Create lcl mask + Vector vec1 = Vector.Create(3); + Vector vec2 = Vector.Create(4); + Vector vec3 = Sve.ConditionalExtractAfterLastActiveElement(mask1, vec1, vec2); // Use as mask + Vector vec4 = Sve.PopCount(mask1); // Use as vector + Consume(vec3, vec4); } // Create a mask. Use it as a mask, then use as a vector inside a loop. @@ -105,18 +95,15 @@ private static void UseMaskAsMaskAndVector() [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsMaskAndVectorInsideLoop() { - if (Sve.IsSupported) - { - Vector mask1 = Sve.CreateFalseMaskInt16(); // Create lcl mask - Vector vec1 = Vector.Create(3); - Vector vec2 = Vector.Create(4); - Vector vec3 = Sve.Splice(mask1, vec1, vec2); // Use as mask + Vector mask1 = Sve.CreateFalseMaskInt16(); // Create lcl mask + Vector vec1 = Vector.Create(3); + Vector vec2 = Vector.Create(4); + Vector vec3 = Sve.Splice(mask1, vec1, vec2); // Use as mask - for (int i = 0; i < 100; i++) - { - Vector vec4 = Sve.ReverseElement8(mask1); // Use as vector - Consume(vec3, vec4); - } + for (int i = 0; i < 100; i++) + { + Vector vec4 = Sve.ReverseElement8(mask1); // Use as vector + Consume(vec3, vec4); } } @@ -125,17 +112,14 @@ private static void UseMaskAsMaskAndVectorInsideLoop() [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsVectorAndMaskInsideLoop() { - if (Sve.IsSupported) - { - Vector vec1 = Vector.Create(7); // Create lcl vector - Vector vec2 = Vector.Create(3); - Vector vec3 = Sve.Add(vec1, vec2); // Use as vector + Vector vec1 = Vector.Create(7); // Create lcl vector + Vector vec2 = Vector.Create(3); + Vector vec3 = Sve.Add(vec1, vec2); // Use as vector - for (int i = 0; i < 100; i++) - { - Vector vec4 = Sve.Compact(vec1, vec3); // Use as mask - Consume(vec3, vec4); - } + for (int i = 0; i < 100; i++) + { + Vector vec4 = Sve.Compact(vec1, vec3); // Use as mask + Consume(vec3, vec4); } } @@ -144,15 +128,12 @@ private static void UseMaskAsVectorAndMaskInsideLoop() [MethodImpl(MethodImplOptions.NoInlining)] private static void CastMaskUseAsVector() { - if (Sve.IsSupported) - { - Vector mask1; - if (Environment.TickCount % 2 == 0) - mask1 = Sve.CreateTrueMaskInt32(); - else - mask1 = Unsafe.BitCast, Vector>(Sve.CreateTrueMaskUInt32()); - Consume(mask1); // Use as vector - } + Vector mask1; + if (Environment.TickCount % 2 == 0) + mask1 = Sve.CreateTrueMaskInt32(); + else + mask1 = Unsafe.BitCast, Vector>(Sve.CreateTrueMaskUInt32()); + Consume(mask1); // Use as vector } // Create a mask, potentially bitcasting it. Use it as a mask. @@ -160,18 +141,15 @@ private static void CastMaskUseAsVector() [MethodImpl(MethodImplOptions.NoInlining)] private static void CastMaskUseAsMask() { - if (Sve.IsSupported) - { - Vector mask1; - if (Environment.TickCount % 2 == 0) - mask1 = Sve.CreateTrueMaskInt32(); - else - mask1 = Unsafe.BitCast, Vector>(Sve.CreateTrueMaskUInt32()); - - Vector vec1 = Vector.Create(25); - Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask - Consume(vec2); - } + Vector mask1; + if (Environment.TickCount % 2 == 0) + mask1 = Sve.CreateTrueMaskInt32(); + else + mask1 = Unsafe.BitCast, Vector>(Sve.CreateTrueMaskUInt32()); + + Vector vec1 = Vector.Create(25); + Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask + Consume(vec2); } // Create a mask. Use it as a mask and a reference. @@ -179,14 +157,11 @@ private static void CastMaskUseAsMask() [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsMaskAndRef() { - if (Sve.IsSupported) - { - Vector mask1 = Sve.CreateFalseMaskDouble(); // Create lcl mask - Vector vec1 = Vector.Create(1.3); - ConsumrAddressExposed(ref mask1); // Use as ref - Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask - Consume(vec2); - } + Vector mask1 = Sve.CreateFalseMaskDouble(); // Create lcl mask + Vector vec1 = Vector.Create(1.3); + ConsumrAddressExposed(ref mask1); // Use as ref + Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask + Consume(vec2); } } From bc1956697cbc778de1b824407c3379224e0353c4 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Nov 2024 10:42:58 +0000 Subject: [PATCH 63/66] Check for parameters and OSR locals --- src/coreclr/jit/optimizemaskconversions.cpp | 17 +++++++++++++++++ src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs | 13 +++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/coreclr/jit/optimizemaskconversions.cpp b/src/coreclr/jit/optimizemaskconversions.cpp index 3d48dbf556bc86..5cfcb2fb2d4e93 100644 --- a/src/coreclr/jit/optimizemaskconversions.cpp +++ b/src/coreclr/jit/optimizemaskconversions.cpp @@ -184,6 +184,23 @@ class MaskConversionsCheckVisitor final : public GenTreeVisitorInvalidateWeight(); return fgWalkResult::WALK_CONTINUE; } + // TODO: For both parameters and OSR locals, these could potentially be converted. + // Instead of retyping the existing locals, for each def create a new local store + // in the new type and update all the uses to use the new store. For parameters + // and OSR locals add a single initial conversion in an initial basic block. Take + // this into account in the weighting. + else if (varDsc->lvIsParam) + { + JITDUMP("is parameter. "); + weight->InvalidateWeight(); + return fgWalkResult::WALK_CONTINUE; + } + else if (varDsc->lvIsOSRLocal) + { + JITDUMP("is OSR local. "); + weight->InvalidateWeight(); + return fgWalkResult::WALK_CONTINUE; + } // Update the weights. JITDUMP("has %s conversion. ", hasConversion ? "mask" : "no"); diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs index 69ee309defd101..d96691ad8c7ffe 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs @@ -28,6 +28,8 @@ public static void TestEntryPoint() { if (Sve.IsSupported) { + Vector vec = Vector.Create(77); + for (int i = 0; i < 4; i++) { for (int j = 0; j < 200; j++) @@ -40,6 +42,7 @@ public static void TestEntryPoint() CastMaskUseAsVector(); CastMaskUseAsMask(); UseMaskAsMaskAndRef(); + UseParamAsMask(vec); } Thread.Sleep(100); @@ -52,6 +55,7 @@ public static void TestEntryPoint() CastMaskUseAsVector(); CastMaskUseAsMask(); UseMaskAsMaskAndRef(); + UseParamAsMask(vec); } } @@ -164,4 +168,13 @@ private static void UseMaskAsMaskAndRef() Consume(vec2); } + // Take a vector parameter. Use it as a mask + // No conversion due to it being a parameter. + [MethodImpl(MethodImplOptions.NoInlining)] + private static void UseParamAsMask(Vector vecParam) + { + Vector vec1 = Vector.Create(73); + Vector vec2 = Sve.Compact(vecParam, vec1); // Use as mask + Consume(vec2); + } } From 8073bf9d2e4e0f10389d1f69e15668b432f8223a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Nov 2024 12:23:01 +0000 Subject: [PATCH 64/66] rename tests --- .../ChangeMatchUse.cs => MaskConversions/ChangeMaskUse.cs} | 5 +++-- .../ChangeMaskUse.csproj} | 0 2 files changed, 3 insertions(+), 2 deletions(-) rename src/tests/JIT/opt/{LocalMasks/ChangeMatchUse.cs => MaskConversions/ChangeMaskUse.cs} (98%) rename src/tests/JIT/opt/{LocalMasks/ChangeMatchUse.csproj => MaskConversions/ChangeMaskUse.csproj} (100%) diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs b/src/tests/JIT/opt/MaskConversions/ChangeMaskUse.cs similarity index 98% rename from src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs rename to src/tests/JIT/opt/MaskConversions/ChangeMaskUse.cs index d96691ad8c7ffe..2212ce93e2d2e4 100644 --- a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.cs +++ b/src/tests/JIT/opt/MaskConversions/ChangeMaskUse.cs @@ -1,7 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// Unit tests for the local masks optimization +// Unit tests for the masks conversion optimization +// Uses vectors as masks and vice versa. using System; using System.Numerics; @@ -12,7 +13,7 @@ using System.Threading; using Xunit; -public class ChangeMatchUse +public class ChangeMaskUse { [MethodImpl(MethodImplOptions.NoInlining)] private static void Consume(T value) { } diff --git a/src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj b/src/tests/JIT/opt/MaskConversions/ChangeMaskUse.csproj similarity index 100% rename from src/tests/JIT/opt/LocalMasks/ChangeMatchUse.csproj rename to src/tests/JIT/opt/MaskConversions/ChangeMaskUse.csproj From e22f5cefa48c779d7b3f9b067f0eb0416ca5d11c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 13 Nov 2024 15:13:56 +0000 Subject: [PATCH 65/66] Don't convert uses of masks as vectors --- src/coreclr/jit/optimizemaskconversions.cpp | 17 +++-- .../JIT/opt/MaskConversions/ChangeMaskUse.cs | 25 ++----- src/tests/JIT/opt/MaskConversions/MaskUse.cs | 73 +++++++++++++++++++ .../JIT/opt/MaskConversions/MaskUse.csproj | 9 +++ 4 files changed, 100 insertions(+), 24 deletions(-) create mode 100644 src/tests/JIT/opt/MaskConversions/MaskUse.cs create mode 100644 src/tests/JIT/opt/MaskConversions/MaskUse.csproj diff --git a/src/coreclr/jit/optimizemaskconversions.cpp b/src/coreclr/jit/optimizemaskconversions.cpp index 5cfcb2fb2d4e93..c779e08eb20ce6 100644 --- a/src/coreclr/jit/optimizemaskconversions.cpp +++ b/src/coreclr/jit/optimizemaskconversions.cpp @@ -184,11 +184,18 @@ class MaskConversionsCheckVisitor final : public GenTreeVisitorInvalidateWeight(); return fgWalkResult::WALK_CONTINUE; } - // TODO: For both parameters and OSR locals, these could potentially be converted. - // Instead of retyping the existing locals, for each def create a new local store - // in the new type and update all the uses to use the new store. For parameters - // and OSR locals add a single initial conversion in an initial basic block. Take - // this into account in the weighting. + // TODO: Converting to a mask loses data - as each field is only a single bit. + // For parameters, OSR locals, and locals which are used as vectors, then they + // cannot be stored as a mask as data will be lost. + // For all of these, conversions could be done by creating a new store of type mask. + // Then uses as mask could be converted to type mask and pointed to use the new + // definition. Tbe weighting would need updating to take this into account. + else if (isLocalUse && !hasConversion) + { + JITDUMP("is used as vector. "); + weight->InvalidateWeight(); + return fgWalkResult::WALK_CONTINUE; + } else if (varDsc->lvIsParam) { JITDUMP("is parameter. "); diff --git a/src/tests/JIT/opt/MaskConversions/ChangeMaskUse.cs b/src/tests/JIT/opt/MaskConversions/ChangeMaskUse.cs index 2212ce93e2d2e4..db46a9bcd7a0fb 100644 --- a/src/tests/JIT/opt/MaskConversions/ChangeMaskUse.cs +++ b/src/tests/JIT/opt/MaskConversions/ChangeMaskUse.cs @@ -22,7 +22,7 @@ private static void Consume(T value) { } private static void Consume(T value, T2 value2) { } [MethodImpl(MethodImplOptions.NoInlining)] - private static void ConsumrAddressExposed(ref Vector value) {} + private static void ConsumeAddressExposed(ref Vector value) {} [Fact] public static void TestEntryPoint() @@ -35,7 +35,6 @@ public static void TestEntryPoint() { for (int j = 0; j < 200; j++) { - UseMaskAsMask(); UseMaskAsVector(); UseMaskAsMaskAndVector(); UseMaskAsMaskAndVectorInsideLoop(); @@ -48,7 +47,6 @@ public static void TestEntryPoint() Thread.Sleep(100); } - UseMaskAsMask(); UseMaskAsVector(); UseMaskAsMaskAndVector(); UseMaskAsMaskAndVectorInsideLoop(); @@ -60,19 +58,8 @@ public static void TestEntryPoint() } } - // Create a mask. Use it as a mask. - // Conversion of mask1 will be removed. - [MethodImpl(MethodImplOptions.NoInlining)] - private static void UseMaskAsMask() - { - Vector mask1 = Sve.CreateWhileLessThanMask64Bit(2, 9); // Create lcl mask - Vector vec1 = Vector.Create(5); - Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask - Consume(vec2); - } - // Create a mask. Use it as a vector. - // No conversions will be changed: Mask->Vector is optimal. + // No conversions will be changed: use as vector. [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsVector() { @@ -83,7 +70,7 @@ private static void UseMaskAsVector() } // Create a mask. Use it as a mask, then use as a vector. - // Mask1 conversions will be switched. + // No conversions will be changed: use as vector. [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsMaskAndVector() { @@ -96,7 +83,7 @@ private static void UseMaskAsMaskAndVector() } // Create a mask. Use it as a mask, then use as a vector inside a loop. - // No conversions will be changed: vector use inside the loop dominates. + // No conversions will be changed: use as vector. [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsMaskAndVectorInsideLoop() { @@ -113,7 +100,7 @@ private static void UseMaskAsMaskAndVectorInsideLoop() } // Create a mask. Use it as a vector, then use as a mask inside a loop. - // Will be converted: mask use inside the loop dominates. + // No conversions will be changed: use as vector. [MethodImpl(MethodImplOptions.NoInlining)] private static void UseMaskAsVectorAndMaskInsideLoop() { @@ -164,7 +151,7 @@ private static void UseMaskAsMaskAndRef() { Vector mask1 = Sve.CreateFalseMaskDouble(); // Create lcl mask Vector vec1 = Vector.Create(1.3); - ConsumrAddressExposed(ref mask1); // Use as ref + ConsumeAddressExposed(ref mask1); // Use as ref Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask Consume(vec2); } diff --git a/src/tests/JIT/opt/MaskConversions/MaskUse.cs b/src/tests/JIT/opt/MaskConversions/MaskUse.cs new file mode 100644 index 00000000000000..f03e1c63a2755d --- /dev/null +++ b/src/tests/JIT/opt/MaskConversions/MaskUse.cs @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Unit tests for the masks conversion optimization +// Uses vectors as masks and vice versa. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Threading; +using Xunit; + +public class ChangeMaskUse +{ + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Consume(T value) { } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Consume(T value, T2 value2) { } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ConsumrAddressExposed(ref Vector value) {} + + [Fact] + public static void TestEntryPoint() + { + if (Sve.IsSupported) + { + Vector vec = Vector.Create(77); + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 200; j++) + { + UseMask1(); + UseMask2(); + } + + Thread.Sleep(100); + } + UseMask1(); + UseMask2(); + } + } + + // Create a mask and use it as a mask. + // Mask will be used without going to local. Conversions will be optimised during lowering. + [MethodImpl(MethodImplOptions.NoInlining)] + private static void UseMask1() + { + Vector mask1 = Sve.CreateWhileLessThanMask64Bit(2, 9); // Create local mask + Vector vec1 = Vector.Create(5); + Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask + Consume(vec2); + } + + // Create a mask and use it as a mask twice. + // Will be switched to store as mask. + [MethodImpl(MethodImplOptions.NoInlining)] + private static void UseMask2() + { + Vector mask1 = Sve.CreateTrueMaskInt32(SveMaskPattern.LargestMultipleOf3); // Create local mask + Vector vec1 = Vector.Create(5); + Vector vec2 = Sve.Compact(mask1, vec1); // Use as mask + Vector vec3 = Sve.ConditionalExtractAfterLastActiveElement(mask1, vec1, vec1); // Use as mask + Consume(vec2, vec3); + } + + +} diff --git a/src/tests/JIT/opt/MaskConversions/MaskUse.csproj b/src/tests/JIT/opt/MaskConversions/MaskUse.csproj new file mode 100644 index 00000000000000..1352ebe3277bc7 --- /dev/null +++ b/src/tests/JIT/opt/MaskConversions/MaskUse.csproj @@ -0,0 +1,9 @@ + + + True + $(NoWarn),SYSLIB5003 + + + + + From 0d603994057a9641791822b160ac21bf0dece8c4 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 Nov 2024 09:46:58 +0000 Subject: [PATCH 66/66] fix formatting --- src/coreclr/jit/jitconfigvalues.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 7cd4ead945e3dd..d63762eef2ec2b 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -571,9 +571,9 @@ OPT_CONFIG_INTEGER(JitDoValueNumber, "JitDoValueNumber", 1) // Perform value num OPT_CONFIG_STRING(JitOptRepeatRange, "JitOptRepeatRange") // Enable JitOptRepeat based on method hash range -OPT_CONFIG_INTEGER(JitDoIfConversion, "JitDoIfConversion", 1) // Perform If conversion +OPT_CONFIG_INTEGER(JitDoIfConversion, "JitDoIfConversion", 1) // Perform If conversion OPT_CONFIG_INTEGER(JitDoOptimizeMaskConversions, "JitDoOptimizeMaskConversions", 1) // Perform optimization of mask - // conversions + // conversions RELEASE_CONFIG_INTEGER(JitEnableOptRepeat, "JitEnableOptRepeat", 1) // If zero, do not allow JitOptRepeat RELEASE_CONFIG_METHODSET(JitOptRepeat, "JitOptRepeat") // Runs optimizer multiple times on specified methods