From bdde1b7398d22f5b2e3ae798417f07b4ab4bf480 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 11 Dec 2023 13:39:56 -0800 Subject: [PATCH 01/13] faster ReadOneFast --- src/coreclr/inc/gcinfodecoder.h | 38 ++++++++++++++------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index 36e86a3efe059b..ea2f3d6aeee325 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -265,6 +265,7 @@ class BitStreamReader m_pCurrent = m_pBuffer = dac_cast((size_t)dac_cast(pBuffer) & ~((size_t)sizeof(size_t)-1)); m_RelPos = m_InitialRelPos = (int)((size_t)dac_cast(pBuffer) % sizeof(size_t)) * 8/*BITS_PER_BYTE*/; + m_current = *m_pCurrent >> m_RelPos; } BitStreamReader(const BitStreamReader& other) @@ -275,6 +276,7 @@ class BitStreamReader m_InitialRelPos = other.m_InitialRelPos; m_pCurrent = other.m_pCurrent; m_RelPos = other.m_RelPos; + m_current = other.m_current; } const BitStreamReader& operator=(const BitStreamReader& other) @@ -285,6 +287,7 @@ class BitStreamReader m_InitialRelPos = other.m_InitialRelPos; m_pCurrent = other.m_pCurrent; m_RelPos = other.m_RelPos; + m_current = other.m_current; return *this; } @@ -295,33 +298,35 @@ class BitStreamReader _ASSERTE(numBits > 0 && numBits <= BITS_PER_SIZE_T); - size_t result = (*m_pCurrent) >> m_RelPos; + size_t result = m_current; + m_current >>= numBits; int newRelPos = m_RelPos + numBits; if(newRelPos >= BITS_PER_SIZE_T) { m_pCurrent++; + m_current = *m_pCurrent; newRelPos -= BITS_PER_SIZE_T; - if(newRelPos > 0) - { - size_t extraBits = (*m_pCurrent) << (numBits - newRelPos); - result ^= extraBits; - } + size_t extraBits = m_current << (numBits - newRelPos); + result |= extraBits; + m_current >>= newRelPos; } m_RelPos = newRelPos; - result &= SAFE_SHIFT_LEFT(1, numBits) - 1; + result &= ((size_t)-1 >> (BITS_PER_SIZE_T - numBits)); return result; } - // This version reads one bit, returning zero/non-zero (not 0/1) + // This version reads one bit // NOTE: This routine is perf-critical __forceinline size_t ReadOneFast() { SUPPORTS_DAC; - size_t result = (*m_pCurrent) & (((size_t)1) << m_RelPos); + size_t result = m_current & 1; + m_current >>= 1; if(++m_RelPos == BITS_PER_SIZE_T) { m_pCurrent++; + m_current = *m_pCurrent; m_RelPos = 0; } return result; @@ -339,6 +344,7 @@ class BitStreamReader size_t adjPos = pos + m_InitialRelPos; m_pCurrent = m_pBuffer + adjPos / BITS_PER_SIZE_T; m_RelPos = (int)(adjPos % BITS_PER_SIZE_T); + m_current = *m_pCurrent >> m_RelPos; _ASSERTE(GetCurrentPos() == pos); } @@ -349,19 +355,6 @@ class BitStreamReader SetCurrentPos(GetCurrentPos() + numBitsToSkip); } - __forceinline void AlignUpToByte() - { - if(m_RelPos <= BITS_PER_SIZE_T - 8) - { - m_RelPos = (m_RelPos + 7) & ~7; - } - else - { - m_RelPos = 0; - m_pCurrent++; - } - } - __forceinline size_t ReadBitAtPos( size_t pos ) { size_t adjPos = pos + m_InitialRelPos; @@ -422,6 +415,7 @@ class BitStreamReader int m_InitialRelPos; PTR_size_t m_pCurrent; int m_RelPos; + size_t m_current; }; struct GcSlotDesc From 12b491bd321bc8ce4142c71b4a87cfee93458147 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 10 Dec 2023 10:25:46 -0800 Subject: [PATCH 02/13] lzcnt --- src/coreclr/inc/gcinfotypes.h | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h index 6ba89ff00058c4..a560e7cf043fba 100644 --- a/src/coreclr/inc/gcinfotypes.h +++ b/src/coreclr/inc/gcinfotypes.h @@ -9,6 +9,10 @@ #include "gcinfo.h" #endif +#ifdef _MSC_VER +#include +#endif // _MSC_VER + // ***************************************************************************** // WARNING!!!: These values and code are also used by SOS in the diagnostics // repo. Should updated in a backwards and forwards compatible way. @@ -43,14 +47,25 @@ __forceinline size_t SAFE_SHIFT_RIGHT(size_t x, size_t count) inline UINT32 CeilOfLog2(size_t x) { + // we are ok even if bsr is used vs. lzcnt _ASSERTE(x > 0); - UINT32 result = (x & (x - 1)) ? 1 : 0; - while (x != 1) - { - result++; - x >>= 1; - } - return result; + + x = (x << 1) - 1; +#ifdef TARGET_64BIT +#ifdef _MSC_VER + UINT32 lzcountCeil = (UINT32)__lzcnt64((unsigned __int64)x); +#else // _MSC_VER + UINT32 lzcountCeil = (UINT32)__builtin_clzl((unsigned long)x); +#endif // _MSC_VER +#else // TARGET_64BIT +#ifdef _MSC_VER + UINT32 lzcountCeil = (UINT32)__lzcnt((unsigned int)x); +#else // _MSC_VER + UINT32 lzcountCeil = (UINT32)__builtin_clz((unsigned int)x); +#endif // _MSC_VER +#endif + + return BITS_PER_SIZE_T - lzcountCeil; } enum GcSlotFlags From 9f4c0c302fffee909dd15bf6789f7fbc8a2da78f Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 11 Dec 2023 09:25:05 -0800 Subject: [PATCH 03/13] linear search for safepoints --- src/coreclr/inc/gcinfodecoder.h | 3 +- src/coreclr/vm/gcinfodecoder.cpp | 79 +++++++++++++++++++++----------- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index ea2f3d6aeee325..e96bfbf40cffa6 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -584,7 +584,8 @@ class GcInfoDecoder #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED UINT32 m_NumSafePoints; UINT32 m_SafePointIndex; - UINT32 FindSafePoint(UINT32 codeOffset); + UINT32 NarrowSafePointSearch(size_t savedPos, UINT32 breakOffset, UINT32* searchEnd); + UINT32 FindSafePoint(UINT32 codeOffset); #endif UINT32 m_NumInterruptibleRanges; diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index d738f8fb67a909..81950d5e1d4357 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -394,43 +394,70 @@ bool GcInfoDecoder::IsSafePoint(UINT32 codeOffset) } -UINT32 GcInfoDecoder::FindSafePoint(UINT32 breakOffset) +// Repositioning within a bit stream is relatively involved, compared to sequential read, +// so we prefer linear search unless the number of safepoints is too high. +// The limit is not very significant as most methods will have just a few safe points. +// At 32, even if a single point is 16bit encoded (64K method length), +// the whole run will be under 64 bytes, so likely we will stay in the same cache line. +#define MAX_LINEAR_SEARCH 32 + +NOINLINE +UINT32 GcInfoDecoder::NarrowSafePointSearch(size_t savedPos, UINT32 breakOffset, UINT32* searchEnd) { - if(m_NumSafePoints == 0) - return 0; + INT32 low = 0; + INT32 high = (INT32)m_NumSafePoints; - const size_t savedPos = m_Reader.GetCurrentPos(); const UINT32 numBitsPerOffset = CeilOfLog2(NORMALIZE_CODE_OFFSET(m_CodeLength)); - UINT32 result = m_NumSafePoints; + while (high - low > MAX_LINEAR_SEARCH) + { + const INT32 mid = (low + high) / 2; + _ASSERTE(mid >= 0 && mid < (INT32)m_NumSafePoints); + m_Reader.SetCurrentPos(savedPos + (UINT32)mid * numBitsPerOffset); + UINT32 midSpOffset = (UINT32)m_Reader.Read(numBitsPerOffset); + if (breakOffset < midSpOffset) + high = mid; + else + low = mid; + } + + m_Reader.SetCurrentPos(savedPos +(UINT32)low * numBitsPerOffset); + *searchEnd = high; + return low; +} + +UINT32 GcInfoDecoder::FindSafePoint(UINT32 breakOffset) +{ + _ASSERTE(m_NumSafePoints > 0); #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Safepoints are encoded with a -1 adjustment - // but normalizing them masks off the low order bit - // Thus only bother looking if the address is odd - if ((breakOffset & 1) != 0) + _ASSERTE((breakOffset & 1) != 0); #endif - { - const UINT32 normBreakOffset = NORMALIZE_CODE_OFFSET(breakOffset); - INT32 low = 0; - INT32 high = (INT32)m_NumSafePoints; + UINT32 result = m_NumSafePoints; + const size_t savedPos = m_Reader.GetCurrentPos(); + const UINT32 normBreakOffset = NORMALIZE_CODE_OFFSET(breakOffset); - while(low < high) + UINT32 linearSearchStart = 0; + UINT32 linearSearchEnd = m_NumSafePoints; + if (linearSearchEnd - linearSearchStart > MAX_LINEAR_SEARCH) + { + linearSearchStart = NarrowSafePointSearch(savedPos, normBreakOffset, &linearSearchEnd); + } + + const UINT32 numBitsPerOffset = CeilOfLog2(NORMALIZE_CODE_OFFSET(m_CodeLength)); + for (UINT32 i = linearSearchStart; i < linearSearchEnd; i++) + { + UINT32 spOffset = (UINT32)m_Reader.Read(numBitsPerOffset); + if(spOffset == normBreakOffset) { - const INT32 mid = (low+high)/2; - _ASSERTE(mid >= 0 && mid < (INT32)m_NumSafePoints); - m_Reader.SetCurrentPos(savedPos + (UINT32)mid * numBitsPerOffset); - UINT32 normOffset = (UINT32)m_Reader.Read(numBitsPerOffset); - if(normOffset == normBreakOffset) - { - result = (UINT32) mid; - break; - } + result = i; + break; + } - if(normOffset < normBreakOffset) - low = mid+1; - else - high = mid; + if (spOffset > normBreakOffset) + { + break; } } From 43875b681d4bbbef542d938d13b7756a7c0c8cd2 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 17 Dec 2023 11:49:06 -0800 Subject: [PATCH 04/13] do not "decode" individual bool flags --- src/coreclr/inc/gcinfodecoder.h | 9 +------ src/coreclr/vm/gcinfodecoder.cpp | 46 +++++++++++--------------------- 2 files changed, 16 insertions(+), 39 deletions(-) diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index e96bfbf40cffa6..f60fe2716b54dd 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -559,15 +559,8 @@ class GcInfoDecoder UINT32 m_InstructionOffset; // Pre-decoded information + GcInfoHeaderFlags m_headerFlags; bool m_IsInterruptible; - bool m_IsVarArg; - bool m_GenericSecretParamIsMD; - bool m_GenericSecretParamIsMT; -#ifdef TARGET_AMD64 - bool m_WantsReportOnlyLeaf; -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - bool m_HasTailCalls; -#endif // TARGET_AMD64 INT32 m_GSCookieStackSlot; INT32 m_ReversePInvokeFrameStackSlot; UINT32 m_ValidRangeStart; diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 81950d5e1d4357..d11a023d649197 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -109,36 +109,20 @@ GcInfoDecoder::GcInfoDecoder( // Pre-decode information //-------------------------------------------- - GcInfoHeaderFlags headerFlags; bool slimHeader = (m_Reader.ReadOneFast() == 0); // Use flag mask to bail out early if we already decoded all the pieces that caller requested int remainingFlags = flags == DECODE_EVERYTHING ? ~0 : flags; if (slimHeader) { - headerFlags = (GcInfoHeaderFlags)(m_Reader.ReadOneFast() ? GC_INFO_HAS_STACK_BASE_REGISTER : 0); + m_headerFlags = (GcInfoHeaderFlags)(m_Reader.ReadOneFast() ? GC_INFO_HAS_STACK_BASE_REGISTER : 0); } else { int numFlagBits = (m_Version == 1) ? GC_INFO_FLAGS_BIT_SIZE_VERSION_1 : GC_INFO_FLAGS_BIT_SIZE; - headerFlags = (GcInfoHeaderFlags) m_Reader.Read(numFlagBits); + m_headerFlags = (GcInfoHeaderFlags) m_Reader.Read(numFlagBits); } - m_IsVarArg = headerFlags & GC_INFO_IS_VARARG; - int hasGSCookie = headerFlags & GC_INFO_HAS_GS_COOKIE; - int hasPSPSym = headerFlags & GC_INFO_HAS_PSP_SYM; - int hasGenericsInstContext = (headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) != GC_INFO_HAS_GENERICS_INST_CONTEXT_NONE; - m_GenericSecretParamIsMD = (headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) == GC_INFO_HAS_GENERICS_INST_CONTEXT_MD; - m_GenericSecretParamIsMT = (headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) == GC_INFO_HAS_GENERICS_INST_CONTEXT_MT; - int hasStackBaseRegister = headerFlags & GC_INFO_HAS_STACK_BASE_REGISTER; -#ifdef TARGET_AMD64 - m_WantsReportOnlyLeaf = ((headerFlags & GC_INFO_WANTS_REPORT_ONLY_LEAF) != 0); -#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - m_HasTailCalls = ((headerFlags & GC_INFO_HAS_TAILCALLS) != 0); -#endif // TARGET_AMD64 - int hasEncInfo = headerFlags & GC_INFO_HAS_EDIT_AND_CONTINUE_INFO; - int hasReversePInvokeFrame = headerFlags & GC_INFO_REVERSE_PINVOKE_FRAME; - int returnKindBits = (slimHeader) ? SIZE_OF_RETURN_KIND_IN_SLIM_HEADER : SIZE_OF_RETURN_KIND_IN_FAT_HEADER; m_ReturnKind = (ReturnKind)((UINT32)m_Reader.Read(returnKindBits)); @@ -162,7 +146,7 @@ GcInfoDecoder::GcInfoDecoder( return; } - if (hasGSCookie) + if (m_headerFlags & GC_INFO_HAS_GS_COOKIE) { // Note that normalization as a code offset can be different than // normalization as code length @@ -176,7 +160,7 @@ GcInfoDecoder::GcInfoDecoder( m_ValidRangeEnd = (UINT32) DENORMALIZE_CODE_OFFSET(normCodeLength - normEpilogSize); _ASSERTE(m_ValidRangeStart < m_ValidRangeEnd); } - else if (hasGenericsInstContext) + else if ((m_headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) != GC_INFO_HAS_GENERICS_INST_CONTEXT_NONE) { // Decode prolog information UINT32 normPrologSize = (UINT32) m_Reader.DecodeVarLengthUnsigned(NORM_PROLOG_SIZE_ENCBASE) + 1; @@ -197,7 +181,7 @@ GcInfoDecoder::GcInfoDecoder( } // Decode the offset to the GS cookie. - if(hasGSCookie) + if(m_headerFlags & GC_INFO_HAS_GS_COOKIE) { m_GSCookieStackSlot = (INT32) DENORMALIZE_STACK_SLOT(m_Reader.DecodeVarLengthSigned(GS_COOKIE_STACK_SLOT_ENCBASE)); } @@ -215,7 +199,7 @@ GcInfoDecoder::GcInfoDecoder( // Decode the offset to the PSPSym. // The PSPSym is relative to the caller SP on IA64 and the initial stack pointer before any stack allocation on X64 (InitialSP). - if(hasPSPSym) + if(m_headerFlags & GC_INFO_HAS_PSP_SYM) { m_PSPSymStackSlot = (INT32) DENORMALIZE_STACK_SLOT(m_Reader.DecodeVarLengthSigned(PSP_SYM_STACK_SLOT_ENCBASE)); } @@ -232,7 +216,7 @@ GcInfoDecoder::GcInfoDecoder( } // Decode the offset to the generics type context. - if(hasGenericsInstContext) + if((m_headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) != GC_INFO_HAS_GENERICS_INST_CONTEXT_NONE) { m_GenericsInstContextStackSlot = (INT32) DENORMALIZE_STACK_SLOT(m_Reader.DecodeVarLengthSigned(GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE)); } @@ -248,7 +232,7 @@ GcInfoDecoder::GcInfoDecoder( return; } - if(hasStackBaseRegister) + if(m_headerFlags & GC_INFO_HAS_STACK_BASE_REGISTER) { if (slimHeader) { @@ -264,7 +248,7 @@ GcInfoDecoder::GcInfoDecoder( m_StackBaseRegister = NO_STACK_BASE_REGISTER; } - if (hasEncInfo) + if (m_headerFlags & GC_INFO_HAS_EDIT_AND_CONTINUE_INFO) { m_SizeOfEditAndContinuePreservedArea = (UINT32) m_Reader.DecodeVarLengthUnsigned(SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE); #ifdef TARGET_ARM64 @@ -286,7 +270,7 @@ GcInfoDecoder::GcInfoDecoder( return; } - if (hasReversePInvokeFrame) + if (m_headerFlags & GC_INFO_REVERSE_PINVOKE_FRAME) { m_ReversePInvokeFrameStackSlot = (INT32)DENORMALIZE_STACK_SLOT(m_Reader.DecodeVarLengthSigned(REVERSE_PINVOKE_FRAME_ENCBASE)); } @@ -364,13 +348,13 @@ bool GcInfoDecoder::IsInterruptible() bool GcInfoDecoder::HasMethodDescGenericsInstContext() { _ASSERTE( m_Flags & DECODE_GENERICS_INST_CONTEXT ); - return m_GenericSecretParamIsMD; + return (m_headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) == GC_INFO_HAS_GENERICS_INST_CONTEXT_MD; } bool GcInfoDecoder::HasMethodTableGenericsInstContext() { _ASSERTE( m_Flags & DECODE_GENERICS_INST_CONTEXT ); - return m_GenericSecretParamIsMT; + return (m_headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) == GC_INFO_HAS_GENERICS_INST_CONTEXT_MT; } #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED @@ -560,14 +544,14 @@ INT32 GcInfoDecoder::GetPSPSymStackSlot() bool GcInfoDecoder::GetIsVarArg() { _ASSERTE( m_Flags & DECODE_VARARG ); - return m_IsVarArg; + return m_headerFlags & GC_INFO_IS_VARARG; } #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) bool GcInfoDecoder::HasTailCalls() { _ASSERTE( m_Flags & DECODE_HAS_TAILCALLS ); - return m_HasTailCalls; + return ((headerFlags & GC_INFO_HAS_TAILCALLS) != 0); } #endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 @@ -575,7 +559,7 @@ bool GcInfoDecoder::WantsReportOnlyLeaf() { // Only AMD64 with JIT64 can return false here. #ifdef TARGET_AMD64 - return m_WantsReportOnlyLeaf; + return ((m_headerFlags & GC_INFO_WANTS_REPORT_ONLY_LEAF) != 0); #else return true; #endif From 7bd79a6270eb417eeddc3f16e5eaaeda9d5644e3 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 17 Dec 2023 13:50:52 -0800 Subject: [PATCH 05/13] factored out predecoding of fat header --- src/coreclr/inc/gcinfodecoder.h | 2 + src/coreclr/vm/gcinfodecoder.cpp | 209 +++++++++++++++++++------------ 2 files changed, 131 insertions(+), 80 deletions(-) diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index f60fe2716b54dd..738ea151c21746 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -592,6 +592,8 @@ class GcInfoDecoder #endif UINT32 m_Version; + bool PredecodeFatHeader(int remainingFlags); + static bool SetIsInterruptibleCB (UINT32 startOffset, UINT32 stopOffset, void * hCallback); OBJECTREF* GetRegisterSlot( diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index d11a023d649197..7c95c80a9f4e9c 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -83,49 +83,13 @@ bool GcInfoDecoder::SetIsInterruptibleCB (UINT32 startOffset, UINT32 stopOffset, return fStop; } -GcInfoDecoder::GcInfoDecoder( - GCInfoToken gcInfoToken, - GcInfoDecoderFlags flags, - UINT32 breakOffset - ) - : m_Reader(dac_cast(gcInfoToken.Info)) - , m_InstructionOffset(breakOffset) - , m_IsInterruptible(false) - , m_ReturnKind(RT_Illegal) -#ifdef _DEBUG - , m_Flags( flags ) - , m_GcInfoAddress(dac_cast(gcInfoToken.Info)) -#endif - , m_Version(gcInfoToken.Version) +// returns true if we decoded enough; +bool GcInfoDecoder::PredecodeFatHeader(int remainingFlags) { - _ASSERTE( (flags & (DECODE_INTERRUPTIBILITY | DECODE_GC_LIFETIMES)) || (0 == breakOffset) ); - - // The current implementation doesn't support the two flags together - _ASSERTE( - ((flags & (DECODE_INTERRUPTIBILITY | DECODE_GC_LIFETIMES)) != (DECODE_INTERRUPTIBILITY | DECODE_GC_LIFETIMES)) - ); - - //-------------------------------------------- - // Pre-decode information - //-------------------------------------------- - - bool slimHeader = (m_Reader.ReadOneFast() == 0); - // Use flag mask to bail out early if we already decoded all the pieces that caller requested - int remainingFlags = flags == DECODE_EVERYTHING ? ~0 : flags; - - if (slimHeader) - { - m_headerFlags = (GcInfoHeaderFlags)(m_Reader.ReadOneFast() ? GC_INFO_HAS_STACK_BASE_REGISTER : 0); - } - else - { - int numFlagBits = (m_Version == 1) ? GC_INFO_FLAGS_BIT_SIZE_VERSION_1 : GC_INFO_FLAGS_BIT_SIZE; - m_headerFlags = (GcInfoHeaderFlags) m_Reader.Read(numFlagBits); - } + int numFlagBits = (m_Version == 1) ? GC_INFO_FLAGS_BIT_SIZE_VERSION_1 : GC_INFO_FLAGS_BIT_SIZE; + m_headerFlags = (GcInfoHeaderFlags)m_Reader.Read(numFlagBits); - int returnKindBits = (slimHeader) ? SIZE_OF_RETURN_KIND_IN_SLIM_HEADER : SIZE_OF_RETURN_KIND_IN_FAT_HEADER; - m_ReturnKind = - (ReturnKind)((UINT32)m_Reader.Read(returnKindBits)); + m_ReturnKind = (ReturnKind)((UINT32)m_Reader.Read(SIZE_OF_RETURN_KIND_IN_FAT_HEADER)); remainingFlags &= ~(DECODE_RETURN_KIND | DECODE_VARARG); #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) @@ -134,16 +98,15 @@ GcInfoDecoder::GcInfoDecoder( if (remainingFlags == 0) { // Bail, if we've decoded enough, - return; + return true; } - m_CodeLength = (UINT32) DENORMALIZE_CODE_LENGTH((UINT32) m_Reader.DecodeVarLengthUnsigned(CODE_LENGTH_ENCBASE)); - + m_CodeLength = (UINT32)DENORMALIZE_CODE_LENGTH((UINT32)m_Reader.DecodeVarLengthUnsigned(CODE_LENGTH_ENCBASE)); remainingFlags &= ~DECODE_CODE_LENGTH; if (remainingFlags == 0) { // Bail, if we've decoded enough, - return; + return true; } if (m_headerFlags & GC_INFO_HAS_GS_COOKIE) @@ -153,18 +116,18 @@ GcInfoDecoder::GcInfoDecoder( UINT32 normCodeLength = NORMALIZE_CODE_OFFSET(m_CodeLength); // Decode prolog/epilog information - UINT32 normPrologSize = (UINT32) m_Reader.DecodeVarLengthUnsigned(NORM_PROLOG_SIZE_ENCBASE) + 1; - UINT32 normEpilogSize = (UINT32) m_Reader.DecodeVarLengthUnsigned(NORM_EPILOG_SIZE_ENCBASE); + UINT32 normPrologSize = (UINT32)m_Reader.DecodeVarLengthUnsigned(NORM_PROLOG_SIZE_ENCBASE) + 1; + UINT32 normEpilogSize = (UINT32)m_Reader.DecodeVarLengthUnsigned(NORM_EPILOG_SIZE_ENCBASE); - m_ValidRangeStart = (UINT32) DENORMALIZE_CODE_OFFSET(normPrologSize); - m_ValidRangeEnd = (UINT32) DENORMALIZE_CODE_OFFSET(normCodeLength - normEpilogSize); + m_ValidRangeStart = (UINT32)DENORMALIZE_CODE_OFFSET(normPrologSize); + m_ValidRangeEnd = (UINT32)DENORMALIZE_CODE_OFFSET(normCodeLength - normEpilogSize); _ASSERTE(m_ValidRangeStart < m_ValidRangeEnd); } else if ((m_headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) != GC_INFO_HAS_GENERICS_INST_CONTEXT_NONE) { // Decode prolog information - UINT32 normPrologSize = (UINT32) m_Reader.DecodeVarLengthUnsigned(NORM_PROLOG_SIZE_ENCBASE) + 1; - m_ValidRangeStart = (UINT32) DENORMALIZE_CODE_OFFSET(normPrologSize); + UINT32 normPrologSize = (UINT32)m_Reader.DecodeVarLengthUnsigned(NORM_PROLOG_SIZE_ENCBASE) + 1; + m_ValidRangeStart = (UINT32)DENORMALIZE_CODE_OFFSET(normPrologSize); // satisfy asserts that assume m_GSCookieValidRangeStart != 0 ==> m_GSCookieValidRangeStart < m_GSCookieValidRangeEnd m_ValidRangeEnd = m_ValidRangeStart + 1; } @@ -177,48 +140,48 @@ GcInfoDecoder::GcInfoDecoder( if (remainingFlags == 0) { // Bail, if we've decoded enough, - return; + return true; } // Decode the offset to the GS cookie. - if(m_headerFlags & GC_INFO_HAS_GS_COOKIE) + if (m_headerFlags & GC_INFO_HAS_GS_COOKIE) { - m_GSCookieStackSlot = (INT32) DENORMALIZE_STACK_SLOT(m_Reader.DecodeVarLengthSigned(GS_COOKIE_STACK_SLOT_ENCBASE)); + m_GSCookieStackSlot = (INT32)DENORMALIZE_STACK_SLOT(m_Reader.DecodeVarLengthSigned(GS_COOKIE_STACK_SLOT_ENCBASE)); } else { - m_GSCookieStackSlot = NO_GS_COOKIE; + m_GSCookieStackSlot = NO_GS_COOKIE; } remainingFlags &= ~DECODE_GS_COOKIE; if (remainingFlags == 0) { // Bail, if we've decoded enough, - return; + return true; } // Decode the offset to the PSPSym. // The PSPSym is relative to the caller SP on IA64 and the initial stack pointer before any stack allocation on X64 (InitialSP). - if(m_headerFlags & GC_INFO_HAS_PSP_SYM) + if (m_headerFlags & GC_INFO_HAS_PSP_SYM) { - m_PSPSymStackSlot = (INT32) DENORMALIZE_STACK_SLOT(m_Reader.DecodeVarLengthSigned(PSP_SYM_STACK_SLOT_ENCBASE)); + m_PSPSymStackSlot = (INT32)DENORMALIZE_STACK_SLOT(m_Reader.DecodeVarLengthSigned(PSP_SYM_STACK_SLOT_ENCBASE)); } else { - m_PSPSymStackSlot = NO_PSP_SYM; + m_PSPSymStackSlot = NO_PSP_SYM; } remainingFlags &= ~DECODE_PSP_SYM; if (remainingFlags == 0) { // Bail, if we've decoded enough, - return; + return true; } // Decode the offset to the generics type context. - if((m_headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) != GC_INFO_HAS_GENERICS_INST_CONTEXT_NONE) + if ((m_headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) != GC_INFO_HAS_GENERICS_INST_CONTEXT_NONE) { - m_GenericsInstContextStackSlot = (INT32) DENORMALIZE_STACK_SLOT(m_Reader.DecodeVarLengthSigned(GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE)); + m_GenericsInstContextStackSlot = (INT32)DENORMALIZE_STACK_SLOT(m_Reader.DecodeVarLengthSigned(GENERICS_INST_CONTEXT_STACK_SLOT_ENCBASE)); } else { @@ -229,19 +192,12 @@ GcInfoDecoder::GcInfoDecoder( if (remainingFlags == 0) { // Bail, if we've decoded enough, - return; + return true; } - if(m_headerFlags & GC_INFO_HAS_STACK_BASE_REGISTER) + if (m_headerFlags & GC_INFO_HAS_STACK_BASE_REGISTER) { - if (slimHeader) - { - m_StackBaseRegister = (UINT32) DENORMALIZE_STACK_BASE_REGISTER(0); - } - else - { - m_StackBaseRegister = (UINT32) DENORMALIZE_STACK_BASE_REGISTER(m_Reader.DecodeVarLengthUnsigned(STACK_BASE_REGISTER_ENCBASE)); - } + m_StackBaseRegister = (UINT32)DENORMALIZE_STACK_BASE_REGISTER(m_Reader.DecodeVarLengthUnsigned(STACK_BASE_REGISTER_ENCBASE)); } else { @@ -250,9 +206,9 @@ GcInfoDecoder::GcInfoDecoder( if (m_headerFlags & GC_INFO_HAS_EDIT_AND_CONTINUE_INFO) { - m_SizeOfEditAndContinuePreservedArea = (UINT32) m_Reader.DecodeVarLengthUnsigned(SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE); + m_SizeOfEditAndContinuePreservedArea = (UINT32)m_Reader.DecodeVarLengthUnsigned(SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE); #ifdef TARGET_ARM64 - m_SizeOfEditAndContinueFixedStackFrame = (UINT32) m_Reader.DecodeVarLengthUnsigned(SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE); + m_SizeOfEditAndContinueFixedStackFrame = (UINT32)m_Reader.DecodeVarLengthUnsigned(SIZE_OF_EDIT_AND_CONTINUE_FIXED_STACK_FRAME_ENCBASE); #endif } else @@ -267,7 +223,7 @@ GcInfoDecoder::GcInfoDecoder( if (remainingFlags == 0) { // Bail, if we've decoded enough, - return; + return true; } if (m_headerFlags & GC_INFO_REVERSE_PINVOKE_FRAME) @@ -283,20 +239,113 @@ GcInfoDecoder::GcInfoDecoder( if (remainingFlags == 0) { // Bail, if we've decoded enough, - return; + return true; } #ifdef FIXED_STACK_PARAMETER_SCRATCH_AREA - if (slimHeader) + m_SizeOfStackOutgoingAndScratchArea = (UINT32)DENORMALIZE_SIZE_OF_STACK_AREA(m_Reader.DecodeVarLengthUnsigned(SIZE_OF_STACK_AREA_ENCBASE)); +#endif // FIXED_STACK_PARAMETER_SCRATCH_AREA + + return false; +} + +GcInfoDecoder::GcInfoDecoder( + GCInfoToken gcInfoToken, + GcInfoDecoderFlags flags, + UINT32 breakOffset + ) + : m_Reader(dac_cast(gcInfoToken.Info)) + , m_InstructionOffset(breakOffset) + , m_IsInterruptible(false) + , m_ReturnKind(RT_Illegal) +#ifdef _DEBUG + , m_Flags( flags ) + , m_GcInfoAddress(dac_cast(gcInfoToken.Info)) +#endif + , m_Version(gcInfoToken.Version) +{ + _ASSERTE( (flags & (DECODE_INTERRUPTIBILITY | DECODE_GC_LIFETIMES)) || (0 == breakOffset) ); + + // The current implementation doesn't support the two flags together + _ASSERTE( + ((flags & (DECODE_INTERRUPTIBILITY | DECODE_GC_LIFETIMES)) != (DECODE_INTERRUPTIBILITY | DECODE_GC_LIFETIMES)) + ); + + //-------------------------------------------- + // Pre-decode information + //-------------------------------------------- + + bool slimHeader = (m_Reader.ReadOneFast() == 0); + // Use flag mask to bail out early if we already decoded all the pieces that caller requested + int remainingFlags = flags == DECODE_EVERYTHING ? ~0 : flags; + + if (!slimHeader) { - m_SizeOfStackOutgoingAndScratchArea = 0; + if (PredecodeFatHeader(remainingFlags)) + return; } else { - m_SizeOfStackOutgoingAndScratchArea = (UINT32)DENORMALIZE_SIZE_OF_STACK_AREA(m_Reader.DecodeVarLengthUnsigned(SIZE_OF_STACK_AREA_ENCBASE)); - } + m_headerFlags = (GcInfoHeaderFlags)(m_Reader.ReadOneFast() ? GC_INFO_HAS_STACK_BASE_REGISTER : 0); + m_ReturnKind = (ReturnKind)((UINT32)m_Reader.Read(SIZE_OF_RETURN_KIND_IN_SLIM_HEADER)); + + remainingFlags &= ~(DECODE_RETURN_KIND | DECODE_VARARG); +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + remainingFlags &= ~DECODE_HAS_TAILCALLS; +#endif + if (remainingFlags == 0) + { + // Bail, if we've decoded enough, + return; + } + + m_CodeLength = (UINT32)DENORMALIZE_CODE_LENGTH((UINT32)m_Reader.DecodeVarLengthUnsigned(CODE_LENGTH_ENCBASE)); + + // + // predecoding the rest of slim header does not require any reading. + // + + m_ValidRangeStart = m_ValidRangeEnd = 0; + m_GSCookieStackSlot = NO_GS_COOKIE; + m_PSPSymStackSlot = NO_PSP_SYM; + m_GenericsInstContextStackSlot = NO_GENERICS_INST_CONTEXT; + + if (m_headerFlags & GC_INFO_HAS_STACK_BASE_REGISTER) + { + m_StackBaseRegister = (UINT32)DENORMALIZE_STACK_BASE_REGISTER(0); + } + else + { + m_StackBaseRegister = NO_STACK_BASE_REGISTER; + } + + m_SizeOfEditAndContinuePreservedArea = NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA; +#ifdef TARGET_ARM64 + m_SizeOfEditAndContinueFixedStackFrame = 0; +#endif + + m_ReversePInvokeFrameStackSlot = NO_REVERSE_PINVOKE_FRAME; + +#ifdef FIXED_STACK_PARAMETER_SCRATCH_AREA + m_SizeOfStackOutgoingAndScratchArea = 0; #endif // FIXED_STACK_PARAMETER_SCRATCH_AREA + remainingFlags &= ~(DECODE_CODE_LENGTH + | DECODE_PROLOG_LENGTH + | DECODE_GS_COOKIE + | DECODE_PSP_SYM + | DECODE_GENERICS_INST_CONTEXT + | DECODE_EDIT_AND_CONTINUE + | DECODE_REVERSE_PINVOKE_VAR + ); + + if (remainingFlags == 0) + { + // Bail, if we've decoded enough, + return; + } + } + #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED m_NumSafePoints = (UINT32) DENORMALIZE_NUM_SAFE_POINTS(m_Reader.DecodeVarLengthUnsigned(NUM_SAFE_POINTS_ENCBASE)); #endif From e443e8279f9592647a898c7c2cbe2588389e1e4a Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 18 Dec 2023 12:54:32 -0800 Subject: [PATCH 06/13] tweaks and comments --- src/coreclr/vm/gcinfodecoder.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 7c95c80a9f4e9c..e5b4dfac616d8f 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -83,7 +83,7 @@ bool GcInfoDecoder::SetIsInterruptibleCB (UINT32 startOffset, UINT32 stopOffset, return fStop; } -// returns true if we decoded enough; +// returns true if we decoded all that was asked; bool GcInfoDecoder::PredecodeFatHeader(int remainingFlags) { int numFlagBits = (m_Version == 1) ? GC_INFO_FLAGS_BIT_SIZE_VERSION_1 : GC_INFO_FLAGS_BIT_SIZE; @@ -286,13 +286,24 @@ GcInfoDecoder::GcInfoDecoder( } else { - m_headerFlags = (GcInfoHeaderFlags)(m_Reader.ReadOneFast() ? GC_INFO_HAS_STACK_BASE_REGISTER : 0); + if (m_Reader.ReadOneFast()) + { + m_headerFlags = GC_INFO_HAS_STACK_BASE_REGISTER; + m_StackBaseRegister = (UINT32)DENORMALIZE_STACK_BASE_REGISTER(0); + } + else + { + m_headerFlags = (GcInfoHeaderFlags)0; + m_StackBaseRegister = NO_STACK_BASE_REGISTER; + } + m_ReturnKind = (ReturnKind)((UINT32)m_Reader.Read(SIZE_OF_RETURN_KIND_IN_SLIM_HEADER)); remainingFlags &= ~(DECODE_RETURN_KIND | DECODE_VARARG); #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) remainingFlags &= ~DECODE_HAS_TAILCALLS; #endif + if (remainingFlags == 0) { // Bail, if we've decoded enough, @@ -309,17 +320,8 @@ GcInfoDecoder::GcInfoDecoder( m_GSCookieStackSlot = NO_GS_COOKIE; m_PSPSymStackSlot = NO_PSP_SYM; m_GenericsInstContextStackSlot = NO_GENERICS_INST_CONTEXT; - - if (m_headerFlags & GC_INFO_HAS_STACK_BASE_REGISTER) - { - m_StackBaseRegister = (UINT32)DENORMALIZE_STACK_BASE_REGISTER(0); - } - else - { - m_StackBaseRegister = NO_STACK_BASE_REGISTER; - } - m_SizeOfEditAndContinuePreservedArea = NO_SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA; + #ifdef TARGET_ARM64 m_SizeOfEditAndContinueFixedStackFrame = 0; #endif @@ -427,7 +429,7 @@ bool GcInfoDecoder::IsSafePoint(UINT32 codeOffset) } -// Repositioning within a bit stream is relatively involved, compared to sequential read, +// Repositioning within a bit stream is an involved operation, compared to sequential read, // so we prefer linear search unless the number of safepoints is too high. // The limit is not very significant as most methods will have just a few safe points. // At 32, even if a single point is 16bit encoded (64K method length), From a7ca3a5ad69a8ed8a928405b51bc71144e6dcaf1 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 18 Dec 2023 16:20:17 -0800 Subject: [PATCH 07/13] fix arm64 build --- src/coreclr/inc/gcinfotypes.h | 9 ++++++--- src/coreclr/vm/gcinfodecoder.cpp | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h index a560e7cf043fba..7546e0645342cf 100644 --- a/src/coreclr/inc/gcinfotypes.h +++ b/src/coreclr/inc/gcinfotypes.h @@ -47,19 +47,22 @@ __forceinline size_t SAFE_SHIFT_RIGHT(size_t x, size_t count) inline UINT32 CeilOfLog2(size_t x) { - // we are ok even if bsr is used vs. lzcnt + // we want lzcnt, but bsr is ok too _ASSERTE(x > 0); x = (x << 1) - 1; + #ifdef TARGET_64BIT #ifdef _MSC_VER - UINT32 lzcountCeil = (UINT32)__lzcnt64((unsigned __int64)x); + DWORD lzcountCeil; + _BitScanReverse64(&lzcountCeil, (unsigned long)x); #else // _MSC_VER UINT32 lzcountCeil = (UINT32)__builtin_clzl((unsigned long)x); #endif // _MSC_VER #else // TARGET_64BIT #ifdef _MSC_VER - UINT32 lzcountCeil = (UINT32)__lzcnt((unsigned int)x); + DWORD lzcountCeil; + _BitScanReverse(&lzcountCeil, (unsigned long)x); #else // _MSC_VER UINT32 lzcountCeil = (UINT32)__builtin_clz((unsigned int)x); #endif // _MSC_VER diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index e5b4dfac616d8f..636c42c54d9ead 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -602,7 +602,7 @@ bool GcInfoDecoder::GetIsVarArg() bool GcInfoDecoder::HasTailCalls() { _ASSERTE( m_Flags & DECODE_HAS_TAILCALLS ); - return ((headerFlags & GC_INFO_HAS_TAILCALLS) != 0); + return ((m_headerFlags & GC_INFO_HAS_TAILCALLS) != 0); } #endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 From 14ef9e1683f902cb1974f43aed250afabf1d4ab8 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 18 Dec 2023 21:26:27 -0800 Subject: [PATCH 08/13] use intrinsics correctly --- src/coreclr/inc/gcinfotypes.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h index 7546e0645342cf..b60ef374adc942 100644 --- a/src/coreclr/inc/gcinfotypes.h +++ b/src/coreclr/inc/gcinfotypes.h @@ -47,28 +47,31 @@ __forceinline size_t SAFE_SHIFT_RIGHT(size_t x, size_t count) inline UINT32 CeilOfLog2(size_t x) { - // we want lzcnt, but bsr is ok too + // it is ok to use bsr or clz unconditionally _ASSERTE(x > 0); x = (x << 1) - 1; #ifdef TARGET_64BIT #ifdef _MSC_VER - DWORD lzcountCeil; - _BitScanReverse64(&lzcountCeil, (unsigned long)x); + DWORD result; + _BitScanReverse64(&result, (unsigned long)x); + return (UINT32)result; #else // _MSC_VER - UINT32 lzcountCeil = (UINT32)__builtin_clzl((unsigned long)x); + // LZCNT returns index starting from MSB, whereas BSR gives the index from LSB. + // 63 ^ BSR here is equivalent to 63 - BSR since the BSR result is always between 0 and 63. + // This saves an instruction, as subtraction from constant requires either MOV/SUB or NEG/ADD. + return (UINT32)63 ^ (UINT32)__builtin_clzl((unsigned long)x); #endif // _MSC_VER #else // TARGET_64BIT #ifdef _MSC_VER - DWORD lzcountCeil; - _BitScanReverse(&lzcountCeil, (unsigned long)x); + DWORD result; + _BitScanReverse(&result, (unsigned int)x); + return (UINT32)result; #else // _MSC_VER - UINT32 lzcountCeil = (UINT32)__builtin_clz((unsigned int)x); + return (UINT32)31 ^ (UINT32)__builtin_clz((unsigned int)x); #endif // _MSC_VER #endif - - return BITS_PER_SIZE_T - lzcountCeil; } enum GcSlotFlags From 05b269baeac124e2fb4b7e84b2f23879197a4089 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Tue, 19 Dec 2023 10:54:07 -0800 Subject: [PATCH 09/13] no need to search for safepoints when asked only for interruptibility --- src/coreclr/inc/gcinfotypes.h | 2 +- src/coreclr/vm/gcinfodecoder.cpp | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h index b60ef374adc942..ee46330119baed 100644 --- a/src/coreclr/inc/gcinfotypes.h +++ b/src/coreclr/inc/gcinfotypes.h @@ -59,7 +59,7 @@ inline UINT32 CeilOfLog2(size_t x) return (UINT32)result; #else // _MSC_VER // LZCNT returns index starting from MSB, whereas BSR gives the index from LSB. - // 63 ^ BSR here is equivalent to 63 - BSR since the BSR result is always between 0 and 63. + // 63 ^ LZCNT here is equivalent to 63 - LZCNT since the LZCNT result is always between 0 and 63. // This saves an instruction, as subtraction from constant requires either MOV/SUB or NEG/ADD. return (UINT32)63 ^ (UINT32)__builtin_clzl((unsigned long)x); #endif // _MSC_VER diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 636c42c54d9ead..a03d96b1dd1661 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -350,6 +350,7 @@ GcInfoDecoder::GcInfoDecoder( #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED m_NumSafePoints = (UINT32) DENORMALIZE_NUM_SAFE_POINTS(m_Reader.DecodeVarLengthUnsigned(NUM_SAFE_POINTS_ENCBASE)); + m_SafePointIndex = m_NumSafePoints; #endif if (slimHeader) @@ -362,18 +363,14 @@ GcInfoDecoder::GcInfoDecoder( } #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - if(flags & (DECODE_INTERRUPTIBILITY | DECODE_GC_LIFETIMES)) + if(flags & (DECODE_GC_LIFETIMES)) { if(m_NumSafePoints) { m_SafePointIndex = FindSafePoint(m_InstructionOffset); } - else - { - m_SafePointIndex = 0; - } } - else if(flags & DECODE_FOR_RANGES_CALLBACK) + else if(flags & (DECODE_FOR_RANGES_CALLBACK | DECODE_INTERRUPTIBILITY)) { // Note that normalization as a code offset can be different than // normalization as code length From 5106b833fd387c045a4d5ff7b82239b656760127 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Tue, 19 Dec 2023 13:59:32 -0800 Subject: [PATCH 10/13] fix for arm --- src/coreclr/vm/gcinfodecoder.cpp | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index a03d96b1dd1661..08c56661edf075 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -415,10 +415,9 @@ bool GcInfoDecoder::IsSafePoint(UINT32 codeOffset) if(m_NumSafePoints == 0) return false; -#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Safepoints are encoded with a -1 adjustment codeOffset--; -#endif + size_t savedPos = m_Reader.GetCurrentPos(); UINT32 safePointIndex = FindSafePoint(codeOffset); m_Reader.SetCurrentPos(savedPos); @@ -461,35 +460,36 @@ UINT32 GcInfoDecoder::NarrowSafePointSearch(size_t savedPos, UINT32 breakOffset, UINT32 GcInfoDecoder::FindSafePoint(UINT32 breakOffset) { _ASSERTE(m_NumSafePoints > 0); -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // Safepoints are encoded with a -1 adjustment - _ASSERTE((breakOffset & 1) != 0); -#endif - UINT32 result = m_NumSafePoints; const size_t savedPos = m_Reader.GetCurrentPos(); - const UINT32 normBreakOffset = NORMALIZE_CODE_OFFSET(breakOffset); - - UINT32 linearSearchStart = 0; - UINT32 linearSearchEnd = m_NumSafePoints; - if (linearSearchEnd - linearSearchStart > MAX_LINEAR_SEARCH) - { - linearSearchStart = NarrowSafePointSearch(savedPos, normBreakOffset, &linearSearchEnd); - } - const UINT32 numBitsPerOffset = CeilOfLog2(NORMALIZE_CODE_OFFSET(m_CodeLength)); - for (UINT32 i = linearSearchStart; i < linearSearchEnd; i++) + +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + // Safepoints are encoded with a -1 adjustment + if ((breakOffset & 1) != 0) +#endif { - UINT32 spOffset = (UINT32)m_Reader.Read(numBitsPerOffset); - if(spOffset == normBreakOffset) + const UINT32 normBreakOffset = NORMALIZE_CODE_OFFSET(breakOffset); + UINT32 linearSearchStart = 0; + UINT32 linearSearchEnd = m_NumSafePoints; + if (linearSearchEnd - linearSearchStart > MAX_LINEAR_SEARCH) { - result = i; - break; + linearSearchStart = NarrowSafePointSearch(savedPos, normBreakOffset, &linearSearchEnd); } - if (spOffset > normBreakOffset) + for (UINT32 i = linearSearchStart; i < linearSearchEnd; i++) { - break; + UINT32 spOffset = (UINT32)m_Reader.Read(numBitsPerOffset); + if (spOffset == normBreakOffset) + { + result = i; + break; + } + + if (spOffset > normBreakOffset) + { + break; + } } } From 902ed79c8e13b4986bc1739de2188b7c7ca8f22d Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Tue, 19 Dec 2023 20:32:43 -0800 Subject: [PATCH 11/13] small tweak for DecodeVarLengthUnsigned --- src/coreclr/inc/gcinfodecoder.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index 738ea151c21746..34af8c53055687 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -369,17 +369,17 @@ class BitStreamReader // See the corresponding methods on BitStreamWriter for more information on the format //-------------------------------------------------------------------------- - inline size_t DecodeVarLengthUnsigned( int base ) + size_t DecodeVarLengthUnsignedMore ( int base ) { _ASSERTE((base > 0) && (base < (int)BITS_PER_SIZE_T)); size_t numEncodings = size_t{ 1 } << base; - size_t result = 0; - for(int shift=0; ; shift+=base) + size_t result = numEncodings; + for(int shift=base; ; shift+=base) { _ASSERTE(shift+base <= (int)BITS_PER_SIZE_T); size_t currentChunk = Read(base+1); - result |= (currentChunk & (numEncodings-1)) << shift; + result ^= (currentChunk & (numEncodings-1)) << shift; if(!(currentChunk & numEncodings)) { // Extension bit is not set, we're done. @@ -388,6 +388,19 @@ class BitStreamReader } } + __forceinline size_t DecodeVarLengthUnsigned(int base) + { + _ASSERTE((base > 0) && (base < (int)BITS_PER_SIZE_T)); + + size_t result = Read(base + 1); + if (result & ((size_t)1 << base)) + { + result ^= DecodeVarLengthUnsignedMore(base); + } + + return result; + } + inline SSIZE_T DecodeVarLengthSigned( int base ) { _ASSERTE((base > 0) && (base < (int)BITS_PER_SIZE_T)); From 6ffcdafc6040314ad6a2cf13152468b416c76521 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Tue, 26 Dec 2023 16:48:06 -0800 Subject: [PATCH 12/13] removed SAFE_SHIFT_LEFT, SAFE_SHIFT_RIGHT --- src/coreclr/inc/gcinfoencoder.h | 2 +- src/coreclr/inc/gcinfotypes.h | 19 ------------------- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/src/coreclr/inc/gcinfoencoder.h b/src/coreclr/inc/gcinfoencoder.h index ea71e14d8dbfe0..5085e5971a8ebd 100644 --- a/src/coreclr/inc/gcinfoencoder.h +++ b/src/coreclr/inc/gcinfoencoder.h @@ -285,7 +285,7 @@ class BitStreamWriter // Writes bits knowing that they will all fit in the current memory slot inline void WriteInCurrentSlot( size_t data, UINT32 count ) { - data &= SAFE_SHIFT_LEFT(1, count) - 1; + data &= ((size_t)-1 >> (BITS_PER_SIZE_T - count)); data <<= (BITS_PER_SIZE_T - m_FreeBitsInCurrentSlot); *m_pCurrentSlot |= data; } diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h index ee46330119baed..7457063d47eb3f 100644 --- a/src/coreclr/inc/gcinfotypes.h +++ b/src/coreclr/inc/gcinfotypes.h @@ -26,25 +26,6 @@ #define BITS_PER_SIZE_T ((int)sizeof(size_t)*8) - -//-------------------------------------------------------------------------------- -// It turns out, that ((size_t)x) << y == x, when y is not a literal -// and its value is BITS_PER_SIZE_T -// I guess the processor only shifts of the right operand modulo BITS_PER_SIZE_T -// In many cases, we want the above operation to yield 0, -// hence the following macros -//-------------------------------------------------------------------------------- -__forceinline size_t SAFE_SHIFT_LEFT(size_t x, size_t count) -{ - _ASSERTE(count <= BITS_PER_SIZE_T); - return (x << 1) << (count - 1); -} -__forceinline size_t SAFE_SHIFT_RIGHT(size_t x, size_t count) -{ - _ASSERTE(count <= BITS_PER_SIZE_T); - return (x >> 1) >> (count - 1); -} - inline UINT32 CeilOfLog2(size_t x) { // it is ok to use bsr or clz unconditionally From 53e523742622281623720648ae1e58fdd9ad8164 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 27 Dec 2023 22:23:58 -0800 Subject: [PATCH 13/13] undo unnecessary change. --- src/coreclr/vm/gcinfodecoder.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 08c56661edf075..c42845654014e4 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -415,9 +415,10 @@ bool GcInfoDecoder::IsSafePoint(UINT32 codeOffset) if(m_NumSafePoints == 0) return false; +#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Safepoints are encoded with a -1 adjustment codeOffset--; - +#endif size_t savedPos = m_Reader.GetCurrentPos(); UINT32 safePointIndex = FindSafePoint(codeOffset); m_Reader.SetCurrentPos(savedPos);