From b74ee38e929f25d8dc3368be678d693dde4bef1f Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 21 Dec 2023 17:06:26 -0800 Subject: [PATCH 1/6] indexed unwind info search --- .../Runtime/windows/CoffNativeCodeManager.cpp | 131 +++++++++++------- .../Runtime/windows/CoffNativeCodeManager.h | 5 + 2 files changed, 84 insertions(+), 52 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index cc409cf2167a16..79a5ed59315a32 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -164,63 +164,72 @@ static PTR_VOID GetUnwindDataBlob(TADDR moduleBase, PTR_RUNTIME_FUNCTION pRuntim #endif } +// index nodes are searched linearly. +// 16 * sizeof(uint32_t) == 64, which is a typical cache line size +// thus we expect at most one cache miss on every level of the index +#define INDEX_BRANCHING_FACTOR 16 +// T_RUNTIME_FUNCTION is larger than uint32_t, so we have a smaller granularity at last level +#define FUNCTABLE_INDEX_GRANULARITY 8 +#define INDEX_ALIGNMENT 64 CoffNativeCodeManager::CoffNativeCodeManager(TADDR moduleBase, PTR_VOID pvManagedCodeStartRange, uint32_t cbManagedCodeRange, PTR_RUNTIME_FUNCTION pRuntimeFunctionTable, uint32_t nRuntimeFunctionTable, PTR_PTR_VOID pClasslibFunctions, uint32_t nClasslibFunctions) : m_moduleBase(moduleBase), - m_pvManagedCodeStartRange(pvManagedCodeStartRange), m_cbManagedCodeRange(cbManagedCodeRange), - m_pRuntimeFunctionTable(pRuntimeFunctionTable), m_nRuntimeFunctionTable(nRuntimeFunctionTable), - m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions) + m_pvManagedCodeStartRange(pvManagedCodeStartRange), m_cbManagedCodeRange(cbManagedCodeRange), + m_pRuntimeFunctionTable(pRuntimeFunctionTable), m_nRuntimeFunctionTable(nRuntimeFunctionTable), + m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions) { -} - -CoffNativeCodeManager::~CoffNativeCodeManager() -{ -} - -static int LookupUnwindInfoForMethod(uint32_t relativePc, - PTR_RUNTIME_FUNCTION pRuntimeFunctionTable, - int low, - int high) -{ - // Binary search the RUNTIME_FUNCTION table - // Use linear search once we get down to a small number of elements - // to avoid Binary search overhead. - while (high - low > 10) + // max offset is beyond the range of managed methods. + int maxOffset = (int)((TADDR)pvManagedCodeStartRange + cbManagedCodeRange - moduleBase); + + // lets build the index for the runtime table. for every granule that has elements we will have an index entry + uint32_t indexSize = (nRuntimeFunctionTable + FUNCTABLE_INDEX_GRANULARITY - 1) / FUNCTABLE_INDEX_GRANULARITY; + uint32_t indexCount = 0; + uint32_t* index = m_indices[indexCount++] = (uint32_t*)_aligned_malloc(indexSize * sizeof(uint32_t), INDEX_ALIGNMENT); + if (!index) + abort(); // can't allocate some modest amount of memory at startup + + // in every index N we will put the lowest value from the granule N + 1 + // when we will scan the value N in the indices and see that it is higher than the target, we will know + // that the granule N must be scnned for the entry as the next granule will have higher addresses. + for (uint32_t i = 1; i < indexSize; i++) { - int middle = low + (high - low) / 2; - - PTR_RUNTIME_FUNCTION pFunctionEntry = pRuntimeFunctionTable + middle; - if (relativePc < pFunctionEntry->BeginAddress) - { - high = middle - 1; - } - else - { - low = middle; - } + _ASSERTE(i * FUNCTABLE_INDEX_GRANULARITY < nRuntimeFunctionTable); + index[i - 1] = pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress; } - for (int i = low; i < high; i++) + // we put the maxOffset at the end of the index. + // there is no N + 1 granule to get the value from, so the last slot will contain the sentinel. + index[indexSize - 1] = maxOffset; + + // Now build the N-ary tree of indices. + // At branching factor 16 a program with 32K methods will have 3 sub-index levels. + uint32_t* prevIdx = index; + while (indexSize > INDEX_BRANCHING_FACTOR) { - PTR_RUNTIME_FUNCTION pNextFunctionEntry = pRuntimeFunctionTable + (i + 1); - if (relativePc < pNextFunctionEntry->BeginAddress) + uint32_t prevSize = indexSize; + indexSize = (indexSize + INDEX_BRANCHING_FACTOR - 1) / INDEX_BRANCHING_FACTOR; + index = m_indices[indexCount++] = (uint32_t*)_aligned_malloc(indexSize * sizeof(uint32_t), INDEX_ALIGNMENT); + if (!index) + abort(); // can't allocate some modest amout of memory at startup + + for (uint32_t i = 1; i < indexSize; i++) { - high = i; - break; + _ASSERTE(i * INDEX_BRANCHING_FACTOR < prevSize); + index[i - 1] = prevIdx[i * INDEX_BRANCHING_FACTOR]; } - } - PTR_RUNTIME_FUNCTION pFunctionEntry = pRuntimeFunctionTable + high; - if (relativePc >= pFunctionEntry->BeginAddress) - { - return high; + index[indexSize - 1] = maxOffset; + prevIdx = index; } - ASSERT_UNCONDITIONALLY("Invalid code address"); - return -1; + m_indexCount = indexCount; +} + +CoffNativeCodeManager::~CoffNativeCodeManager() +{ } struct CoffNativeMethodInfo @@ -233,6 +242,33 @@ struct CoffNativeMethodInfo // Ensure that CoffNativeMethodInfo fits into the space reserved by MethodInfo static_assert(sizeof(CoffNativeMethodInfo) <= sizeof(MethodInfo), "CoffNativeMethodInfo too big"); +int CoffNativeCodeManager::LookupUnwindInfoIdx(uint32_t relativePc) +{ + uint32_t idx = 0; + for (int j = m_indexCount - 1; j >= 0; j--) + { + uint32_t* index = m_indices[j]; + idx *= INDEX_BRANCHING_FACTOR; + + while ((uint32_t)index[idx] < relativePc) + idx++; + } + + for (idx *= FUNCTABLE_INDEX_GRANULARITY; idx < m_nRuntimeFunctionTable; idx++) + { + uint32_t curAddr = m_pRuntimeFunctionTable[idx].BeginAddress; + if (curAddr == relativePc) + return idx; + + if (curAddr > relativePc) + return idx - 1; + } + + // We can only get here if called with invalid address or m_pRuntimeFunctionTable is corrupted. + // Either way we cannot recover as we expect every managed method to have a method info. + UNREACHABLE(); +} + bool CoffNativeCodeManager::FindMethodInfo(PTR_VOID ControlPC, MethodInfo * pMethodInfoOut) { @@ -244,16 +280,10 @@ bool CoffNativeCodeManager::FindMethodInfo(PTR_VOID ControlPC, } CoffNativeMethodInfo * pMethodInfo = (CoffNativeMethodInfo *)pMethodInfoOut; - TADDR relativePC = dac_cast(ControlPC) - m_moduleBase; - - int MethodIndex = LookupUnwindInfoForMethod((uint32_t)relativePC, m_pRuntimeFunctionTable, - 0, m_nRuntimeFunctionTable - 1); - if (MethodIndex < 0) - return false; + int MethodIndex = LookupUnwindInfoIdx((uint32_t)relativePC); PTR_RUNTIME_FUNCTION pRuntimeFunction = m_pRuntimeFunctionTable + MethodIndex; - pMethodInfo->runtimeFunction = pRuntimeFunction; // The runtime function could correspond to a funclet. We need to get to the @@ -965,10 +995,7 @@ PTR_VOID CoffNativeCodeManager::GetAssociatedData(PTR_VOID ControlPC) } TADDR relativePC = dac_cast(ControlPC) - m_moduleBase; - - int MethodIndex = LookupUnwindInfoForMethod((uint32_t)relativePC, m_pRuntimeFunctionTable, 0, m_nRuntimeFunctionTable - 1); - if (MethodIndex < 0) - return NULL; + int MethodIndex = LookupUnwindInfoIdx((uint32_t)relativePC); PTR_RUNTIME_FUNCTION pRuntimeFunction = m_pRuntimeFunctionTable + MethodIndex; diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h index 445a998fdb00bc..5d898215de9446 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h @@ -44,6 +44,11 @@ class CoffNativeCodeManager : public ICodeManager PTR_PTR_VOID m_pClasslibFunctions; uint32_t m_nClasslibFunctions; + int m_indexCount; + uint32_t* m_indices[8]; + + int LookupUnwindInfoIdx(uint32_t relativePc); + public: CoffNativeCodeManager(TADDR moduleBase, PTR_VOID pvManagedCodeStartRange, uint32_t cbManagedCodeRange, From 588b65af9a1eb73b3593e76bb7cf3ca4ef4298a7 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Fri, 22 Dec 2023 10:53:07 -0800 Subject: [PATCH 2/6] handle locations inside the last method --- .../Runtime/windows/CoffNativeCodeManager.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index 79a5ed59315a32..79b1bdd6d01660 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -264,9 +264,13 @@ int CoffNativeCodeManager::LookupUnwindInfoIdx(uint32_t relativePc) return idx - 1; } - // We can only get here if called with invalid address or m_pRuntimeFunctionTable is corrupted. - // Either way we cannot recover as we expect every managed method to have a method info. - UNREACHABLE(); + // we can only get here if we are looking for a location inside the very last managed function. + _ASSERTE(m_pRuntimeFunctionTable[idx - 1].BeginAddress < relativePc); +#if defined(TARGET_AMD64) + _ASSERTE(m_pRuntimeFunctionTable[idx - 1].EndAddress > relativePc); +#endif + + return idx - 1; } bool CoffNativeCodeManager::FindMethodInfo(PTR_VOID ControlPC, From 90cace2c28f804f78466080d9909d6e3526d4faa Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sat, 23 Dec 2023 15:57:46 -0800 Subject: [PATCH 3/6] PR feedback --- .../Runtime/windows/CoffNativeCodeManager.cpp | 60 ++++++++++++------- .../Runtime/windows/CoffNativeCodeManager.h | 4 +- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index 79b1bdd6d01660..0ad12459aad6c6 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -179,25 +179,45 @@ CoffNativeCodeManager::CoffNativeCodeManager(TADDR moduleBase, : m_moduleBase(moduleBase), m_pvManagedCodeStartRange(pvManagedCodeStartRange), m_cbManagedCodeRange(cbManagedCodeRange), m_pRuntimeFunctionTable(pRuntimeFunctionTable), m_nRuntimeFunctionTable(nRuntimeFunctionTable), - m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions) + m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions), m_indexCount(0) +{ +} + +CoffNativeCodeManager::~CoffNativeCodeManager() +{ + for (uint32_t i = 0; i < m_indexCount; i++) + { + uint32_t* ptr = m_indices[i]; + if (ptr) + { + _aligned_free(ptr); + m_indices[i] = nullptr; + } + } + + m_indexCount = 0; +} + +bool CoffNativeCodeManager::InitFuncTableIndex() { // max offset is beyond the range of managed methods. - int maxOffset = (int)((TADDR)pvManagedCodeStartRange + cbManagedCodeRange - moduleBase); + int maxOffset = (int)((TADDR)m_pvManagedCodeStartRange + m_cbManagedCodeRange - m_moduleBase); // lets build the index for the runtime table. for every granule that has elements we will have an index entry - uint32_t indexSize = (nRuntimeFunctionTable + FUNCTABLE_INDEX_GRANULARITY - 1) / FUNCTABLE_INDEX_GRANULARITY; - uint32_t indexCount = 0; - uint32_t* index = m_indices[indexCount++] = (uint32_t*)_aligned_malloc(indexSize * sizeof(uint32_t), INDEX_ALIGNMENT); + uint32_t indexSize = (m_nRuntimeFunctionTable + FUNCTABLE_INDEX_GRANULARITY - 1) / FUNCTABLE_INDEX_GRANULARITY; + uint32_t* index = m_indices[m_indexCount] = (uint32_t*)_aligned_malloc(indexSize * sizeof(uint32_t), INDEX_ALIGNMENT); if (!index) - abort(); // can't allocate some modest amount of memory at startup + return false; + + m_indexCount++; // in every index N we will put the lowest value from the granule N + 1 // when we will scan the value N in the indices and see that it is higher than the target, we will know - // that the granule N must be scnned for the entry as the next granule will have higher addresses. + // that the granule N must be scanned for the entry as the next granule will have higher addresses. for (uint32_t i = 1; i < indexSize; i++) { - _ASSERTE(i * FUNCTABLE_INDEX_GRANULARITY < nRuntimeFunctionTable); - index[i - 1] = pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress; + _ASSERTE(i * FUNCTABLE_INDEX_GRANULARITY < m_nRuntimeFunctionTable); + index[i - 1] = m_pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress; } // we put the maxOffset at the end of the index. @@ -211,9 +231,11 @@ CoffNativeCodeManager::CoffNativeCodeManager(TADDR moduleBase, { uint32_t prevSize = indexSize; indexSize = (indexSize + INDEX_BRANCHING_FACTOR - 1) / INDEX_BRANCHING_FACTOR; - index = m_indices[indexCount++] = (uint32_t*)_aligned_malloc(indexSize * sizeof(uint32_t), INDEX_ALIGNMENT); + index = m_indices[m_indexCount] = (uint32_t*)_aligned_malloc(indexSize * sizeof(uint32_t), INDEX_ALIGNMENT); if (!index) - abort(); // can't allocate some modest amout of memory at startup + return false; + + m_indexCount++; for (uint32_t i = 1; i < indexSize; i++) { @@ -225,11 +247,7 @@ CoffNativeCodeManager::CoffNativeCodeManager(TADDR moduleBase, prevIdx = index; } - m_indexCount = indexCount; -} - -CoffNativeCodeManager::~CoffNativeCodeManager() -{ + return true; } struct CoffNativeMethodInfo @@ -257,15 +275,12 @@ int CoffNativeCodeManager::LookupUnwindInfoIdx(uint32_t relativePc) for (idx *= FUNCTABLE_INDEX_GRANULARITY; idx < m_nRuntimeFunctionTable; idx++) { uint32_t curAddr = m_pRuntimeFunctionTable[idx].BeginAddress; - if (curAddr == relativePc) - return idx; - if (curAddr > relativePc) return idx - 1; } // we can only get here if we are looking for a location inside the very last managed function. - _ASSERTE(m_pRuntimeFunctionTable[idx - 1].BeginAddress < relativePc); + _ASSERTE(m_pRuntimeFunctionTable[idx - 1].BeginAddress <= relativePc); #if defined(TARGET_AMD64) _ASSERTE(m_pRuntimeFunctionTable[idx - 1].EndAddress > relativePc); #endif @@ -999,7 +1014,7 @@ PTR_VOID CoffNativeCodeManager::GetAssociatedData(PTR_VOID ControlPC) } TADDR relativePC = dac_cast(ControlPC) - m_moduleBase; - int MethodIndex = LookupUnwindInfoIdx((uint32_t)relativePC); + int MethodIndex = LookupUnwindInfoIdx((uint32_t)relativePC); PTR_RUNTIME_FUNCTION pRuntimeFunction = m_pRuntimeFunctionTable + MethodIndex; @@ -1039,6 +1054,9 @@ bool RhRegisterOSModule(void * pModule, if (pCoffNativeCodeManager == nullptr) return false; + if (!pCoffNativeCodeManager->InitFuncTableIndex()) + return false; + RegisterCodeManager(pCoffNativeCodeManager, pvManagedCodeStartRange, cbManagedCodeRange); if (!RegisterUnboxingStubs(pvUnboxingStubsStartRange, cbUnboxingStubsRange)) diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h index 5d898215de9446..2f53c1f359b7bd 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h @@ -44,7 +44,7 @@ class CoffNativeCodeManager : public ICodeManager PTR_PTR_VOID m_pClasslibFunctions; uint32_t m_nClasslibFunctions; - int m_indexCount; + uint32_t m_indexCount; uint32_t* m_indices[8]; int LookupUnwindInfoIdx(uint32_t relativePc); @@ -56,6 +56,8 @@ class CoffNativeCodeManager : public ICodeManager PTR_PTR_VOID pClasslibFunctions, uint32_t nClasslibFunctions); ~CoffNativeCodeManager(); + bool InitFuncTableIndex(); + // // Code manager methods // From a985d18c0f5f5fd67a04f7615edca0293b18f4aa Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 24 Dec 2023 10:34:37 -0800 Subject: [PATCH 4/6] build index lazily --- .../Runtime/windows/CoffNativeCodeManager.cpp | 57 +++++++++++++------ .../Runtime/windows/CoffNativeCodeManager.h | 4 +- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index 0ad12459aad6c6..ea05cc1e9d01b7 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -173,13 +173,14 @@ static PTR_VOID GetUnwindDataBlob(TADDR moduleBase, PTR_RUNTIME_FUNCTION pRuntim #define INDEX_ALIGNMENT 64 CoffNativeCodeManager::CoffNativeCodeManager(TADDR moduleBase, - PTR_VOID pvManagedCodeStartRange, uint32_t cbManagedCodeRange, - PTR_RUNTIME_FUNCTION pRuntimeFunctionTable, uint32_t nRuntimeFunctionTable, - PTR_PTR_VOID pClasslibFunctions, uint32_t nClasslibFunctions) + PTR_VOID pvManagedCodeStartRange, uint32_t cbManagedCodeRange, + PTR_RUNTIME_FUNCTION pRuntimeFunctionTable, uint32_t nRuntimeFunctionTable, + PTR_PTR_VOID pClasslibFunctions, uint32_t nClasslibFunctions) : m_moduleBase(moduleBase), m_pvManagedCodeStartRange(pvManagedCodeStartRange), m_cbManagedCodeRange(cbManagedCodeRange), m_pRuntimeFunctionTable(pRuntimeFunctionTable), m_nRuntimeFunctionTable(nRuntimeFunctionTable), - m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions), m_indexCount(0) + m_pClasslibFunctions(pClasslibFunctions), m_nClasslibFunctions(nClasslibFunctions), + m_initializedIndices(0), m_indexCount(0), m_indices{ 0 } { } @@ -198,12 +199,8 @@ CoffNativeCodeManager::~CoffNativeCodeManager() m_indexCount = 0; } -bool CoffNativeCodeManager::InitFuncTableIndex() +bool CoffNativeCodeManager::AllocFuncTableIndex() { - // max offset is beyond the range of managed methods. - int maxOffset = (int)((TADDR)m_pvManagedCodeStartRange + m_cbManagedCodeRange - m_moduleBase); - - // lets build the index for the runtime table. for every granule that has elements we will have an index entry uint32_t indexSize = (m_nRuntimeFunctionTable + FUNCTABLE_INDEX_GRANULARITY - 1) / FUNCTABLE_INDEX_GRANULARITY; uint32_t* index = m_indices[m_indexCount] = (uint32_t*)_aligned_malloc(indexSize * sizeof(uint32_t), INDEX_ALIGNMENT); if (!index) @@ -211,6 +208,31 @@ bool CoffNativeCodeManager::InitFuncTableIndex() m_indexCount++; + while (indexSize > INDEX_BRANCHING_FACTOR) + { + uint32_t prevSize = indexSize; + indexSize = (indexSize + INDEX_BRANCHING_FACTOR - 1) / INDEX_BRANCHING_FACTOR; + index = m_indices[m_indexCount] = (uint32_t*)_aligned_malloc(indexSize * sizeof(uint32_t), INDEX_ALIGNMENT); + if (!index) + return false; + + m_indexCount++; + } + + return true; +} + +NOINLINE +uint32_t** CoffNativeCodeManager::InitFuncTableIndex() +{ + // max offset is beyond the range of managed methods. + int maxOffset = (int)((TADDR)m_pvManagedCodeStartRange + m_cbManagedCodeRange - m_moduleBase); + + // lets build the index for the runtime table. for every granule that has elements we will have an index entry + uint32_t indexSize = (m_nRuntimeFunctionTable + FUNCTABLE_INDEX_GRANULARITY - 1) / FUNCTABLE_INDEX_GRANULARITY; + uint32_t indexCount = 0; + uint32_t* index = m_indices[indexCount++]; + // in every index N we will put the lowest value from the granule N + 1 // when we will scan the value N in the indices and see that it is higher than the target, we will know // that the granule N must be scanned for the entry as the next granule will have higher addresses. @@ -231,11 +253,7 @@ bool CoffNativeCodeManager::InitFuncTableIndex() { uint32_t prevSize = indexSize; indexSize = (indexSize + INDEX_BRANCHING_FACTOR - 1) / INDEX_BRANCHING_FACTOR; - index = m_indices[m_indexCount] = (uint32_t*)_aligned_malloc(indexSize * sizeof(uint32_t), INDEX_ALIGNMENT); - if (!index) - return false; - - m_indexCount++; + index = m_indices[indexCount++]; for (uint32_t i = 1; i < indexSize; i++) { @@ -247,7 +265,8 @@ bool CoffNativeCodeManager::InitFuncTableIndex() prevIdx = index; } - return true; + WriteRelease64((LONG64*)&m_initializedIndices, (LONG64)m_indices); + return m_initializedIndices; } struct CoffNativeMethodInfo @@ -262,10 +281,14 @@ static_assert(sizeof(CoffNativeMethodInfo) <= sizeof(MethodInfo), "CoffNativeMet int CoffNativeCodeManager::LookupUnwindInfoIdx(uint32_t relativePc) { + uint32_t** indices = m_initializedIndices; + if (!indices) + indices = InitFuncTableIndex(); + uint32_t idx = 0; for (int j = m_indexCount - 1; j >= 0; j--) { - uint32_t* index = m_indices[j]; + uint32_t* index = indices[j]; idx *= INDEX_BRANCHING_FACTOR; while ((uint32_t)index[idx] < relativePc) @@ -1054,7 +1077,7 @@ bool RhRegisterOSModule(void * pModule, if (pCoffNativeCodeManager == nullptr) return false; - if (!pCoffNativeCodeManager->InitFuncTableIndex()) + if (!pCoffNativeCodeManager->AllocFuncTableIndex()) return false; RegisterCodeManager(pCoffNativeCodeManager, pvManagedCodeStartRange, cbManagedCodeRange); diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h index 2f53c1f359b7bd..d2a99834a0ff1f 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h @@ -44,6 +44,7 @@ class CoffNativeCodeManager : public ICodeManager PTR_PTR_VOID m_pClasslibFunctions; uint32_t m_nClasslibFunctions; + uint32_t** volatile m_initializedIndices; uint32_t m_indexCount; uint32_t* m_indices[8]; @@ -56,7 +57,8 @@ class CoffNativeCodeManager : public ICodeManager PTR_PTR_VOID pClasslibFunctions, uint32_t nClasslibFunctions); ~CoffNativeCodeManager(); - bool InitFuncTableIndex(); + bool AllocFuncTableIndex(); + uint32_t** InitFuncTableIndex(); // // Code manager methods From 17285888dc78d5ca871db338b657f18677ddb542 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 24 Dec 2023 15:27:20 -0800 Subject: [PATCH 5/6] make concurrent threads help with building the index --- .../Runtime/windows/CoffNativeCodeManager.cpp | 45 +++++++++++++++---- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index ea05cc1e9d01b7..e5408dd2fc191b 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -206,6 +206,7 @@ bool CoffNativeCodeManager::AllocFuncTableIndex() if (!index) return false; + memset(index, 0, indexSize * sizeof(uint32_t)); m_indexCount++; while (indexSize > INDEX_BRANCHING_FACTOR) @@ -216,6 +217,7 @@ bool CoffNativeCodeManager::AllocFuncTableIndex() if (!index) return false; + memset(index, 0, indexSize * sizeof(uint32_t)); m_indexCount++; } @@ -228,26 +230,46 @@ uint32_t** CoffNativeCodeManager::InitFuncTableIndex() // max offset is beyond the range of managed methods. int maxOffset = (int)((TADDR)m_pvManagedCodeStartRange + m_cbManagedCodeRange - m_moduleBase); + // It is possible to see several threads come here at once. + // We can spin-wait for one thread to do the work or just let all threads do the initialization. + // Either way it will take roughly the same time as for the first thread to complete the work. + // Yet we can make this complete faster if threads help each other by working on different + // parts of the index. + uint32_t perThreadBias = (uint32_t)(((size_t)&perThreadBias * 11400714819323198485ul) >> 32); + // lets build the index for the runtime table. for every granule that has elements we will have an index entry uint32_t indexSize = (m_nRuntimeFunctionTable + FUNCTABLE_INDEX_GRANULARITY - 1) / FUNCTABLE_INDEX_GRANULARITY; uint32_t indexCount = 0; uint32_t* index = m_indices[indexCount++]; - // in every index N we will put the lowest value from the granule N + 1 + // every index N will contain the lowest value from the granule N + 1 // when we will scan the value N in the indices and see that it is higher than the target, we will know - // that the granule N must be scanned for the entry as the next granule will have higher addresses. - for (uint32_t i = 1; i < indexSize; i++) + // that the granule N must be searched for the entry as the next granule will have higher addresses. + uint32_t start = (perThreadBias % indexSize) | 1; + for (uint32_t i = start; i < indexSize; i++) { - _ASSERTE(i * FUNCTABLE_INDEX_GRANULARITY < m_nRuntimeFunctionTable); - index[i - 1] = m_pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress; + if (index[i - 1] == 0) + { + _ASSERTE(i * FUNCTABLE_INDEX_GRANULARITY < m_nRuntimeFunctionTable); + index[i - 1] = m_pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress; + } + } + + for (uint32_t i = 1; i < start; i++) + { + if (index[i - 1] == 0) + { + _ASSERTE(i * FUNCTABLE_INDEX_GRANULARITY < m_nRuntimeFunctionTable); + index[i - 1] = m_pRuntimeFunctionTable[i * FUNCTABLE_INDEX_GRANULARITY].BeginAddress; + } } // we put the maxOffset at the end of the index. // there is no N + 1 granule to get the value from, so the last slot will contain the sentinel. index[indexSize - 1] = maxOffset; - // Now build the N-ary tree of indices. - // At branching factor 16 a program with 32K methods will have 3 sub-index levels. + // Now build an N-ary tree of indices. + // Example: at branching factor 16 a program with 32K methods will have 3 sub-index levels. uint32_t* prevIdx = index; while (indexSize > INDEX_BRANCHING_FACTOR) { @@ -255,7 +277,14 @@ uint32_t** CoffNativeCodeManager::InitFuncTableIndex() indexSize = (indexSize + INDEX_BRANCHING_FACTOR - 1) / INDEX_BRANCHING_FACTOR; index = m_indices[indexCount++]; - for (uint32_t i = 1; i < indexSize; i++) + start = (perThreadBias % indexSize) | 1; + for (uint32_t i = start; i < indexSize; i++) + { + _ASSERTE(i * INDEX_BRANCHING_FACTOR < prevSize); + index[i - 1] = prevIdx[i * INDEX_BRANCHING_FACTOR]; + } + + for (uint32_t i = 1; i < start; i++) { _ASSERTE(i * INDEX_BRANCHING_FACTOR < prevSize); index[i - 1] = prevIdx[i * INDEX_BRANCHING_FACTOR]; From 9c7152a0060ae549c13706933926d3dd614ad6d8 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 25 Dec 2023 11:13:36 -0800 Subject: [PATCH 6/6] couple tweaks and comments --- src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp | 1 + src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index e5408dd2fc191b..283926cabdedab 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -308,6 +308,7 @@ struct CoffNativeMethodInfo // Ensure that CoffNativeMethodInfo fits into the space reserved by MethodInfo static_assert(sizeof(CoffNativeMethodInfo) <= sizeof(MethodInfo), "CoffNativeMethodInfo too big"); +FORCEINLINE int CoffNativeCodeManager::LookupUnwindInfoIdx(uint32_t relativePc) { uint32_t** indices = m_initializedIndices; diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h index d2a99834a0ff1f..b7ddd0fe6741a8 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h @@ -44,6 +44,8 @@ class CoffNativeCodeManager : public ICodeManager PTR_PTR_VOID m_pClasslibFunctions; uint32_t m_nClasslibFunctions; + // used to publish a reference to the index once initialized. + // if the reference is not null, the index can be accessed through it. uint32_t** volatile m_initializedIndices; uint32_t m_indexCount; uint32_t* m_indices[8];