From 825547ffad5920c6c2731383217e75801c47d354 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Sat, 23 Dec 2023 00:07:19 -0800 Subject: [PATCH 1/7] Update coredistools Update to the 1.2.0 version of the coredistools package. Also, implement a basic late disassembler for RyuJIT based on coredistools (previously, this depended on closed-source msvcdis). Various changes: 1. Remove unused AddressMap in SPMI. Removed a bunch of cases in the NearDiffer that were using it. 2. Change arm64 emitter unit tests to not require `verbose` 3. Always build in the late disassembler under DEBUG. It's only used when `JitLateDisasm` is set. 4. Implement new arm32 callback mechanism for movw/movt handling in the NearDiffer. --- src/coreclr/inc/coredistools.h | 86 ++++- src/coreclr/jit/codegenarm64test.cpp | 3 - src/coreclr/jit/codegenlinear.cpp | 2 +- src/coreclr/jit/disasm.cpp | 179 ++++++++- src/coreclr/jit/disasm.h | 22 +- src/coreclr/jit/jit.h | 19 +- .../tools/superpmi/superpmi-shared/agnostic.h | 6 - .../superpmi-shared/compileresult.cpp | 63 ---- .../superpmi/superpmi-shared/compileresult.h | 5 - .../superpmi/superpmi-shared/crlwmlist.h | 1 - .../superpmi-shared/methodcontext.cpp | 35 +- .../superpmi/superpmi-shared/methodcontext.h | 2 +- .../superpmi/superpmi-shared/spmiutil.cpp | 89 +++++ .../tools/superpmi/superpmi-shared/spmiutil.h | 7 +- .../tools/superpmi/superpmi/neardiffer.cpp | 355 ++++++++++-------- .../tools/superpmi/superpmi/neardiffer.h | 6 + 16 files changed, 572 insertions(+), 308 deletions(-) diff --git a/src/coreclr/inc/coredistools.h b/src/coreclr/inc/coredistools.h index 10e9687e67d447..5016e836f771ba 100644 --- a/src/coreclr/inc/coredistools.h +++ b/src/coreclr/inc/coredistools.h @@ -3,9 +3,9 @@ //===--------- coredistools.h - Dissassembly tools for CoreClr ------------===// // -// Core Disassembly Tools API Version 1.0.1-prerelease +// Core Disassembly Tools API Version 1.2.0 // Disassembly tools required by CoreCLR for utilities like -// GCStress and SuperPMI +// GCStress, SuperPMI, and R2RDump. //===----------------------------------------------------------------------===// #if !defined(_COREDISTOOLS_H_) @@ -26,6 +26,10 @@ #define DllIface EXTERN_C __declspec(dllimport) #endif // defined(DllInterfaceExporter) #else + +// Disable "warning: default label in switch which covers all enumeration values [-Wcovered-switch-default]" +#pragma clang diagnostic ignored "-Wcovered-switch-default" + #if !defined(__cdecl) #if defined(__i386__) #define __cdecl __attribute__((cdecl)) @@ -41,7 +45,8 @@ enum TargetArch { Target_X86, Target_X64, Target_Thumb, - Target_Arm64 + Target_Arm64, + Target_LoongArch64 }; struct CorDisasm; @@ -60,7 +65,7 @@ struct PrintControl { // The type of a custom function provided by the user to determine // if two offsets are considered equivalent wrt diffing code blocks. // Offset1 and Offset2 are the two offsets to be compared. -// BlockOffset is the offest of the instructions (that contain Offset1 +// BlockOffset is the offset of the instructions (that contain Offset1 // and Offset2) from the beginning of their respective code blocks. // InstructionLength is the length of the current instruction being // compared for equivalency. @@ -68,6 +73,20 @@ typedef bool(__cdecl *OffsetComparator)(const void *UserData, size_t BlockOffset size_t InstructionLength, uint64_t Offset1, uint64_t Offset2); +// If an OffsetMunger function is defined, it is called before the OffsetComparator. +// If it returns `true` then: +// 1. the instructions are considered equivalent +// 2. the offsets have been decoded and "munged" (changed), and +// *Offset1 and *Offset2 are set to the values to use. +// 3. *SkipInstructions1 instructions in code stream 1 are skipped +// 4. *SkipInstructions2 instructions in code stream 2 are skipped +// +// This is typically used on arm32 to treat "movw/movt" as a single instruction +// generating a single constant. Similarly, for arm64 mov/movk/movk/movk sequences. +typedef bool(__cdecl *OffsetMunger)(const void *UserData, size_t BlockOffset, + size_t InstructionLength, uint64_t* Offset1, uint64_t* Offset2, + uint32_t* SkipInstructions1, uint32_t* SkipInstructions2); + // The Export/Import definitions for CoreDistools library are defined below. // A typedef for each interface function's type is defined in order to aid // the importer. @@ -76,6 +95,10 @@ typedef bool(__cdecl *OffsetComparator)(const void *UserData, size_t BlockOffset typedef CorDisasm * __cdecl InitDisasm_t(enum TargetArch Target); DllIface InitDisasm_t InitDisasm; +// Initialize the disassembler, using buffered print controls +typedef CorDisasm * __cdecl InitBufferedDisasm_t(enum TargetArch Target); +DllIface InitBufferedDisasm_t InitBufferedDisasm; + // Initialize the disassembler using custom print controls typedef CorDisasm * __cdecl NewDisasm_t(enum TargetArch Target, const PrintControl *PControl); @@ -85,6 +108,28 @@ DllIface NewDisasm_t NewDisasm; typedef void __cdecl FinishDisasm_t(const CorDisasm *Disasm); DllIface FinishDisasm_t FinishDisasm; +// Initialize a code differ using buffered output. +typedef CorDisasm * __cdecl InitBufferedDiffer_t(enum TargetArch Target, + const OffsetComparator Comparator); +DllIface InitBufferedDiffer_t InitBufferedDiffer; + +// Initialize the Code Differ +typedef CorAsmDiff * __cdecl NewDiffer_t(enum TargetArch Target, + const PrintControl *PControl, + const OffsetComparator Comparator); +DllIface NewDiffer_t NewDiffer; + +// Initialize the Code Differ, with an offset munger. +typedef CorAsmDiff * __cdecl NewDiffer2_t(enum TargetArch Target, + const PrintControl *PControl, + const OffsetComparator Comparator, + const OffsetMunger Munger); +DllIface NewDiffer2_t NewDiffer2; + +// Delete the Code Differ +typedef void __cdecl FinishDiff_t(const CorAsmDiff *AsmDiff); +DllIface FinishDiff_t FinishDiff; + // DisasmInstruction -- Disassemble one instruction // Arguments: // Disasm -- The Disassembler @@ -100,22 +145,27 @@ typedef size_t __cdecl DisasmInstruction_t(const CorDisasm *Disasm, const uint8_t *Bytes, size_t Maxlength); DllIface DisasmInstruction_t DisasmInstruction; -// Initialize the Code Differ -typedef CorAsmDiff * __cdecl NewDiffer_t(enum TargetArch Target, - const PrintControl *PControl, - const OffsetComparator Comparator); -DllIface NewDiffer_t NewDiffer; - -// Delete the Code Differ -typedef void __cdecl FinishDiff_t(const CorAsmDiff *AsmDiff); -DllIface FinishDiff_t FinishDiff; +// DumpInstruction -- Disassemble one instruction and output it +// Arguments: +// Disasm -- The Disassembler +// Address -- The address at which the bytes of the instruction +// are intended to execute +// Bytes -- Pointer to the actual bytes which need to be disassembled +// MaxLength -- Number of bytes available in Bytes buffer +// Returns: +// -- The Size of the disassembled instruction +// -- Zero on failure +typedef size_t __cdecl DumpInstruction_t(const CorDisasm *Disasm, + const uint8_t *Address, const uint8_t *Bytes, + size_t Maxlength); +DllIface DumpInstruction_t DumpInstruction; // NearDiffCodeBlocks -- Compare two code blocks for semantic // equivalence // Arguments: // AsmDiff -- The Asm-differ // UserData -- Any data the user wishes to pass through into -// the OffsetComparator +// the OffsetComparator/OffsetMunger // Address1 -- Address at which first block will execute // Bytes1 -- Pointer to the actual bytes of the first block // Size1 -- The size of the first block @@ -145,4 +195,12 @@ typedef void __cdecl DumpDiffBlocks_t(const CorAsmDiff *AsmDiff, const uint8_t *Bytes2, size_t Size2); DllIface DumpDiffBlocks_t DumpDiffBlocks; +// Get a pointer to the buffered output buffer. +typedef const char* __cdecl GetOutputBuffer_t(); +DllIface GetOutputBuffer_t GetOutputBuffer; + +// Clear the buffered output buffer. +typedef void __cdecl ClearOutputBuffer_t(); +DllIface ClearOutputBuffer_t ClearOutputBuffer; + #endif // !defined(_COREDISTOOLS_H_) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 2497b196edec7f..2dad168edd770a 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -24,7 +24,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ void CodeGen::genArm64EmitterUnitTestsGeneral() { - assert(verbose); emitter* theEmitter = GetEmitter(); // @@ -1824,7 +1823,6 @@ void CodeGen::genArm64EmitterUnitTestsGeneral() */ void CodeGen::genArm64EmitterUnitTestsAdvSimd() { - assert(verbose); emitter* theEmitter = GetEmitter(); //////////////////////////////////////////////////////////////////////////////// @@ -4557,7 +4555,6 @@ void CodeGen::genArm64EmitterUnitTestsAdvSimd() */ void CodeGen::genArm64EmitterUnitTestsSve() { - assert(verbose); emitter* theEmitter = GetEmitter(); // diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 962a7476c5363d..2fe96023d1e3a6 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2682,7 +2682,7 @@ void CodeGen::genEmitterUnitTests() { const WCHAR* unitTestSection = JitConfig.JitDumpEmitUnitTests(); - if ((!verbose) || (unitTestSection == nullptr)) + if (unitTestSection == nullptr) { return; } diff --git a/src/coreclr/jit/disasm.cpp b/src/coreclr/jit/disasm.cpp index fd5c98eb068810..9b301c94fcb478 100644 --- a/src/coreclr/jit/disasm.cpp +++ b/src/coreclr/jit/disasm.cpp @@ -2,13 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. /*********************************************************************** * -* File: dis.cpp +* File: disasm.cpp * - -* -* File Comments: -* -* This file handles disassembly. It is adapted from the MS linker. +* This file handles disassembly for the "late disassembler". * ***********************************************************************/ @@ -21,6 +17,11 @@ #ifdef LATE_DISASM /*****************************************************************************/ +#ifdef USE_COREDISTOOLS +// TODO: make this local to DisAssembler class and pass it on to coredistools? +FILE* disAsmFileCorDisTools; +#endif // USE_COREDISTOOLS + // Define DISASM_DEBUG to get verbose output of late disassembler inner workings. //#define DISASM_DEBUG #ifdef DISASM_DEBUG @@ -37,6 +38,8 @@ /*****************************************************************************/ +#ifdef USE_MSVCDIS + #define MAX_CLASSNAME_LENGTH 1024 #if defined(HOST_AMD64) @@ -75,7 +78,9 @@ linker, \ "/ALTERNATENAME:__imp_?PfncchfixupSet@DIS@@QAEP6GIPBV1@_KIPAGIPA_K@ZP6GI01I2I3@Z@Z=__imp_?PfncchfixupSet@DIS@@QAEP6GIPBV1@_KIPA_WIPA_K@ZP6GI01I2I3@Z@Z") -#endif +#endif // HOST_* + +#endif // USE_MSVCDIS /***************************************************************************** * Given an absolute address from the beginning of the code @@ -98,6 +103,8 @@ typedef struct codeBlk codeFix* cbFixupLst; } * codeBlkPtr; +#ifdef USE_MSVCDIS + /***************************************************************************** * The following is the callback for jump label and direct function calls fixups. * "addr" represents the address of jump that has to be @@ -856,6 +863,8 @@ size_t DisAssembler::disCchRegMember(const DIS* pdis, DIS::REGA reg, _In_reads_( #endif // 0 } +#endif // USE_MSVCDIS + /***************************************************************************** * Helper function to lazily create a map from code address to CORINFO_METHOD_HANDLE. */ @@ -892,6 +901,8 @@ AddrToAddrMap* DisAssembler::GetRelocationMap() return disRelocationMap; } +#ifdef USE_MSVCDIS + /***************************************************************************** * Return the count of bytes disassembled. */ @@ -1250,7 +1261,7 @@ void DisAssembler::DisasmBuffer(FILE* pfile, bool printit) #elif defined(TARGET_AMD64) pdis = DIS::PdisNew(DIS::distX8664); #elif defined(TARGET_ARM64) - pdis = DIS::PdisNew(DIS::distArm64); + pdis = DIS::PdisNew(DIS::distArm64); #else // TARGET* #error Unsupported or unset target architecture #endif @@ -1337,6 +1348,8 @@ void DisAssembler::DisasmBuffer(FILE* pfile, bool printit) delete pdis; } +#endif // USE_MSVCDIS + /***************************************************************************** * Given a linear offset into the code, find a pointer to the actual code (either in the hot or cold section) * @@ -1418,12 +1431,12 @@ void DisAssembler::disSetMethod(size_t addr, CORINFO_METHOD_HANDLE methHnd) if (disComp->eeGetHelperNum(methHnd)) { DISASM_DUMP("Helper function: %p => %p\n", addr, methHnd); - GetHelperAddrToMethodHandleMap()->Set(addr, methHnd); + GetHelperAddrToMethodHandleMap()->Set(addr, methHnd, AddrToMethodHandleMap::SetKind::Overwrite); } else { DISASM_DUMP("Function: %p => %p\n", addr, methHnd); - GetAddrToMethodHandleMap()->Set(addr, methHnd); + GetAddrToMethodHandleMap()->Set(addr, methHnd, AddrToMethodHandleMap::SetKind::Overwrite); } } @@ -1517,6 +1530,7 @@ void DisAssembler::disAsmCode(BYTE* hotCodePtr, size_t hotCodeSize, BYTE* coldCo disLabels = new (disComp, CMK_DebugOnly) BYTE[disTotalCodeSize](); DisasmBuffer(disAsmFile, /* printIt */ true); + fprintf(disAsmFile, "\n"); if (disAsmFile != jitstdout()) @@ -1544,6 +1558,145 @@ void DisAssembler::disOpenForLateDisAsm(const char* curMethodName, const char* c disCurClassName = curClassName; } +#ifdef USE_COREDISTOOLS + +// Helper functions to print messages from CoreDisTools Library +// The file/linenumber information is from this helper itself, +// since we are only linking with the CoreDisTools library. + +// Currently all loggers are the same +#define LOGGER(L) \ + \ +static void __cdecl CorDisToolsLog##L(const char* msg, ...) \ + \ +{ \ + va_list argList; \ + va_start(argList, msg); \ + vflogf(disAsmFileCorDisTools, msg, argList); \ + va_end(argList); \ + fprintf(disAsmFileCorDisTools, "\n"); \ + \ +} + +LOGGER(VERBOSE) +LOGGER(ERROR) +LOGGER(WARNING) + +const PrintControl CorPrinter = {CorDisToolsLogERROR, CorDisToolsLogWARNING, CorDisToolsLogVERBOSE, + CorDisToolsLogVERBOSE}; + +NewDisasm_t* g_PtrNewDisasm = nullptr; +DumpInstruction_t* g_PtrDumpInstruction = nullptr; +FinishDisasm_t* g_PtrFinishDisasm = nullptr; + +// Coredistools disassembler initialization. +// +// Returns true on success, false on failure. +// +bool DisAssembler::InitCoredistoolsDisasm() +{ + const WCHAR* coreDisToolsLibrary = MAKEDLLNAME_W("coredistools"); + +#ifdef TARGET_UNIX + // Unix will require the full path to coredistools. Assume that the + // location is next to the full path to the superpmi.so. + + WCHAR coreCLRLoadedPath[MAX_LONGPATH]; + HMODULE result = 0; + int returnVal = ::GetModuleFileNameW(result, coreCLRLoadedPath, MAX_LONGPATH); + + if (returnVal == 0) + { + printf("GetModuleFileNameW failed (0x%08x)", ::GetLastError()); + return false; + } + + WCHAR* ptr = (WCHAR*)u16_strrchr(coreCLRLoadedPath, '/'); + + // Move past the / character. + ptr = ptr + 1; + + const WCHAR* coreDisToolsLibraryName = MAKEDLLNAME_W("coredistools"); + ::wcscpy_s(ptr, &coreCLRLoadedPath[MAX_LONGPATH] - ptr, coreDisToolsLibraryName); + coreDisToolsLibrary = coreCLRLoadedPath; +#endif // TARGET_UNIX + + HMODULE hCoreDisToolsLib = ::LoadLibraryExW(coreDisToolsLibrary, NULL, 0); + if (hCoreDisToolsLib == 0) + { + printf("LoadLibrary(%s) failed (0x%08x)", MAKEDLLNAME_A("coredistools"), ::GetLastError()); + return false; + } + + g_PtrNewDisasm = (NewDisasm_t*)::GetProcAddress(hCoreDisToolsLib, "NewDisasm"); + if (g_PtrNewDisasm == nullptr) + { + printf("GetProcAddress 'NewDisasm' failed (0x%08x)", ::GetLastError()); + return false; + } + g_PtrDumpInstruction = (DumpInstruction_t*)::GetProcAddress(hCoreDisToolsLib, "DumpInstruction"); + if (g_PtrDumpInstruction == nullptr) + { + printf("GetProcAddress 'DumpInstruction' failed (0x%08x)", ::GetLastError()); + return false; + } + g_PtrFinishDisasm = (FinishDisasm_t*)::GetProcAddress(hCoreDisToolsLib, "FinishDisasm"); + if (g_PtrFinishDisasm == nullptr) + { + printf("GetProcAddress 'FinishDisasm' failed (0x%08x)", ::GetLastError()); + return false; + } + + TargetArch coreDisTargetArchitecture = Target_Host; + +#if defined(TARGET_ARM64) + coreDisTargetArchitecture = Target_Arm64; +#elif defined(TARGET_ARM) + coreDisTargetArchitecture = Target_Thumb; +#elif defined(TARGET_X86) + coreDisTargetArchitecture = Target_X86; +#elif defined(TARGET_AMD64) + coreDisTargetArchitecture = Target_X64; +#else +#error Unsupported target for LATE_DISASM with USE_COREDISTOOLS +#endif + + corDisasm = (*g_PtrNewDisasm)(coreDisTargetArchitecture, &CorPrinter); + + disAsmFileCorDisTools = nullptr; + + return true; +} + +void DisAssembler::DisasmBuffer(FILE* pfile, bool printit) +{ + disAsmFileCorDisTools = pfile; + + auto disasmRegion = [&](size_t executionAddress, size_t blockAddress, size_t blockSize) { + uint8_t* address = (uint8_t*)executionAddress; + uint8_t* codeBytes = (uint8_t*)blockAddress; + size_t codeSizeBytes = blockSize; + while (codeSizeBytes > 0) + { + size_t instrLen = (*g_PtrDumpInstruction)(corDisasm, address, codeBytes, codeSizeBytes); + if (instrLen == 0) + { + // error + break; + } + assert(instrLen <= codeSizeBytes); + address += instrLen; + codeBytes += instrLen; + codeSizeBytes -= instrLen; + } + }; + + disasmRegion(0, disHotCodeBlock, disHotCodeSize); + disasmRegion(disHotCodeSize, disColdCodeBlock, disColdCodeSize); +} + +#endif // USE_COREDISTOOLS + /*****************************************************************************/ void DisAssembler::disInit(Compiler* pComp) @@ -1557,6 +1710,12 @@ void DisAssembler::disInit(Compiler* pComp) disRelocationMap = nullptr; disDiffable = false; disAsmFile = nullptr; + +#ifdef USE_COREDISTOOLS + InitCoredistoolsDisasm(); + +// TODO: call g_PtrFinishDisasm sometime? +#endif // USE_COREDISTOOLS } /*****************************************************************************/ diff --git a/src/coreclr/jit/disasm.h b/src/coreclr/jit/disasm.h index d5f9fc72378919..b6553f9f6809ec 100644 --- a/src/coreclr/jit/disasm.h +++ b/src/coreclr/jit/disasm.h @@ -6,7 +6,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX DisAsm XX XX XX -XX The dis-assembler to display the native code generated XX +XX The "late disassembler" to display the native code generated XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -18,6 +18,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX /*****************************************************************************/ #ifdef LATE_DISASM +#ifdef USE_COREDISTOOLS +#include "coredistools.h" +#endif // USE_COREDISTOOLS + +#ifdef USE_MSVCDIS + // free() is deprecated (we should only allocate and free memory through CLR hosting interfaces) // and is redefined in clrhost.h to cause a compiler error. // We don't call free(), but this function is mentioned in STL headers included by msvcdis.h @@ -55,6 +61,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX /*****************************************************************************/ +#endif // USE_MSVCDIS + #ifdef HOST_64BIT template struct SizeTKeyFuncs : JitLargePrimitiveKeyFuncs @@ -176,6 +184,8 @@ class DisAssembler #pragma warning(pop) } +#ifdef USE_MSVCDIS + /* Callbacks from msdis */ static size_t __stdcall disCchAddr( @@ -220,6 +230,16 @@ class DisAssembler bool printit = false, bool dispOffs = false, bool dispCodeBytes = false); + +#endif // USE_MSVCDIS + +#ifdef USE_COREDISTOOLS + + bool InitCoredistoolsDisasm(); + + CorDisasm* corDisasm; + +#endif // USE_COREDISTOOLS }; /*****************************************************************************/ diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 3c29b6ae5c4330..d129db5edd39d3 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -304,6 +304,17 @@ typedef ptrdiff_t ssize_t; #include "utils.h" #include "targetosarch.h" +// Late disassembly is OFF by default. Can be turned ON by +// adding /DLATE_DISASM=1 on the command line. +// Always OFF in the non-debug version +#ifdef DEBUG +#define LATE_DISASM 1 +#define USE_COREDISTOOLS +#endif // DEBUG +#if defined(LATE_DISASM) && (LATE_DISASM == 0) +#undef LATE_DISASM +#endif + #ifdef DEBUG #define INDEBUG(x) x #define DEBUGARG(x) , x @@ -455,14 +466,6 @@ class GlobalJitOptions /*****************************************************************************/ -// Late disassembly is OFF by default. Can be turned ON by -// adding /DLATE_DISASM=1 on the command line. -// Always OFF in the non-debug version - -#if defined(LATE_DISASM) && (LATE_DISASM == 0) -#undef LATE_DISASM -#endif - /*****************************************************************************/ /*****************************************************************************/ diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 82bcb14d3a169d..ec8f9a9b04110c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -704,12 +704,6 @@ struct allocGCInfoDetails void* retval; }; -struct Agnostic_AddressMap -{ - DWORDLONG Address; - DWORD size; -}; - struct Agnostic_AllocGCInfo { DWORDLONG size; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp index f9ecfa487c4e13..3c6653c41a1c3a 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp @@ -935,23 +935,6 @@ void CompileResult::applyRelocs(RelocContext* rc, unsigned char* block1, ULONG b { DWORDLONG key = tmp.target; int index = rc->mc->GetRelocTypeHint->GetIndex(key); - if (index == -1) - { - // See if the original address is in the replay address map. This happens for - // relocations on static field addresses found via getFieldInfo(). - void* origAddr = repAddressMap((void*)tmp.target); - if ((origAddr != (void*)-1) && (origAddr != nullptr)) - { - key = CastPointer(origAddr); - index = rc->mc->GetRelocTypeHint->GetIndex(key); - if (index != -1) - { - LogDebug(" Using address map: target %016" PRIX64 ", original target %016" PRIX64, - tmp.target, key); - } - } - } - if (index != -1) { WORD retVal = (WORD)rc->mc->GetRelocTypeHint->Get(key); @@ -1133,52 +1116,6 @@ const char* CompileResult::repProcessName() return nullptr; } -void CompileResult::recAddressMap(void* originalAddress, void* replayAddress, unsigned int size) -{ - if (AddressMap == nullptr) - AddressMap = new LightWeightMap(); - - Agnostic_AddressMap value; - - value.Address = CastPointer(originalAddress); - value.size = (DWORD)size; - - AddressMap->Add(CastPointer(replayAddress), value); -} -void CompileResult::dmpAddressMap(DWORDLONG key, const Agnostic_AddressMap& value) -{ - printf("AddressMap key %016" PRIX64 ", value addr-%016" PRIX64 ", size-%u", key, value.Address, value.size); -} -void* CompileResult::repAddressMap(void* replayAddress) -{ - if (AddressMap == nullptr) - return nullptr; - - int index = AddressMap->GetIndex(CastPointer(replayAddress)); - - if (index != -1) - { - Agnostic_AddressMap value; - value = AddressMap->Get(CastPointer(replayAddress)); - return (void*)value.Address; - } - - return nullptr; -} -void* CompileResult::searchAddressMap(void* newAddress) -{ - if (AddressMap == nullptr) - return (void*)-1; - for (unsigned int i = 0; i < AddressMap->GetCount(); i++) - { - DWORDLONG replayAddress = AddressMap->GetRawKeys()[i]; - Agnostic_AddressMap value = AddressMap->Get(replayAddress); - if ((replayAddress <= CastPointer(newAddress)) && (CastPointer(newAddress) < (replayAddress + value.size))) - return (void*)(value.Address + (CastPointer(newAddress) - replayAddress)); - } - return (void*)-1; -} - void CompileResult::recReserveUnwindInfo(BOOL isFunclet, BOOL isColdCode, ULONG unwindSize) { if (ReserveUnwindInfo == nullptr) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h index a4d129c3e0880a..b7be4dcd89279e 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h @@ -187,11 +187,6 @@ class CompileResult void dmpProcessName(DWORD key, DWORD value); const char* repProcessName(); - void recAddressMap(void* original_address, void* replay_address, unsigned int size); - void dmpAddressMap(DWORDLONG key, const Agnostic_AddressMap& value); - void* repAddressMap(void* replay_address); - void* searchAddressMap(void* replay_address); - void recReserveUnwindInfo(BOOL isFunclet, BOOL isColdCode, ULONG unwindSize); void dmpReserveUnwindInfo(DWORD key, const Agnostic_ReserveUnwindInfo& value); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/crlwmlist.h b/src/coreclr/tools/superpmi/superpmi-shared/crlwmlist.h index 60b6b8de44dea0..1fe5aa5dc0dda5 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/crlwmlist.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/crlwmlist.h @@ -16,7 +16,6 @@ #define DENSELWM(map, value) LWM(map, this_is_an_error, value) #endif -LWM(AddressMap, DWORDLONG, Agnostic_AddressMap) LWM(AllocGCInfo, DWORD, Agnostic_AllocGCInfo) LWM(AllocMem, DWORD, Agnostic_AllocMemDetails) DENSELWM(AllocUnwindInfo, Agnostic_AllocUnwindInfo) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index f5bfebab55301c..a891e6f0e55cad 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -1569,7 +1569,7 @@ void MethodContext::repGetCallInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken, if (pResult->kind == CORINFO_VIRTUALCALL_STUB) { - cr->CallTargetTypes->Add(CastPointer(pResult->codePointerLookup.constLookup.addr), + cr->CallTargetTypes->Add(CastPointer(pResult->stubLookup.constLookup.addr), (DWORD)CORINFO_VIRTUALCALL_STUB); } pResult->instParamLookup.accessType = (InfoAccessType)value.instParamLookup.accessType; @@ -6222,7 +6222,7 @@ WORD MethodContext::repGetRelocTypeHint(void* target) { DWORDLONG key = CastPointer(target); - if (GetRelocTypeHint == nullptr) + if ((GetRelocTypeHint == nullptr) || (GetRelocTypeHint->GetIndex(key) == -1)) { #ifdef sparseMC LogDebug("Sparse - repGetRelocTypeHint yielding fake answer..."); @@ -6231,42 +6231,17 @@ WORD MethodContext::repGetRelocTypeHint(void* target) LogException(EXCEPTIONCODE_MC, "Didn't find %016" PRIX64 "", key); #endif } - if (GetRelocTypeHint->GetIndex(key) == -1) - { - void* origAddr = cr->repAddressMap((void*)target); - if (origAddr != (void*)-1 && origAddr != nullptr) - { - if (GetRelocTypeHint->GetIndex(CastPointer(origAddr)) == -1) - target = origAddr; - } - else - { -#ifdef sparseMC - LogDebug("Sparse - repGetRelocTypeHint yielding fake answer..."); - return 65535; -#else - LogException(EXCEPTIONCODE_MC, "Didn't find %016" PRIX64 "", key); -#endif - } - } int index = GetRelocTypeHint->GetIndex(key); WORD retVal = 0; if (index == -1) { - void* subtarget = cr->searchAddressMap(target); - - int index2 = GetRelocTypeHint->GetIndex(CastPointer(subtarget)); - if (index2 == -1) - { - // __debugbreak(); // seems like a source of pain - retVal = IMAGE_REL_BASED_REL32; - } - else - retVal = (WORD)GetRelocTypeHint->Get(CastPointer(subtarget)); + retVal = IMAGE_REL_BASED_REL32; } else + { retVal = (WORD)GetRelocTypeHint->Get(key); + } DEBUG_REP(dmpGetRelocTypeHint(key, (DWORD)retVal)); return retVal; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index 2af725f63f7aec..eac0d4190bd537 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -1038,7 +1038,7 @@ enum mcPackets //Packet_SatisfiesClassConstraints = 110, Packet_SatisfiesMethodConstraints = 111, Packet_DoesFieldBelongToClass = 112, - PacketCR_AddressMap = 113, + //PacketCR_AddressMap = 113, PacketCR_AllocGCInfo = 114, PacketCR_AllocMem = 115, PacketCR_CallLog = 116, diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp index b21fc595293859..bb3c93e6e50b6c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp @@ -390,6 +390,95 @@ void PutArm64MovkConstant(UINT32* p, unsigned con) *p = (*p & ~(0xffff << 5)) | ((con & 0xffff) << 5); } +// GetArm32MovwConstant / GetArm32MovtConstant: Decode Arm32 movw / movt instructions, e.g.: +// 4b f2 33 40 movw r0, #46131 +// c0 f2 79 30 movt r0, #889 +// +// Return `true` if the instruction pointed to by `p` is a movw/movt, `false` otherwise. +// If true, fill out the target register in `*pReg`, constant in `*pCon`. + +bool GetArm32MovwConstant(UINT32* p, unsigned* pReg, unsigned* pCon) +{ + // This decodes the "MOV (immediate)" instruction, Encoding T3, from the ARM manual section A8.8.102. + if (!Is32BitThumb2Instruction((UINT16*)p)) + { + return false; + } + + // A Thumb-2 instruction is one or two 16-bit words (in little-endian format). + UINT16 instr1 = *(UINT16*)p; + UINT16 instr2 = *((UINT16*)p + 1); + UINT32 instr = (instr1 << 16) | instr2; + if ((instr & 0xfbf08000) != 0xf2400000) + { + return false; + } + + *pReg = (instr & 0xf00) >> 8; + *pCon = ExtractArm32MovImm(instr); + return true; +} + +bool GetArm32MovtConstant(UINT32* p, unsigned* pReg, unsigned* pCon) +{ + // This decodes the "MOVT" instruction, Encoding T1, from the ARM manual section A8.8.106. + if (!Is32BitThumb2Instruction((UINT16*)p)) + { + return false; + } + + // A Thumb-2 instruction is one or two 16-bit words (in little-endian format). + UINT16 instr1 = *(UINT16*)p; + UINT16 instr2 = *((UINT16*)p + 1); + UINT32 instr = (instr1 << 16) | instr2; + if ((instr & 0xfbf08000) != 0xf2C00000) + { + return false; + } + + *pReg = (instr & 0xf00) >> 8; + *pCon = ExtractArm32MovImm(instr); + return true; +} + +// Is the instruction we're pointing to an Arm32 (Thumb-2) 32-bit instruction? +bool Is32BitThumb2Instruction(UINT16* p) +{ + UINT16 instr1 = *p; + if ((instr1 & 0xf800) < 0xe800) + { + // Not a 32-bit instruction + return false; + } + return true; +} + +// Extract the immediate value from a movw/movt instruction encoding. +UINT32 ExtractArm32MovImm(UINT32 instr) +{ + UINT32 imm4 = (instr >> 16) & 0xf; + UINT32 i = (instr >> 26) & 0x1; + UINT32 imm3 = (instr >> 12) & 0x7; + UINT32 imm8 = instr & 0xff; + return (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8; +} + +// PutArm32MovtConstant: set the constant field in an Arm32 `movt` instruction. +// `*p` points to a `movt` instruction. `con` must be a 16-bit constant. +void PutArm32MovtConstant(UINT32* p, unsigned con) +{ + UINT32 imm4 = (con >> 12) & 0xf; + UINT32 i = (con >> 11) & 0x1; + UINT32 imm3 = (con >> 8) & 0x7; + UINT32 imm8 = con & 0xff; + UINT16 instr1 = *(UINT16*)p; + UINT16 instr2 = *((UINT16*)p + 1); + UINT32 instr = (instr1 << 16) | instr2; + instr = (instr & 0xfbf08f00) | (imm4 << 16) | (i << 26) | (imm3 << 12) | imm8; + *(UINT16*)p = (UINT16)(instr >> 16); + *((UINT16*)p + 1) = (UINT16)instr; +} + template static std::string getFromPrinter(TPrint print) { diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h index 4b13202f5e2933..df54e0c5b3c08c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h @@ -85,9 +85,14 @@ void PutThumb2BlRel24(UINT16* p, INT32 imm24); bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon); bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift); - void PutArm64MovkConstant(UINT32* p, unsigned con); +bool GetArm32MovwConstant(UINT32* p, unsigned* pReg, unsigned* pCon); +bool GetArm32MovtConstant(UINT32* p, unsigned* pReg, unsigned* pCon); +bool Is32BitThumb2Instruction(UINT16* p); +UINT32 ExtractArm32MovImm(UINT32 instr); +void PutArm32MovtConstant(UINT32* p, unsigned con); + template inline constexpr unsigned ArrLen(T (&)[size]) { diff --git a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp index 923d3d087015ef..987029940437e2 100644 --- a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp +++ b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp @@ -49,7 +49,7 @@ const PrintControl CorPrinter = {CorDisToolsLogERROR, CorDisToolsLogWARNING, Cor #endif // USE_COREDISTOOLS #ifdef USE_COREDISTOOLS -NewDiffer_t* g_PtrNewDiffer = nullptr; +NewDiffer2_t* g_PtrNewDiffer2 = nullptr; FinishDiff_t* g_PtrFinishDiff = nullptr; NearDiffCodeBlocks_t* g_PtrNearDiffCodeBlocks = nullptr; DumpDiffBlocks_t* g_PtrDumpDiffBlocks = nullptr; @@ -98,10 +98,10 @@ bool NearDiffer::InitAsmDiff() return false; } - g_PtrNewDiffer = (NewDiffer_t*)::GetProcAddress(hCoreDisToolsLib, "NewDiffer"); - if (g_PtrNewDiffer == nullptr) + g_PtrNewDiffer2 = (NewDiffer2_t*)::GetProcAddress(hCoreDisToolsLib, "NewDiffer2"); + if (g_PtrNewDiffer2 == nullptr) { - LogError("GetProcAddress 'NewDiffer' failed (0x%08x)", ::GetLastError()); + LogError("GetProcAddress 'NewDiffer2' failed (0x%08x)", ::GetLastError()); return false; } g_PtrFinishDiff = (FinishDiff_t*)::GetProcAddress(hCoreDisToolsLib, "FinishDiff"); @@ -124,6 +124,7 @@ bool NearDiffer::InitAsmDiff() } TargetArch coreDisTargetArchitecture = Target_Host; + OffsetMunger Munger = nullptr; // Only arm32 uses this feature currently if (TargetArchitecture != nullptr) { @@ -138,6 +139,7 @@ bool NearDiffer::InitAsmDiff() else if ((0 == _stricmp(TargetArchitecture, "arm")) || (0 == _stricmp(TargetArchitecture, "arm32"))) { coreDisTargetArchitecture = Target_Thumb; + Munger = NearDiffer::CoreDisMungeOffsetsCallback; } else if (0 == _stricmp(TargetArchitecture, "arm64")) { @@ -148,7 +150,8 @@ bool NearDiffer::InitAsmDiff() LogError("Illegal target architecture '%s'", TargetArchitecture); } } - corAsmDiff = (*g_PtrNewDiffer)(coreDisTargetArchitecture, &CorPrinter, NearDiffer::CoreDisCompareOffsetsCallback); + corAsmDiff = (*g_PtrNewDiffer2)(coreDisTargetArchitecture, &CorPrinter, + NearDiffer::CoreDisCompareOffsetsCallback, Munger); } #endif // USE_COREDISTOOLS @@ -162,6 +165,13 @@ bool __cdecl NearDiffer::CoreDisCompareOffsetsCallback( { return compareOffsets(payload, blockOffset, instrLen, offset1, offset2); } + +// static +bool __cdecl NearDiffer::CoreDisMungeOffsetsCallback( + const void* payload, size_t blockOffset, size_t instrLen, uint64_t* offset1, uint64_t* offset2, uint32_t* skip1, uint32_t* skip2) +{ + return mungeOffsets(payload, blockOffset, instrLen, offset1, offset2, skip1, skip2); +} #endif // USE_COREDISTOOLS // @@ -318,147 +328,91 @@ struct DiffData size_t otherCodeBlockSize2; }; -// -// NearDiff Offset Comparator. -// Determine whether two syntactically different constants are -// semantically equivalent, using certain heuristics. -// -bool NearDiffer::compareOffsets( - const void* payload, size_t blockOffset, size_t instrLen, uint64_t offset1, uint64_t offset2) +bool NearDiffer::mungeOffsets( + const void* payload, size_t blockOffset, size_t instrLen, uint64_t* offset1, uint64_t* offset2, uint32_t* skip1, uint32_t* skip2) { - // The trivial case - if (offset1 == offset2) - { - return true; - } - const SPMI_TARGET_ARCHITECTURE targetArch = GetSpmiTargetArchitecture(); - const DiffData* data = (const DiffData*)payload; - size_t ip1 = data->originalBlock1 + blockOffset; - size_t ip2 = data->originalBlock2 + blockOffset; - size_t ipRelOffset1 = ip1 + instrLen + (size_t)offset1; - size_t ipRelOffset2 = ip2 + instrLen + (size_t)offset2; - - // Case where we have a call into flat address -- the most common case. - size_t gOffset1 = ipRelOffset1; - size_t gOffset2 = ipRelOffset2; - if ((DWORD)gOffset1 == - (DWORD)gOffset2) // make sure the lower 32bits match (best we can do in the current replay form) - return true; - - // Case where we have an offset into the read only section (e.g. loading a float value) - size_t roOffset1a = (size_t)offset1 - data->originalDataBlock1; - size_t roOffset2a = (size_t)offset2 - data->originalDataBlock2; - if ((roOffset1a == roOffset2a) && - (roOffset1a < data->datablockSize1)) // Confirm its an offset that fits inside our RoRegion - return true; + const DiffData* data = (const DiffData*)payload; - // This case is written to catch IP-relative offsets to the RO data-section - // For example: + // On Thumb-2, we often have movw/movt pairs to generate a 32-bit address. Treat these pairs as + // a single instruction. // - size_t roOffset1b = ipRelOffset1 - data->originalDataBlock1; - size_t roOffset2b = ipRelOffset2 - data->originalDataBlock2; - if ((roOffset1b == roOffset2b) && - (roOffset1b < data->datablockSize1)) // Confirm its an offset that fits inside our RoRegion - return true; - - // Case where we push an address to our own code section. - size_t gOffset1a = (size_t)offset1 - data->originalBlock1; - size_t gOffset2a = (size_t)offset2 - data->originalBlock2; - if ((gOffset1a == gOffset2a) && (gOffset1a < data->blocksize1)) // Confirm its in our code region - return true; - - // Case where we push an address in the other codeblock. - size_t gOffset1b = (size_t)offset1 - data->otherCodeBlock1; - size_t gOffset2b = (size_t)offset2 - data->otherCodeBlock2; - if ((gOffset1b == gOffset2b) && (gOffset1b < data->otherCodeBlockSize1)) // Confirm it's in the other code region - return true; + if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM) + { + bool pair1 = false, pair2 = false; + unsigned reg1_w = 0, reg1_t; + unsigned reg2_w = 0, reg2_t; + unsigned con1_w, con1_t; + unsigned con2_w, con2_t; - // Case where we have an offset into the hot codeblock from the cold code block (why?) - size_t ocOffset1 = ipRelOffset1 - data->otherCodeBlock1; - size_t ocOffset2 = ipRelOffset2 - data->otherCodeBlock2; - if (ocOffset1 == ocOffset2) // Would be nice to check to see if it fits in the other code block - return true; + UINT32* iaddr1 = (UINT32*)(data->block1 + blockOffset); + UINT32* iaddr2 = (UINT32*)(data->block2 + blockOffset); + UINT32* iaddr1end = (UINT32*)(data->block1 + data->blocksize1); + UINT32* iaddr2end = (UINT32*)(data->block2 + data->blocksize2); - // In the below, it seems rather odd to pass artifacts from cr1 into queries for cr2. - // - // One would generally expect to ask if cr1->map(artifact1) == cr2->map(artifact2). - // Leaving things this way for now as it seems to work. + uint64_t addr1 = 0; + uint64_t addr2 = 0; - // VSD calling case. - size_t Offset1 = (ipRelOffset1 - 8); - if (data->cr2->CallTargetTypes->GetIndex((DWORDLONG)Offset1) != -1) - { - // This logging is too noisy, so disable it. - // LogVerbose("Found VSD callsite, did softer compare than ideal"); - return true; - } + // Look for a movw/movt address pattern in code stream 1. - // x86 VSD calling cases. - size_t Offset1b = (size_t)offset1 - 4; - size_t Offset2b = (size_t)offset2; - if (data->cr2->CallTargetTypes->GetIndex((DWORDLONG)Offset1b) != -1) - { - // This logging is too noisy, so disable it. - // LogVerbose("Found VSD callsite, did softer compare than ideal"); - return true; - } - if (data->cr2->CallTargetTypes->GetIndex((DWORDLONG)Offset2b) != -1) - { - // This logging is too noisy, so disable it. - // LogVerbose("Found VSD callsite, did softer compare than ideal"); - return true; - } + if ((iaddr1 < iaddr1end) && + GetArm32MovwConstant(iaddr1, ®1_w, &con1_w)) + { + if ((iaddr1 + 1 < iaddr1end) && + GetArm32MovtConstant(iaddr1 + 1, ®1_t, &con1_t) && + (reg1_w == reg1_t)) + { + addr1 = con1_w + (con1_t << 16); + pair1 = true; + } + } - // Case might be a field address that we handed out to handle inlined values being loaded into - // a register as an immediate value (and where the address is encoded as an indirect immediate load) - size_t realTargetAddr = (size_t)data->cr2->searchAddressMap((void*)gOffset2); - if (realTargetAddr == gOffset1) - return true; + // Look for a movw/movt address pattern in code stream 2. - // Case might be a field address that we handed out to handle inlined values being loaded into - // a register as an immediate value (and where the address is encoded and loaded by immediate into a register) - realTargetAddr = (size_t)data->cr2->searchAddressMap((void*)offset2); - if (realTargetAddr == offset1) - return true; - if (realTargetAddr == 0x424242) // this offset matches what we got back from a getTailCallCopyArgsThunk - return true; + if ((iaddr2 < iaddr2end) && + GetArm32MovwConstant(iaddr2, ®2_w, &con2_w)) + { + if ((iaddr2 + 1 < iaddr2end) && + GetArm32MovtConstant(iaddr2 + 1, ®2_t, &con2_t) && + (reg2_w == reg2_t)) + { + addr2 = con2_w + (con2_t << 16); + pair2 = true; + } + } - realTargetAddr = (size_t)data->cr2->searchAddressMap((void*)(gOffset2)); - if (realTargetAddr != (size_t)-1) // we know this was passed out as a bbloc - return true; + if (pair1 && pair2 && (reg1_w == reg2_w)) + { + // Use the constructed constant instead. + *offset1 = addr1; + *offset2 = addr2; + *skip1 = *skip2 = 1; // skip the `movt` instruction + return true; + } + } - // A new clause that tries to handle the case where neither cr1 or cr2 is the - // recorded CR (diff case with two jits, neither of which is the one used for - // collection) +#if 0 + // We might want to do something similar for mov/movk/movk/movk sequences on Arm64. The following code was + // previously used, but currently is not active. // - size_t mapped1 = (size_t)data->cr1->searchAddressMap((void*)offset1); - size_t mapped2 = (size_t)data->cr2->searchAddressMap((void*)offset2); - - if ((mapped1 == mapped2) && (mapped1 != (size_t)-1)) - return true; - - // There are some cases on arm64 where we generate multiple instruction register construction of addresses - // but we don't have a relocation for them (so they aren't handled by `applyRelocs`). One case is - // allocPgoInstrumentationBySchema(), which returns an address the JIT writes into the code stream - // (used to store dynamic PGO probe data). + // One difference is we might see a different number of movk on arm64 depending on the the data. Our "hack" + // of zeroing out the constant so they compare equal doesn't work well. In this case, we really do want + // a callback to the disassembler to skip the instructions. // + // Another solution is to have the coredistools disassembler have this logic, but that seems like the wrong + // place for that logic, and it's hard to update coredistools. + // // The instruction sequence is something like this: // mov x0, #63408 // movk x0, #23602, lsl #16 // movk x0, #606, lsl #32 // - // Here, we try to match this sequence and look it up in the address map. - // // Since the mov/movk sequence is specific to the replay address constant, we don't assume the baseline // and diff have the same number of instructions (e.g., it's possible to skip a `movk` if it is zero). // - // Some version of this logic might apply to ARM as well. - // if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64) { - bool movk2_1 = false, movk3_1 = false; - bool movk2_2 = false, movk3_2 = false; + int numMovk_1 = 0, numMovk_2 = 0; unsigned reg1_1 = 0, reg2_1, reg3_1, reg4_1; unsigned reg1_2 = 0, reg2_2, reg3_2, reg4_2; @@ -472,8 +426,8 @@ bool NearDiffer::compareOffsets( UINT32* iaddr1end = (UINT32*)(data->block1 + data->blocksize1); UINT32* iaddr2end = (UINT32*)(data->block2 + data->blocksize2); - DWORDLONG addr1 = 0; - DWORDLONG addr2 = 0; + uint64_t addr1 = 0; + uint64_t addr2 = 0; // Look for a mov/movk address pattern in code stream 1. @@ -485,21 +439,22 @@ bool NearDiffer::compareOffsets( GetArm64MovkConstant(iaddr1 + 1, ®2_1, &con2_1, &shift2_1) && (reg1_1 == reg2_1)) { - addr1 = (DWORDLONG)con1_1 + ((DWORDLONG)con2_1 << shift2_1); + ++numMovk_1; + addr1 = (uint64_t)con1_1 + ((uint64_t)con2_1 << shift2_1); if ((iaddr1 + 2 < iaddr1end) && GetArm64MovkConstant(iaddr1 + 2, ®3_1, &con3_1, &shift3_1) && (reg1_1 == reg3_1)) { - movk2_1 = true; - addr1 += (DWORDLONG)con3_1 << shift3_1; + ++numMovk_1; + addr1 += (uint64_t)con3_1 << shift3_1; if ((iaddr1 + 3 < iaddr1end) && GetArm64MovkConstant(iaddr1 + 3, ®4_1, &con4_1, &shift4_1) && (reg1_1 == reg4_1)) { - movk3_1 = true; - addr1 += (DWORDLONG)con4_1 << shift4_1; + ++numMovk_1; + addr1 += (uint64_t)con4_1 << shift4_1; } } } @@ -515,62 +470,134 @@ bool NearDiffer::compareOffsets( GetArm64MovkConstant(iaddr2 + 1, ®2_2, &con2_2, &shift2_2) && (reg1_2 == reg2_2)) { - addr2 = (DWORDLONG)con1_2 + ((DWORDLONG)con2_2 << shift2_2); + ++numMovk_2; + addr2 = (uint64_t)con1_2 + ((uint64_t)con2_2 << shift2_2); if ((iaddr2 + 2 < iaddr2end) && GetArm64MovkConstant(iaddr2 + 2, ®3_2, &con3_2, &shift3_2) && (reg1_2 == reg3_2)) { - movk2_2 = true; - addr2 += (DWORDLONG)con3_2 << shift3_2; + ++numMovk_2; + addr2 += (uint64_t)con3_2 << shift3_2; if ((iaddr2 + 3 < iaddr2end) && GetArm64MovkConstant(iaddr2 + 3, ®4_2, &con4_2, &shift4_2) && (reg1_2 == reg4_2)) { - movk3_2 = true; - addr2 += (DWORDLONG)con4_2 << shift4_2; + ++numMovk_2; + addr2 += (uint64_t)con4_2 << shift4_2; } } } } - // Check the constants. We don't need to check 'addr1 == addr2' because if that were - // true we wouldn't have gotten here. - // // Note: when replaying on a 32-bit platform, we must have - // movk2_1 == movk2_2 == movk3_1 == movk3_2 == false + // numMovk_1 == numMovk_2 == 1 if ((addr1 != 0) && (addr2 != 0) && (reg1_1 == reg1_2)) { - DWORDLONG mapped1 = (DWORDLONG)data->cr1->searchAddressMap((void*)addr1); - DWORDLONG mapped2 = (DWORDLONG)data->cr2->searchAddressMap((void*)addr2); - if ((mapped1 == mapped2) && (mapped1 != (DWORDLONG)-1)) - { - // Now, zero out the constants in the `movk` instructions so when the disassembler - // gets to them, they compare equal. - PutArm64MovkConstant(iaddr1 + 1, 0); - PutArm64MovkConstant(iaddr2 + 1, 0); - if (movk2_1) - { - PutArm64MovkConstant(iaddr1 + 2, 0); - } - if (movk2_2) - { - PutArm64MovkConstant(iaddr2 + 2, 0); - } - if (movk3_1) - { - PutArm64MovkConstant(iaddr1 + 3, 0); - } - if (movk3_2) - { - PutArm64MovkConstant(iaddr2 + 3, 0); - } - return true; - } + offset1 = addr1; + offset2 = addr2; + + *skip1 = numMovk_1; + *skip2 = numMovk_2; + + return true; } } +#endif // 0 + + return false; +} + +// +// NearDiff Offset Comparator. +// Determine whether two syntactically different constants are +// semantically equivalent, using certain heuristics. +// +bool NearDiffer::compareOffsets( + const void* payload, size_t blockOffset, size_t instrLen, uint64_t offset1, uint64_t offset2) +{ + // The trivial case + if (offset1 == offset2) + { + return true; + } + + const DiffData* data = (const DiffData*)payload; + size_t ip1 = data->originalBlock1 + blockOffset; + size_t ip2 = data->originalBlock2 + blockOffset; + size_t ipRelOffset1 = ip1 + instrLen + (size_t)offset1; + size_t ipRelOffset2 = ip2 + instrLen + (size_t)offset2; + + // Case where we have a call into flat address -- the most common case. + size_t gOffset1 = ipRelOffset1; + size_t gOffset2 = ipRelOffset2; + if ((DWORD)gOffset1 == + (DWORD)gOffset2) // make sure the lower 32bits match (best we can do in the current replay form) + return true; + + // Case where we have an offset into the read only section (e.g. loading a float value) + size_t roOffset1a = (size_t)offset1 - data->originalDataBlock1; + size_t roOffset2a = (size_t)offset2 - data->originalDataBlock2; + if ((roOffset1a == roOffset2a) && + (roOffset1a < data->datablockSize1)) // Confirm its an offset that fits inside our RoRegion + return true; + + // Case for IP-relative offset to the RO data-section + size_t roOffset1b = ipRelOffset1 - data->originalDataBlock1; + size_t roOffset2b = ipRelOffset2 - data->originalDataBlock2; + if ((roOffset1b == roOffset2b) && + (roOffset1b < data->datablockSize1)) // Confirm its an offset that fits inside our RoRegion + return true; + + // Case where we have an address to our own code section. + size_t gOffset1a = (size_t)offset1 - data->originalBlock1; + size_t gOffset2a = (size_t)offset2 - data->originalBlock2; + if ((gOffset1a == gOffset2a) && (gOffset1a < data->blocksize1)) // Confirm its in our code region + return true; + + // Case where we have an address in the other codeblock. + size_t gOffset1b = (size_t)offset1 - data->otherCodeBlock1; + size_t gOffset2b = (size_t)offset2 - data->otherCodeBlock2; + if ((gOffset1b == gOffset2b) && (gOffset1b < data->otherCodeBlockSize1)) // Confirm it's in the other code region + return true; + + // Case where we have an offset into the hot codeblock from the cold code block (why?) + size_t ocOffset1 = ipRelOffset1 - data->otherCodeBlock1; + size_t ocOffset2 = ipRelOffset2 - data->otherCodeBlock2; + if (ocOffset1 == ocOffset2) // Would be nice to check to see if it fits in the other code block + return true; + + // In the below, it seems rather odd to pass artifacts from cr1 into queries for cr2. + // + // One would generally expect to ask if cr1->map(artifact1) == cr2->map(artifact2). + // Leaving things this way for now as it seems to work. + + // VSD calling case. + size_t Offset1 = (ipRelOffset1 - 8); + if (data->cr2->CallTargetTypes->GetIndex((DWORDLONG)Offset1) != -1) + { + // This logging is too noisy, so disable it. + // LogVerbose("Found VSD callsite, did softer compare than ideal"); + return true; + } + + // x86 VSD calling cases. + size_t Offset1b = (size_t)offset1 - 4; + size_t Offset2b = (size_t)offset2; + if (data->cr2->CallTargetTypes->GetIndex((DWORDLONG)Offset1b) != -1) + { + // This logging is too noisy, so disable it. + // LogVerbose("Found VSD callsite, did softer compare than ideal"); + return true; + } + if (data->cr2->CallTargetTypes->GetIndex((DWORDLONG)Offset2b) != -1) + { + // This logging is too noisy, so disable it. + // LogVerbose("Found VSD callsite, did softer compare than ideal"); + return true; + } return false; } diff --git a/src/coreclr/tools/superpmi/superpmi/neardiffer.h b/src/coreclr/tools/superpmi/superpmi/neardiffer.h index 1353af27907e01..6e8ef32a805dfa 100644 --- a/src/coreclr/tools/superpmi/superpmi/neardiffer.h +++ b/src/coreclr/tools/superpmi/superpmi/neardiffer.h @@ -71,11 +71,17 @@ class NearDiffer static bool compareOffsets( const void* payload, size_t blockOffset, size_t instrLen, uint64_t offset1, uint64_t offset2); + static bool mungeOffsets( + const void* payload, size_t blockOffset, size_t instrLen, uint64_t* offset1, uint64_t* offset2, uint32_t* skip1, uint32_t* skip2); + #ifdef USE_COREDISTOOLS static bool __cdecl CoreDisCompareOffsetsCallback( const void* payload, size_t blockOffset, size_t instrLen, uint64_t offset1, uint64_t offset2); + static bool __cdecl CoreDisMungeOffsetsCallback( + const void* payload, size_t blockOffset, size_t instrLen, uint64_t* offset1, uint64_t* offset2, uint32_t* skip1, uint32_t* skip2); + CorAsmDiff* corAsmDiff; #endif // USE_COREDISTOOLS From cea862c16b511678a5c8551d74931a5efcff0ab7 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Sat, 23 Dec 2023 00:18:40 -0800 Subject: [PATCH 2/7] Allow fallback to coredistools 1.1.0 --- .../tools/superpmi/superpmi/neardiffer.cpp | 41 ++++++++++++------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp index 987029940437e2..07ed29b3852169 100644 --- a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp +++ b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp @@ -27,16 +27,13 @@ static void LogFromCoreDisToolsHelper(LogLevel level, const char* msg, va_list a Logger::LogVprintf(__func__, __FILE__, __LINE__, level, argList, msg); } -#define LOGGER(L) \ - \ -static void __cdecl CorDisToolsLog##L(const char* msg, ...) \ - \ -{ \ - va_list argList; \ - va_start(argList, msg); \ - LogFromCoreDisToolsHelper(LOGLEVEL_##L, msg, argList); \ - va_end(argList); \ - \ +#define LOGGER(L) \ +static void __cdecl CorDisToolsLog##L(const char* msg, ...) \ +{ \ + va_list argList; \ + va_start(argList, msg); \ + LogFromCoreDisToolsHelper(LOGLEVEL_##L, msg, argList); \ + va_end(argList); \ } LOGGER(VERBOSE) @@ -49,6 +46,7 @@ const PrintControl CorPrinter = {CorDisToolsLogERROR, CorDisToolsLogWARNING, Cor #endif // USE_COREDISTOOLS #ifdef USE_COREDISTOOLS +NewDiffer_t* g_PtrNewDiffer = nullptr; // For temporary backwards-compatibility with older coredistools NewDiffer2_t* g_PtrNewDiffer2 = nullptr; FinishDiff_t* g_PtrFinishDiff = nullptr; NearDiffCodeBlocks_t* g_PtrNearDiffCodeBlocks = nullptr; @@ -98,11 +96,17 @@ bool NearDiffer::InitAsmDiff() return false; } + g_PtrNewDiffer = (NewDiffer_t*)::GetProcAddress(hCoreDisToolsLib, "NewDiffer"); + if (g_PtrNewDiffer == nullptr) + { + LogError("GetProcAddress 'NewDiffer' failed (0x%08x)", ::GetLastError()); + return false; + } g_PtrNewDiffer2 = (NewDiffer2_t*)::GetProcAddress(hCoreDisToolsLib, "NewDiffer2"); if (g_PtrNewDiffer2 == nullptr) { - LogError("GetProcAddress 'NewDiffer2' failed (0x%08x)", ::GetLastError()); - return false; + // Just a warning; fall back to NewDiffer + LogWarning("GetProcAddress 'NewDiffer2' failed (0x%08x)", ::GetLastError()); } g_PtrFinishDiff = (FinishDiff_t*)::GetProcAddress(hCoreDisToolsLib, "FinishDiff"); if (g_PtrFinishDiff == nullptr) @@ -150,8 +154,17 @@ bool NearDiffer::InitAsmDiff() LogError("Illegal target architecture '%s'", TargetArchitecture); } } - corAsmDiff = (*g_PtrNewDiffer2)(coreDisTargetArchitecture, &CorPrinter, - NearDiffer::CoreDisCompareOffsetsCallback, Munger); + + if (g_PtrNewDiffer2 == nullptr) + { + corAsmDiff = (*g_PtrNewDiffer)(coreDisTargetArchitecture, &CorPrinter, + NearDiffer::CoreDisCompareOffsetsCallback); + } + else + { + corAsmDiff = (*g_PtrNewDiffer2)(coreDisTargetArchitecture, &CorPrinter, + NearDiffer::CoreDisCompareOffsetsCallback, Munger); + } } #endif // USE_COREDISTOOLS From 475d189063335ca36f5c4586e0e03079ebe5ffe4 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Sat, 23 Dec 2023 10:45:52 -0800 Subject: [PATCH 3/7] Fixes --- src/coreclr/jit/codegenlinear.cpp | 4 ++-- src/coreclr/jit/disasm.cpp | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 2fe96023d1e3a6..c995a895be915a 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2668,8 +2668,8 @@ void CodeGen::genCodeForSetcc(GenTreeCC* setcc) /***************************************************************************** * Unit testing of the emitter: If JitDumpEmitUnitTests is set, generate * a bunch of instructions, then: - * Use DOTNET_JitLateDisasm=* to see if the late disassembler thinks the instructions as the same as we do. - * Or, use DOTNET_JitRawHexCode and DOTNET_JitRawHexCodeFile and dissassemble the output file. + * Use DOTNET_JitLateDisasm=* to see if the late disassembler thinks the instructions are the same as we do. + * Or, use DOTNET_JitRawHexCode and DOTNET_JitRawHexCodeFile and disassemble the output file. * * Possible values for JitDumpEmitUnitTests: * Amd64: all, sse2 diff --git a/src/coreclr/jit/disasm.cpp b/src/coreclr/jit/disasm.cpp index 9b301c94fcb478..4eb90d34f1b57a 100644 --- a/src/coreclr/jit/disasm.cpp +++ b/src/coreclr/jit/disasm.cpp @@ -1480,7 +1480,7 @@ void DisAssembler::disAsmCode(BYTE* hotCodePtr, size_t hotCodeSize, BYTE* coldCo #endif // !DEBUG #ifdef DEBUG - const wchar_t* fileName = JitConfig.JitLateDisasmTo(); + const WCHAR* fileName = JitConfig.JitLateDisasmTo(); if (fileName != nullptr) { errno_t ec = _wfopen_s(&disAsmFile, fileName, W("a+")); @@ -1566,16 +1566,13 @@ void DisAssembler::disOpenForLateDisAsm(const char* curMethodName, const char* c // Currently all loggers are the same #define LOGGER(L) \ - \ static void __cdecl CorDisToolsLog##L(const char* msg, ...) \ - \ -{ \ +{ \ va_list argList; \ va_start(argList, msg); \ vflogf(disAsmFileCorDisTools, msg, argList); \ va_end(argList); \ fprintf(disAsmFileCorDisTools, "\n"); \ - \ } LOGGER(VERBOSE) From 6cc6304a132d857d546d15071d29d9685c9f7514 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Sat, 23 Dec 2023 10:47:35 -0800 Subject: [PATCH 4/7] Formatting --- src/coreclr/jit/disasm.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/disasm.cpp b/src/coreclr/jit/disasm.cpp index 4eb90d34f1b57a..3b91e649466082 100644 --- a/src/coreclr/jit/disasm.cpp +++ b/src/coreclr/jit/disasm.cpp @@ -1566,14 +1566,14 @@ void DisAssembler::disOpenForLateDisAsm(const char* curMethodName, const char* c // Currently all loggers are the same #define LOGGER(L) \ -static void __cdecl CorDisToolsLog##L(const char* msg, ...) \ -{ \ + static void __cdecl CorDisToolsLog##L(const char* msg, ...) \ + { \ va_list argList; \ va_start(argList, msg); \ vflogf(disAsmFileCorDisTools, msg, argList); \ va_end(argList); \ fprintf(disAsmFileCorDisTools, "\n"); \ -} + } LOGGER(VERBOSE) LOGGER(ERROR) From 39af8d8c5b0a866ea563a041b5cf64dabac1cd9a Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Fri, 29 Dec 2023 14:12:49 -0800 Subject: [PATCH 5/7] Fixes 1. Fix Risc-V build: only build LATE_DISASM for platforms where coredistool support exists. 2. Defer load and initialization of coredistools.dll library for LATE_DISASM until we actually need it. 3. For arm64, in late disasm, if there is a failure, try to recover by skipping one 4-byte instruction. --- src/coreclr/inc/coredistools.h | 4 +- src/coreclr/jit/disasm.cpp | 41 +++++++++++++++++-- src/coreclr/jit/disasm.h | 1 + src/coreclr/jit/jit.h | 9 ++-- .../tools/superpmi/superpmi/neardiffer.cpp | 6 +-- 5 files changed, 50 insertions(+), 11 deletions(-) diff --git a/src/coreclr/inc/coredistools.h b/src/coreclr/inc/coredistools.h index 5016e836f771ba..2f63178125b73c 100644 --- a/src/coreclr/inc/coredistools.h +++ b/src/coreclr/inc/coredistools.h @@ -1,9 +1,9 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -//===--------- coredistools.h - Dissassembly tools for CoreClr ------------===// +//===--------- coredistools.h - Disassembly tools for CoreClr ------------===// // -// Core Disassembly Tools API Version 1.2.0 +// Core Disassembly Tools API Version 1.3.0 // Disassembly tools required by CoreCLR for utilities like // GCStress, SuperPMI, and R2RDump. //===----------------------------------------------------------------------===// diff --git a/src/coreclr/jit/disasm.cpp b/src/coreclr/jit/disasm.cpp index 3b91e649466082..fcdc8c6deba512 100644 --- a/src/coreclr/jit/disasm.cpp +++ b/src/coreclr/jit/disasm.cpp @@ -1471,6 +1471,10 @@ void DisAssembler::disAsmCode(BYTE* hotCodePtr, size_t hotCodeSize, BYTE* coldCo return; } +#ifdef USE_COREDISTOOLS + InitCoredistoolsDisasm(); +#endif // USE_COREDISTOOLS + #ifdef DEBUG // Should we make it diffable? disDiffable = disComp->opts.dspDiffable; @@ -1592,6 +1596,15 @@ FinishDisasm_t* g_PtrFinishDisasm = nullptr; // bool DisAssembler::InitCoredistoolsDisasm() { + // TODO: call g_PtrFinishDisasm sometime? + if (disCoreDisToolsLibraryInitialized) + { + // We've already loaded the library and created a disassembler. + return true; + } + + disCoreDisToolsLibraryInitialized = true; // we only initialize once, even if it fails. + const WCHAR* coreDisToolsLibrary = MAKEDLLNAME_W("coredistools"); #ifdef TARGET_UNIX @@ -1669,6 +1682,9 @@ void DisAssembler::DisasmBuffer(FILE* pfile, bool printit) { disAsmFileCorDisTools = pfile; + unsigned errorCount = 0; + static const unsigned maxErrorCount = 50; + auto disasmRegion = [&](size_t executionAddress, size_t blockAddress, size_t blockSize) { uint8_t* address = (uint8_t*)executionAddress; uint8_t* codeBytes = (uint8_t*)blockAddress; @@ -1679,6 +1695,27 @@ void DisAssembler::DisasmBuffer(FILE* pfile, bool printit) if (instrLen == 0) { // error + ++errorCount; +#ifdef TARGET_ARM64 + // For arm64, with fixed-size instructions, try to recover by skipping one + // instruction and continuing. + if (codeSizeBytes >= 4) + { + CorDisToolsLogERROR("%x: %02x %02x %02x %02x", address, codeBytes[0], codeBytes[1], codeBytes[2], + codeBytes[3]); + address += 4; + codeBytes += 4; + codeSizeBytes -= 4; + } + if (errorCount < maxErrorCount) + { + continue; + } + else + { + CorDisToolsLogERROR("Too many failures"); + } +#endif // TARGET_ARM64 break; } assert(instrLen <= codeSizeBytes); @@ -1709,9 +1746,7 @@ void DisAssembler::disInit(Compiler* pComp) disAsmFile = nullptr; #ifdef USE_COREDISTOOLS - InitCoredistoolsDisasm(); - -// TODO: call g_PtrFinishDisasm sometime? + disCoreDisToolsLibraryInitialized = false; #endif // USE_COREDISTOOLS } diff --git a/src/coreclr/jit/disasm.h b/src/coreclr/jit/disasm.h index b6553f9f6809ec..9a81d3abd3764b 100644 --- a/src/coreclr/jit/disasm.h +++ b/src/coreclr/jit/disasm.h @@ -236,6 +236,7 @@ class DisAssembler #ifdef USE_COREDISTOOLS bool InitCoredistoolsDisasm(); + bool disCoreDisToolsLibraryInitialized = false; CorDisasm* corDisasm; diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index d129db5edd39d3..32e10789993d2e 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -304,13 +304,16 @@ typedef ptrdiff_t ssize_t; #include "utils.h" #include "targetosarch.h" -// Late disassembly is OFF by default. Can be turned ON by -// adding /DLATE_DISASM=1 on the command line. -// Always OFF in the non-debug version +// The late disassembler is built in for certain platforms, for DEBUG builds. It is enabled by using +// DOTNET_JitLateDisasm. It can be built in for non-DEBUG builds if desired. + +#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_X86) || defined(TARGET_AMD64) #ifdef DEBUG #define LATE_DISASM 1 #define USE_COREDISTOOLS #endif // DEBUG +#endif // platforms + #if defined(LATE_DISASM) && (LATE_DISASM == 0) #undef LATE_DISASM #endif diff --git a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp index 07ed29b3852169..f32ffa331a5b89 100644 --- a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp +++ b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp @@ -128,7 +128,7 @@ bool NearDiffer::InitAsmDiff() } TargetArch coreDisTargetArchitecture = Target_Host; - OffsetMunger Munger = nullptr; // Only arm32 uses this feature currently + OffsetMunger munger = nullptr; // Only arm32 uses this feature currently if (TargetArchitecture != nullptr) { @@ -143,7 +143,7 @@ bool NearDiffer::InitAsmDiff() else if ((0 == _stricmp(TargetArchitecture, "arm")) || (0 == _stricmp(TargetArchitecture, "arm32"))) { coreDisTargetArchitecture = Target_Thumb; - Munger = NearDiffer::CoreDisMungeOffsetsCallback; + munger = NearDiffer::CoreDisMungeOffsetsCallback; } else if (0 == _stricmp(TargetArchitecture, "arm64")) { @@ -163,7 +163,7 @@ bool NearDiffer::InitAsmDiff() else { corAsmDiff = (*g_PtrNewDiffer2)(coreDisTargetArchitecture, &CorPrinter, - NearDiffer::CoreDisCompareOffsetsCallback, Munger); + NearDiffer::CoreDisCompareOffsetsCallback, munger); } } #endif // USE_COREDISTOOLS From 518fc12164572b11d4f2d9037887e36f1c5e6efc Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Tue, 2 Jan 2024 19:51:12 -0800 Subject: [PATCH 6/7] Be smarter about initializing and uninitializing coredistools In particular, only load the coredistools library once, not once per function to disassemble. --- src/coreclr/jit/compiler.cpp | 6 ++ src/coreclr/jit/disasm.cpp | 139 ++++++++++++++++++++++++----------- src/coreclr/jit/disasm.h | 14 +++- 3 files changed, 115 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 5948a966f3c6ef..05b676230c6ae0 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1994,6 +1994,12 @@ void Compiler::compInit(ArenaAllocator* pAlloc, void Compiler::compDone() { +#ifdef LATE_DISASM + if (codeGen != nullptr) + { + codeGen->getDisAssembler().disDone(); + } +#endif // LATE_DISASM } void* Compiler::compGetHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ diff --git a/src/coreclr/jit/disasm.cpp b/src/coreclr/jit/disasm.cpp index fcdc8c6deba512..8d7111d015ce7d 100644 --- a/src/coreclr/jit/disasm.cpp +++ b/src/coreclr/jit/disasm.cpp @@ -18,8 +18,8 @@ /*****************************************************************************/ #ifdef USE_COREDISTOOLS -// TODO: make this local to DisAssembler class and pass it on to coredistools? -FILE* disAsmFileCorDisTools; +// The loggers have no way to pass a context variable, so this is a global. +FILE* g_disAsmFileCorDisTools; #endif // USE_COREDISTOOLS // Define DISASM_DEBUG to get verbose output of late disassembler inner workings. @@ -1574,9 +1574,9 @@ void DisAssembler::disOpenForLateDisAsm(const char* curMethodName, const char* c { \ va_list argList; \ va_start(argList, msg); \ - vflogf(disAsmFileCorDisTools, msg, argList); \ + vflogf(g_disAsmFileCorDisTools, msg, argList); \ va_end(argList); \ - fprintf(disAsmFileCorDisTools, "\n"); \ + fprintf(g_disAsmFileCorDisTools, "\n"); \ } LOGGER(VERBOSE) @@ -1586,74 +1586,110 @@ LOGGER(WARNING) const PrintControl CorPrinter = {CorDisToolsLogERROR, CorDisToolsLogWARNING, CorDisToolsLogVERBOSE, CorDisToolsLogVERBOSE}; -NewDisasm_t* g_PtrNewDisasm = nullptr; -DumpInstruction_t* g_PtrDumpInstruction = nullptr; -FinishDisasm_t* g_PtrFinishDisasm = nullptr; +LONG DisAssembler::s_disCoreDisToolsLibraryInitializing = 0; // 0 = not initializing; 1 = initializing +bool DisAssembler::s_disCoreDisToolsLibraryInitialized = false; +bool DisAssembler::s_disCoreDisToolsLibraryLoadSuccessful = false; +NewDisasm_t* DisAssembler::s_PtrNewDisasm = nullptr; +DumpInstruction_t* DisAssembler::s_PtrDumpInstruction = nullptr; +FinishDisasm_t* DisAssembler::s_PtrFinishDisasm = nullptr; -// Coredistools disassembler initialization. +// Load coredistools library and find the entrypoints. // // Returns true on success, false on failure. // -bool DisAssembler::InitCoredistoolsDisasm() +bool DisAssembler::InitCoredistoolsLibrary() { - // TODO: call g_PtrFinishDisasm sometime? - if (disCoreDisToolsLibraryInitialized) + while (InterlockedCompareExchange(&s_disCoreDisToolsLibraryInitializing, 1, 0) != 0) + { + // Wait for initialization to be complete + } + + if (s_disCoreDisToolsLibraryInitialized) { - // We've already loaded the library and created a disassembler. - return true; + // We've already loaded the library. Was it successful? + InterlockedExchange(&s_disCoreDisToolsLibraryInitializing, 0); // unlock initialization + return s_disCoreDisToolsLibraryLoadSuccessful; } - disCoreDisToolsLibraryInitialized = true; // we only initialize once, even if it fails. + s_disCoreDisToolsLibraryInitialized = true; // we only initialize once, even if it fails. + s_disCoreDisToolsLibraryLoadSuccessful = false; // assume the worst - const WCHAR* coreDisToolsLibrary = MAKEDLLNAME_W("coredistools"); + const WCHAR* coreDisToolsLibraryNameW = MAKEDLLNAME_W("coredistools"); + const CHAR* coreDisToolsLibraryNameA = MAKEDLLNAME_A("coredistools"); + + HMODULE hCoreDisToolsLib = NULL; #ifdef TARGET_UNIX // Unix will require the full path to coredistools. Assume that the - // location is next to the full path to the superpmi.so. + // location is next to the full path to the JIT. + + WCHAR coreCLRLoadedPath[MAX_LONGPATH]; + int returnVal = ::GetModuleFileNameW(NULL, coreCLRLoadedPath, MAX_LONGPATH); - WCHAR coreCLRLoadedPath[MAX_LONGPATH]; - HMODULE result = 0; - int returnVal = ::GetModuleFileNameW(result, coreCLRLoadedPath, MAX_LONGPATH); + WCHAR* ptr; + WCHAR* coreDisToolsLibrary; if (returnVal == 0) { printf("GetModuleFileNameW failed (0x%08x)", ::GetLastError()); - return false; + goto FinishedInitializing; } - WCHAR* ptr = (WCHAR*)u16_strrchr(coreCLRLoadedPath, '/'); + ptr = (WCHAR*)u16_strrchr(coreCLRLoadedPath, '/'); // Move past the / character. ptr = ptr + 1; - const WCHAR* coreDisToolsLibraryName = MAKEDLLNAME_W("coredistools"); - ::wcscpy_s(ptr, &coreCLRLoadedPath[MAX_LONGPATH] - ptr, coreDisToolsLibraryName); + ::wcscpy_s(ptr, &coreCLRLoadedPath[MAX_LONGPATH] - ptr, coreDisToolsLibraryNameW); coreDisToolsLibrary = coreCLRLoadedPath; -#endif // TARGET_UNIX +#else // TARGET_UNIX + const WCHAR* coreDisToolsLibrary = coreDisToolsLibraryNameW; +#endif // !TARGET_UNIX - HMODULE hCoreDisToolsLib = ::LoadLibraryExW(coreDisToolsLibrary, NULL, 0); + // We never call FreeLibrary because we don't unload it until the JIT itself is unloaded. + hCoreDisToolsLib = ::LoadLibraryExW(coreDisToolsLibrary, NULL, 0); if (hCoreDisToolsLib == 0) { - printf("LoadLibrary(%s) failed (0x%08x)", MAKEDLLNAME_A("coredistools"), ::GetLastError()); - return false; + printf("LoadLibrary(%s) failed (0x%08x)", coreDisToolsLibraryNameA, ::GetLastError()); + goto FinishedInitializing; } - g_PtrNewDisasm = (NewDisasm_t*)::GetProcAddress(hCoreDisToolsLib, "NewDisasm"); - if (g_PtrNewDisasm == nullptr) + s_PtrNewDisasm = (NewDisasm_t*)::GetProcAddress(hCoreDisToolsLib, "NewDisasm"); + if (s_PtrNewDisasm == nullptr) { printf("GetProcAddress 'NewDisasm' failed (0x%08x)", ::GetLastError()); - return false; + goto FinishedInitializing; } - g_PtrDumpInstruction = (DumpInstruction_t*)::GetProcAddress(hCoreDisToolsLib, "DumpInstruction"); - if (g_PtrDumpInstruction == nullptr) + s_PtrDumpInstruction = (DumpInstruction_t*)::GetProcAddress(hCoreDisToolsLib, "DumpInstruction"); + if (s_PtrDumpInstruction == nullptr) { printf("GetProcAddress 'DumpInstruction' failed (0x%08x)", ::GetLastError()); - return false; + goto FinishedInitializing; } - g_PtrFinishDisasm = (FinishDisasm_t*)::GetProcAddress(hCoreDisToolsLib, "FinishDisasm"); - if (g_PtrFinishDisasm == nullptr) + s_PtrFinishDisasm = (FinishDisasm_t*)::GetProcAddress(hCoreDisToolsLib, "FinishDisasm"); + if (s_PtrFinishDisasm == nullptr) { printf("GetProcAddress 'FinishDisasm' failed (0x%08x)", ::GetLastError()); + goto FinishedInitializing; + } + + s_disCoreDisToolsLibraryLoadSuccessful = true; // We made it! + +// done initializing + +FinishedInitializing: + InterlockedExchange(&s_disCoreDisToolsLibraryInitializing, 0); // unlock initialization + return s_disCoreDisToolsLibraryLoadSuccessful; +} + +// Coredistools disassembler initialization. +// +// Returns true on success, false on failure. +// +bool DisAssembler::InitCoredistoolsDisasm() +{ + if (!InitCoredistoolsLibrary()) + { return false; } @@ -1662,7 +1698,7 @@ bool DisAssembler::InitCoredistoolsDisasm() #if defined(TARGET_ARM64) coreDisTargetArchitecture = Target_Arm64; #elif defined(TARGET_ARM) - coreDisTargetArchitecture = Target_Thumb; + coreDisTargetArchitecture = Target_Thumb; #elif defined(TARGET_X86) coreDisTargetArchitecture = Target_X86; #elif defined(TARGET_AMD64) @@ -1671,16 +1707,27 @@ bool DisAssembler::InitCoredistoolsDisasm() #error Unsupported target for LATE_DISASM with USE_COREDISTOOLS #endif - corDisasm = (*g_PtrNewDisasm)(coreDisTargetArchitecture, &CorPrinter); - - disAsmFileCorDisTools = nullptr; + corDisasm = (*s_PtrNewDisasm)(coreDisTargetArchitecture, &CorPrinter); return true; } +// Coredistools disassembler shutdown +// +void DisAssembler::DoneCoredistoolsDisasm() +{ + if (corDisasm == nullptr) + { + return; + } + + (*s_PtrFinishDisasm)(corDisasm); + corDisasm = nullptr; +} + void DisAssembler::DisasmBuffer(FILE* pfile, bool printit) { - disAsmFileCorDisTools = pfile; + g_disAsmFileCorDisTools = pfile; unsigned errorCount = 0; static const unsigned maxErrorCount = 50; @@ -1691,7 +1738,7 @@ void DisAssembler::DisasmBuffer(FILE* pfile, bool printit) size_t codeSizeBytes = blockSize; while (codeSizeBytes > 0) { - size_t instrLen = (*g_PtrDumpInstruction)(corDisasm, address, codeBytes, codeSizeBytes); + size_t instrLen = (*s_PtrDumpInstruction)(corDisasm, address, codeBytes, codeSizeBytes); if (instrLen == 0) { // error @@ -1746,7 +1793,15 @@ void DisAssembler::disInit(Compiler* pComp) disAsmFile = nullptr; #ifdef USE_COREDISTOOLS - disCoreDisToolsLibraryInitialized = false; + g_disAsmFileCorDisTools = nullptr; + corDisasm = nullptr; +#endif // USE_COREDISTOOLS +} + +void DisAssembler::disDone() +{ +#ifdef USE_COREDISTOOLS + DoneCoredistoolsDisasm(); #endif // USE_COREDISTOOLS } diff --git a/src/coreclr/jit/disasm.h b/src/coreclr/jit/disasm.h index 9a81d3abd3764b..f56028b3ae4179 100644 --- a/src/coreclr/jit/disasm.h +++ b/src/coreclr/jit/disasm.h @@ -86,6 +86,9 @@ class DisAssembler // Constructor void disInit(Compiler* pComp); + // Destructor + void disDone(); + // Initialize the class for the current method being generated. void disOpenForLateDisAsm(const char* curMethodName, const char* curClassName, PCCOR_SIGNATURE sig); @@ -235,9 +238,16 @@ class DisAssembler #ifdef USE_COREDISTOOLS - bool InitCoredistoolsDisasm(); - bool disCoreDisToolsLibraryInitialized = false; + bool InitCoredistoolsLibrary(); // Load the coredistools library + static LONG s_disCoreDisToolsLibraryInitializing; // 0 = not initializing; 1 = initializing + static bool s_disCoreDisToolsLibraryInitialized; + static bool s_disCoreDisToolsLibraryLoadSuccessful; + static NewDisasm_t* s_PtrNewDisasm; + static DumpInstruction_t* s_PtrDumpInstruction; + static FinishDisasm_t* s_PtrFinishDisasm; + bool InitCoredistoolsDisasm(); // Prepare for disassembly + void DoneCoredistoolsDisasm(); // Done with disassembly CorDisasm* corDisasm; #endif // USE_COREDISTOOLS From a4bc6089be88ba8b3aa53d3768342a3829ee4ace Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Tue, 2 Jan 2024 20:05:44 -0800 Subject: [PATCH 7/7] Update to 1.4.0 --- src/coreclr/inc/coredistools.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/coreclr/inc/coredistools.h b/src/coreclr/inc/coredistools.h index 2f63178125b73c..f26693f98d513b 100644 --- a/src/coreclr/inc/coredistools.h +++ b/src/coreclr/inc/coredistools.h @@ -3,7 +3,7 @@ //===--------- coredistools.h - Disassembly tools for CoreClr ------------===// // -// Core Disassembly Tools API Version 1.3.0 +// Core Disassembly Tools API Version 1.4.0 // Disassembly tools required by CoreCLR for utilities like // GCStress, SuperPMI, and R2RDump. //===----------------------------------------------------------------------===// @@ -26,10 +26,6 @@ #define DllIface EXTERN_C __declspec(dllimport) #endif // defined(DllInterfaceExporter) #else - -// Disable "warning: default label in switch which covers all enumeration values [-Wcovered-switch-default]" -#pragma clang diagnostic ignored "-Wcovered-switch-default" - #if !defined(__cdecl) #if defined(__i386__) #define __cdecl __attribute__((cdecl))