From 9e6059ea8e62b7ca0e2c0a248fefca2cbfa3e2b9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 Nov 2023 14:46:50 -0800 Subject: [PATCH 01/14] try regMask to unsigned __int128 for linux/arm64 --- src/coreclr/jit/target.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 638e55d56c7431..009a469cc1ed46 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -209,8 +209,14 @@ enum _regMask_enum : unsigned // In any case, we believe that is OK to freely cast between these types; no information will // be lost. -#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef unsigned __int64 regMaskTP; +#elif defined(TARGET_ARM64) +#if defined(HOST_UNIX) +typedef unsigned __int128 regMaskTP; +#else +typedef unsigned __int64 regMaskTP; +#endif // HOST_UNIX #else typedef unsigned regMaskTP; #endif From 247de7e944bb0197eac3700b1e57ca110ba56e06 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 Nov 2023 15:24:49 -0800 Subject: [PATCH 02/14] other fixes --- src/coreclr/jit/compiler.hpp | 20 ++++++++++++++++++++ src/coreclr/jit/target.h | 12 ++++++++++++ src/coreclr/jit/utils.cpp | 16 ++++++++++++++++ src/coreclr/jit/utils.h | 11 +++++++++++ 4 files changed, 59 insertions(+) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 47c9007ddee4fc..dd68c08ee99ca1 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -136,6 +136,14 @@ inline unsigned genLog2(unsigned __int64 value) return BitOperations::BitScanForward(value); } +#ifdef HOST_UNIX +inline unsigned genLog2(unsigned __int128 value) +{ + // assert(genExactlyOneBit(value)); + return BitOperations::BitScanForward(value); +} +#endif + #ifdef __APPLE__ inline unsigned genLog2(size_t value) { @@ -157,6 +165,18 @@ inline unsigned uhi32(unsigned __int64 value) return static_cast(value >> 32); } +#ifdef HOST_UNIX +/***************************************************************************** + * + * A rather simple routine that counts the number of bits in a given number. + */ + +inline unsigned genCountBits(unsigned __int128 bits) +{ + return BitOperations::PopCount(static_cast(bits)); +} +#endif + /***************************************************************************** * * A rather simple routine that counts the number of bits in a given number. diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 009a469cc1ed46..66b831c0ff9e5f 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -80,7 +80,11 @@ inline bool compUnixX86Abi() #define CSE_CONST_SHARED_LOW_BITS 12 #elif defined(TARGET_ARM64) +#ifdef HOST_UNIX +#define REGMASK_BITS 128 +#else #define REGMASK_BITS 64 +#endif #define CSE_CONST_SHARED_LOW_BITS 12 #elif defined(TARGET_LOONGARCH64) @@ -139,7 +143,11 @@ enum _regNumber_enum : unsigned ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs) }; +#ifdef HOST_UNIX +enum _regMask_enum : unsigned __int128 +#else enum _regMask_enum : unsigned __int64 +#endif { RBM_NONE = 0, #define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, @@ -234,7 +242,11 @@ typedef unsigned regMaskSmall; #define REG_MASK_INT_FMT "%08X" #define REG_MASK_ALL_FMT "%08X" #else +#if defined(HOST_UNIX) && defined(TARGET_ARM64) +typedef unsigned __int128 regMaskSmall; +#else typedef unsigned __int64 regMaskSmall; +#endif #define REG_MASK_INT_FMT "%04llX" #define REG_MASK_ALL_FMT "%016llX" #endif diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 099155e85f0b98..55edbcc9e198f5 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -3332,6 +3332,22 @@ uint32_t BitOperations::PopCount(uint64_t value) #endif } +#ifdef HOST_UNIX +//------------------------------------------------------------------------ +// BitOperations::PopCount: Returns the population count (number of bits set) of a mask. +// +// Arguments: +// value - the value +// +// Return Value: +// The population count (number of bits set) of value +// +uint32_t BitOperations::PopCount(unsigned __int128 value) +{ + return BitOperations::PopCount(static_cast(value)); +} +#endif + //------------------------------------------------------------------------ // BitOperations::ReverseBits: Reverses the bits in an integer value // diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 747daf9d719d46..247b0f1778541e 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -827,6 +827,13 @@ class BitOperations #endif } +#ifdef HOST_UNIX + FORCEINLINE static uint32_t BitScanForward(unsigned __int128 value) + { + return BitScanForward(static_cast(value)); + } +#endif + //------------------------------------------------------------------------ // BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit // (MSB) for a set bit (1) @@ -881,6 +888,10 @@ class BitOperations static uint32_t PopCount(uint64_t value); +#ifdef HOST_UNIX + static uint32_t PopCount(unsigned __int128 value); +#endif + static uint32_t ReverseBits(uint32_t value); static uint64_t ReverseBits(uint64_t value); From 193cdf1e8057ec986e283b2c7351cff1377174d4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 Nov 2023 15:27:31 -0800 Subject: [PATCH 03/14] jit format --- src/coreclr/jit/target.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 66b831c0ff9e5f..cee4d1e8cc3214 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -226,7 +226,7 @@ typedef unsigned __int128 regMaskTP; typedef unsigned __int64 regMaskTP; #endif // HOST_UNIX #else -typedef unsigned regMaskTP; +typedef unsigned regMaskTP; #endif #if REGMASK_BITS == 8 @@ -234,7 +234,7 @@ typedef unsigned char regMaskSmall; #define REG_MASK_INT_FMT "%02X" #define REG_MASK_ALL_FMT "%02X" #elif REGMASK_BITS == 16 -typedef unsigned short regMaskSmall; +typedef unsigned short regMaskSmall; #define REG_MASK_INT_FMT "%04X" #define REG_MASK_ALL_FMT "%04X" #elif REGMASK_BITS == 32 From 8cbc94c7f6b6a42a45edf3419af8c024948675b5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 Nov 2023 22:38:17 +0000 Subject: [PATCH 04/14] add extra prinf for BitScanForward --- src/coreclr/jit/utils.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 247b0f1778541e..e094fae7d3cda0 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -828,9 +828,25 @@ class BitOperations } #ifdef HOST_UNIX + static void print128x(unsigned __int128 n) + { + printf("%lx : ", static_cast(n)); + uint64_t lo = n; + uint64_t hi = (n >> 64); + if (hi) + { + printf("%lx", hi); + printf("%lx", lo); + } + printf("%lx", lo); + } + FORCEINLINE static uint32_t BitScanForward(unsigned __int128 value) { - return BitScanForward(static_cast(value)); + uint32_t result = BitScanForward(static_cast(value)); + print128x(value); + printf(", answer= %u\n", value, result); + return result; } #endif From 745d1da45470d9c328559c996d92ec888e315b8e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 Nov 2023 22:43:12 +0000 Subject: [PATCH 05/14] Add printf and fixes --- src/coreclr/jit/lsra.cpp | 6 +++++ src/coreclr/jit/lsra.h | 2 ++ src/coreclr/jit/lsraarm64.cpp | 43 +++++++++++++++++++++++++---------- src/coreclr/jit/lsrabuild.cpp | 11 ++++++++- src/coreclr/jit/utils.h | 8 ++++--- 5 files changed, 54 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 391682418ed1e2..80796f11378f8d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -4838,6 +4838,12 @@ void LinearScan::allocateRegisters() { RefPosition* nextRefPosition = currentRefPosition.nextRefPosition; + + if (compiler->verbose && currentRefPosition.rpNum == 4) + { + // __builtin_debugtrap(); + printf("here\n"); + } // TODO: Can we combine this with the freeing of registers below? It might // mess with the dump, since this was previously being done before the call below // to dumpRegRecords. diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index d0f7aeb8369695..8387692a2e23ad 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -29,6 +29,8 @@ const unsigned int MaxLocation = UINT_MAX; const unsigned int MaxInternalRegisters = 8; const unsigned int RegisterTypeCount = 2; +#define UINT128(hi, lo) (((__uint128_t) (hi)) << 64 | (lo)) + /***************************************************************************** * Register types *****************************************************************************/ diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index aed2de96d5e306..437a19f5ce859b 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -76,7 +76,9 @@ void LinearScan::assignConsecutiveRegisters(RefPosition* firstRefPosition, regNu assert(firstRefPosition->refType != RefTypeUpperVectorRestore); INDEBUG(int refPosCount = 1); - consecutiveRegsInUseThisLocation = (((1ULL << firstRefPosition->regCount) - 1) << firstRegAssigned); + consecutiveRegsInUseThisLocation = (UINT128(0, ((1ULL << firstRefPosition->regCount) - 1)) << firstRegAssigned); + printf("consecutiveRegsInUseThisLocation: "); + BitOperations::print128x(consecutiveRegsInUseThisLocation); while (consecutiveRefPosition != nullptr) { @@ -192,8 +194,12 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, // available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it // is safe to assign any of those registers, but not beyond that. #define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ - regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ - regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + regMaskTP selectionStartMask = UINT128(0, (1ULL << regAvailableStartIndex) - 1); \ + printf("selectionStartMask: "); \ + BitOperations::print128x(selectionStartMask); \ + regMaskTP selectionEndMask = UINT128(0, (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1); \ + printf("selectionEndMask: "); \ + BitOperations::print128x(selectionEndMask); \ consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; @@ -201,12 +207,18 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, do { + printf("currAvailableRegs: "); + BitOperations::print128x(currAvailableRegs); // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::BitScanForward(static_cast(currAvailableRegs)); - regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; - + regAvailableStartIndex = BitOperations::BitScanForward(currAvailableRegs); + printf("regAvailableStartIndex: %u\n", regAvailableStartIndex); + regMaskTP startMask = UINT128(0, (1ULL << regAvailableStartIndex) - 1); + printf("startMask: "); + BitOperations::print128x(startMask); // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. - regMaskTP maskProcessed = ~(currAvailableRegs | startMask); + regMaskTP maskProcessed = UINT128(0, 0xFFFFFFFF) & ~(currAvailableRegs | startMask); + printf("maskProcessed: "); + BitOperations::print128x(maskProcessed); // From regAvailableStart, find the first unavailable register (bit `0`). if (maskProcessed == RBM_NONE) @@ -220,9 +232,12 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } else { - regAvailableEndIndex = BitOperations::BitScanForward(static_cast(maskProcessed)); + regAvailableEndIndex = BitOperations::BitScanForward(maskProcessed); } - regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; + printf("regAvailableEndIndex: %u\n", regAvailableEndIndex); + regMaskTP endMask = UINT128(0, (1ULL << regAvailableEndIndex) - 1); + printf("endMask: "); + BitOperations::print128x(consecutiveRegsInUseThisLocation); // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available. // If they are equal to or greater than our register requirements, then add all of them to the result. @@ -328,11 +343,13 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC regMaskTP unprocessedRegs = consecutiveCandidates; unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; int maxSpillRegs = registersNeeded; - regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; + regMaskTP registersNeededMask = UINT128(0, (1ULL << registersNeeded) - 1); + printf("registersNeededMask: "); + BitOperations::print128x(registersNeededMask); do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::BitScanForward(static_cast(unprocessedRegs)); + regAvailableStartIndex = BitOperations::BitScanForward(unprocessedRegs); // For the current range, find how many registers are free vs. busy regMaskTP maskForCurRange = RBM_NONE; @@ -356,7 +373,9 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC if (shouldCheckForRounding) { unsigned int roundedRegistersNeeded = registersNeeded - (63 - regAvailableStartIndex + 1); - maskForCurRange = (1ULL << roundedRegistersNeeded) - 1; + maskForCurRange = UINT128(0, (1ULL << roundedRegistersNeeded) - 1); + printf("maskForCurRange: "); + BitOperations::print128x(maskForCurRange); } maskForCurRange |= (registersNeededMask << regAvailableStartIndex); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 7d9803c645799d..55ac56e91e9d86 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2799,7 +2799,14 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } - if (availableRegCount < (sizeof(regMaskTP) * 8)) + if ((sizeof(regMaskTP) * 8) > 64) + { + // Mask out the bits that are between 64 ~ availableRegCount + // unsigned __int128 a = ((UINT128(1, 0) << 64) - 1); + unsigned __int64 b = ~0; + actualRegistersMask = b; + } + else if (availableRegCount < (sizeof(regMaskTP) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount actualRegistersMask = (1ULL << availableRegCount) - 1; @@ -2808,6 +2815,8 @@ void LinearScan::buildIntervals() { actualRegistersMask = ~RBM_NONE; } + printf("actualRegistersMask: "); + BitOperations::print128x(actualRegistersMask); #ifdef DEBUG // Make sure we don't have any blocks that were not visited diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index e094fae7d3cda0..df5defd71072ed 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -838,14 +838,16 @@ class BitOperations printf("%lx", hi); printf("%lx", lo); } - printf("%lx", lo); + else + { + printf("%lx", lo); + } + printf("\n"); } FORCEINLINE static uint32_t BitScanForward(unsigned __int128 value) { uint32_t result = BitScanForward(static_cast(value)); - print128x(value); - printf(", answer= %u\n", value, result); return result; } #endif From 077d122671468110ea9aa55da7681ca335154572 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 Nov 2023 22:45:06 +0000 Subject: [PATCH 06/14] remove all printf --- src/coreclr/jit/lsraarm64.cpp | 20 -------------------- src/coreclr/jit/lsrabuild.cpp | 2 -- 2 files changed, 22 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 437a19f5ce859b..695f98c5389057 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -77,8 +77,6 @@ void LinearScan::assignConsecutiveRegisters(RefPosition* firstRefPosition, regNu INDEBUG(int refPosCount = 1); consecutiveRegsInUseThisLocation = (UINT128(0, ((1ULL << firstRefPosition->regCount) - 1)) << firstRegAssigned); - printf("consecutiveRegsInUseThisLocation: "); - BitOperations::print128x(consecutiveRegsInUseThisLocation); while (consecutiveRefPosition != nullptr) { @@ -195,11 +193,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, // is safe to assign any of those registers, but not beyond that. #define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ regMaskTP selectionStartMask = UINT128(0, (1ULL << regAvailableStartIndex) - 1); \ - printf("selectionStartMask: "); \ - BitOperations::print128x(selectionStartMask); \ regMaskTP selectionEndMask = UINT128(0, (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1); \ - printf("selectionEndMask: "); \ - BitOperations::print128x(selectionEndMask); \ consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; @@ -207,18 +201,11 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, do { - printf("currAvailableRegs: "); - BitOperations::print128x(currAvailableRegs); // From LSB, find the first available register (bit `1`) regAvailableStartIndex = BitOperations::BitScanForward(currAvailableRegs); - printf("regAvailableStartIndex: %u\n", regAvailableStartIndex); regMaskTP startMask = UINT128(0, (1ULL << regAvailableStartIndex) - 1); - printf("startMask: "); - BitOperations::print128x(startMask); // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. regMaskTP maskProcessed = UINT128(0, 0xFFFFFFFF) & ~(currAvailableRegs | startMask); - printf("maskProcessed: "); - BitOperations::print128x(maskProcessed); // From regAvailableStart, find the first unavailable register (bit `0`). if (maskProcessed == RBM_NONE) @@ -234,10 +221,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, { regAvailableEndIndex = BitOperations::BitScanForward(maskProcessed); } - printf("regAvailableEndIndex: %u\n", regAvailableEndIndex); regMaskTP endMask = UINT128(0, (1ULL << regAvailableEndIndex) - 1); - printf("endMask: "); - BitOperations::print128x(consecutiveRegsInUseThisLocation); // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available. // If they are equal to or greater than our register requirements, then add all of them to the result. @@ -344,8 +328,6 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; int maxSpillRegs = registersNeeded; regMaskTP registersNeededMask = UINT128(0, (1ULL << registersNeeded) - 1); - printf("registersNeededMask: "); - BitOperations::print128x(registersNeededMask); do { // From LSB, find the first available register (bit `1`) @@ -374,8 +356,6 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC { unsigned int roundedRegistersNeeded = registersNeeded - (63 - regAvailableStartIndex + 1); maskForCurRange = UINT128(0, (1ULL << roundedRegistersNeeded) - 1); - printf("maskForCurRange: "); - BitOperations::print128x(maskForCurRange); } maskForCurRange |= (registersNeededMask << regAvailableStartIndex); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 55ac56e91e9d86..1f1866047a6d14 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2815,8 +2815,6 @@ void LinearScan::buildIntervals() { actualRegistersMask = ~RBM_NONE; } - printf("actualRegistersMask: "); - BitOperations::print128x(actualRegistersMask); #ifdef DEBUG // Make sure we don't have any blocks that were not visited From 5fddd8d345129a8c936851ddff75b05a708781ac Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 Nov 2023 07:37:12 -0800 Subject: [PATCH 07/14] fix the build error --- src/coreclr/jit/lsra.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 8387692a2e23ad..34763ff0ed2a7b 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -29,7 +29,11 @@ const unsigned int MaxLocation = UINT_MAX; const unsigned int MaxInternalRegisters = 8; const unsigned int RegisterTypeCount = 2; -#define UINT128(hi, lo) (((__uint128_t) (hi)) << 64 | (lo)) +#if defined(TARGET_ARM64) and defined(HOST_UNIX) +#define UINT128(hi, lo) (((__uint128_t)(hi)) << 64 | (lo)) +#else +#define UINT128(hi, lo) lo +#endif // TARGET_ARM64 && HOST_UNIX /***************************************************************************** * Register types From 956ac77ce6e15ae618766f7bbb538a2bd0ae200f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 Nov 2023 07:38:27 -0800 Subject: [PATCH 08/14] jit format --- src/coreclr/jit/lsra.cpp | 1 - src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/utils.h | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 80796f11378f8d..1ebf2ba4c2397d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -4838,7 +4838,6 @@ void LinearScan::allocateRegisters() { RefPosition* nextRefPosition = currentRefPosition.nextRefPosition; - if (compiler->verbose && currentRefPosition.rpNum == 4) { // __builtin_debugtrap(); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 1f1866047a6d14..38017cf52ea5bf 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2803,7 +2803,7 @@ void LinearScan::buildIntervals() { // Mask out the bits that are between 64 ~ availableRegCount // unsigned __int128 a = ((UINT128(1, 0) << 64) - 1); - unsigned __int64 b = ~0; + unsigned __int64 b = ~0; actualRegistersMask = b; } else if (availableRegCount < (sizeof(regMaskTP) * 8)) diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index df5defd71072ed..54957672a4440a 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -841,10 +841,10 @@ class BitOperations else { printf("%lx", lo); - } + } printf("\n"); } - + FORCEINLINE static uint32_t BitScanForward(unsigned __int128 value) { uint32_t result = BitScanForward(static_cast(value)); From 3d72de5f3e2627319ee233a0a9bf3bb481a2b3c3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 Nov 2023 07:51:05 -0800 Subject: [PATCH 09/14] remove a print statement --- src/coreclr/jit/lsra.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 1ebf2ba4c2397d..391682418ed1e2 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -4838,11 +4838,6 @@ void LinearScan::allocateRegisters() { RefPosition* nextRefPosition = currentRefPosition.nextRefPosition; - if (compiler->verbose && currentRefPosition.rpNum == 4) - { - // __builtin_debugtrap(); - printf("here\n"); - } // TODO: Can we combine this with the freeing of registers below? It might // mess with the dump, since this was previously being done before the call below // to dumpRegRecords. From 5540aaa3df7ea35eae7cd60a8b1fee86127641d8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 Nov 2023 08:11:40 -0800 Subject: [PATCH 10/14] add another missing ifdef --- src/coreclr/jit/lsrabuild.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 38017cf52ea5bf..a447e57005a385 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2799,6 +2799,7 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } +#if defined(HOST_UNIX) && defined(TARGET_ARM64) if ((sizeof(regMaskTP) * 8) > 64) { // Mask out the bits that are between 64 ~ availableRegCount @@ -2806,7 +2807,9 @@ void LinearScan::buildIntervals() unsigned __int64 b = ~0; actualRegistersMask = b; } - else if (availableRegCount < (sizeof(regMaskTP) * 8)) + else +#endif // HOST_UNIX && TARGET_ARM64 + if (availableRegCount < (sizeof(regMaskTP) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount actualRegistersMask = (1ULL << availableRegCount) - 1; From c6337257ce6ed2c47b11582297c79f68fa33c686 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 Nov 2023 08:59:34 -0800 Subject: [PATCH 11/14] Add HAS_PRIMITIVE_128 --- src/coreclr/jit/compiler.hpp | 17 ++++++++--------- src/coreclr/jit/jit.h | 6 ++++++ src/coreclr/jit/lsra.h | 4 ++-- src/coreclr/jit/lsrabuild.cpp | 4 ++-- src/coreclr/jit/target.h | 14 +++++++------- src/coreclr/jit/utils.cpp | 4 ++-- src/coreclr/jit/utils.h | 8 ++++---- 7 files changed, 31 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index dd68c08ee99ca1..bfd85cc66af9c2 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -136,14 +136,6 @@ inline unsigned genLog2(unsigned __int64 value) return BitOperations::BitScanForward(value); } -#ifdef HOST_UNIX -inline unsigned genLog2(unsigned __int128 value) -{ - // assert(genExactlyOneBit(value)); - return BitOperations::BitScanForward(value); -} -#endif - #ifdef __APPLE__ inline unsigned genLog2(size_t value) { @@ -165,7 +157,14 @@ inline unsigned uhi32(unsigned __int64 value) return static_cast(value >> 32); } -#ifdef HOST_UNIX +#if HAS_PRIMITIVE_128 + +inline unsigned genLog2(unsigned __int128 value) +{ + // assert(genExactlyOneBit(value)); + return BitOperations::BitScanForward(value); +} + /***************************************************************************** * * A rather simple routine that counts the number of bits in a given number. diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 3c29b6ae5c4330..5a59c7f6d62d82 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -757,6 +757,12 @@ inline size_t unsigned_abs(__int64 x) #define FEATURE_LOOP_ALIGN 0 #endif +#if defined(TARGET_ARM64) && defined(HOST_UNIX) +#define HAS_PRIMITIVE_128 1 +#else +#define HAS_PRIMITIVE_128 0 +#endif + #define CLFLG_MAXOPT \ (CLFLG_CSE | CLFLG_REGVAR | CLFLG_RNGCHKOPT | CLFLG_DEADSTORE | CLFLG_CODEMOTION | CLFLG_QMARK | CLFLG_TREETRANS | \ CLFLG_INLINING | CLFLG_STRUCTPROMOTE) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 34763ff0ed2a7b..4eb8bc101fb690 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -29,11 +29,11 @@ const unsigned int MaxLocation = UINT_MAX; const unsigned int MaxInternalRegisters = 8; const unsigned int RegisterTypeCount = 2; -#if defined(TARGET_ARM64) and defined(HOST_UNIX) +#if HAS_PRIMITIVE_128 #define UINT128(hi, lo) (((__uint128_t)(hi)) << 64 | (lo)) #else #define UINT128(hi, lo) lo -#endif // TARGET_ARM64 && HOST_UNIX +#endif // HAS_PRIMITIVE_128 /***************************************************************************** * Register types diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index a447e57005a385..67f3a232e0e8da 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2799,7 +2799,7 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } -#if defined(HOST_UNIX) && defined(TARGET_ARM64) +#if HAS_PRIMITIVE_128 if ((sizeof(regMaskTP) * 8) > 64) { // Mask out the bits that are between 64 ~ availableRegCount @@ -2808,7 +2808,7 @@ void LinearScan::buildIntervals() actualRegistersMask = b; } else -#endif // HOST_UNIX && TARGET_ARM64 +#endif // HAS_PRIMITIVE_128 if (availableRegCount < (sizeof(regMaskTP) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index cee4d1e8cc3214..70c4791b4e0562 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -80,11 +80,11 @@ inline bool compUnixX86Abi() #define CSE_CONST_SHARED_LOW_BITS 12 #elif defined(TARGET_ARM64) -#ifdef HOST_UNIX +#if HAS_PRIMITIVE_128 #define REGMASK_BITS 128 #else #define REGMASK_BITS 64 -#endif +#endif // HAS_PRIMITIVE_128 #define CSE_CONST_SHARED_LOW_BITS 12 #elif defined(TARGET_LOONGARCH64) @@ -143,11 +143,11 @@ enum _regNumber_enum : unsigned ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs) }; -#ifdef HOST_UNIX +#if HAS_PRIMITIVE_128 enum _regMask_enum : unsigned __int128 #else enum _regMask_enum : unsigned __int64 -#endif +#endif // HAS_PRIMITIVE_128 { RBM_NONE = 0, #define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, @@ -220,11 +220,11 @@ enum _regMask_enum : unsigned #if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef unsigned __int64 regMaskTP; #elif defined(TARGET_ARM64) -#if defined(HOST_UNIX) +#if HAS_PRIMITIVE_128 typedef unsigned __int128 regMaskTP; #else typedef unsigned __int64 regMaskTP; -#endif // HOST_UNIX +#endif // HAS_PRIMITIVE_128 #else typedef unsigned regMaskTP; #endif @@ -242,7 +242,7 @@ typedef unsigned regMaskSmall; #define REG_MASK_INT_FMT "%08X" #define REG_MASK_ALL_FMT "%08X" #else -#if defined(HOST_UNIX) && defined(TARGET_ARM64) +#if HAS_PRIMITIVE_128 typedef unsigned __int128 regMaskSmall; #else typedef unsigned __int64 regMaskSmall; diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 55edbcc9e198f5..685896690004e2 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -3332,7 +3332,7 @@ uint32_t BitOperations::PopCount(uint64_t value) #endif } -#ifdef HOST_UNIX +#if HAS_PRIMITIVE_128 //------------------------------------------------------------------------ // BitOperations::PopCount: Returns the population count (number of bits set) of a mask. // @@ -3346,7 +3346,7 @@ uint32_t BitOperations::PopCount(unsigned __int128 value) { return BitOperations::PopCount(static_cast(value)); } -#endif +#endif // HAS_PRIMITIVE_128 //------------------------------------------------------------------------ // BitOperations::ReverseBits: Reverses the bits in an integer value diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 54957672a4440a..8a5441970556bf 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -827,7 +827,7 @@ class BitOperations #endif } -#ifdef HOST_UNIX +#if HAS_PRIMITIVE_128 static void print128x(unsigned __int128 n) { printf("%lx : ", static_cast(n)); @@ -850,7 +850,7 @@ class BitOperations uint32_t result = BitScanForward(static_cast(value)); return result; } -#endif +#endif // HAS_PRIMITIVE_128 //------------------------------------------------------------------------ // BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit @@ -906,9 +906,9 @@ class BitOperations static uint32_t PopCount(uint64_t value); -#ifdef HOST_UNIX +#if HAS_PRIMITIVE_128 static uint32_t PopCount(unsigned __int128 value); -#endif +#endif // HAS_PRIMITIVE_128 static uint32_t ReverseBits(uint32_t value); From aeebfd00959f277ad47c3b543d1aee9782e5d8da Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 Nov 2023 09:02:48 -0800 Subject: [PATCH 12/14] jit format --- src/coreclr/jit/compiler.hpp | 2 +- src/coreclr/jit/lsrabuild.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index bfd85cc66af9c2..963c2b63e46da3 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -174,7 +174,7 @@ inline unsigned genCountBits(unsigned __int128 bits) { return BitOperations::PopCount(static_cast(bits)); } -#endif +#endif // HAS_PRIMITIVE_128 /***************************************************************************** * diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 67f3a232e0e8da..3016434192d502 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2809,7 +2809,7 @@ void LinearScan::buildIntervals() } else #endif // HAS_PRIMITIVE_128 - if (availableRegCount < (sizeof(regMaskTP) * 8)) + if (availableRegCount < (sizeof(regMaskTP) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount actualRegistersMask = (1ULL << availableRegCount) - 1; From 50c9c62e45d1b6987f541f3de5121b470e63c9b3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 Nov 2023 10:13:50 -0800 Subject: [PATCH 13/14] #define is not detected in utils.h --- src/coreclr/jit/utils.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 8a5441970556bf..05ae66d36b2836 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -827,7 +827,7 @@ class BitOperations #endif } -#if HAS_PRIMITIVE_128 +#if defined(TARGET_ARM64) && defined(HOST_UNIX) static void print128x(unsigned __int128 n) { printf("%lx : ", static_cast(n)); @@ -850,7 +850,7 @@ class BitOperations uint32_t result = BitScanForward(static_cast(value)); return result; } -#endif // HAS_PRIMITIVE_128 +#endif // TARGET_ARM64 && HOST_UNIX //------------------------------------------------------------------------ // BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit @@ -906,9 +906,9 @@ class BitOperations static uint32_t PopCount(uint64_t value); -#if HAS_PRIMITIVE_128 +#if defined(TARGET_ARM64) && defined(HOST_UNIX) static uint32_t PopCount(unsigned __int128 value); -#endif // HAS_PRIMITIVE_128 +#endif // TARGET_ARM64 && HOST_UNIX static uint32_t ReverseBits(uint32_t value); From d013337fc193457282dccdadfcf23a67eba4c102 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 16 Nov 2023 00:25:08 -0800 Subject: [PATCH 14/14] actually enable __int128 --- src/coreclr/jit/compiler.hpp | 4 ++-- src/coreclr/jit/jit.h | 6 ------ src/coreclr/jit/target.h | 6 ++++++ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 963c2b63e46da3..25ee67aa32a303 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -172,9 +172,9 @@ inline unsigned genLog2(unsigned __int128 value) inline unsigned genCountBits(unsigned __int128 bits) { - return BitOperations::PopCount(static_cast(bits)); + return BitOperations::PopCount(bits); } -#endif // HAS_PRIMITIVE_128 +#endif /***************************************************************************** * diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 5a59c7f6d62d82..3c29b6ae5c4330 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -757,12 +757,6 @@ inline size_t unsigned_abs(__int64 x) #define FEATURE_LOOP_ALIGN 0 #endif -#if defined(TARGET_ARM64) && defined(HOST_UNIX) -#define HAS_PRIMITIVE_128 1 -#else -#define HAS_PRIMITIVE_128 0 -#endif - #define CLFLG_MAXOPT \ (CLFLG_CSE | CLFLG_REGVAR | CLFLG_RNGCHKOPT | CLFLG_DEADSTORE | CLFLG_CODEMOTION | CLFLG_QMARK | CLFLG_TREETRANS | \ CLFLG_INLINING | CLFLG_STRUCTPROMOTE) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 70c4791b4e0562..cd120189675841 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -64,6 +64,12 @@ inline bool compUnixX86Abi() #error Unsupported or unset target architecture #endif +#if defined(TARGET_ARM64) && defined(HOST_UNIX) +#define HAS_PRIMITIVE_128 1 +#else +#define HAS_PRIMITIVE_128 0 +#endif + /*****************************************************************************/ // The following are intended to capture only those #defines that cannot be replaced // with static const members of Target