From 4ebf40f32ee9b8611e5911d35ceb8da6ddb4f8e9 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 29 Mar 2023 12:12:32 -0700 Subject: [PATCH 1/2] Updating some places to cover xmm16-xmm31 --- docs/design/coreclr/botr/clr-abi.md | 56 +++++++++++----------- src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/emitxarch.cpp | 11 +++-- src/coreclr/jit/lclvars.cpp | 2 +- src/coreclr/jit/unwindamd64.cpp | 72 +++++++++++++++++++++++++++++ 5 files changed, 109 insertions(+), 34 deletions(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 91f312721aedcc..14801dd8b6b897 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -711,34 +711,34 @@ The general rules outlined in the System V x86_64 ABI documentation are followed 6. The following table describes register usage according to the System V x86_64 ABI ``` -| Register | Usage | Preserved across | -| | | function calls | -|--------------|-----------------------------------------|-------------------| -| %rax | temporary register; with variable argu- | No | -| | ments passes information about the | | -| | number of SSE registers used; | | -| | 1st return argument | | -| %rbx | callee-saved register; optionally used | Yes | -| | as base pointer | | -| %rcx | used to pass 4st integer argument to | No | -| | to functions | | -| %rdx | used to pass 3rd argument to functions | No | -| | 2nd return register | | -| %rsp | stack pointer | Yes | -| %rbp | callee-saved register; optionally used | Yes | -| | as frame pointer | | -| %rsi | used to pass 2nd argument to functions | No | -| %rdi | used to pass 1st argument to functions | No | -| %r8 | used to pass 5th argument to functions | No | -| %r9 | used to pass 6th argument to functions | No | -| %r10 | temporary register, used for passing a | No | -| | function's static chain pointer | | -| %r11 | temporary register | No | -| %r12-%r15 | callee-saved registers | Yes | -| %xmm0-%xmm1 | used to pass and return floating point | No | -| | arguments | | -| %xmm2-%xmm7 | used to pass floating point arguments | No | -| %xmm8-%xmm15 | temporary registers | No | +| Register | Usage | Preserved across | +| | | function calls | +|---------------|-----------------------------------------|-------------------| +| %rax | temporary register; with variable argu- | No | +| | ments passes information about the | | +| | number of SSE registers used; | | +| | 1st return argument | | +| %rbx | callee-saved register; optionally used | Yes | +| | as base pointer | | +| %rcx | used to pass 4st integer argument to | No | +| | to functions | | +| %rdx | used to pass 3rd argument to functions | No | +| | 2nd return register | | +| %rsp | stack pointer | Yes | +| %rbp | callee-saved register; optionally used | Yes | +| | as frame pointer | | +| %rsi | used to pass 2nd argument to functions | No | +| %rdi | used to pass 1st argument to functions | No | +| %r8 | used to pass 5th argument to functions | No | +| %r9 | used to pass 6th argument to functions | No | +| %r10 | temporary register, used for passing a | No | +| | function's static chain pointer | | +| %r11 | temporary register | No | +| %r12-%r15 | callee-saved registers | Yes | +| %xmm0-%xmm1 | used to pass and return floating point | No | +| | arguments | | +| %xmm2-%xmm7 | used to pass floating point arguments | No | +| %xmm8-%xmm31 | temporary registers | No | ``` # Calling convention specifics for x86 diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 9fd3a655149913..dda09ea43cbce5 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -5345,7 +5345,7 @@ void CodeGen::genFinalizeFrame() #if defined(TARGET_XARCH) // Compute the count of callee saved float regs saved on stack. - // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15) + // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm31) // regs are stack allocated and preserved in their stack locations. compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED; maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED; diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 4fab45b3b94b9e..dfbf51e22afdb3 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -9669,14 +9669,14 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) return emitXMMregName(reg); case EA_8BYTE: - if ((REG_XMM0 <= reg) && (reg <= REG_XMM15)) + if (IsXMMReg(reg)) { return emitXMMregName(reg); } break; case EA_4BYTE: - if ((REG_XMM0 <= reg) && (reg <= REG_XMM15)) + if (IsXMMReg(reg)) { return emitXMMregName(reg); } @@ -9759,6 +9759,9 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) switch (EA_SIZE(attr)) { + case EA_64BYTE: + return emitZMMregName(reg); + case EA_32BYTE: return emitYMMregName(reg); @@ -9766,14 +9769,14 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) return emitXMMregName(reg); case EA_8BYTE: - if ((REG_XMM0 <= reg) && (reg <= REG_XMM7)) + if (IsXMMReg(reg)) { return emitXMMregName(reg); } break; case EA_4BYTE: - if ((REG_XMM0 <= reg) && (reg <= REG_XMM7)) + if (IsXMMReg(reg)) { return emitXMMregName(reg); } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 38620131777271..da54964aaaa738 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -6391,7 +6391,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() lvaIncrementFrameSize(extraSlotSize); } - // In case of Amd64 compCalleeRegsPushed does not include float regs (Xmm6-xmm15) that + // In case of Amd64 compCalleeRegsPushed does not include float regs (xmm6-xmm31) that // need to be pushed. But Amd64 doesn't support push/pop of xmm registers. // Instead we need to allocate space for them on the stack and save them in prolog. // Therefore, we consider xmm registers being saved while computing stack offsets diff --git a/src/coreclr/jit/unwindamd64.cpp b/src/coreclr/jit/unwindamd64.cpp index fa5f9df5ae333c..0896e38d2b03aa 100644 --- a/src/coreclr/jit/unwindamd64.cpp +++ b/src/coreclr/jit/unwindamd64.cpp @@ -119,6 +119,78 @@ short Compiler::mapRegNumToDwarfReg(regNumber reg) case REG_XMM15: dwarfReg = 32; break; + case REG_XMM16: + dwarfReg = 67; + break; + case REG_XMM17: + dwarfReg = 68; + break; + case REG_XMM18: + dwarfReg = 69; + break; + case REG_XMM19: + dwarfReg = 70; + break; + case REG_XMM20: + dwarfReg = 71; + break; + case REG_XMM21: + dwarfReg = 72; + break; + case REG_XMM22: + dwarfReg = 73; + break; + case REG_XMM23: + dwarfReg = 74; + break; + case REG_XMM24: + dwarfReg = 75; + break; + case REG_XMM25: + dwarfReg = 76; + break; + case REG_XMM26: + dwarfReg = 77; + break; + case REG_XMM27: + dwarfReg = 78; + break; + case REG_XMM28: + dwarfReg = 79; + break; + case REG_XMM29: + dwarfReg = 80; + break; + case REG_XMM30: + dwarfReg = 81; + break; + case REG_XMM31: + dwarfReg = 82; + break; + case REG_K0: + dwarfReg = 118; + break; + case REG_K1: + dwarfReg = 119; + break; + case REG_K2: + dwarfReg = 120; + break; + case REG_K3: + dwarfReg = 121; + break; + case REG_K4: + dwarfReg = 122; + break; + case REG_K5: + dwarfReg = 123; + break; + case REG_K6: + dwarfReg = 124; + break; + case REG_K7: + dwarfReg = 125; + break; default: noway_assert(!"unexpected REG_NUM"); } From eaf0cc394f9bd29e00e4ee70915105736cba215c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 29 Mar 2023 12:53:46 -0700 Subject: [PATCH 2/2] Remove XMM0-XMM31 and K0-K7 from mapRegNumToDwarfReg --- src/coreclr/jit/unwindamd64.cpp | 120 -------------------------------- 1 file changed, 120 deletions(-) diff --git a/src/coreclr/jit/unwindamd64.cpp b/src/coreclr/jit/unwindamd64.cpp index 0896e38d2b03aa..b5fdf604e55ed8 100644 --- a/src/coreclr/jit/unwindamd64.cpp +++ b/src/coreclr/jit/unwindamd64.cpp @@ -71,126 +71,6 @@ short Compiler::mapRegNumToDwarfReg(regNumber reg) case REG_R15: dwarfReg = 15; break; - case REG_XMM0: - dwarfReg = 17; - break; - case REG_XMM1: - dwarfReg = 18; - break; - case REG_XMM2: - dwarfReg = 19; - break; - case REG_XMM3: - dwarfReg = 20; - break; - case REG_XMM4: - dwarfReg = 21; - break; - case REG_XMM5: - dwarfReg = 22; - break; - case REG_XMM6: - dwarfReg = 23; - break; - case REG_XMM7: - dwarfReg = 24; - break; - case REG_XMM8: - dwarfReg = 25; - break; - case REG_XMM9: - dwarfReg = 26; - break; - case REG_XMM10: - dwarfReg = 27; - break; - case REG_XMM11: - dwarfReg = 28; - break; - case REG_XMM12: - dwarfReg = 29; - break; - case REG_XMM13: - dwarfReg = 30; - break; - case REG_XMM14: - dwarfReg = 31; - break; - case REG_XMM15: - dwarfReg = 32; - break; - case REG_XMM16: - dwarfReg = 67; - break; - case REG_XMM17: - dwarfReg = 68; - break; - case REG_XMM18: - dwarfReg = 69; - break; - case REG_XMM19: - dwarfReg = 70; - break; - case REG_XMM20: - dwarfReg = 71; - break; - case REG_XMM21: - dwarfReg = 72; - break; - case REG_XMM22: - dwarfReg = 73; - break; - case REG_XMM23: - dwarfReg = 74; - break; - case REG_XMM24: - dwarfReg = 75; - break; - case REG_XMM25: - dwarfReg = 76; - break; - case REG_XMM26: - dwarfReg = 77; - break; - case REG_XMM27: - dwarfReg = 78; - break; - case REG_XMM28: - dwarfReg = 79; - break; - case REG_XMM29: - dwarfReg = 80; - break; - case REG_XMM30: - dwarfReg = 81; - break; - case REG_XMM31: - dwarfReg = 82; - break; - case REG_K0: - dwarfReg = 118; - break; - case REG_K1: - dwarfReg = 119; - break; - case REG_K2: - dwarfReg = 120; - break; - case REG_K3: - dwarfReg = 121; - break; - case REG_K4: - dwarfReg = 122; - break; - case REG_K5: - dwarfReg = 123; - break; - case REG_K6: - dwarfReg = 124; - break; - case REG_K7: - dwarfReg = 125; - break; default: noway_assert(!"unexpected REG_NUM"); }