diff --git a/src/coreclr/debug/inc/dbgtargetcontext.h b/src/coreclr/debug/inc/dbgtargetcontext.h index 745e0f6a363cf6..e23aaa7c2333bd 100644 --- a/src/coreclr/debug/inc/dbgtargetcontext.h +++ b/src/coreclr/debug/inc/dbgtargetcontext.h @@ -555,26 +555,25 @@ static_assert(sizeof(DT_CONTEXT) == sizeof(T_CONTEXT), "DT_CONTEXT size must equ #define DT_RISCV64_MAX_BREAKPOINTS 8 #define DT_RISCV64_MAX_WATCHPOINTS 2 -typedef DECLSPEC_ALIGN(16) struct { +typedef struct DECLSPEC_ALIGN(16) { // // Control flags. // /* +0x000 */ DWORD ContextFlags; - /* +0x004 */ DWORD Fcsr; // // Integer registers // - DWORD64 ZR; - DWORD64 RA; - DWORD64 SP; - DWORD64 GP; - DWORD64 TP; + DWORD64 R0; + DWORD64 Ra; + DWORD64 Sp; + DWORD64 Gp; + DWORD64 Tp; DWORD64 T0; DWORD64 T1; DWORD64 T2; - DWORD64 FP; + DWORD64 Fp; DWORD64 S1; DWORD64 A0; DWORD64 A1; @@ -598,12 +597,13 @@ typedef DECLSPEC_ALIGN(16) struct { DWORD64 T4; DWORD64 T5; DWORD64 T6; - DWORD64 PC; + DWORD64 Pc; // // Floating Point Registers // ULONGLONG F[32]; + DWORD Fcsr; } DT_CONTEXT; static_assert(sizeof(DT_CONTEXT) == sizeof(T_CONTEXT), "DT_CONTEXT size must equal the T_CONTEXT size"); diff --git a/src/coreclr/debug/inc/riscv64/primitives.h b/src/coreclr/debug/inc/riscv64/primitives.h index 0ce355aec5950c..39c505b8f28738 100644 --- a/src/coreclr/debug/inc/riscv64/primitives.h +++ b/src/coreclr/debug/inc/riscv64/primitives.h @@ -87,31 +87,31 @@ constexpr CorDebugRegister g_JITToCorDbgReg[] = inline void CORDbgSetIP(DT_CONTEXT *context, LPVOID ip) { LIMITED_METHOD_CONTRACT; - context->PC = (DWORD64)ip; + context->Pc = (DWORD64)ip; } inline LPVOID CORDbgGetSP(const DT_CONTEXT * context) { LIMITED_METHOD_CONTRACT; - return (LPVOID)(size_t)(context->SP); + return (LPVOID)(size_t)(context->Sp); } inline void CORDbgSetSP(DT_CONTEXT *context, LPVOID esp) { LIMITED_METHOD_CONTRACT; - context->SP = (DWORD64)esp; + context->Sp = (DWORD64)esp; } inline LPVOID CORDbgGetFP(const DT_CONTEXT * context) { LIMITED_METHOD_CONTRACT; - return (LPVOID)(size_t)(context->FP); + return (LPVOID)(size_t)(context->Fp); } inline void CORDbgSetFP(DT_CONTEXT *context, LPVOID fp) { LIMITED_METHOD_CONTRACT; - context->FP = (DWORD64)fp; + context->Fp = (DWORD64)fp; } @@ -121,9 +121,9 @@ inline BOOL CompareControlRegisters(const DT_CONTEXT * pCtx1, const DT_CONTEXT * // TODO-RISCV64: Sort out frame registers - if ((pCtx1->PC == pCtx2->PC) && - (pCtx1->SP == pCtx2->SP) && - (pCtx1->FP == pCtx2->FP)) + if ((pCtx1->Pc == pCtx2->Pc) && + (pCtx1->Sp == pCtx2->Sp) && + (pCtx1->Fp == pCtx2->Fp)) { return TRUE; } @@ -168,7 +168,7 @@ inline LPVOID CORDbgGetIP(DT_CONTEXT *context) { LIMITED_METHOD_CONTRACT; - return (LPVOID)(size_t)(context->PC); + return (LPVOID)(size_t)(context->Pc); } inline void CORDbgSetInstructionExImpl(CORDB_ADDRESS_TYPE* address, diff --git a/src/coreclr/inc/crosscomp.h b/src/coreclr/inc/crosscomp.h index bc208595be770c..e03eac034b36f8 100644 --- a/src/coreclr/inc/crosscomp.h +++ b/src/coreclr/inc/crosscomp.h @@ -540,7 +540,6 @@ typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT { // /* +0x000 */ DWORD ContextFlags; - /* +0x004 */ DWORD Fcsr; // // Integer registers @@ -584,6 +583,7 @@ typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT { // //TODO-RISCV64: support the SIMD. ULONGLONG F[32]; + DWORD Fcsr; } T_CONTEXT, *PT_CONTEXT; // _IMAGE_RISCV64_RUNTIME_FUNCTION_ENTRY (see ExternalAPIs\Win9CoreSystem\inc\winnt.h) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 8f9a25ac848814..89a976d2360262 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1278,7 +1278,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre { // A faster/smaller way to generate 0.0 // We will just zero out the entire vector register for both float and double - emit->emitIns_R_R(INS_fmv_d_x, EA_8BYTE, targetReg, REG_R0); + emit->emitIns_R_R(size == EA_4BYTE ? INS_fmv_w_x : INS_fmv_d_x, size, targetReg, REG_R0); } else { @@ -1589,11 +1589,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) } else // store into register (i.e move into register) { - if (dataReg != targetReg) - { - // Assign into targetReg when dataReg (from op1) is not the same register - inst_Mov(targetType, targetReg, dataReg, true, emitActualTypeSize(targetType)); - } + inst_Mov(targetType, targetReg, dataReg, true); genProduceReg(lclNode); } } @@ -1994,17 +1990,19 @@ void CodeGen::genCodeForNegNot(GenTree* tree) emitAttr attr = emitActualTypeSize(tree); if (tree->OperIs(GT_NEG)) { - if (attr == EA_4BYTE) + if (varTypeIsFloating(targetType)) { - GetEmitter()->emitIns_R_R_R(INS_subw, attr, targetReg, REG_R0, operandReg); + GetEmitter()->emitIns_R_R_R(targetType == TYP_DOUBLE ? INS_fsgnjn_d : INS_fsgnjn_s, attr, targetReg, + operandReg, operandReg); } else { - GetEmitter()->emitIns_R_R_R(INS_sub, attr, targetReg, REG_R0, operandReg); + GetEmitter()->emitIns_R_R_R(attr == EA_4BYTE ? INS_subw : INS_sub, attr, targetReg, REG_R0, operandReg); } } else if (tree->OperIs(GT_NOT)) { + assert(!varTypeIsFloating(targetType)); GetEmitter()->emitIns_R_R_I(INS_xori, attr, targetReg, operandReg, -1); } @@ -2109,6 +2107,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree) // (AnyVal / 0) => DivideByZeroException // (MinInt / -1) => ArithmeticException // + bool checkDividend = true; // Do we have an immediate for the 'divisorOp'? @@ -3298,7 +3297,32 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // void CodeGen::genCkfinite(GenTree* treeNode) { - NYI_RISCV64("genCkfinite-----unimplemented/unused on RISCV64 yet----"); + assert(treeNode->OperGet() == GT_CKFINITE); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + var_types targetType = treeNode->TypeGet(); + int expMask = 0x381; // 0b1110000001; + + emitter* emit = GetEmitter(); + emitAttr attr = emitActualTypeSize(treeNode); + + // Extract exponent into a register. + regNumber intReg = treeNode->GetSingleTempReg(); + regNumber fpReg = genConsumeReg(op1); + + emit->emitIns_R_R(attr == EA_4BYTE ? INS_fclass_s : INS_fclass_d, attr, intReg, fpReg); + // Mask of exponent with all 1's and check if the exponent is all 1's + emit->emitIns_R_R_I(INS_andi, EA_PTRSIZE, intReg, intReg, expMask); + // If exponent is all 1's, throw ArithmeticException + genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_bne, intReg); + + // if it is a finite value copy it to targetReg + if (treeNode->GetRegNum() != fpReg) + { + inst_Mov(targetType, treeNode->GetRegNum(), fpReg, /* canSkip */ true); + } + + genProduceReg(treeNode); } //------------------------------------------------------------------------ @@ -3327,7 +3351,88 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) if (varTypeIsFloating(op1Type)) { assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); - NYI_RISCV64("genCodeForCompare-----unimplemented on RISCV64 yet----"); + bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0; + regNumber regOp1 = op1->GetRegNum(); + regNumber regOp2 = op2->GetRegNum(); + + if (IsUnordered) + { + BasicBlock* skipLabel = nullptr; + if (tree->OperIs(GT_LT)) + { + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_fle_s : INS_fle_d, cmpSize, targetReg, regOp2, regOp1); + } + else if (tree->OperIs(GT_LE)) + { + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_flt_s : INS_flt_d, cmpSize, targetReg, regOp2, regOp1); + } + else if (tree->OperIs(GT_EQ)) + { + skipLabel = genCreateTempLabel(); + emit->emitIns_R_R(cmpSize == EA_4BYTE ? INS_fclass_s : INS_fclass_d, cmpSize, targetReg, regOp1); + emit->emitIns_R_R(cmpSize == EA_4BYTE ? INS_fclass_s : INS_fclass_d, cmpSize, rsGetRsvdReg(), regOp2); + emit->emitIns_R_R_R(INS_or, EA_8BYTE, rsGetRsvdReg(), targetReg, rsGetRsvdReg()); + emit->emitIns_R_R_I(INS_andi, EA_8BYTE, rsGetRsvdReg(), rsGetRsvdReg(), 0x300); + emit->emitIns_R_R_I(INS_addi, EA_8BYTE, targetReg, REG_R0, 1); + emit->emitIns_J(INS_bnez, skipLabel, rsGetRsvdReg()); + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_feq_s : INS_feq_d, cmpSize, targetReg, regOp1, regOp2); + genDefineTempLabel(skipLabel); + } + else if (tree->OperIs(GT_NE)) + { + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_feq_s : INS_feq_d, cmpSize, targetReg, regOp1, regOp2); + } + else if (tree->OperIs(GT_GT)) + { + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_fle_s : INS_fle_d, cmpSize, targetReg, regOp1, regOp2); + } + else if (tree->OperIs(GT_GE)) + { + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_flt_s : INS_flt_d, cmpSize, targetReg, regOp1, regOp2); + } + if (skipLabel == nullptr) + { + emit->emitIns_R_R_R(INS_sub, EA_8BYTE, targetReg, REG_R0, targetReg); + emit->emitIns_R_R_I(INS_addi, EA_8BYTE, targetReg, targetReg, 1); + } + } + else + { + if (tree->OperIs(GT_LT)) + { + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_flt_s : INS_flt_d, cmpSize, targetReg, regOp1, regOp2); + } + else if (tree->OperIs(GT_LE)) + { + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_fle_s : INS_fle_d, cmpSize, targetReg, regOp1, regOp2); + } + else if (tree->OperIs(GT_EQ)) + { + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_feq_s : INS_feq_d, cmpSize, targetReg, regOp1, regOp2); + } + else if (tree->OperIs(GT_NE)) + { + emit->emitIns_R_R(cmpSize == EA_4BYTE ? INS_fclass_s : INS_fclass_d, cmpSize, targetReg, regOp1); + emit->emitIns_R_R(cmpSize == EA_4BYTE ? INS_fclass_s : INS_fclass_d, cmpSize, rsGetRsvdReg(), regOp2); + emit->emitIns_R_R_R(INS_or, EA_8BYTE, rsGetRsvdReg(), targetReg, rsGetRsvdReg()); + emit->emitIns_R_R_I(INS_andi, EA_8BYTE, rsGetRsvdReg(), rsGetRsvdReg(), 0x300); + emit->emitIns_R_R_I(INS_addi, EA_8BYTE, targetReg, REG_R0, 0); + BasicBlock* skipLabel = genCreateTempLabel(); + emit->emitIns_J(INS_bnez, skipLabel, rsGetRsvdReg()); + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_feq_s : INS_feq_d, cmpSize, targetReg, regOp1, regOp2); + emit->emitIns_R_R_R(INS_sub, EA_8BYTE, targetReg, REG_R0, targetReg); + emit->emitIns_R_R_I(INS_addi, EA_8BYTE, targetReg, targetReg, 1); + genDefineTempLabel(skipLabel); + } + else if (tree->OperIs(GT_GT)) + { + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_flt_s : INS_flt_d, cmpSize, targetReg, regOp2, regOp1); + } + else if (tree->OperIs(GT_GE)) + { + emit->emitIns_R_R_R(cmpSize == EA_4BYTE ? INS_fle_s : INS_fle_d, cmpSize, targetReg, regOp2, regOp1); + } + } } else { @@ -3572,6 +3677,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) assert(compiler->compCurBB->bbJumpKind == BBJ_COND); assert(tree->OperIs(GT_JCMP)); + assert(!varTypeIsFloating(tree)); assert(tree->TypeGet() == TYP_VOID); assert(tree->GetRegNum() == REG_NA); @@ -3597,11 +3703,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); regNumber regOp1 = op1->GetRegNum(); - if (varTypeIsFloating(op1Type)) - { - NYI_RISCV64("genCodeForJumpCompare floating-----unimplemented on RISCV64 yet----"); - } - else if (op2->isContainedIntOrIImmed()) + if (op2->isContainedIntOrIImmed()) { ssize_t imm = op2->AsIntCon()->gtIconVal; if (imm) @@ -4167,7 +4269,21 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp) { - NYI_RISCV64("genStackPointerAdjustment-----unimplemented/unused on RISCV64 yet----"); + assert(spDelta < 0); + + // We assert that the SP change is less than one page. If it's greater, you should have called a + // function that does a probe, which will in turn call this function. + assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize()); + + if (emitter::isValidSimm12(spDelta)) + { + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta); + } + else + { + GetEmitter()->emitIns_I_la(EA_PTRSIZE, regTmp, spDelta); + GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, regTmp); + } } //------------------------------------------------------------------------ @@ -4185,7 +4301,8 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm // void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp) { - NYI_RISCV64("genStackPointerConstantAdjustmentWithProbe-----unimplemented/unused on RISCV64 yet----"); + GetEmitter()->emitIns_R_R_I(INS_lw, EA_4BYTE, regTmp, REG_SP, 0); + genStackPointerConstantAdjustment(spDelta, regTmp); } //------------------------------------------------------------------------ @@ -4202,8 +4319,35 @@ void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNum // target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp) { - NYI_RISCV64("genStackPointerConstantAdjustmentLoopWithProbe-----unimplemented/unused on RISCV64 yet----"); - return 0; + assert(spDelta < 0); + + const target_size_t pageSize = compiler->eeGetPageSize(); + + ssize_t spRemainingDelta = spDelta; + do + { + ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize); + genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp); + spRemainingDelta -= spOneDelta; + } while (spRemainingDelta < 0); + + // What offset from the final SP was the last probe? This depends on the fact that + // genStackPointerConstantAdjustmentWithProbe() probes first, then does "SUB SP". + target_size_t lastTouchDelta = (target_size_t)(-spDelta) % pageSize; + if ((lastTouchDelta == 0) || (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)) + { + // We haven't probed almost a complete page. If lastTouchDelta==0, then spDelta was an exact + // multiple of pageSize, which means we last probed exactly one page back. Otherwise, we probed + // the page, but very far from the end. If the next action on the stack might subtract from SP + // first, before touching the current SP, then we do one more probe at the very bottom. This can + // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV" + // strategy. + + GetEmitter()->emitIns_R_R_I(INS_lw, EA_4BYTE, regTmp, REG_SP, 0); + lastTouchDelta = 0; + } + + return lastTouchDelta; } //------------------------------------------------------------------------ @@ -4829,9 +4973,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) addrNode = source->AsOp()->gtOp1; - // addrNode can either be a GT_LCL_ADDR or an address expression + // addrNode can either be a GT_LCL_ADDR<0> or an address expression // - if (addrNode->OperGet() == GT_LCL_ADDR) + if (addrNode->IsLclVarAddr()) { // We have a GT_BLK(GT_LCL_ADDR<0>) // @@ -5022,7 +5166,200 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) // void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) { - NYI_RISCV64("genPutArgSplit-----unimplemented/unused on RISCV64 yet----"); + assert(treeNode->OperIs(GT_PUTARG_SPLIT)); + + GenTree* source = treeNode->gtOp1; + emitter* emit = GetEmitter(); + unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar; + unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize; + + if (source->OperGet() == GT_FIELD_LIST) + { + // Evaluate each of the GT_FIELD_LIST items into their register + // and store their register into the outgoing argument area + unsigned regIndex = 0; + unsigned firstOnStackOffs = UINT_MAX; + + for (GenTreeFieldList::Use& use : source->AsFieldList()->Uses()) + { + GenTree* nextArgNode = use.GetNode(); + regNumber fieldReg = nextArgNode->GetRegNum(); + genConsumeReg(nextArgNode); + + if (regIndex >= treeNode->gtNumRegs) + { + if (firstOnStackOffs == UINT_MAX) + { + firstOnStackOffs = use.GetOffset(); + } + var_types type = nextArgNode->TypeGet(); + emitAttr attr = emitTypeSize(type); + + unsigned offset = treeNode->getArgOffset() + use.GetOffset() - firstOnStackOffs; + // We can't write beyond the outgoing arg area + assert(offset + EA_SIZE_IN_BYTES(attr) <= argOffsetMax); + + // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing + // argument area + emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, offset); + } + else + { + var_types type = treeNode->GetRegType(regIndex); + regNumber argReg = treeNode->GetRegNumByIdx(regIndex); + + // If child node is not already in the register we need, move it + if (argReg != fieldReg) + { + inst_RV_RV(ins_Copy(type), argReg, fieldReg, type); + } + regIndex++; + } + } + } + else + { + var_types targetType = source->TypeGet(); + assert(source->OperGet() == GT_BLK); + assert(varTypeIsStruct(targetType)); + + regNumber baseReg = treeNode->ExtractTempReg(); + regNumber addrReg = REG_NA; + + GenTreeLclVarCommon* varNode = nullptr; + GenTree* addrNode = nullptr; + + addrNode = source->AsOp()->gtOp1; + + // addrNode can either be a GT_LCL_ADDR<0> or an address expression + // + if (addrNode->IsLclVarAddr()) + { + // We have a GT_BLK(GT_LCL_ADDR<0>) + // + // We will treat this case the same as above + // (i.e if we just had this GT_LCL_VAR directly as the source) + // so update 'source' to point this GT_LCL_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + varNode = addrNode->AsLclVarCommon(); + addrNode = nullptr; + } + + // Either varNode or addrNOde must have been setup above, + // the xor ensures that only one of the two is setup, not both + assert((varNode != nullptr) ^ (addrNode != nullptr)); + + // This is the varNum for our load operations, + // only used when we have a struct with a LclVar source + unsigned srcVarNum = BAD_VAR_NUM; + + if (varNode != nullptr) + { + assert(varNode->isContained()); + srcVarNum = varNode->GetLclNum(); + LclVarDsc* varDsc = compiler->lvaGetDesc(srcVarNum); + + // This struct also must live in the stack frame. + // And it can't live in a register. + assert(varDsc->lvOnFrame && !varDsc->lvRegister); + } + else // addrNode is used + { + addrReg = genConsumeReg(addrNode); + + // If addrReg equal to baseReg, we use the last target register as alternative baseReg. + // Because the candidate mask for the internal baseReg does not include any of the target register, + // we can ensure that baseReg, addrReg, and the last target register are not all same. + assert(baseReg != addrReg); + } + + ClassLayout* layout = source->AsBlk()->GetLayout(); + + // Put on stack first + unsigned structOffset = treeNode->gtNumRegs * TARGET_POINTER_SIZE; + unsigned remainingSize = layout->GetSize() - structOffset; + unsigned argOffsetOut = treeNode->getArgOffset(); + + assert((remainingSize > 0) && (roundUp(remainingSize, TARGET_POINTER_SIZE) == treeNode->GetStackByteSize())); + while (remainingSize > 0) + { + var_types type; + if (remainingSize >= TARGET_POINTER_SIZE) + { + type = layout->GetGCPtrType(structOffset / TARGET_POINTER_SIZE); + } + else if (remainingSize >= 4) + { + type = TYP_INT; + } + else if (remainingSize >= 2) + { + type = TYP_USHORT; + } + else + { + assert(remainingSize == 1); + type = TYP_UBYTE; + } + + emitAttr attr = emitActualTypeSize(type); + unsigned moveSize = genTypeSize(type); + + instruction loadIns = ins_Load(type); + if (varNode != nullptr) + { + // Load from our local source + emit->emitIns_R_S(loadIns, attr, baseReg, srcVarNum, structOffset); + } + else + { + // check for case of destroying the addrRegister while we still need it + assert(baseReg != addrReg); + + // Load from our address expression source + emit->emitIns_R_R_I(loadIns, attr, baseReg, addrReg, structOffset); + } + + // Emit the instruction to store the register into the outgoing argument area + emit->emitIns_S_R(ins_Store(type), attr, baseReg, varNumOut, argOffsetOut); + argOffsetOut += moveSize; + assert(argOffsetOut <= argOffsetMax); + + remainingSize -= moveSize; + structOffset += moveSize; + } + + // We set up the registers in order, so that we assign the last target register `baseReg` is no longer in use, + // in case we had to reuse the last target register for it. + structOffset = 0; + for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++) + { + regNumber targetReg = treeNode->GetRegNumByIdx(idx); + var_types type = treeNode->GetRegType(idx); + + if (varNode != nullptr) + { + // Load from our local source + emit->emitIns_R_S(ins_Load(type), emitTypeSize(type), targetReg, srcVarNum, structOffset); + } + else + { + // check for case of destroying the addrRegister while we still need it + if (targetReg == addrReg && idx != treeNode->gtNumRegs - 1) + { + assert(targetReg != baseReg); + emit->emitIns_R_R_I(INS_ori, emitActualTypeSize(type), baseReg, addrReg, 0); + addrReg = baseReg; + } + + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(type), emitTypeSize(type), targetReg, addrReg, structOffset); + } + structOffset += TARGET_POINTER_SIZE; + } + } + genProduceReg(treeNode); } //------------------------------------------------------------------------ @@ -5164,12 +5501,12 @@ void CodeGen::genCodeForShift(GenTree* tree) GetEmitter()->emitIns_R_R_R(INS_sub, size, rsGetRsvdReg(), rsGetRsvdReg(), shiftBy->GetRegNum()); if (size == EA_8BYTE) { - GetEmitter()->emitIns_R_R_R(INS_srl, size, tree->GetRegNum(), operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_R(INS_srl, size, REG_RA, operand->GetRegNum(), shiftRight); GetEmitter()->emitIns_R_R_R(INS_sll, size, rsGetRsvdReg(), operand->GetRegNum(), shiftLeft); } else { - GetEmitter()->emitIns_R_R_R(INS_srlw, size, tree->GetRegNum(), operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_R(INS_srlw, size, REG_RA, operand->GetRegNum(), shiftRight); GetEmitter()->emitIns_R_R_R(INS_sllw, size, rsGetRsvdReg(), operand->GetRegNum(), shiftLeft); } } @@ -5184,16 +5521,16 @@ void CodeGen::genCodeForShift(GenTree* tree) unsigned shiftLeft = tree->OperIs(GT_ROR) ? immWidth - shiftByImm : shiftByImm; if ((shiftByImm >= 32 && shiftByImm < 64) || size == EA_8BYTE) { - GetEmitter()->emitIns_R_R_I(INS_srli, size, tree->GetRegNum(), operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_I(INS_srli, size, REG_RA, operand->GetRegNum(), shiftRight); GetEmitter()->emitIns_R_R_I(INS_slli, size, rsGetRsvdReg(), operand->GetRegNum(), shiftLeft); } else { - GetEmitter()->emitIns_R_R_I(INS_srliw, size, tree->GetRegNum(), operand->GetRegNum(), shiftRight); + GetEmitter()->emitIns_R_R_I(INS_srliw, size, REG_RA, operand->GetRegNum(), shiftRight); GetEmitter()->emitIns_R_R_I(INS_slliw, size, rsGetRsvdReg(), operand->GetRegNum(), shiftLeft); } } - GetEmitter()->emitIns_R_R_R(INS_or, size, tree->GetRegNum(), tree->GetRegNum(), rsGetRsvdReg()); + GetEmitter()->emitIns_R_R_R(INS_or, size, tree->GetRegNum(), REG_RA, rsGetRsvdReg()); } else { @@ -6328,8 +6665,8 @@ void CodeGen::genFloatToFloatCast(GenTree* treeNode) if (srcType != dstType) { - instruction ins = (srcType == TYP_FLOAT) ? INS_fcvt_d_w // convert Single to Double - : INS_fcvt_w_d; // convert Double to Single + instruction ins = (srcType == TYP_FLOAT) ? INS_fcvt_d_s // convert Single to Double + : INS_fcvt_s_d; // convert Double to Single GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum()); } @@ -6736,7 +7073,21 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) // void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/) { - NYI_RISCV64("genProfilingLeaveCallback-----unimplemented/unused on RISCV64 yet----"); + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + + // Only hook if profiler says it's okay. + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + compiler->info.compProfilerCallback = true; + + // Need to save on to the stack level, since the helper call will pop the argument + unsigned saveStackLvl2 = genStackLevel; + + /* Restore the stack level */ + SetStackLevel(saveStackLvl2); } /*----------------------------------------------------------------------------- @@ -7498,6 +7849,29 @@ void CodeGen::genFnPrologCalleeRegArgs() } } + if (regArgNum > 0) + { + for (int i = MAX_REG_ARG - 1; i >= 0; i--) + { + if (regArg[i] > 0 && (regArgInit[i] <= REG_S1 || regArgInit[i] > REG_A7)) + { + instruction ins; + if ((regArgMaskIsInt & (1 << regArg[i])) != 0) + { + ins = INS_slliw; + } + else + { + ins = INS_ori; + } + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], 0); + regArgMaskLive &= ~genRegMask((regNumber)regArg[i]); + regArg[i] = 0; + regArgNum -= 1; + } + } + } + if (regArgNum > 0) { instruction ins; @@ -7524,7 +7898,7 @@ void CodeGen::genFnPrologCalleeRegArgs() GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], 0); break; } - else if (regArgInit[i] > regArg[i] || (regArgInit[i] >= REG_T0 && regArgInit[i] <= REG_S1)) + else if (regArgInit[i] > regArg[i]) { GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], 0); } @@ -7552,10 +7926,6 @@ void CodeGen::genFnPrologCalleeRegArgs() 0); regArgNum--; regArgMaskLive &= ~genRegMask((regNumber)regArg[j]); - if (regArgNum == 0) - { - break; - } } else if (k == i) { @@ -7567,15 +7937,16 @@ void CodeGen::genFnPrologCalleeRegArgs() regArgNum--; regArgMaskLive &= ~genRegMask((regNumber)regArg[j]); regArg[j] = 0; - if (regArgNum == 0) - { - break; - } } else { NYI_RISCV64("-----------CodeGen::genFnPrologCalleeRegArgs() error!--"); } + + if (regArgNum == 0) + { + break; + } } } } diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index bff4816e53a515..414b2cd68964db 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -319,9 +319,10 @@ void emitter::emitIns_S_R_R(instruction ins, emitAttr attr, regNumber reg1, regN id->idIns(ins); + assert(isGeneralRegister(reg2)); code_t code = emitInsCode(ins); code |= (code_t)(reg1 & 0x1f) << 20; - code |= (code_t)(reg2 & 0x1f) << 15; + code |= (code_t)reg2 << 15; code |= (((imm >> 5) & 0x7f) << 25) | ((imm & 0x1f) << 7); id->idAddr()->iiaSetInstrEncode(code); @@ -566,26 +567,37 @@ void emitter::emitIns_R_R( code |= reg1 << 7; code |= reg2 << 15; } - else if ((INS_fcvt_w_s <= ins && INS_fmv_x_w >= ins) || (INS_fclass_s == ins || INS_fclass_d == ins) || - (INS_fcvt_w_d == ins || INS_fcvt_wu_d == ins) || (INS_fcvt_l_s == ins || INS_fcvt_lu_s == ins) || - (INS_fmv_x_d == ins)) + else if (INS_fmv_x_d == ins || INS_fmv_x_w == ins || INS_fclass_s == ins || INS_fclass_d == ins) { - // TODO-RISCV64-CQ: Check rounding mode assert(isGeneralRegisterOrR0(reg1)); assert(isFloatReg(reg2)); - code |= (reg1 & 0x1f) << 7; + code |= reg1 << 7; code |= (reg2 & 0x1f) << 15; } - else if ((INS_fcvt_s_w <= ins && INS_fmv_w_x >= ins) || (INS_fcvt_d_w == ins || INS_fcvt_d_wu == ins) || - (INS_fcvt_s_l == ins || INS_fcvt_s_lu == ins) || (INS_fmv_d_x == ins) || - (INS_fcvt_d_l == ins || INS_fcvt_d_lu == ins)) - + else if (INS_fcvt_w_s == ins || INS_fcvt_wu_s == ins || INS_fcvt_w_d == ins || INS_fcvt_wu_d == ins || + INS_fcvt_l_s == ins || INS_fcvt_lu_s == ins || INS_fcvt_l_d == ins || INS_fcvt_lu_d == ins) + { + assert(isGeneralRegisterOrR0(reg1)); + assert(isFloatReg(reg2)); + code |= reg1 << 7; + code |= (reg2 & 0x1f) << 15; + code |= 0x1 << 12; + } + else if (INS_fmv_w_x == ins || INS_fmv_d_x == ins) { - // TODO-RISCV64-CQ: Check rounding mode assert(isFloatReg(reg1)); assert(isGeneralRegisterOrR0(reg2)); code |= (reg1 & 0x1f) << 7; - code |= (reg2 & 0x1f) << 15; + code |= reg2 << 15; + } + else if (INS_fcvt_s_w == ins || INS_fcvt_s_wu == ins || INS_fcvt_d_w == ins || INS_fcvt_d_wu == ins || + INS_fcvt_s_l == ins || INS_fcvt_s_lu == ins || INS_fcvt_d_l == ins || INS_fcvt_d_lu == ins) + { + assert(isFloatReg(reg1)); + assert(isGeneralRegisterOrR0(reg2)); + code |= (reg1 & 0x1f) << 7; + code |= reg2 << 15; + code |= 0x7 << 12; } else if (INS_fcvt_s_d == ins || INS_fcvt_d_s == ins) { @@ -593,6 +605,7 @@ void emitter::emitIns_R_R( assert(isFloatReg(reg2)); code |= (reg1 & 0x1f) << 7; code |= (reg2 & 0x1f) << 15; + code |= 0x7 << 12; } else { @@ -623,20 +636,24 @@ void emitter::emitIns_R_R_I( (INS_lb <= ins && INS_lhu >= ins) || INS_ld == ins || INS_lw == ins || INS_jalr == ins || INS_fld == ins || INS_flw == ins) { - code |= (reg1 & 0x1f) << 7; // rd - code |= (reg2 & 0x1f) << 15; // rs1 - code |= imm << 20; // imm + assert(isGeneralRegister(reg2)); + code |= (reg1 & 0x1f) << 7; // rd + code |= reg2 << 15; // rs1 + code |= imm << 20; // imm } else if (INS_sd == ins || INS_sw == ins || INS_sh == ins || INS_sb == ins || INS_fsw == ins || INS_fsd == ins) { + assert(isGeneralRegister(reg2)); code |= (reg1 & 0x1f) << 20; // rs2 - code |= (reg2 & 0x1f) << 15; // rs1 + code |= reg2 << 15; // rs1 code |= (((imm >> 5) & 0x7f) << 25) | ((imm & 0x1f) << 7); // imm } else if (INS_beq <= ins && INS_bgeu >= ins) { - code |= (reg1 & 0x1f) << 15; - code |= (reg2 & 0x1f) << 20; + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + code |= reg1 << 15; + code |= reg2 << 20; code |= ((imm >> 11) & 0x1) << 7; code |= ((imm >> 1) & 0xf) << 8; code |= ((imm >> 5) & 0x3f) << 25; @@ -746,6 +763,10 @@ void emitter::emitIns_R_R_R( code |= ((reg1 & 0x1f) << 7); code |= ((reg2 & 0x1f) << 15); code |= ((reg3 & 0x1f) << 20); + if ((INS_fadd_s <= ins && INS_fsqrt_s >= ins) || (INS_fadd_d <= ins && INS_fsqrt_d >= ins)) + { + code |= 0x7 << 12; + } } else { @@ -1087,45 +1108,56 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm) { assert(!EA_IS_RELOC(size)); assert(isGeneralRegister(reg)); - if (0 == ((imm + 0x800) >> 32)) + + // TODO-CQ-RISCV: at least for imm=-2*1024*1024*1024 (and similar ones) code can be simplified to "lui rd, 0x80000" + + if (0 == ((imm + 0x800) >> 31)) { if (((imm + 0x800) >> 12) != 0) { emitIns_R_I(INS_lui, size, reg, ((imm + 0x800) >> 12)); if ((imm & 0xFFF) != 0) { - emitIns_R_R_I(INS_addiw, size, reg, reg, imm & 0xFFF); + emitIns_R_R_I(size == EA_4BYTE ? INS_addiw : INS_addi, size, reg, reg, imm & 0xFFF); } } else { - emitIns_R_R_I(INS_addiw, size, reg, REG_R0, imm & 0xFFF); + emitIns_R_R_I(size == EA_4BYTE ? INS_addiw : INS_addi, size, reg, REG_R0, imm & 0xFFF); } } else { - UINT32 high = (imm >> 32) & 0xffffffff; + UINT32 high = (imm >> 33) & 0x7fffffff; + regNumber highReg = reg; if (((high + 0x800) >> 12) != 0) { - emitIns_R_I(INS_lui, size, reg, ((high + 0x800) >> 12)); + emitIns_R_I(INS_lui, size, highReg, ((high + 0x800) >> 12)); if ((high & 0xFFF) != 0) { - emitIns_R_R_I(INS_addi, size, reg, reg, high & 0xFFF); + emitIns_R_R_I(size == EA_4BYTE ? INS_addiw : INS_addi, size, highReg, highReg, high & 0xFFF); } } else if ((high & 0xFFF) != 0) { - emitIns_R_R_I(INS_addi, size, reg, REG_R0, high & 0xFFF); + emitIns_R_R_I(size == EA_4BYTE ? INS_addiw : INS_addi, size, highReg, REG_R0, high & 0xFFF); } - UINT32 low = imm & 0xffffffff; - emitIns_R_R_I(INS_slli, size, reg, reg, 11); - emitIns_R_R_I(INS_addi, size, reg, reg, (low >> 21) & 0x7FF); + else + { + highReg = REG_R0; + } + UINT64 low = imm & 0x1ffffffff; + if (highReg != REG_R0) + { + emitIns_R_R_I(size == EA_4BYTE ? INS_slliw : INS_slli, size, highReg, highReg, 11); + } + emitIns_R_R_I(size == EA_4BYTE ? INS_addiw : INS_addi, size, reg, highReg, (low >> 22) & 0x7FF); - emitIns_R_R_I(INS_slli, size, reg, reg, 11); - emitIns_R_R_I(INS_addi, size, reg, reg, (low >> 10) & 0x7FF); + emitIns_R_R_I(size == EA_4BYTE ? INS_slliw : INS_slli, size, reg, reg, 11); + emitIns_R_R_I(size == EA_4BYTE ? INS_addiw : INS_addi, size, reg, reg, (low >> 11) & 0x7FF); - emitIns_R_R_I(INS_slli, size, reg, reg, 10); - emitIns_R_R_I(INS_addi, size, reg, reg, low & 0x3FF); + emitIns_R_R_I(size == EA_4BYTE ? INS_slliw : INS_slli, size, reg, reg, 11); + emitIns_R_R_I(size == EA_4BYTE ? INS_addiw : INS_addi, size, reg, reg, low & 0x7FF); } } @@ -1478,7 +1510,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t dst += 4; code = emitInsCode(INS_jalr); - code |= (code_t)(reg2 & 0x1f) << 7; + code |= (code_t)reg2 << 7; code |= (code_t)REG_T2 << 15; code |= (low & 0x3ff) << 20; // the offset default is 0; @@ -2205,12 +2237,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) assert(code == 0x00000013); #endif code = 0x00000013 | (codeGen->rsGetRsvdReg() << 15); - *(code_t*)dstRW = code | ((code_t)(reg1 & 0x1f) << 7) | (((code_t)doff & 0xfff) << 20); + *(code_t*)dstRW = code | ((code_t)reg1 << 7) | (((code_t)doff & 0xfff) << 20); } else { code = emitInsCode(ins); - code |= (code_t)((reg1 & 0x1f) << 7); + code |= (code_t)(reg1 & 0x1f) << 7; code |= (code_t)codeGen->rsGetRsvdReg() << 15; code |= (code_t)(doff & 0xfff) << 20; *(code_t*)dstRW = code; @@ -2417,8 +2449,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) assert((INS_bne & 0xefff) == INS_beq); code = emitInsCode((instruction)((int)ins ^ 0x1000)); - code |= ((code_t)(reg1) /*& 0x1f */) << 15; /* rj */ - code |= ((code_t)(reg2) /*& 0x1f */) << 20; /* rd */ + code |= (code_t)reg1 << 15; /* rj */ + code |= (code_t)reg2 << 20; /* rd */ code |= 0x8 << 7; *(code_t*)dstRW = code; dstRW += 4; @@ -2440,8 +2472,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) assert((INS_bgeu & 0xefff) == INS_bltu); code = emitInsCode((instruction)((int)ins ^ 0x1000)); - code |= ((code_t)(reg1) /*& 0x1f */) << 15; /* rj */ - code |= ((code_t)(reg2) /*& 0x1f */) << 20; /* rd */ + code |= (code_t)reg1 << 15; /* rj */ + code |= (code_t)reg2 << 20; /* rd */ code |= 0x8 << 7; *(code_t*)dstRW = code; dstRW += 4; @@ -3225,7 +3257,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id) case 0x70: // FMV.X.W & FCLASS.S if (opcode4 == 0) // FMV.X.W { - printf("fmv.x.w %s, %s\n", xd, xs1); + printf("fmv.x.w %s, %s\n", xd, fs1); } else if (opcode4 == 1) // FCLASS.S { @@ -3247,7 +3279,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id) } else if (opcode4 == 2) // FEQ.S { - printf("feq.s %s, %s, %s\n", fd, xs1, fs2); + printf("feq.s %s, %s, %s\n", xd, fs1, fs2); } else { @@ -3371,7 +3403,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id) { printf("fcvt.w.d %s, %s\n", xd, fs1); } - if (opcode3 == 1) // FCVT.WU.D + else if (opcode3 == 1) // FCVT.WU.D { printf("fcvt.wu.d %s, %s\n", xd, fs1); } @@ -3383,7 +3415,6 @@ void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id) { printf("fcvt.lu.d %s, %s\n", xd, fs1); } - else { NYI_RISCV64("illegal ins within emitDisInsName!"); @@ -4051,6 +4082,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, emitIns_R_R_I(INS_srli, EA_8BYTE, regOp2, regOp2, 32); } } + if (needCheckOv) { assert(!varTypeIsFloating(dst)); @@ -4060,6 +4092,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, if (dst->GetRegNum() == regOp1) { assert(codeGen->rsGetRsvdReg() != regOp1); + assert(REG_RA != regOp1); saveOperReg1 = codeGen->rsGetRsvdReg(); saveOperReg2 = regOp2; emitIns_R_R_I(INS_addi, attr, codeGen->rsGetRsvdReg(), regOp1, 0); @@ -4067,6 +4100,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, else if (dst->GetRegNum() == regOp2) { assert(codeGen->rsGetRsvdReg() != regOp2); + assert(REG_RA != regOp2); saveOperReg1 = regOp1; saveOperReg2 = codeGen->rsGetRsvdReg(); emitIns_R_R_I(INS_addi, attr, codeGen->rsGetRsvdReg(), regOp2, 0); @@ -4107,7 +4141,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, else { tempReg1 = REG_RA; // src1->GetSingleTempReg(); - tempReg2 = codeGen->rsGetRsvdReg(); + tempReg2 = REG_T5; // TODO-RISCV64-Bug?: Assign proper temp register assert(tempReg1 != tempReg2); assert(tempReg1 != saveOperReg1); assert(tempReg2 != saveOperReg2); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 42580242c408ac..a36807b426cf88 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1778,7 +1778,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) return EA_SIZE(emitActualTypeSize(dstType)) == EA_4BYTE ? INS_movfr2gr_s : INS_movfr2gr_d; #elif defined(TARGET_RISCV64) assert(!varTypeIsSIMD(dstType)); - return EA_SIZE(emitActualTypeSize(dstType)) == EA_4BYTE ? INS_fcvt_w_d : INS_fcvt_l_d; + return EA_SIZE(emitActualTypeSize(dstType)) == EA_4BYTE ? INS_fmv_x_w : INS_fmv_x_d; #else NYI("ins_Copy"); #endif @@ -1834,15 +1834,16 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) } #elif defined(TARGET_RISCV64) assert(!varTypeIsSIMD(dstType)); + assert(!genIsValidFloatReg(srcReg)); if (dstType == TYP_DOUBLE) { - return INS_fcvt_d_l; + return INS_fmv_d_x; } else { assert(dstType == TYP_FLOAT); - return INS_fcvt_s_l; + return INS_fmv_w_x; } #else NYI("ins_Copy"); diff --git a/src/coreclr/jit/lowerriscv64.cpp b/src/coreclr/jit/lowerriscv64.cpp index 897d39e1b8618e..46fe05f0efd199 100644 --- a/src/coreclr/jit/lowerriscv64.cpp +++ b/src/coreclr/jit/lowerriscv64.cpp @@ -138,7 +138,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) GenTree* cmpOp1; GenTree* cmpOp2; - if (op->OperIsCompare()) + if (op->OperIsCompare() && !varTypeIsFloating(op->gtGetOp1())) { // We do not expect any other relops on LA64 assert(op->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT)); @@ -191,30 +191,6 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) // GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) { - if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND)) - { - GenTree* opNode = nullptr; - GenTree* notNode = nullptr; - if (binOp->gtGetOp1()->OperIs(GT_NOT)) - { - notNode = binOp->gtGetOp1(); - opNode = binOp->gtGetOp2(); - } - else if (binOp->gtGetOp2()->OperIs(GT_NOT)) - { - notNode = binOp->gtGetOp2(); - opNode = binOp->gtGetOp1(); - } - - if (notNode != nullptr) - { - binOp->gtOp1 = opNode; - binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1(); - binOp->ChangeOper(GT_AND_NOT); - BlockRange().Remove(notNode); - } - } - ContainCheckBinary(binOp); return binOp->gtNext; diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 35ad4397b568b3..cef952206acf4f 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -1047,8 +1047,79 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) // int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) { - NYI_RISCV64("BuildPutArgSplit-----unimplemented on RISCV64 yet----"); - return 0; + int srcCount = 0; + assert(argNode->gtOper == GT_PUTARG_SPLIT); + + GenTree* putArgChild = argNode->gtGetOp1(); + + // Registers for split argument corresponds to source + int dstCount = argNode->gtNumRegs; + + regNumber argReg = argNode->GetRegNum(); + regMaskTP argMask = RBM_NONE; + for (unsigned i = 0; i < argNode->gtNumRegs; i++) + { + regNumber thisArgReg = (regNumber)((unsigned)argReg + i); + argMask |= genRegMask(thisArgReg); + argNode->SetRegNumByIdx(thisArgReg, i); + } + + if (putArgChild->OperGet() == GT_FIELD_LIST) + { + // Generated code: + // 1. Consume all of the items in the GT_FIELD_LIST (source) + // 2. Store to target slot and move to target registers (destination) from source + // + unsigned sourceRegCount = 0; + + // To avoid redundant moves, have the argument operand computed in the + // register in which the argument is passed to the call. + + for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses()) + { + GenTree* node = use.GetNode(); + assert(!node->isContained()); + // The only multi-reg nodes we should see are OperIsMultiRegOp() + assert(!node->IsMultiRegNode()); + + // Consume all the registers, setting the appropriate register mask for the ones that + // go into registers. + regMaskTP sourceMask = RBM_NONE; + if (sourceRegCount < argNode->gtNumRegs) + { + sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); + } + sourceRegCount++; + BuildUse(node, sourceMask, 0); + } + srcCount += sourceRegCount; + assert(putArgChild->isContained()); + } + else + { + assert(putArgChild->TypeGet() == TYP_STRUCT); + assert(putArgChild->OperGet() == GT_BLK); + + // We can use a ld/st sequence so we need an internal register + buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); + + GenTree* objChild = putArgChild->gtGetOp1(); + if (objChild->IsLclVarAddr()) + { + // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_BLK and the GT_LCL_ADDR<0> + // as one contained operation + // + assert(objChild->isContained()); + } + else + { + srcCount = BuildIndirUses(putArgChild->AsIndir()); + } + assert(putArgChild->isContained()); + } + buildInternalRegisterUses(); + BuildDefs(argNode, dstCount, argMask); + return srcCount; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 1cdc9ec4980d2d..ad7e5be605199a 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -38,7 +38,7 @@ inline bool compMacOsArm64Abi() } inline bool compFeatureArgSplit() { - return TargetArchitecture::IsLoongArch64 || TargetArchitecture::IsArm32 || + return TargetArchitecture::IsLoongArch64 || TargetArchitecture::IsArm32 || TargetArchitecture::IsRiscv64 || (TargetOS::IsWindows && TargetArchitecture::IsArm64); } inline bool compUnixX86Abi() diff --git a/src/coreclr/jit/unwindriscv64.cpp b/src/coreclr/jit/unwindriscv64.cpp index f151e7f408800a..bc59e66321f4c8 100644 --- a/src/coreclr/jit/unwindriscv64.cpp +++ b/src/coreclr/jit/unwindriscv64.cpp @@ -997,7 +997,22 @@ int UnwindPrologCodes::Match(UnwindEpilogInfo* pEpi) void UnwindPrologCodes::CopyFrom(UnwindPrologCodes* pCopyFrom) { - NYI_RISCV64("CopyFrom-----unimplemented on RISCV64 yet----"); + assert(uwiComp == pCopyFrom->uwiComp); + assert(upcMem == upcMemLocal); + assert(upcMemSize == UPC_LOCAL_COUNT); + assert(upcHeaderSlot == -1); + assert(upcEpilogSlot == -1); + + // Copy the codes + EnsureSize(pCopyFrom->upcMemSize); + assert(upcMemSize == pCopyFrom->upcMemSize); + memcpy_s(upcMem, upcMemSize, pCopyFrom->upcMem, pCopyFrom->upcMemSize); + + // Copy the other data + upcCodeSlot = pCopyFrom->upcCodeSlot; + upcHeaderSlot = pCopyFrom->upcHeaderSlot; + upcEpilogSlot = pCopyFrom->upcEpilogSlot; + upcUnwindBlockSlot = pCopyFrom->upcUnwindBlockSlot; } void UnwindPrologCodes::EnsureSize(int requiredSize) @@ -1230,7 +1245,8 @@ void UnwindFragmentInfo::AddEpilog() void UnwindFragmentInfo::CopyPrologCodes(UnwindFragmentInfo* pCopyFrom) { - NYI_RISCV64("CopyPrologCodes-----unimplemented on RISCV64 yet----"); + ufiPrologCodes.CopyFrom(&pCopyFrom->ufiPrologCodes); + ufiPrologCodes.AddCode(UWC_END_C); } // Split the epilog codes that currently exist in 'pSplitFrom'. The ones that represent @@ -1240,7 +1256,42 @@ void UnwindFragmentInfo::CopyPrologCodes(UnwindFragmentInfo* pCopyFrom) void UnwindFragmentInfo::SplitEpilogCodes(emitLocation* emitLoc, UnwindFragmentInfo* pSplitFrom) { - NYI_RISCV64("SplitEpilogCodes-----unimplemented on RISCV64 yet----"); + UnwindEpilogInfo* pEpiPrev; + UnwindEpilogInfo* pEpi; + + UNATIVE_OFFSET splitOffset = emitLoc->CodeOffset(uwiComp->GetEmitter()); + + for (pEpiPrev = NULL, pEpi = pSplitFrom->ufiEpilogList; pEpi != NULL; pEpiPrev = pEpi, pEpi = pEpi->epiNext) + { + pEpi->FinalizeOffset(); // Get the offset of the epilog from the emitter so we can compare it + if (pEpi->GetStartOffset() >= splitOffset) + { + // This epilog and all following epilogs, which must be in order of increasing offsets, + // get moved to this fragment. + + // Splice in the epilogs to this fragment. Set the head of the epilog + // list to this epilog. + ufiEpilogList = pEpi; // In this case, don't use 'ufiEpilogFirst' + ufiEpilogLast = pSplitFrom->ufiEpilogLast; + + // Splice out the tail of the list from the 'pSplitFrom' epilog list + pSplitFrom->ufiEpilogLast = pEpiPrev; + if (pSplitFrom->ufiEpilogLast == NULL) + { + pSplitFrom->ufiEpilogList = NULL; + } + else + { + pSplitFrom->ufiEpilogLast->epiNext = NULL; + } + + // No more codes should be added once we start splitting + pSplitFrom->ufiCurCodes = NULL; + ufiCurCodes = NULL; + + break; + } + } } // Is this epilog at the end of an unwind fragment? Ask the emitter. @@ -1794,7 +1845,8 @@ void UnwindInfo::Split() /*static*/ void UnwindInfo::EmitSplitCallback(void* context, emitLocation* emitLoc) { - NYI_RISCV64("EmitSplitCallback-----unimplemented on RISCV64 yet----"); + UnwindInfo* puwi = (UnwindInfo*)context; + puwi->AddFragment(emitLoc); } // Reserve space for the unwind info for all fragments @@ -1861,7 +1913,21 @@ void UnwindInfo::CaptureLocation() void UnwindInfo::AddFragment(emitLocation* emitLoc) { - NYI_RISCV64("AddFragment-----unimplemented on RISCV64 yet----"); + assert(uwiInitialized == UWI_INITIALIZED_PATTERN); + assert(uwiFragmentLast != NULL); + + UnwindFragmentInfo* newFrag = new (uwiComp, CMK_UnwindInfo) UnwindFragmentInfo(uwiComp, emitLoc, true); + +#ifdef DEBUG + newFrag->ufiNum = uwiFragmentLast->ufiNum + 1; +#endif // DEBUG + + newFrag->CopyPrologCodes(&uwiFragmentFirst); + newFrag->SplitEpilogCodes(emitLoc, uwiFragmentLast); + + // Link the new fragment in at the end of the fragment list + uwiFragmentLast->ufiNext = newFrag; + uwiFragmentLast = newFrag; } #ifdef DEBUG diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 92ffa6aad9d7a1..8e085e3cc15ab1 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -2284,7 +2284,6 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // /* +0x000 */ DWORD ContextFlags; - /* +0x004 */ DWORD Fcsr; // // Integer registers. @@ -2328,6 +2327,7 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // // TODO-RISCV64: support the SIMD. ULONGLONG F[32]; + DWORD Fcsr; } CONTEXT, *PCONTEXT, *LPCONTEXT; // diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index e61abe2ea57e7d..12aedda2db5f87 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -141,7 +141,7 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__GSCookie == sizeof(GSCookie)); #define SIZEOF__Frame 0x10 ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); -#define SIZEOF__CONTEXT 0x210 +#define SIZEOF__CONTEXT 0x220 ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT));