diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 0f28edbc39..84588a2ff7 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -154,6 +154,7 @@ const float kMaxMipLodBias = 15.99f; const float kMinMipLodBias = -16.0f; const unsigned kResRetStatusIndex = 4; +const unsigned kVecResRetStatusIndex = 1; /* hctdb_instrhelp.get_max_oload_dims()*/ // OLOAD_DIMS-TEXT:BEGIN diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index a66dfc68d4..253121346a 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -6438,7 +6438,7 @@ Type *OP::GetFourI32Type() const { return m_pFourI32Type; } Type *OP::GetFourI16Type() const { return m_pFourI16Type; } bool OP::IsResRetType(llvm::Type *Ty) { - if (!Ty->isStructTy()) + if (!Ty || !Ty->isStructTy()) return false; for (Type *ResTy : m_pResRetType) { if (Ty == ResTy) diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index db596a3821..9587897e22 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1573,9 +1573,15 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } else { Value *V = EVI->getOperand(0); + StructType *StrTy = dyn_cast(V->getType()); + unsigned ExtractIndex = EVI->getIndices()[0]; + // Ensure parameter is a single value that is extracted from the correct + // ResRet struct location. bool IsLegal = EVI->getNumIndices() == 1 && - EVI->getIndices()[0] == DXIL::kResRetStatusIndex && - ValCtx.DxilMod.GetOP()->IsResRetType(V->getType()); + (ExtractIndex == DXIL::kResRetStatusIndex || + ExtractIndex == DXIL::kVecResRetStatusIndex) && + ValCtx.DxilMod.GetOP()->IsResRetType(StrTy) && + ExtractIndex == StrTy->getNumElements() - 1; if (!IsLegal) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 7d5eb0edce..2033533327 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -3063,10 +3063,10 @@ static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, } void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder, - hlsl::OP *hlslOp) { + hlsl::OP *hlslOp, + unsigned StatusIndex = DXIL::kResRetStatusIndex) { if (status && !isa(status)) { - Value *statusVal = - Builder.CreateExtractValue(ResRet, DXIL::kResRetStatusIndex); + Value *statusVal = Builder.CreateExtractValue(ResRet, StatusIndex); Value *checkAccessOp = hlslOp->GetI32Const( static_cast(DXIL::OpCode::CheckAccessFullyMapped)); Function *checkAccessFn = hlslOp->GetOpFunc( @@ -4028,9 +4028,9 @@ struct ResLoadHelper { // Used for some subscript operators that feed the generic HL call inst // into a load op and by the matrixload call instruction. ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx, - Value *Offset, Value *mip = nullptr) + Value *Offset, Value *status = nullptr, Value *mip = nullptr) : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), - addr(idx), offset(Offset), status(nullptr), mipLevel(mip) { + addr(idx), offset(Offset), status(status), mipLevel(mip) { opcode = LoadOpFromResKind(RK); Type *Ty = Inst->getType(); if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() && @@ -4304,18 +4304,22 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, Function *F = OP->GetOpFunc(opcode, EltTy); Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); + unsigned StatusIndex; // Extract elements from returned ResRet. // Native vector loads just have one vector element in the ResRet. // Others have up to four scalars that need to be individually extracted. - if (opcode == OP::OpCode::RawBufferVectorLoad) + if (opcode == OP::OpCode::RawBufferVectorLoad) { Elts[i++] = Builder.CreateExtractValue(Ld, 0); - else + StatusIndex = DXIL::kVecResRetStatusIndex; + } else { for (unsigned j = 0; j < chunkSize; j++, i++) Elts[i] = Builder.CreateExtractValue(Ld, j); + StatusIndex = DXIL::kResRetStatusIndex; + } // Update status. - UpdateStatus(Ld, helper.status, Builder, OP); + UpdateStatus(Ld, helper.status, Builder, OP, StatusIndex); if (!FirstLd) FirstLd = Ld; @@ -8537,7 +8541,7 @@ Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder, Value *status, Value *bufIdx, Value *baseOffset, const DataLayout &DL) { - ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset); + ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset, status); #ifndef NDEBUG Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); Type *matType = ptr->getType()->getPointerElementType(); @@ -8864,7 +8868,7 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, } } else if (LoadInst *LdInst = dyn_cast(user)) { // Load of scalar/vector within a struct or structured raw load. - ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset); + ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset, status); TranslateBufLoad(helper, ResKind, Builder, OP, DL); LdInst->eraseFromParent(); @@ -9239,7 +9243,8 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, IRBuilder<> Builder(CI); if (LoadInst *ldInst = dyn_cast(*U)) { Value *Offset = UndefValue::get(Builder.getInt32Ty()); - ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, mipLevel); + ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, + /*status*/ nullptr, mipLevel); TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); ldInst->eraseFromParent(); } else { diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl index 9f7a487a05..572734d679 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl @@ -1,35 +1,35 @@ -// RUN: %dxc -T vs_6_6 -DETY=float -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=float -DCOLS=4 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=bool -DCOLS=4 %s | FileCheck %s @@ -105,27 +105,27 @@ RWStructuredBuffer< TYPE SS > RwStBuf : register(u2); ConsumeStructuredBuffer< TYPE SS > CnStBuf : register(u4); AppendStructuredBuffer< TYPE SS > ApStBuf : register(u5); -TYPE Add(TYPE f1[COLS], TYPE f2[COLS])[COLS] { +TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS])[COLS] { TYPE ret[COLS]; for (int i = 0; i < COLS; i++) - ret[i] = f1[i] + f2[i]; + ret[i] = f1[i] + f2[i] + f3[i] + f4[i]; return ret; } template -T Add(T v1, T v2) { return v1 + v2; } +T Add(T v1, T v2, T v3, T v4) { return v1 + v2 + v3 + v4; } -TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS])[COLS] { +TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS], TYPE f5[COLS], TYPE f6[COLS])[COLS] { TYPE ret[COLS]; for (int i = 0; i < COLS; i++) - ret[i] = f1[i] + f2[i] + f3[i] + f4[i]; + ret[i] = f1[i] + f2[i] + f3[i] + f4[i] + f5[i] + f6[i]; return ret; } template -T Add(T v1, T v2, T v3, T v4) { return v1 + v2 + v3 + v4; } +T Add(T v1, T v2, T v3, T v4, T v5, T v6) { return v1 + v2 + v3 + v4 + v5 + v6; } -void main(uint ix[2] : IX) { +void main(uint ix[3] : IX) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -150,26 +150,55 @@ void main(uint ix[2] : IX) { // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] // OFF: [[RIX0:%.*]] = add i32 [[IX0]], [[BOFF:[0-9]+]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX0]] - // MAT: [[IX0p4:%.*]] = add i32 [[RIX0]], [[p4:[0-9]+]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] - // MAT: [[IX0p8:%.*]] = add i32 [[RIX0]], [[p8:[0-9]+]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] + // MULTI: [[IX0p4:%.*]] = add i32 [[RIX0]], [[p4:[0-9]+]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] + // MULTI: [[IX0p8:%.*]] = add i32 [[RIX0]], [[p8:[0-9]+]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] // I1: icmp ne i32 // I1: icmp ne i32 // I1: icmp ne i32 // I1: icmp ne i32 TYPE babElt1 SS = RwByBuf.Load< TYPE SS >(ix[0]); + // CHECK-DAG: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 1 + // CHECK-DAG: [[RIX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 1 + // OFF: [[RIX1:%.*]] = add i32 [[IX1]], [[BOFF]] + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX1]] + // MULTI: [[IX1p4:%.*]] = add i32 [[RIX1]], [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1p4]] + // MULTI: [[IX1p8:%.*]] = add i32 [[RIX1]], [[p8]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 + // I1: icmp ne i32 + // I1: icmp ne i32 + // I1: icmp ne i32 + uint status1; + TYPE babElt3 SS = RwByBuf.Load< TYPE SS >(ix[1], status1); + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[RIX0]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE babElt2 SS = RoByBuf.Load< TYPE SS >(ix[0]); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[RIX1]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status2; + TYPE babElt4 SS = RoByBuf.Load< TYPE SS >(ix[1], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 @@ -177,48 +206,76 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 {{%.*}}, i32 undef, float 0.0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 {{%.*}}, i32 undef, double 0.0 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX0]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] - RwByBuf.Store< TYPE SS >(ix[0], Add(babElt1, babElt2)); + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< TYPE SS >(ix[0], Add(babElt1, babElt2, babElt3, babElt4)); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt1 SS = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt2 SS = RwStBuf[ix[1]]; + // CHECK: [[IX2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 2 + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[BOFF]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt5 SS = RwStBuf.Load(ix[2], status1); + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt3 SS = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt4 SS = RoStBuf[ix[1]]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[BOFF]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt6 SS = RoStBuf.Load(ix[2], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 @@ -226,9 +283,13 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 0, float 0.0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 16, double 0.0 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] - RwStBuf[ix[0]] = Add(stbElt1, stbElt2, stbElt3, stbElt4); + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix[0]] = Add(stbElt1, stbElt2, stbElt3, stbElt4, stbElt5, stbElt6); + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -236,8 +297,8 @@ void main(uint ix[2] : IX) { // OFF: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 // OFF: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 16 // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -253,7 +314,7 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 16 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[BOFF]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p8]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p8]] ApStBuf.Append(cnElt); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl index 5305ee495b..f71b29e83e 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl @@ -26,7 +26,7 @@ AppendStructuredBuffer > ApStBuf : register(u5); // CHECK-LABEL: define void @main [shader("vertex")] -void main(uint ix[2] : IX) { +void main(uint ix[3] : IX) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -45,36 +45,73 @@ void main(uint ix[2] : IX) { // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector babElt1 = RwByBuf.Load< vector >(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + uint status1; + vector babElt3 = RwByBuf.Load< vector >(ix[1], status1); + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector babElt2 = RoByBuf.Load< vector >(ix[0]); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + uint status2; + vector babElt4 = RoByBuf.Load< vector >(ix[1], status2); + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< vector >(ix[0], babElt1 + babElt2); + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< vector >(ix[0], babElt1 + babElt2 + babElt3 + babElt4); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt1 = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt2 = RwStBuf[ix[1]]; + // CHECK: [[IX2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt5 = RwStBuf.Load(ix[2], status1); + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt4 = RoStBuf[ix[1]]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt6 = RoStBuf.Load(ix[2], status2); + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6; + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl index 8dcf5ead1c..896f442c2c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl @@ -87,12 +87,36 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE babElt2 = RoByBuf.Load< TYPE >(ix0); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status1 = 0; + TYPE babElt3 = RwByBuf.Load< TYPE >(ix1, status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status2 = 0; + TYPE babElt4 = RoByBuf.Load< TYPE >(ix1, status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2); + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2 + babElt3 + babElt4); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] @@ -102,6 +126,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt1 = RwStBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -116,6 +141,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt3 = RoStBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -123,12 +149,34 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt4 = RoStBuf[ix1]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt5 = RwStBuf.Load(ix2[0], status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt6 = RoStBuf.Load(ix2[0], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6; + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -167,6 +215,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt1 = RwTyBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -183,6 +232,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt2 = RwTyBuf[ix1]; + // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -200,6 +250,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt3 = RoTyBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -217,6 +268,44 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt4 = RoTyBuf[ix1]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY32]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt5 = RwTyBuf.Load(ix2[0], status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY32]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt6 = RoTyBuf.Load(ix2[0], status2); + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // I64: trunc i64 %{{.*}} to i32 @@ -229,8 +318,12 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 - // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] - RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4; + // CHECK: call void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 300 + RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4 + typElt5 + typElt6; + RwByBuf.Store< uint > (300, status1 && status2); // Texture Tests // CHECK: [[ANHDLROTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX1]] @@ -250,6 +343,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt1 = RoTex1d[ix0]; + // CHECK: [[ANHDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX1]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX1]], i32 undef, i32 [[IX0]], i32 undef, i32 undef // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -285,6 +379,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt3 = RoTex2d[ix2]; + // CHECK: [[ANHDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX2]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX2]], i32 undef, i32 [[IX20]], i32 [[IX21]], i32 undef // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -320,6 +415,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt5 = RoTex3d[ix3]; + // CHECK: [[ANHDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX3]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX3]], i32 undef, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] // F64: call double @dx.op.makeDouble.f64(i32 101