From f5400671e589faceeaae28a0bb837bad10270b6a Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Thu, 5 Jun 2025 13:40:10 -0600 Subject: [PATCH 1/6] fix aggregate load status value propagation The status Value was not getting passed all the way down to the load instruction generation for aggregate types because the helper constructor would always set it to null. It needs to be explicitly stated since by that point, the original call instruction it came from has been lost amidst subsequent GEPs, bitcasts, and/or loads that aggregate types (arrays and structs) will use on the results of the original call instruction to get the exact element required. This changes the constructor to take an optional status parameter allowing the locations where it might be set to pass it along. In other cases, it will be null and be appropriately ignored. Modified aggregate tests to verify this behavior. This required keeping track of the return of the last load operation involved in a raw buffer load, which made arrays more complicated. Rather than give them their own CHECK prefix, I lumped them in with large matrices requiring three loads. This did require making all the array lengths 3 to match. The loss in test variability is worth the convenience as there is no known distinction when it comes to array sizes over 1. --- lib/HLSL/HLOperationLower.cpp | 11 +- .../intrinsics/buffer-agg-load-stores.hlsl | 159 ++++++++++++------ .../hlsl/intrinsics/buffer-load-stores.hlsl | 90 +++++++++- 3 files changed, 200 insertions(+), 60 deletions(-) diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 7d5eb0edce..a311afabce 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -4028,9 +4028,9 @@ struct ResLoadHelper { // Used for some subscript operators that feed the generic HL call inst // into a load op and by the matrixload call instruction. ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx, - Value *Offset, Value *mip = nullptr) + Value *Offset, Value *status = nullptr, Value *mip = nullptr) : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), - addr(idx), offset(Offset), status(nullptr), mipLevel(mip) { + addr(idx), offset(Offset), status(status), mipLevel(mip) { opcode = LoadOpFromResKind(RK); Type *Ty = Inst->getType(); if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() && @@ -8537,7 +8537,7 @@ Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder, Value *status, Value *bufIdx, Value *baseOffset, const DataLayout &DL) { - ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset); + ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset, status); #ifndef NDEBUG Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); Type *matType = ptr->getType()->getPointerElementType(); @@ -8864,7 +8864,7 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, } } else if (LoadInst *LdInst = dyn_cast(user)) { // Load of scalar/vector within a struct or structured raw load. - ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset); + ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset, status); TranslateBufLoad(helper, ResKind, Builder, OP, DL); LdInst->eraseFromParent(); @@ -9239,7 +9239,8 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, IRBuilder<> Builder(CI); if (LoadInst *ldInst = dyn_cast(*U)) { Value *Offset = UndefValue::get(Builder.getInt32Ty()); - ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, mipLevel); + ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, + /*status*/ nullptr, mipLevel); TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); ldInst->eraseFromParent(); } else { diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl index 9f7a487a05..65d1010046 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl @@ -1,35 +1,35 @@ -// RUN: %dxc -T vs_6_6 -DETY=float -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=float -DCOLS=4 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=bool -DCOLS=4 %s | FileCheck %s @@ -105,27 +105,27 @@ RWStructuredBuffer< TYPE SS > RwStBuf : register(u2); ConsumeStructuredBuffer< TYPE SS > CnStBuf : register(u4); AppendStructuredBuffer< TYPE SS > ApStBuf : register(u5); -TYPE Add(TYPE f1[COLS], TYPE f2[COLS])[COLS] { +TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS])[COLS] { TYPE ret[COLS]; for (int i = 0; i < COLS; i++) - ret[i] = f1[i] + f2[i]; + ret[i] = f1[i] + f2[i] + f3[i] + f4[i]; return ret; } template -T Add(T v1, T v2) { return v1 + v2; } +T Add(T v1, T v2, T v3, T v4) { return v1 + v2 + v3 + v4; } -TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS])[COLS] { +TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS], TYPE f5[COLS], TYPE f6[COLS])[COLS] { TYPE ret[COLS]; for (int i = 0; i < COLS; i++) - ret[i] = f1[i] + f2[i] + f3[i] + f4[i]; + ret[i] = f1[i] + f2[i] + f3[i] + f4[i] + f5[i] + f6[i]; return ret; } template -T Add(T v1, T v2, T v3, T v4) { return v1 + v2 + v3 + v4; } +T Add(T v1, T v2, T v3, T v4, T v5, T v6) { return v1 + v2 + v3 + v4 + v5 + v6; } -void main(uint ix[2] : IX) { +void main(uint ix[3] : IX) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -150,26 +150,55 @@ void main(uint ix[2] : IX) { // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] // OFF: [[RIX0:%.*]] = add i32 [[IX0]], [[BOFF:[0-9]+]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX0]] - // MAT: [[IX0p4:%.*]] = add i32 [[RIX0]], [[p4:[0-9]+]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] - // MAT: [[IX0p8:%.*]] = add i32 [[RIX0]], [[p8:[0-9]+]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] + // MULTI: [[IX0p4:%.*]] = add i32 [[RIX0]], [[p4:[0-9]+]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] + // MULTI: [[IX0p8:%.*]] = add i32 [[RIX0]], [[p8:[0-9]+]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] // I1: icmp ne i32 // I1: icmp ne i32 // I1: icmp ne i32 // I1: icmp ne i32 TYPE babElt1 SS = RwByBuf.Load< TYPE SS >(ix[0]); + // CHECK-DAG: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 1 + // CHECK-DAG: [[RIX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 1 + // OFF: [[RIX1:%.*]] = add i32 [[IX1]], [[BOFF]] + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX1]] + // MULTI: [[IX1p4:%.*]] = add i32 [[RIX1]], [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1p4]] + // MULTI: [[IX1p8:%.*]] = add i32 [[RIX1]], [[p8]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 + // I1: icmp ne i32 + // I1: icmp ne i32 + // I1: icmp ne i32 + uint status1; + TYPE babElt3 SS = RwByBuf.Load< TYPE SS >(ix[1], status1); + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[RIX0]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE babElt2 SS = RoByBuf.Load< TYPE SS >(ix[0]); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[RIX1]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status2; + TYPE babElt4 SS = RoByBuf.Load< TYPE SS >(ix[1], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 @@ -177,48 +206,73 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 {{%.*}}, i32 undef, float 0.0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 {{%.*}}, i32 undef, double 0.0 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX0]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] - RwByBuf.Store< TYPE SS >(ix[0], Add(babElt1, babElt2)); + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] + RwByBuf.Store< TYPE SS >(ix[0], Add(babElt1, babElt2, babElt3, babElt4)); + RwByBuf.Store< uint > (100, status1 + status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt1 SS = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt2 SS = RwStBuf[ix[1]]; + // CHECK: [[IX2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 2 + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[BOFF]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt5 SS = RwStBuf.Load(ix[2], status1); + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt3 SS = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt4 SS = RoStBuf[ix[1]]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[BOFF]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt6 SS = RoStBuf.Load(ix[2], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 @@ -226,9 +280,10 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 0, float 0.0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 16, double 0.0 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] - RwStBuf[ix[0]] = Add(stbElt1, stbElt2, stbElt3, stbElt4); + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] + RwStBuf[ix[0]] = Add(stbElt1, stbElt2, stbElt3, stbElt4, stbElt5, stbElt6); + RwByBuf.Store< uint > (200, status1 + status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -236,8 +291,8 @@ void main(uint ix[2] : IX) { // OFF: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 // OFF: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 16 // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -253,7 +308,7 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 16 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[BOFF]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p8]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p8]] ApStBuf.Append(cnElt); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl index 8dcf5ead1c..e60a568ab3 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl @@ -87,12 +87,32 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE babElt2 = RoByBuf.Load< TYPE >(ix0); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status1 = 0; + TYPE babElt3 = RwByBuf.Load< TYPE >(ix1, status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status2 = 0; + TYPE babElt4 = RoByBuf.Load< TYPE >(ix1, status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2); + RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2 + babElt3 + babElt4 + status1 + status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] @@ -102,6 +122,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt1 = RwStBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -116,6 +137,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt3 = RoStBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -123,12 +145,30 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt4 = RoStBuf[ix1]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt5 = RwStBuf.Load(ix2[0], status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt6 = RoStBuf.Load(ix2[0], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6 + status1 + status2; // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -167,6 +207,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt1 = RwTyBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -183,6 +224,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt2 = RwTyBuf[ix1]; + // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -200,6 +242,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt3 = RoTyBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -217,6 +260,44 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt4 = RoTyBuf[ix1]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY32]] [[RESRET]], 4 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt5 = RwTyBuf.Load(ix2[0], status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY32]] [[RESRET]], 4 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt6 = RoTyBuf.Load(ix2[0], status2); + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // I64: trunc i64 %{{.*}} to i32 @@ -230,7 +311,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] - RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4; + RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4 + typElt5 + typElt6 + status1 + status2; // Texture Tests // CHECK: [[ANHDLROTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX1]] @@ -250,6 +331,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt1 = RoTex1d[ix0]; + // CHECK: [[ANHDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX1]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX1]], i32 undef, i32 [[IX0]], i32 undef, i32 undef // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -285,6 +367,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt3 = RoTex2d[ix2]; + // CHECK: [[ANHDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX2]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX2]], i32 undef, i32 [[IX20]], i32 [[IX21]], i32 undef // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -320,6 +403,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt5 = RoTex3d[ix3]; + // CHECK: [[ANHDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX3]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX3]], i32 undef, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] // F64: call double @dx.op.makeDouble.f64(i32 101 From 900ec4af8cf1bd113bab030d1818cd2525a91b19 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Wed, 4 Jun 2025 18:23:12 -0600 Subject: [PATCH 2/6] [SM6.9] Fix load status indexing The status retrieval from the resret returned by the raw buffer loads was using the wrong index for native vectors supported by shader model 6.9. Adjusting the index by the opcode ensures that the index will be correct. This also required a change to validation that allows checkAccessFullyMapped to operate on the second element extracted from a resret where applicable and some corresponding null tolerance in related code. Adds status retrieving overloads to the relevant load/store tests. Fixes #7508 --- include/dxc/DXIL/DxilConstants.h | 1 + lib/DXIL/DxilOperations.cpp | 2 +- lib/DxilValidation/DxilValidation.cpp | 10 ++++- lib/HLSL/HLOperationLower.cpp | 16 +++++--- .../intrinsics/buffer-load-stores-sm69.hlsl | 37 +++++++++++++++++-- 5 files changed, 53 insertions(+), 13 deletions(-) diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 0f28edbc39..84588a2ff7 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -154,6 +154,7 @@ const float kMaxMipLodBias = 15.99f; const float kMinMipLodBias = -16.0f; const unsigned kResRetStatusIndex = 4; +const unsigned kVecResRetStatusIndex = 1; /* hctdb_instrhelp.get_max_oload_dims()*/ // OLOAD_DIMS-TEXT:BEGIN diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index a66dfc68d4..253121346a 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -6438,7 +6438,7 @@ Type *OP::GetFourI32Type() const { return m_pFourI32Type; } Type *OP::GetFourI16Type() const { return m_pFourI16Type; } bool OP::IsResRetType(llvm::Type *Ty) { - if (!Ty->isStructTy()) + if (!Ty || !Ty->isStructTy()) return false; for (Type *ResTy : m_pResRetType) { if (Ty == ResTy) diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index db596a3821..77b348404f 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1573,9 +1573,15 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } else { Value *V = EVI->getOperand(0); + StructType *StrTy = dyn_cast(V->getType()); + uint ExtractIndex = EVI->getIndices()[0]; + // Ensure parameter is a single value that is extracted from the correct + // ResRet struct location. bool IsLegal = EVI->getNumIndices() == 1 && - EVI->getIndices()[0] == DXIL::kResRetStatusIndex && - ValCtx.DxilMod.GetOP()->IsResRetType(V->getType()); + (ExtractIndex == DXIL::kResRetStatusIndex || + ExtractIndex == DXIL::kVecResRetStatusIndex) && + ValCtx.DxilMod.GetOP()->IsResRetType(StrTy) && + ExtractIndex == StrTy->getNumElements() - 1; if (!IsLegal) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index a311afabce..2033533327 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -3063,10 +3063,10 @@ static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, } void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder, - hlsl::OP *hlslOp) { + hlsl::OP *hlslOp, + unsigned StatusIndex = DXIL::kResRetStatusIndex) { if (status && !isa(status)) { - Value *statusVal = - Builder.CreateExtractValue(ResRet, DXIL::kResRetStatusIndex); + Value *statusVal = Builder.CreateExtractValue(ResRet, StatusIndex); Value *checkAccessOp = hlslOp->GetI32Const( static_cast(DXIL::OpCode::CheckAccessFullyMapped)); Function *checkAccessFn = hlslOp->GetOpFunc( @@ -4304,18 +4304,22 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, Function *F = OP->GetOpFunc(opcode, EltTy); Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); + unsigned StatusIndex; // Extract elements from returned ResRet. // Native vector loads just have one vector element in the ResRet. // Others have up to four scalars that need to be individually extracted. - if (opcode == OP::OpCode::RawBufferVectorLoad) + if (opcode == OP::OpCode::RawBufferVectorLoad) { Elts[i++] = Builder.CreateExtractValue(Ld, 0); - else + StatusIndex = DXIL::kVecResRetStatusIndex; + } else { for (unsigned j = 0; j < chunkSize; j++, i++) Elts[i] = Builder.CreateExtractValue(Ld, j); + StatusIndex = DXIL::kResRetStatusIndex; + } // Update status. - UpdateStatus(Ld, helper.status, Builder, OP); + UpdateStatus(Ld, helper.status, Builder, OP, StatusIndex); if (!FirstLd) FirstLd = Ld; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl index 5305ee495b..b2315085d0 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl @@ -26,7 +26,7 @@ AppendStructuredBuffer > ApStBuf : register(u5); // CHECK-LABEL: define void @main [shader("vertex")] -void main(uint ix[2] : IX) { +void main(uint ix[3] : IX) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -45,36 +45,65 @@ void main(uint ix[2] : IX) { // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector babElt1 = RwByBuf.Load< vector >(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + uint status1; + vector babElt3 = RwByBuf.Load< vector >(ix[1], status1); + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector babElt2 = RoByBuf.Load< vector >(ix[0]); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + uint status2; + vector babElt4 = RoByBuf.Load< vector >(ix[1], status2); + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< vector >(ix[0], babElt1 + babElt2); + RwByBuf.Store< vector >(ix[0], babElt1 + babElt2 + babElt3 + babElt4 + status1 + status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt1 = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt2 = RwStBuf[ix[1]]; + // CHECK: [[IX2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt5 = RwStBuf.Load(ix[2], status1); + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt4 = RoStBuf[ix[1]]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt6 = RoStBuf.Load(ix[2], status2); + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6 + status1 + status2; // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] From 55d633caf7d1b3087edafc75cd0961858e64e266 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 16 Jun 2025 16:33:14 -0600 Subject: [PATCH 3/6] correct uint typo I guess only MSVC doesn't have uint alias? Unintended anyway. --- lib/DxilValidation/DxilValidation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 77b348404f..9587897e22 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1574,7 +1574,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, } else { Value *V = EVI->getOperand(0); StructType *StrTy = dyn_cast(V->getType()); - uint ExtractIndex = EVI->getIndices()[0]; + unsigned ExtractIndex = EVI->getIndices()[0]; // Ensure parameter is a single value that is extracted from the correct // ResRet struct location. bool IsLegal = EVI->getNumIndices() == 1 && From ad0a00eb39c6b0accf42e506f06245f647966fa9 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 23 Jun 2025 16:26:57 -0600 Subject: [PATCH 4/6] add checks for separate status stores These might have confused the checks for multi stores where relevant. By adding the checks, the number of stores must be as expected and there will be no missed failures. --- .../CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl index 65d1010046..88e3242bd2 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl @@ -208,6 +208,8 @@ void main(uint ix[3] : IX) { // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX0]] // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 RwByBuf.Store< TYPE SS >(ix[0], Add(babElt1, babElt2, babElt3, babElt4)); RwByBuf.Store< uint > (100, status1 + status2); @@ -282,6 +284,8 @@ void main(uint ix[3] : IX) { // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 RwStBuf[ix[0]] = Add(stbElt1, stbElt2, stbElt3, stbElt4, stbElt5, stbElt6); RwByBuf.Store< uint > (200, status1 + status2); From fc4002f676174264d4fed17a8b9776861a9003a3 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Tue, 24 Jun 2025 16:07:53 -0600 Subject: [PATCH 5/6] Add direct testing of status values to ensure they are getting RAUWed Changed the usage to logical && to skip the immediate conversion usage which wasn't that distinct. --- .../intrinsics/buffer-agg-load-stores.hlsl | 55 +++---------------- .../intrinsics/buffer-load-stores-sm69.hlsl | 20 +++++-- .../hlsl/intrinsics/buffer-load-stores.hlsl | 32 +++++++---- 3 files changed, 44 insertions(+), 63 deletions(-) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl index 88e3242bd2..5b4d72ecc0 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl @@ -1,45 +1,4 @@ // RUN: %dxc -T vs_6_6 -DETY=float -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=bool -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI - -// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI - -// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI - -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s - -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI - -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI - -// RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=float -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=bool -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=uint64_t -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=double -DCOLS=2 %s | FileCheck %s - -// RUN: %dxc -T vs_6_6 -DATY=OffVector -DETY=float -DCOLS=4 %s | FileCheck %s --check-prefixes=CHECK,OFF -// RUN: %dxc -T vs_6_6 -DATY=OffVector -DETY=bool -DCOLS=4 %s | FileCheck %s --check-prefixes=CHECK,OFF -// RUN: %dxc -T vs_6_6 -DATY=OffVector -DETY=uint64_t -DCOLS=2 %s | FileCheck %s --check-prefixes=CHECK,OFF -// RUN: %dxc -T vs_6_6 -DATY=OffVector -DETY=double -DCOLS=2 %s | FileCheck %s --check-prefixes=CHECK,OFF /////////////////////////////////////////////////////////////////////// // Test codegen for various load and store operations and conversions @@ -169,7 +128,7 @@ void main(uint ix[3] : IX) { // MULTI: [[IX1p8:%.*]] = add i32 [[RIX1]], [[p8]] // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1p8]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne i32 // I1: icmp ne i32 // I1: icmp ne i32 @@ -191,7 +150,7 @@ void main(uint ix[3] : IX) { // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1p4]] // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1p8]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -208,10 +167,11 @@ void main(uint ix[3] : IX) { // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX0]] // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] + // CHECK: and i1 [[CHK1]], [[CHK2]] // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 RwByBuf.Store< TYPE SS >(ix[0], Add(babElt1, babElt2, babElt3, babElt4)); - RwByBuf.Store< uint > (100, status1 + status2); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] @@ -238,7 +198,7 @@ void main(uint ix[3] : IX) { // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[p4]] // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[p8]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -268,7 +228,7 @@ void main(uint ix[3] : IX) { // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[p4]] // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[p8]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -284,10 +244,11 @@ void main(uint ix[3] : IX) { // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] + // CHECK: and i1 [[CHK1]], [[CHK2]] // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 RwStBuf[ix[0]] = Add(stbElt1, stbElt2, stbElt3, stbElt4, stbElt5, stbElt6); - RwByBuf.Store< uint > (200, status1 + status2); + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl index b2315085d0..f71b29e83e 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl @@ -48,7 +48,7 @@ void main(uint ix[3] : IX) { // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer uint status1; vector babElt3 = RwByBuf.Load< vector >(ix[1], status1); @@ -60,14 +60,18 @@ void main(uint ix[3] : IX) { // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer uint status2; vector babElt4 = RoByBuf.Load< vector >(ix[1], status2); // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< vector >(ix[0], babElt1 + babElt2 + babElt3 + babElt4 + status1 + status2); + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< vector >(ix[0], babElt1 + babElt2 + babElt3 + babElt4); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] @@ -82,7 +86,7 @@ void main(uint ix[3] : IX) { // CHECK: [[IX2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt5 = RwStBuf.Load(ix[2], status1); @@ -97,13 +101,17 @@ void main(uint ix[3] : IX) { // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt6 = RoStBuf.Load(ix[2], status2); // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6 + status1 + status2; + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6; + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl index e60a568ab3..896f442c2c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl @@ -89,7 +89,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -99,7 +99,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -112,7 +112,11 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2 + babElt3 + babElt4 + status1 + status2); + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2 + babElt3 + babElt4); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] @@ -147,7 +151,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX20]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -156,7 +160,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX20]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -168,7 +172,11 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6 + status1 + status2; + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6; + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -262,7 +270,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX20]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY32]] [[RESRET]], 4 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 // I64: zext i32 %{{.*}} to i64 @@ -281,7 +289,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX20]] // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY32]] [[RESRET]], 4 - // CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 // I64: zext i32 %{{.*}} to i64 @@ -310,8 +318,12 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 - // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] - RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4 + typElt5 + typElt6 + status1 + status2; + // CHECK: call void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 300 + RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4 + typElt5 + typElt6; + RwByBuf.Store< uint > (300, status1 && status2); // Texture Tests // CHECK: [[ANHDLROTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX1]] From 9611fc7846bd13df7c2ea43e04ed3b9e5ad0d1d7 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Wed, 25 Jun 2025 10:07:21 -0600 Subject: [PATCH 6/6] restore mistakenly removed run lines --- .../intrinsics/buffer-agg-load-stores.hlsl | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl index 5b4d72ecc0..572734d679 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl @@ -1,4 +1,45 @@ // RUN: %dxc -T vs_6_6 -DETY=float -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI + +// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI + +// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI + +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s + +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI + +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI + +// RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=float -DCOLS=4 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=bool -DCOLS=4 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=uint64_t -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=double -DCOLS=2 %s | FileCheck %s + +// RUN: %dxc -T vs_6_6 -DATY=OffVector -DETY=float -DCOLS=4 %s | FileCheck %s --check-prefixes=CHECK,OFF +// RUN: %dxc -T vs_6_6 -DATY=OffVector -DETY=bool -DCOLS=4 %s | FileCheck %s --check-prefixes=CHECK,OFF +// RUN: %dxc -T vs_6_6 -DATY=OffVector -DETY=uint64_t -DCOLS=2 %s | FileCheck %s --check-prefixes=CHECK,OFF +// RUN: %dxc -T vs_6_6 -DATY=OffVector -DETY=double -DCOLS=2 %s | FileCheck %s --check-prefixes=CHECK,OFF /////////////////////////////////////////////////////////////////////// // Test codegen for various load and store operations and conversions