From f6b133776ac978259cb7286c38bc32b2535d919b Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Sun, 4 Dec 2022 21:12:35 +0300 Subject: [PATCH 01/10] vulkan: update headers to 1.3.235 --- include/vulkan/vulkan_core.h | 389 ++++++++++++++++++++++++++++++++++- 1 file changed, 387 insertions(+), 2 deletions(-) diff --git a/include/vulkan/vulkan_core.h b/include/vulkan/vulkan_core.h index da5c99dd..b9c5e254 100644 --- a/include/vulkan/vulkan_core.h +++ b/include/vulkan/vulkan_core.h @@ -72,7 +72,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 230 +#define VK_HEADER_VERSION 235 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION) @@ -912,6 +912,19 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_IMPORT_METAL_SHARED_EVENT_INFO_EXT = 1000311011, VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_2_NV = 1000314008, VK_STRUCTURE_TYPE_CHECKPOINT_DATA_2_NV = 1000314009, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT = 1000316000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_DENSITY_MAP_PROPERTIES_EXT = 1000316001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT = 1000316002, + VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT = 1000316003, + VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT = 1000316004, + VK_STRUCTURE_TYPE_BUFFER_CAPTURE_DESCRIPTOR_DATA_INFO_EXT = 1000316005, + VK_STRUCTURE_TYPE_IMAGE_CAPTURE_DESCRIPTOR_DATA_INFO_EXT = 1000316006, + VK_STRUCTURE_TYPE_IMAGE_VIEW_CAPTURE_DESCRIPTOR_DATA_INFO_EXT = 1000316007, + VK_STRUCTURE_TYPE_SAMPLER_CAPTURE_DESCRIPTOR_DATA_INFO_EXT = 1000316008, + VK_STRUCTURE_TYPE_OPAQUE_CAPTURE_DESCRIPTOR_DATA_CREATE_INFO_EXT = 1000316010, + VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT = 1000316011, + VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_PUSH_DESCRIPTOR_BUFFER_HANDLE_EXT = 1000316012, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CAPTURE_DESCRIPTOR_DATA_INFO_EXT = 1000316009, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT = 1000320000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT = 1000320001, VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT = 1000320002, @@ -1011,6 +1024,10 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_OFFSET_FEATURES_QCOM = 1000425000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_OFFSET_PROPERTIES_QCOM = 1000425001, VK_STRUCTURE_TYPE_SUBPASS_FRAGMENT_DENSITY_MAP_OFFSET_END_INFO_QCOM = 1000425002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COPY_MEMORY_INDIRECT_FEATURES_NV = 1000426000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COPY_MEMORY_INDIRECT_PROPERTIES_NV = 1000426001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_DECOMPRESSION_FEATURES_NV = 1000427000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_DECOMPRESSION_PROPERTIES_NV = 1000427001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINEAR_COLOR_ATTACHMENT_FEATURES_NV = 1000430000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_COMPRESSION_CONTROL_SWAPCHAIN_FEATURES_EXT = 1000437000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_PROCESSING_FEATURES_QCOM = 1000440000, @@ -1040,8 +1057,12 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_TILE_PROPERTIES_QCOM = 1000484001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_AMIGO_PROFILING_FEATURES_SEC = 1000485000, VK_STRUCTURE_TYPE_AMIGO_PROFILING_SUBMIT_INFO_SEC = 1000485001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_FEATURES_NV = 1000490000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_PROPERTIES_NV = 1000490001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT = 1000351000, VK_STRUCTURE_TYPE_MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT = 1000351002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_BUILTINS_FEATURES_ARM = 1000497000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_BUILTINS_PROPERTIES_ARM = 1000497001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES, VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, @@ -2222,6 +2243,7 @@ typedef enum VkImageCreateFlagBits { VK_IMAGE_CREATE_CORNER_SAMPLED_BIT_NV = 0x00002000, VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT = 0x00001000, VK_IMAGE_CREATE_SUBSAMPLED_BIT_EXT = 0x00004000, + VK_IMAGE_CREATE_DESCRIPTOR_BUFFER_CAPTURE_REPLAY_BIT_EXT = 0x00010000, VK_IMAGE_CREATE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_BIT_EXT = 0x00040000, VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT = 0x00020000, VK_IMAGE_CREATE_FRAGMENT_DENSITY_MAP_OFFSET_BIT_QCOM = 0x00008000, @@ -2441,6 +2463,7 @@ typedef enum VkBufferCreateFlagBits { VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, VK_BUFFER_CREATE_PROTECTED_BIT = 0x00000008, VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT = 0x00000010, + VK_BUFFER_CREATE_DESCRIPTOR_BUFFER_CAPTURE_REPLAY_BIT_EXT = 0x00000020, VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT = VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT, VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR = VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT, VK_BUFFER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF @@ -2476,6 +2499,9 @@ typedef enum VkBufferUsageFlagBits { #ifdef VK_ENABLE_BETA_EXTENSIONS VK_BUFFER_USAGE_VIDEO_ENCODE_SRC_BIT_KHR = 0x00010000, #endif + VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT = 0x00200000, + VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT = 0x00400000, + VK_BUFFER_USAGE_PUSH_DESCRIPTORS_DESCRIPTOR_BUFFER_BIT_EXT = 0x04000000, VK_BUFFER_USAGE_MICROMAP_BUILD_INPUT_READ_ONLY_BIT_EXT = 0x00800000, VK_BUFFER_USAGE_MICROMAP_STORAGE_BIT_EXT = 0x01000000, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV = VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR, @@ -2488,6 +2514,7 @@ typedef VkFlags VkBufferViewCreateFlags; typedef enum VkImageViewCreateFlagBits { VK_IMAGE_VIEW_CREATE_FRAGMENT_DENSITY_MAP_DYNAMIC_BIT_EXT = 0x00000001, + VK_IMAGE_VIEW_CREATE_DESCRIPTOR_BUFFER_CAPTURE_REPLAY_BIT_EXT = 0x00000004, VK_IMAGE_VIEW_CREATE_FRAGMENT_DENSITY_MAP_DEFERRED_BIT_EXT = 0x00000002, VK_IMAGE_VIEW_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkImageViewCreateFlagBits; @@ -2532,6 +2559,7 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR = 0x00000080, VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV = 0x00040000, VK_PIPELINE_CREATE_LIBRARY_BIT_KHR = 0x00000800, + VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT = 0x20000000, VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT = 0x00800000, VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT = 0x00000400, VK_PIPELINE_CREATE_RAY_TRACING_ALLOW_MOTION_BIT_NV = 0x00100000, @@ -2631,6 +2659,7 @@ typedef VkFlags VkShaderStageFlags; typedef enum VkSamplerCreateFlagBits { VK_SAMPLER_CREATE_SUBSAMPLED_BIT_EXT = 0x00000001, VK_SAMPLER_CREATE_SUBSAMPLED_COARSE_RECONSTRUCTION_BIT_EXT = 0x00000002, + VK_SAMPLER_CREATE_DESCRIPTOR_BUFFER_CAPTURE_REPLAY_BIT_EXT = 0x00000008, VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT = 0x00000004, VK_SAMPLER_CREATE_IMAGE_PROCESSING_BIT_QCOM = 0x00000010, VK_SAMPLER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF @@ -2651,6 +2680,8 @@ typedef VkFlags VkDescriptorPoolResetFlags; typedef enum VkDescriptorSetLayoutCreateFlagBits { VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT = 0x00000002, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR = 0x00000001, + VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT = 0x00000010, + VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT = 0x00000020, VK_DESCRIPTOR_SET_LAYOUT_CREATE_HOST_ONLY_POOL_BIT_EXT = 0x00000004, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT, VK_DESCRIPTOR_SET_LAYOUT_CREATE_HOST_ONLY_POOL_BIT_VALVE = VK_DESCRIPTOR_SET_LAYOUT_CREATE_HOST_ONLY_POOL_BIT_EXT, @@ -5703,6 +5734,7 @@ typedef enum VkDriverId { VK_DRIVER_ID_SAMSUNG_PROPRIETARY = 21, VK_DRIVER_ID_MESA_VENUS = 22, VK_DRIVER_ID_MESA_DOZEN = 23, + VK_DRIVER_ID_MESA_NVK = 24, VK_DRIVER_ID_AMD_PROPRIETARY_KHR = VK_DRIVER_ID_AMD_PROPRIETARY, VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR = VK_DRIVER_ID_AMD_OPEN_SOURCE, VK_DRIVER_ID_MESA_RADV_KHR = VK_DRIVER_ID_MESA_RADV, @@ -6613,6 +6645,7 @@ static const VkAccessFlagBits2 VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_NV = static const VkAccessFlagBits2 VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_NV = 0x00400000ULL; static const VkAccessFlagBits2 VK_ACCESS_2_FRAGMENT_DENSITY_MAP_READ_BIT_EXT = 0x01000000ULL; static const VkAccessFlagBits2 VK_ACCESS_2_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT = 0x00080000ULL; +static const VkAccessFlagBits2 VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT = 0x20000000000ULL; static const VkAccessFlagBits2 VK_ACCESS_2_INVOCATION_MASK_READ_BIT_HUAWEI = 0x8000000000ULL; static const VkAccessFlagBits2 VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR = 0x10000000000ULL; static const VkAccessFlagBits2 VK_ACCESS_2_MICROMAP_READ_BIT_EXT = 0x100000000000ULL; @@ -13546,6 +13579,218 @@ typedef struct VkDeviceDiagnosticsConfigCreateInfoNV { #define VK_QCOM_RENDER_PASS_STORE_OPS_EXTENSION_NAME "VK_QCOM_render_pass_store_ops" +#define VK_EXT_descriptor_buffer 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkAccelerationStructureKHR) +#define VK_EXT_DESCRIPTOR_BUFFER_SPEC_VERSION 1 +#define VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME "VK_EXT_descriptor_buffer" +typedef struct VkPhysicalDeviceDescriptorBufferPropertiesEXT { + VkStructureType sType; + void* pNext; + VkBool32 combinedImageSamplerDescriptorSingleArray; + VkBool32 bufferlessPushDescriptors; + VkBool32 allowSamplerImageViewPostSubmitCreation; + VkDeviceSize descriptorBufferOffsetAlignment; + uint32_t maxDescriptorBufferBindings; + uint32_t maxResourceDescriptorBufferBindings; + uint32_t maxSamplerDescriptorBufferBindings; + uint32_t maxEmbeddedImmutableSamplerBindings; + uint32_t maxEmbeddedImmutableSamplers; + size_t bufferCaptureReplayDescriptorDataSize; + size_t imageCaptureReplayDescriptorDataSize; + size_t imageViewCaptureReplayDescriptorDataSize; + size_t samplerCaptureReplayDescriptorDataSize; + size_t accelerationStructureCaptureReplayDescriptorDataSize; + size_t samplerDescriptorSize; + size_t combinedImageSamplerDescriptorSize; + size_t sampledImageDescriptorSize; + size_t storageImageDescriptorSize; + size_t uniformTexelBufferDescriptorSize; + size_t robustUniformTexelBufferDescriptorSize; + size_t storageTexelBufferDescriptorSize; + size_t robustStorageTexelBufferDescriptorSize; + size_t uniformBufferDescriptorSize; + size_t robustUniformBufferDescriptorSize; + size_t storageBufferDescriptorSize; + size_t robustStorageBufferDescriptorSize; + size_t inputAttachmentDescriptorSize; + size_t accelerationStructureDescriptorSize; + VkDeviceSize maxSamplerDescriptorBufferRange; + VkDeviceSize maxResourceDescriptorBufferRange; + VkDeviceSize samplerDescriptorBufferAddressSpaceSize; + VkDeviceSize resourceDescriptorBufferAddressSpaceSize; + VkDeviceSize descriptorBufferAddressSpaceSize; +} VkPhysicalDeviceDescriptorBufferPropertiesEXT; + +typedef struct VkPhysicalDeviceDescriptorBufferDensityMapPropertiesEXT { + VkStructureType sType; + void* pNext; + size_t combinedImageSamplerDensityMapDescriptorSize; +} VkPhysicalDeviceDescriptorBufferDensityMapPropertiesEXT; + +typedef struct VkPhysicalDeviceDescriptorBufferFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 descriptorBuffer; + VkBool32 descriptorBufferCaptureReplay; + VkBool32 descriptorBufferImageLayoutIgnored; + VkBool32 descriptorBufferPushDescriptors; +} VkPhysicalDeviceDescriptorBufferFeaturesEXT; + +typedef struct VkDescriptorAddressInfoEXT { + VkStructureType sType; + void* pNext; + VkDeviceAddress address; + VkDeviceSize range; + VkFormat format; +} VkDescriptorAddressInfoEXT; + +typedef struct VkDescriptorBufferBindingInfoEXT { + VkStructureType sType; + void* pNext; + VkDeviceAddress address; + VkBufferUsageFlags usage; +} VkDescriptorBufferBindingInfoEXT; + +typedef struct VkDescriptorBufferBindingPushDescriptorBufferHandleEXT { + VkStructureType sType; + void* pNext; + VkBuffer buffer; +} VkDescriptorBufferBindingPushDescriptorBufferHandleEXT; + +typedef union VkDescriptorDataEXT { + const VkSampler* pSampler; + const VkDescriptorImageInfo* pCombinedImageSampler; + const VkDescriptorImageInfo* pInputAttachmentImage; + const VkDescriptorImageInfo* pSampledImage; + const VkDescriptorImageInfo* pStorageImage; + const VkDescriptorAddressInfoEXT* pUniformTexelBuffer; + const VkDescriptorAddressInfoEXT* pStorageTexelBuffer; + const VkDescriptorAddressInfoEXT* pUniformBuffer; + const VkDescriptorAddressInfoEXT* pStorageBuffer; + VkDeviceAddress accelerationStructure; +} VkDescriptorDataEXT; + +typedef struct VkDescriptorGetInfoEXT { + VkStructureType sType; + const void* pNext; + VkDescriptorType type; + VkDescriptorDataEXT data; +} VkDescriptorGetInfoEXT; + +typedef struct VkBufferCaptureDescriptorDataInfoEXT { + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferCaptureDescriptorDataInfoEXT; + +typedef struct VkImageCaptureDescriptorDataInfoEXT { + VkStructureType sType; + const void* pNext; + VkImage image; +} VkImageCaptureDescriptorDataInfoEXT; + +typedef struct VkImageViewCaptureDescriptorDataInfoEXT { + VkStructureType sType; + const void* pNext; + VkImageView imageView; +} VkImageViewCaptureDescriptorDataInfoEXT; + +typedef struct VkSamplerCaptureDescriptorDataInfoEXT { + VkStructureType sType; + const void* pNext; + VkSampler sampler; +} VkSamplerCaptureDescriptorDataInfoEXT; + +typedef struct VkOpaqueCaptureDescriptorDataCreateInfoEXT { + VkStructureType sType; + const void* pNext; + const void* opaqueCaptureDescriptorData; +} VkOpaqueCaptureDescriptorDataCreateInfoEXT; + +typedef struct VkAccelerationStructureCaptureDescriptorDataInfoEXT { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureKHR accelerationStructure; + VkAccelerationStructureNV accelerationStructureNV; +} VkAccelerationStructureCaptureDescriptorDataInfoEXT; + +typedef void (VKAPI_PTR *PFN_vkGetDescriptorSetLayoutSizeEXT)(VkDevice device, VkDescriptorSetLayout layout, VkDeviceSize* pLayoutSizeInBytes); +typedef void (VKAPI_PTR *PFN_vkGetDescriptorSetLayoutBindingOffsetEXT)(VkDevice device, VkDescriptorSetLayout layout, uint32_t binding, VkDeviceSize* pOffset); +typedef void (VKAPI_PTR *PFN_vkGetDescriptorEXT)(VkDevice device, const VkDescriptorGetInfoEXT* pDescriptorInfo, size_t dataSize, void* pDescriptor); +typedef void (VKAPI_PTR *PFN_vkCmdBindDescriptorBuffersEXT)(VkCommandBuffer commandBuffer, uint32_t bufferCount, const VkDescriptorBufferBindingInfoEXT* pBindingInfos); +typedef void (VKAPI_PTR *PFN_vkCmdSetDescriptorBufferOffsetsEXT)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const uint32_t* pBufferIndices, const VkDeviceSize* pOffsets); +typedef void (VKAPI_PTR *PFN_vkCmdBindDescriptorBufferEmbeddedSamplersEXT)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t set); +typedef VkResult (VKAPI_PTR *PFN_vkGetBufferOpaqueCaptureDescriptorDataEXT)(VkDevice device, const VkBufferCaptureDescriptorDataInfoEXT* pInfo, void* pData); +typedef VkResult (VKAPI_PTR *PFN_vkGetImageOpaqueCaptureDescriptorDataEXT)(VkDevice device, const VkImageCaptureDescriptorDataInfoEXT* pInfo, void* pData); +typedef VkResult (VKAPI_PTR *PFN_vkGetImageViewOpaqueCaptureDescriptorDataEXT)(VkDevice device, const VkImageViewCaptureDescriptorDataInfoEXT* pInfo, void* pData); +typedef VkResult (VKAPI_PTR *PFN_vkGetSamplerOpaqueCaptureDescriptorDataEXT)(VkDevice device, const VkSamplerCaptureDescriptorDataInfoEXT* pInfo, void* pData); +typedef VkResult (VKAPI_PTR *PFN_vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT)(VkDevice device, const VkAccelerationStructureCaptureDescriptorDataInfoEXT* pInfo, void* pData); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkGetDescriptorSetLayoutSizeEXT( + VkDevice device, + VkDescriptorSetLayout layout, + VkDeviceSize* pLayoutSizeInBytes); + +VKAPI_ATTR void VKAPI_CALL vkGetDescriptorSetLayoutBindingOffsetEXT( + VkDevice device, + VkDescriptorSetLayout layout, + uint32_t binding, + VkDeviceSize* pOffset); + +VKAPI_ATTR void VKAPI_CALL vkGetDescriptorEXT( + VkDevice device, + const VkDescriptorGetInfoEXT* pDescriptorInfo, + size_t dataSize, + void* pDescriptor); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorBuffersEXT( + VkCommandBuffer commandBuffer, + uint32_t bufferCount, + const VkDescriptorBufferBindingInfoEXT* pBindingInfos); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetDescriptorBufferOffsetsEXT( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t firstSet, + uint32_t setCount, + const uint32_t* pBufferIndices, + const VkDeviceSize* pOffsets); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorBufferEmbeddedSamplersEXT( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t set); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetBufferOpaqueCaptureDescriptorDataEXT( + VkDevice device, + const VkBufferCaptureDescriptorDataInfoEXT* pInfo, + void* pData); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetImageOpaqueCaptureDescriptorDataEXT( + VkDevice device, + const VkImageCaptureDescriptorDataInfoEXT* pInfo, + void* pData); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetImageViewOpaqueCaptureDescriptorDataEXT( + VkDevice device, + const VkImageViewCaptureDescriptorDataInfoEXT* pInfo, + void* pData); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetSamplerOpaqueCaptureDescriptorDataEXT( + VkDevice device, + const VkSamplerCaptureDescriptorDataInfoEXT* pInfo, + void* pData); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT( + VkDevice device, + const VkAccelerationStructureCaptureDescriptorDataInfoEXT* pInfo, + void* pData); +#endif + + #define VK_EXT_graphics_pipeline_library 1 #define VK_EXT_GRAPHICS_PIPELINE_LIBRARY_SPEC_VERSION 1 #define VK_EXT_GRAPHICS_PIPELINE_LIBRARY_EXTENSION_NAME "VK_EXT_graphics_pipeline_library" @@ -14857,6 +15102,104 @@ typedef struct VkSubpassFragmentDensityMapOffsetEndInfoQCOM { +#define VK_NV_copy_memory_indirect 1 +#define VK_NV_COPY_MEMORY_INDIRECT_SPEC_VERSION 1 +#define VK_NV_COPY_MEMORY_INDIRECT_EXTENSION_NAME "VK_NV_copy_memory_indirect" +typedef struct VkCopyMemoryIndirectCommandNV { + VkDeviceAddress srcAddress; + VkDeviceAddress dstAddress; + VkDeviceSize size; +} VkCopyMemoryIndirectCommandNV; + +typedef struct VkCopyMemoryToImageIndirectCommandNV { + VkDeviceAddress srcAddress; + uint32_t bufferRowLength; + uint32_t bufferImageHeight; + VkImageSubresourceLayers imageSubresource; + VkOffset3D imageOffset; + VkExtent3D imageExtent; +} VkCopyMemoryToImageIndirectCommandNV; + +typedef struct VkPhysicalDeviceCopyMemoryIndirectFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 indirectCopy; +} VkPhysicalDeviceCopyMemoryIndirectFeaturesNV; + +typedef struct VkPhysicalDeviceCopyMemoryIndirectPropertiesNV { + VkStructureType sType; + void* pNext; + VkQueueFlags supportedQueues; +} VkPhysicalDeviceCopyMemoryIndirectPropertiesNV; + +typedef void (VKAPI_PTR *PFN_vkCmdCopyMemoryIndirectNV)(VkCommandBuffer commandBuffer, VkDeviceAddress copyBufferAddress, uint32_t copyCount, uint32_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdCopyMemoryToImageIndirectNV)(VkCommandBuffer commandBuffer, VkDeviceAddress copyBufferAddress, uint32_t copyCount, uint32_t stride, VkImage dstImage, VkImageLayout dstImageLayout, const VkImageSubresourceLayers* pImageSubresources); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdCopyMemoryIndirectNV( + VkCommandBuffer commandBuffer, + VkDeviceAddress copyBufferAddress, + uint32_t copyCount, + uint32_t stride); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyMemoryToImageIndirectNV( + VkCommandBuffer commandBuffer, + VkDeviceAddress copyBufferAddress, + uint32_t copyCount, + uint32_t stride, + VkImage dstImage, + VkImageLayout dstImageLayout, + const VkImageSubresourceLayers* pImageSubresources); +#endif + + +#define VK_NV_memory_decompression 1 +#define VK_NV_MEMORY_DECOMPRESSION_SPEC_VERSION 1 +#define VK_NV_MEMORY_DECOMPRESSION_EXTENSION_NAME "VK_NV_memory_decompression" + +// Flag bits for VkMemoryDecompressionMethodFlagBitsNV +typedef VkFlags64 VkMemoryDecompressionMethodFlagBitsNV; +static const VkMemoryDecompressionMethodFlagBitsNV VK_MEMORY_DECOMPRESSION_METHOD_GDEFLATE_1_0_BIT_NV = 0x00000001ULL; + +typedef VkFlags64 VkMemoryDecompressionMethodFlagsNV; +typedef struct VkDecompressMemoryRegionNV { + VkDeviceAddress srcAddress; + VkDeviceAddress dstAddress; + VkDeviceSize compressedSize; + VkDeviceSize decompressedSize; + VkMemoryDecompressionMethodFlagsNV decompressionMethod; +} VkDecompressMemoryRegionNV; + +typedef struct VkPhysicalDeviceMemoryDecompressionFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 memoryDecompression; +} VkPhysicalDeviceMemoryDecompressionFeaturesNV; + +typedef struct VkPhysicalDeviceMemoryDecompressionPropertiesNV { + VkStructureType sType; + void* pNext; + VkMemoryDecompressionMethodFlagsNV decompressionMethods; + uint64_t maxDecompressionIndirectCount; +} VkPhysicalDeviceMemoryDecompressionPropertiesNV; + +typedef void (VKAPI_PTR *PFN_vkCmdDecompressMemoryNV)(VkCommandBuffer commandBuffer, uint32_t decompressRegionCount, const VkDecompressMemoryRegionNV* pDecompressMemoryRegions); +typedef void (VKAPI_PTR *PFN_vkCmdDecompressMemoryIndirectCountNV)(VkCommandBuffer commandBuffer, VkDeviceAddress indirectCommandsAddress, VkDeviceAddress indirectCommandsCountAddress, uint32_t stride); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdDecompressMemoryNV( + VkCommandBuffer commandBuffer, + uint32_t decompressRegionCount, + const VkDecompressMemoryRegionNV* pDecompressMemoryRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdDecompressMemoryIndirectCountNV( + VkCommandBuffer commandBuffer, + VkDeviceAddress indirectCommandsAddress, + VkDeviceAddress indirectCommandsCountAddress, + uint32_t stride); +#endif + + #define VK_NV_linear_color_attachment 1 #define VK_NV_LINEAR_COLOR_ATTACHMENT_SPEC_VERSION 1 #define VK_NV_LINEAR_COLOR_ATTACHMENT_EXTENSION_NAME "VK_NV_linear_color_attachment" @@ -15493,13 +15836,54 @@ typedef struct VkAmigoProfilingSubmitInfoSEC { +#define VK_NV_ray_tracing_invocation_reorder 1 +#define VK_NV_RAY_TRACING_INVOCATION_REORDER_SPEC_VERSION 1 +#define VK_NV_RAY_TRACING_INVOCATION_REORDER_EXTENSION_NAME "VK_NV_ray_tracing_invocation_reorder" + +typedef enum VkRayTracingInvocationReorderModeNV { + VK_RAY_TRACING_INVOCATION_REORDER_MODE_NONE_NV = 0, + VK_RAY_TRACING_INVOCATION_REORDER_MODE_REORDER_NV = 1, + VK_RAY_TRACING_INVOCATION_REORDER_MODE_MAX_ENUM_NV = 0x7FFFFFFF +} VkRayTracingInvocationReorderModeNV; +typedef struct VkPhysicalDeviceRayTracingInvocationReorderPropertiesNV { + VkStructureType sType; + void* pNext; + VkRayTracingInvocationReorderModeNV rayTracingInvocationReorderReorderingHint; +} VkPhysicalDeviceRayTracingInvocationReorderPropertiesNV; + +typedef struct VkPhysicalDeviceRayTracingInvocationReorderFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 rayTracingInvocationReorder; +} VkPhysicalDeviceRayTracingInvocationReorderFeaturesNV; + + + #define VK_EXT_mutable_descriptor_type 1 #define VK_EXT_MUTABLE_DESCRIPTOR_TYPE_SPEC_VERSION 1 #define VK_EXT_MUTABLE_DESCRIPTOR_TYPE_EXTENSION_NAME "VK_EXT_mutable_descriptor_type" +#define VK_ARM_shader_core_builtins 1 +#define VK_ARM_SHADER_CORE_BUILTINS_SPEC_VERSION 2 +#define VK_ARM_SHADER_CORE_BUILTINS_EXTENSION_NAME "VK_ARM_shader_core_builtins" +typedef struct VkPhysicalDeviceShaderCoreBuiltinsFeaturesARM { + VkStructureType sType; + void* pNext; + VkBool32 shaderCoreBuiltins; +} VkPhysicalDeviceShaderCoreBuiltinsFeaturesARM; + +typedef struct VkPhysicalDeviceShaderCoreBuiltinsPropertiesARM { + VkStructureType sType; + void* pNext; + uint64_t shaderCoreMask; + uint32_t shaderCoreCount; + uint32_t shaderWarpsPerCore; +} VkPhysicalDeviceShaderCoreBuiltinsPropertiesARM; + + + #define VK_KHR_acceleration_structure 1 -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkAccelerationStructureKHR) #define VK_KHR_ACCELERATION_STRUCTURE_SPEC_VERSION 13 #define VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME "VK_KHR_acceleration_structure" @@ -15511,6 +15895,7 @@ typedef enum VkBuildAccelerationStructureModeKHR { typedef enum VkAccelerationStructureCreateFlagBitsKHR { VK_ACCELERATION_STRUCTURE_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR = 0x00000001, + VK_ACCELERATION_STRUCTURE_CREATE_DESCRIPTOR_BUFFER_CAPTURE_REPLAY_BIT_EXT = 0x00000008, VK_ACCELERATION_STRUCTURE_CREATE_MOTION_BIT_NV = 0x00000004, VK_ACCELERATION_STRUCTURE_CREATE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF } VkAccelerationStructureCreateFlagBitsKHR; From 8e290ec9956b89d8657bc53c928ba2215cd135fc Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Fri, 8 Sep 2023 19:32:17 +0300 Subject: [PATCH 02/10] amdilc: fix structured uav/srv types to outer resource type --- src/amdilc/amdilc_compiler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amdilc/amdilc_compiler.c b/src/amdilc/amdilc_compiler.c index 812d19a7..181866a7 100644 --- a/src/amdilc/amdilc_compiler.c +++ b/src/amdilc/amdilc_compiler.c @@ -1430,7 +1430,7 @@ static void emitUav( const IlcResource resource = { .resType = RES_TYPE_GENERIC, .id = resourceId, - .typeId = arrayId, + .typeId = structId, .texelTypeId = compiler->floatId, .ilId = id, .ilType = IL_USAGE_PIXTEX_UNKNOWN, @@ -1526,7 +1526,7 @@ static void emitSrv( const IlcResource resource = { .resType = RES_TYPE_GENERIC, .id = resourceId, - .typeId = arrayId, + .typeId = structId, .texelTypeId = compiler->floatId, .ilId = id, .ilType = IL_USAGE_PIXTEX_UNKNOWN, From c9a8e33b662f535d8c88a095b0f4f1ea92f35bf0 Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Sun, 4 Dec 2022 22:22:58 +0300 Subject: [PATCH 03/10] amdilc, mantle: rework descriptor sets usage Reworked descriptor sets implementation, so descriptor sets are being used directly and not during command buffer building. Requires VK_EXT_mutable_descriptor_type extension. --- src/amdilc/amdilc.h | 22 +- src/amdilc/amdilc_binding_patcher.c | 42 ++ src/amdilc/amdilc_compiler.c | 341 +++++++++---- src/amdilc/amdilc_spirv.c | 16 + src/amdilc/amdilc_spirv.h | 5 + src/amdilc/meson.build | 1 + src/mantle/mantle_cmd_buf.c | 269 +++++----- src/mantle/mantle_cmd_buf_man.c | 12 - src/mantle/mantle_descriptor_set.c | 173 ++++++- src/mantle/mantle_image_view.c | 1 + src/mantle/mantle_init_device.c | 116 ++++- src/mantle/mantle_internal.h | 5 + src/mantle/mantle_object.h | 38 +- src/mantle/mantle_object_man.c | 25 +- src/mantle/mantle_shader_pipeline.c | 727 ++++++++++++++++------------ src/mantle/util.c | 22 + src/mantle/vulkan_loader.c | 4 + src/mantle/vulkan_loader.h | 4 + 18 files changed, 1216 insertions(+), 607 deletions(-) create mode 100644 src/amdilc/amdilc_binding_patcher.c diff --git a/src/amdilc/amdilc.h b/src/amdilc/amdilc.h index 8fed7302..3b899078 100644 --- a/src/amdilc/amdilc.h +++ b/src/amdilc/amdilc.h @@ -6,11 +6,16 @@ #define VK_NO_PROTOTYPES #include "vulkan/vulkan.h" -#define DESCRIPTOR_SET_ID (0) #define ATOMIC_COUNTER_SET_ID (1) +#define DYNAMIC_MEMORY_VIEW_BINDING_ID (0) +#define DYNAMIC_MEMORY_VIEW_DESCRIPTOR_SET_ID (0) +#define DESCRIPTOR_SET_ID (2) #define ILC_MAX_STRIDE_CONSTANTS (8) +#define DESCRIPTOR_CONST_OFFSETS_OFFSET (sizeof(uint32_t) * ILC_MAX_STRIDE_CONSTANTS) +#define DESCRIPTOR_OFFSET_COUNT (32) + typedef enum _IlcBindingType { ILC_BINDING_SAMPLER, ILC_BINDING_RESOURCE, @@ -18,6 +23,9 @@ typedef enum _IlcBindingType { typedef struct _IlcBinding { IlcBindingType type; + uint32_t id; + uint32_t offsetSpecId; + uint32_t descriptorSetIndexSpecId; uint32_t ilIndex; uint32_t vkIndex; // Unique across shader stages VkDescriptorType descriptorType; @@ -39,6 +47,12 @@ typedef struct _IlcShader { char* name; } IlcShader; +typedef struct _IlcBindingPatchEntry { + uint32_t id; + uint32_t bindingIndex; + uint32_t descriptorSetIndex; +} IlcBindingPatchEntry; + IlcShader ilcCompileShader( const void* code, unsigned size); @@ -52,4 +66,10 @@ void ilcDisassembleShader( const void* code, unsigned size); +void patchShaderBindings( + void* code, + uint32_t codeSize, + const IlcBindingPatchEntry* entries, + uint32_t entryCount); + #endif // AMDILC_H_ diff --git a/src/amdilc/amdilc_binding_patcher.c b/src/amdilc/amdilc_binding_patcher.c new file mode 100644 index 00000000..9368127d --- /dev/null +++ b/src/amdilc/amdilc_binding_patcher.c @@ -0,0 +1,42 @@ +#include "amdilc_spirv.h" +#include "amdilc_internal.h" + +const IlcBindingPatchEntry* findEntryById( + IlcSpvId id, + const IlcBindingPatchEntry* entries, + uint32_t entryCount) +{ + for (uint32_t i = 0; i < entryCount; i++) { + if (id == entries[i].id) { + return &entries[i]; + } + } + return NULL; +} + +void patchShaderBindings( + void* code, + uint32_t codeSize, + const IlcBindingPatchEntry* entries, + uint32_t entryCount) +{ + uint32_t wordCount = codeSize / sizeof(uint32_t); + IlcSpvWord* spirvWords = (IlcSpvWord*)code; + for (uint32_t i = 5; i < wordCount;) { + SpvOp opCode = spirvWords[i] & SpvOpCodeMask; + unsigned instrWordCount = spirvWords[i] >> SpvWordCountShift; + + if (opCode == SpvOpDecorate) { + IlcSpvWord id = spirvWords[i + 1]; + IlcSpvWord decoration = spirvWords[i + 2]; + if (decoration == SpvDecorationDescriptorSet || decoration == SpvDecorationBinding) { + const IlcBindingPatchEntry* entry = findEntryById(id, entries, entryCount); + if (entry != NULL) { + spirvWords[i + 3] = (decoration == SpvDecorationDescriptorSet) ? entry->descriptorSetIndex : entry->bindingIndex; + } + } + } + + i += instrWordCount; + } +} diff --git a/src/amdilc/amdilc_compiler.c b/src/amdilc/amdilc_compiler.c index 181866a7..70cec54f 100644 --- a/src/amdilc/amdilc_compiler.c +++ b/src/amdilc/amdilc_compiler.c @@ -48,16 +48,21 @@ typedef struct { typedef struct { IlcResourceType resType; IlcSpvId id; + IlcSpvId interfaceId; IlcSpvId typeId; IlcSpvId texelTypeId; uint32_t ilId; uint8_t ilType; IlcSpvId strideId; + IlcSpvId specOffsetId; + IlcSpvId specDescriptorSlotId; } IlcResource; typedef struct { - IlcSpvId id; + IlcSpvId interfaceId; uint32_t ilId; + IlcSpvId specOffsetId; + IlcSpvId specDescriptorSlotId; } IlcSampler; typedef struct { @@ -333,48 +338,30 @@ static void emitBinding( IlcCompiler* compiler, IlcBindingType bindingType, IlcSpvId bindingId, + IlcSpvId descriptorOffsetId, + IlcSpvId descriptorSetIndexId, IlcSpvWord ilId, VkDescriptorType vkDescriptorType, int strideIndex) { - unsigned vkIndex = 0; - - // We want the Vulkan binding index to be unique across shader stages to use a single - // descriptor set, but the shaders are compiled in advance. Interleave the index based on the - // shader stage so there's no collision. - switch (compiler->kernel->shaderType) { - case IL_SHADER_VERTEX: - vkIndex = compiler->bindingCount * 5 + 0; - break; - case IL_SHADER_HULL: - vkIndex = compiler->bindingCount * 5 + 1; - break; - case IL_SHADER_DOMAIN: - vkIndex = compiler->bindingCount * 5 + 2; - break; - case IL_SHADER_GEOMETRY: - vkIndex = compiler->bindingCount * 5 + 3; - break; - case IL_SHADER_PIXEL: - vkIndex = compiler->bindingCount * 5 + 4; - break; - case IL_SHADER_COMPUTE: - vkIndex = compiler->bindingCount; - break; - default: - assert(false); - } - IlcSpvWord set = DESCRIPTOR_SET_ID; ilcSpvPutDecoration(compiler->module, bindingId, SpvDecorationDescriptorSet, 1, &set); - ilcSpvPutDecoration(compiler->module, bindingId, SpvDecorationBinding, 1, &vkIndex); + IlcSpvWord binding = 0; + ilcSpvPutDecoration(compiler->module, bindingId, SpvDecorationBinding, 1, &binding); + IlcSpvWord offsetSpecId = ilId * 2 + ((bindingType == ILC_BINDING_SAMPLER) ? 0 : 32); + ilcSpvPutDecoration(compiler->module, descriptorOffsetId, SpvDecorationSpecId, 1, &offsetSpecId); + IlcSpvWord descriptorSetIndexSpecId = ilId * 2 + 1 + ((bindingType == ILC_BINDING_SAMPLER) ? 0 : 32); + ilcSpvPutDecoration(compiler->module, descriptorSetIndexId, SpvDecorationSpecId, 1, &descriptorSetIndexSpecId); compiler->bindingCount++; compiler->bindings = realloc(compiler->bindings, compiler->bindingCount * sizeof(IlcBinding)); compiler->bindings[compiler->bindingCount - 1] = (IlcBinding) { .type = bindingType, + .id = bindingId, + .offsetSpecId = offsetSpecId, + .descriptorSetIndexSpecId = descriptorSetIndexSpecId, .ilIndex = ilId, - .vkIndex = vkIndex, + .vkIndex = binding, .descriptorType = vkDescriptorType, .strideIndex = strideIndex, }; @@ -456,6 +443,8 @@ static const IlcResource* findResource( return NULL; } +static const IlcResource* emitPushConstant(IlcCompiler* compiler); + static const IlcResource* addResource( IlcCompiler* compiler, const IlcResource* resource) @@ -465,10 +454,25 @@ static const IlcResource* addResource( assert(false); } + if (resource->resType == RES_TYPE_GENERIC) { + const IlcResource* pcResource = findResource(compiler, RES_TYPE_PUSH_CONSTANTS, 0); + + if (!pcResource) { + // HACK: had to create push constants in here, since other places require storing resource pointer + pcResource = emitPushConstant(compiler); + } + } // TODO use emitName char name[32]; - snprintf(name, sizeof(name), "resource%u.%u", resource->resType, resource->ilId); - ilcSpvPutName(compiler->module, resource->id, name); + if (resource->id) { + snprintf(name, sizeof(name), "resource%u.%u", resource->resType, resource->ilId); + ilcSpvPutName(compiler->module, resource->id, name); + } + + if (resource->interfaceId != 0 && resource->id != resource->interfaceId) { + snprintf(name, sizeof(name), "resource_array%u.%u", resource->resType, resource->ilId); + ilcSpvPutName(compiler->module, resource->interfaceId, name); + } compiler->resourceCount++; compiler->resources = realloc(compiler->resources, @@ -478,6 +482,110 @@ static const IlcResource* addResource( return &compiler->resources[compiler->resourceCount - 1]; } +static const IlcResource* emitPushConstant( + IlcCompiler* compiler) +{ + IlcSpvId strideLengthId = ilcSpvPutConstant(compiler->module, compiler->intId, + ILC_MAX_STRIDE_CONSTANTS); + IlcSpvId strideArrayId = ilcSpvPutArrayType(compiler->module, compiler->intId, strideLengthId); + IlcSpvId descriptorCountId = ilcSpvPutConstant(compiler->module, compiler->intId, + DESCRIPTOR_OFFSET_COUNT); + IlcSpvId descriptorOffsetArrayId = ilcSpvPutArrayType(compiler->module, compiler->intId, descriptorCountId); + + IlcSpvId structFields[] = { strideArrayId, descriptorOffsetArrayId }; + IlcSpvId structId = ilcSpvPutStructType(compiler->module, 2, structFields); + IlcSpvId pcId = emitVariable(compiler, structId, SpvStorageClassPushConstant); + + ilcSpvPutDecoration(compiler->module, structId, SpvDecorationBlock, 0, NULL); + + IlcSpvWord strideArrayStride = sizeof(uint32_t); + IlcSpvWord strideMemberOffset = 0; + ilcSpvPutDecoration(compiler->module, strideArrayId, SpvDecorationArrayStride, + 1, &strideArrayStride); + ilcSpvPutMemberDecoration(compiler->module, structId, 0, SpvDecorationOffset, + 1, &strideMemberOffset); + + IlcSpvWord descriptorOffsetArrayStride = sizeof(uint32_t); + IlcSpvWord descriptorOffsetMemberOffset = DESCRIPTOR_CONST_OFFSETS_OFFSET; + ilcSpvPutDecoration(compiler->module, descriptorOffsetArrayId, SpvDecorationArrayStride, + 1, &descriptorOffsetArrayStride); + ilcSpvPutMemberDecoration(compiler->module, structId, 1, SpvDecorationOffset, + 1, &descriptorOffsetMemberOffset); + + const IlcResource pushConstantsResource = { + .resType = RES_TYPE_PUSH_CONSTANTS, + .id = pcId, + .interfaceId = pcId, + .typeId = 0, + .texelTypeId = 0, + .ilId = 0, + .ilType = IL_USAGE_PIXTEX_UNKNOWN, + .strideId = 0, + .specOffsetId = 0, + .specDescriptorSlotId = 0, + }; + + return addResource(compiler, &pushConstantsResource); +} + +static IlcSpvId emitDescriptorAccess( + IlcCompiler* compiler, + IlcSpvId specOffsetId, + IlcSpvId descriptorIndexId) +{ + const IlcResource* pcResource = findResource(compiler, RES_TYPE_PUSH_CONSTANTS, 0); + + if (!pcResource) { + LOGE("push constants are not initialized\n"); + assert(false); + // Lazily create push constants resource + pcResource = emitPushConstant(compiler); + } + + IlcSpvId indexesId[] = { + ilcSpvPutConstant(compiler->module, compiler->intId, 1), + descriptorIndexId, + }; + IlcSpvId ptrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassPushConstant, + compiler->intId); + IlcSpvId descriptorSetOffsetPtrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, pcResource->id, 2, indexesId); + IlcSpvId descriptorSetOffsetId = ilcSpvPutLoad(compiler->module, compiler->intId, descriptorSetOffsetPtrId); + + return ilcSpvPutOp2(compiler->module, SpvOpIAdd, compiler->intId, descriptorSetOffsetId, specOffsetId); +} + + +static IlcSpvId emitResourceLoad( + IlcCompiler* compiler, + const IlcResource* resource, + SpvStorageClass storageClass) +{ + assert(resource->resType == RES_TYPE_GENERIC); + assert(!resource->id && resource->interfaceId); + + IlcSpvId pointerId = ilcSpvPutPointerType(compiler->module, storageClass, + resource->typeId); + IlcSpvId descriptorOffsetId = emitDescriptorAccess( + compiler, + resource->specOffsetId, resource->specDescriptorSlotId); + + return ilcSpvPutAccessChain(compiler->module, pointerId, resource->interfaceId, 1, &descriptorOffsetId); +} + +static IlcSpvId emitSamplerLoad( + IlcCompiler* compiler, + const IlcSampler* resource) +{ + IlcSpvId samplerTypeId = ilcSpvPutSamplerType(compiler->module); + IlcSpvId pointerId = ilcSpvPutPointerType(compiler->module, SpvStorageClassUniformConstant, + samplerTypeId); + IlcSpvId descriptorOffsetId = emitDescriptorAccess( + compiler, + resource->specOffsetId, resource->specDescriptorSlotId); + + return ilcSpvPutAccessChain(compiler->module, pointerId, resource->interfaceId, 1, &descriptorOffsetId); +} + static const IlcSampler* findSampler( IlcCompiler* compiler, uint32_t ilId) @@ -502,7 +610,7 @@ static const IlcSampler* addSampler( assert(false); } - emitName(compiler, sampler->id, "sampler", sampler->ilId); + emitName(compiler, sampler->interfaceId, "sampler_array", sampler->ilId); compiler->samplerCount++; compiler->samplers = realloc(compiler->samplers, sizeof(IlcSampler) * compiler->samplerCount); @@ -520,17 +628,23 @@ static const IlcSampler* findOrCreateSampler( if (sampler == NULL) { // Create new sampler IlcSpvId samplerTypeId = ilcSpvPutSamplerType(compiler->module); - IlcSpvId pointerId = ilcSpvPutPointerType(compiler->module, SpvStorageClassUniformConstant, - samplerTypeId); - IlcSpvId samplerId = ilcSpvPutVariable(compiler->module, pointerId, - SpvStorageClassUniformConstant); + IlcSpvId samplerArrayTypeId = ilcSpvPutRuntimeArrayType(compiler->module, samplerTypeId, true); + IlcSpvId arrayPointerId = ilcSpvPutPointerType(compiler->module, SpvStorageClassUniformConstant, + samplerArrayTypeId); + IlcSpvId samplerArrayId = ilcSpvPutVariable(compiler->module, arrayPointerId, + SpvStorageClassUniformConstant); + + IlcSpvId specOffsetId = ilcSpvPutSpecConstant(compiler->module, compiler->intId, 0); + IlcSpvId specDescriptorSlotId = ilcSpvPutSpecConstant(compiler->module, compiler->intId, 0); - emitBinding(compiler, ILC_BINDING_SAMPLER, samplerId, ilId, VK_DESCRIPTOR_TYPE_SAMPLER, + emitBinding(compiler, ILC_BINDING_SAMPLER, samplerArrayId, specOffsetId, specDescriptorSlotId, ilId, VK_DESCRIPTOR_TYPE_SAMPLER, NO_STRIDE_INDEX); const IlcSampler newSampler = { - .id = samplerId, + .interfaceId = samplerArrayId, .ilId = ilId, + .specOffsetId = specOffsetId, + .specDescriptorSlotId = specDescriptorSlotId, }; sampler = addSampler(compiler, &newSampler); @@ -1304,24 +1418,31 @@ static void emitResource( IlcSpvId imageId = ilcSpvPutImageType(compiler->module, sampledTypeId, spvDim, 0, isArrayed(type), isMultisampled(type), 1, spvImageFormat); - IlcSpvId pImageId = ilcSpvPutPointerType(compiler->module, SpvStorageClassUniformConstant, - imageId); - IlcSpvId resourceId = ilcSpvPutVariable(compiler->module, pImageId, + IlcSpvId imageArrayId = ilcSpvPutRuntimeArrayType(compiler->module, imageId, true); + IlcSpvId pImageArrayId = ilcSpvPutPointerType(compiler->module, SpvStorageClassUniformConstant, + imageArrayId); + IlcSpvId arrayResourceId = ilcSpvPutVariable(compiler->module, pImageArrayId, SpvStorageClassUniformConstant); - emitBinding(compiler, ILC_BINDING_RESOURCE, resourceId, id, + IlcSpvId specOffsetId = ilcSpvPutSpecConstant(compiler->module, compiler->intId, 0); + IlcSpvId specDescriptorSlotId = ilcSpvPutSpecConstant(compiler->module, compiler->intId, 0); + + emitBinding(compiler, ILC_BINDING_RESOURCE, arrayResourceId, specOffsetId, specDescriptorSlotId, id, spvDim == SpvDimBuffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, NO_STRIDE_INDEX); const IlcResource resource = { .resType = RES_TYPE_GENERIC, - .id = resourceId, + .id = 0, + .interfaceId = arrayResourceId, .typeId = imageId, .texelTypeId = texelTypeId, .ilId = id, .ilType = type, .strideId = 0, + .specOffsetId = specOffsetId, + .specDescriptorSlotId = specDescriptorSlotId, }; addResource(compiler, &resource); @@ -1371,25 +1492,33 @@ static void emitTypedUav( IlcSpvId imageId = ilcSpvPutImageType(compiler->module, sampledTypeId, spvDim, 0, isArrayed(type), isMultisampled(type), 2, spvImageFormat); - IlcSpvId pImageId = ilcSpvPutPointerType(compiler->module, SpvStorageClassUniformConstant, - imageId); - IlcSpvId resourceId = ilcSpvPutVariable(compiler->module, pImageId, + IlcSpvId imageArrayId = ilcSpvPutRuntimeArrayType(compiler->module, imageId, true); + IlcSpvId pImageArrayId = ilcSpvPutPointerType(compiler->module, SpvStorageClassUniformConstant, + imageArrayId); + IlcSpvId arrayResourceId = ilcSpvPutVariable(compiler->module, pImageArrayId, SpvStorageClassUniformConstant); + + IlcSpvId specOffsetId = ilcSpvPutSpecConstant(compiler->module, compiler->intId, 0); + IlcSpvId specDescriptorSlotId = ilcSpvPutSpecConstant(compiler->module, compiler->intId, 0); + ilcSpvPutName(compiler->module, imageId, "typedUav"); - emitBinding(compiler, ILC_BINDING_RESOURCE, resourceId, id, + emitBinding(compiler, ILC_BINDING_RESOURCE, arrayResourceId, specOffsetId, specDescriptorSlotId, id, spvDim == SpvDimBuffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, NO_STRIDE_INDEX); const IlcResource resource = { .resType = RES_TYPE_GENERIC, - .id = resourceId, + .id = 0, + .interfaceId = arrayResourceId, .typeId = imageId, .texelTypeId = sampledTypeId, .ilId = id, .ilType = type, .strideId = 0, + .specOffsetId = specOffsetId, + .specDescriptorSlotId = specDescriptorSlotId, }; addResource(compiler, &resource); @@ -1404,11 +1533,16 @@ static void emitUav( IlcSpvId arrayId = ilcSpvPutRuntimeArrayType(compiler->module, compiler->floatId, true); IlcSpvId structId = ilcSpvPutStructType(compiler->module, 1, &arrayId); - IlcSpvId pointerId = ilcSpvPutPointerType(compiler->module, SpvStorageClassStorageBuffer, - structId); - IlcSpvId resourceId = ilcSpvPutVariable(compiler->module, pointerId, + IlcSpvId structArrayId = ilcSpvPutRuntimeArrayType(compiler->module, structId, true); + IlcSpvId arrayPointerId = ilcSpvPutPointerType(compiler->module, SpvStorageClassStorageBuffer, + structArrayId); + IlcSpvId arrayResourceId = ilcSpvPutVariable(compiler->module, arrayPointerId, SpvStorageClassStorageBuffer); + + IlcSpvId specOffsetId = ilcSpvPutSpecConstant(compiler->module, compiler->intId, 0); + IlcSpvId specDescriptorSlotId = ilcSpvPutSpecConstant(compiler->module, compiler->intId, 0); + IlcSpvWord arrayStride = sizeof(float); IlcSpvWord memberOffset = 0; ilcSpvPutDecoration(compiler->module, arrayId, SpvDecorationArrayStride, 1, &arrayStride); @@ -1416,7 +1550,7 @@ static void emitUav( ilcSpvPutMemberDecoration(compiler->module, structId, 0, SpvDecorationOffset, 1, &memberOffset); ilcSpvPutName(compiler->module, arrayId, isStructured ? "structUav" : "rawUav"); - emitBinding(compiler, ILC_BINDING_RESOURCE, resourceId, id, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + emitBinding(compiler, ILC_BINDING_RESOURCE, arrayResourceId, specOffsetId, specDescriptorSlotId, id, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, NO_STRIDE_INDEX); IlcSpvId strideId = 0; @@ -1429,12 +1563,15 @@ static void emitUav( const IlcResource resource = { .resType = RES_TYPE_GENERIC, - .id = resourceId, + .id = 0, + .interfaceId = arrayResourceId, .typeId = structId, .texelTypeId = compiler->floatId, .ilId = id, .ilType = IL_USAGE_PIXTEX_UNKNOWN, .strideId = strideId, + .specOffsetId = specOffsetId, + .specDescriptorSlotId = specDescriptorSlotId, }; addResource(compiler, &resource); @@ -1449,20 +1586,24 @@ static void emitSrv( IlcSpvId arrayId = ilcSpvPutRuntimeArrayType(compiler->module, compiler->floatId, true); IlcSpvId structId = ilcSpvPutStructType(compiler->module, 1, &arrayId); - IlcSpvId pointerId = ilcSpvPutPointerType(compiler->module, SpvStorageClassStorageBuffer, - structId); - IlcSpvId resourceId = ilcSpvPutVariable(compiler->module, pointerId, + IlcSpvId structArrayId = ilcSpvPutRuntimeArrayType(compiler->module, structId, true); + IlcSpvId arrayPointerId = ilcSpvPutPointerType(compiler->module, SpvStorageClassStorageBuffer, + structArrayId); + IlcSpvId arrayResourceId = ilcSpvPutVariable(compiler->module, arrayPointerId, SpvStorageClassStorageBuffer); + IlcSpvId specOffsetId = ilcSpvPutSpecConstant(compiler->module, compiler->intId, 0); + IlcSpvId specDescriptorSlotId = ilcSpvPutSpecConstant(compiler->module, compiler->intId, 0); + IlcSpvWord arrayStride = sizeof(float); IlcSpvWord memberOffset = 0; ilcSpvPutDecoration(compiler->module, arrayId, SpvDecorationArrayStride, 1, &arrayStride); ilcSpvPutDecoration(compiler->module, structId, SpvDecorationBlock, 0, NULL); ilcSpvPutMemberDecoration(compiler->module, structId, 0, SpvDecorationOffset, 1, &memberOffset); - ilcSpvPutDecoration(compiler->module, resourceId, SpvDecorationNonWritable, 0, NULL); + ilcSpvPutDecoration(compiler->module, arrayResourceId, SpvDecorationNonWritable, 0, NULL); ilcSpvPutName(compiler->module, arrayId, isStructured ? "structSrv" : "rawSrv"); - emitBinding(compiler, ILC_BINDING_RESOURCE, resourceId, id, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + emitBinding(compiler, ILC_BINDING_RESOURCE, arrayResourceId, specOffsetId, specDescriptorSlotId, id, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, isStructured ? NO_STRIDE_INDEX : compiler->currentStrideIndex); IlcSpvId strideId = 0; @@ -1483,31 +1624,7 @@ static void emitSrv( if (pcResource == NULL) { // Lazily create push constants resource - IlcSpvId lengthId = ilcSpvPutConstant(compiler->module, compiler->intId, - ILC_MAX_STRIDE_CONSTANTS); - IlcSpvId arrayId = ilcSpvPutArrayType(compiler->module, compiler->intId, lengthId); - IlcSpvId structId = ilcSpvPutStructType(compiler->module, 1, &arrayId); - IlcSpvId pcId = emitVariable(compiler, structId, SpvStorageClassPushConstant); - - IlcSpvWord arrayStride = sizeof(uint32_t); - IlcSpvWord memberOffset = 0; - ilcSpvPutDecoration(compiler->module, arrayId, SpvDecorationArrayStride, - 1, &arrayStride); - ilcSpvPutDecoration(compiler->module, structId, SpvDecorationBlock, 0, NULL); - ilcSpvPutMemberDecoration(compiler->module, structId, 0, SpvDecorationOffset, - 1, &memberOffset); - - const IlcResource pushConstantsResource = { - .resType = RES_TYPE_PUSH_CONSTANTS, - .id = pcId, - .typeId = 0, - .texelTypeId = 0, - .ilId = 0, - .ilType = IL_USAGE_PIXTEX_UNKNOWN, - .strideId = 0, - }; - - pcResource = addResource(compiler, &pushConstantsResource); + pcResource = emitPushConstant(compiler); } IlcSpvId ptrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassPushConstant, @@ -1525,12 +1642,15 @@ static void emitSrv( const IlcResource resource = { .resType = RES_TYPE_GENERIC, - .id = resourceId, + .id = 0, + .interfaceId = arrayResourceId, .typeId = structId, .texelTypeId = compiler->floatId, .ilId = id, .ilType = IL_USAGE_PIXTEX_UNKNOWN, .strideId = strideId, + .specOffsetId = specOffsetId, + .specDescriptorSlotId = specDescriptorSlotId, }; addResource(compiler, &resource); @@ -1555,11 +1675,14 @@ static void emitLds( const IlcResource resource = { .resType = RES_TYPE_LDS, .id = resourceId, + .interfaceId = resourceId, .typeId = arrayId, .texelTypeId = compiler->uintId, .ilId = id, .ilType = IL_USAGE_PIXTEX_UNKNOWN, .strideId = isStructured ? ilcSpvPutConstant(compiler->module, compiler->intId, stride) : 0, + .specOffsetId = 0, + .specDescriptorSlotId = 0, }; addResource(compiler, &resource); @@ -2503,7 +2626,7 @@ static void emitLoad( operandIdCount++; } - IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, resource->id); + IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, emitResourceLoad(compiler, resource, SpvStorageClassUniformConstant)); IlcSpvId fetchId = ilcSpvPutImageFetch(compiler->module, resource->texelTypeId, resourceId, srcId, operandsMask, operandIdCount, operandIds); storeDestination(compiler, dst, fetchId, resource->texelTypeId); @@ -2528,7 +2651,7 @@ static void emitResinfo( IlcSpvId vecTypeId = dimCount == 1 ? compiler->intId : ilcSpvPutVectorType(compiler->module, compiler->intId, dimCount); - IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, resource->id); + IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, emitResourceLoad(compiler, resource, SpvStorageClassUniformConstant)); IlcSpvId srcId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId lodId = emitVectorTrim(compiler, srcId, compiler->int4Id, COMP_INDEX_X, 1); ilcSpvPutCapability(compiler->module, SpvCapabilityImageQuery); @@ -2646,9 +2769,9 @@ static void emitSample( operandIdCount++; } - IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, resource->id); + IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, emitResourceLoad(compiler, resource, SpvStorageClassUniformConstant)); IlcSpvId samplerTypeId = ilcSpvPutSamplerType(compiler->module); - IlcSpvId samplerId = ilcSpvPutLoad(compiler->module, samplerTypeId, sampler->id); + IlcSpvId samplerId = ilcSpvPutLoad(compiler->module, samplerTypeId, emitSamplerLoad(compiler, sampler)); IlcSpvId sampledImageTypeId = ilcSpvPutSampledImageType(compiler->module, resource->typeId); IlcSpvId sampledImageId = ilcSpvPutSampledImage(compiler->module, sampledImageTypeId, resourceId, samplerId); @@ -2736,9 +2859,9 @@ static void emitFetch4( operandIdCount++; } - IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, resource->id); + IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, emitResourceLoad(compiler, resource, SpvStorageClassUniformConstant)); IlcSpvId samplerTypeId = ilcSpvPutSamplerType(compiler->module); - IlcSpvId samplerId = ilcSpvPutLoad(compiler->module, samplerTypeId, sampler->id); + IlcSpvId samplerId = ilcSpvPutLoad(compiler->module, samplerTypeId, emitSamplerLoad(compiler, sampler)); IlcSpvId sampledImageTypeId = ilcSpvPutSampledImageType(compiler->module, resource->typeId); IlcSpvId sampledImageId = ilcSpvPutSampledImage(compiler->module, sampledImageTypeId, resourceId, samplerId); @@ -2850,7 +2973,7 @@ static void emitUavLoad( // Vulkan spec: "The Result Type operand of OpImageRead must be a vector of four components." IlcSpvId texel4TypeId = ilcSpvPutVectorType(compiler->module, resource->texelTypeId, 4); - IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, resource->id); + IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, emitResourceLoad(compiler, resource, SpvStorageClassUniformConstant)); IlcSpvId addressId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId readId = ilcSpvPutImageRead(compiler->module, texel4TypeId, resourceId, addressId); storeDestination(compiler, dst, readId, texel4TypeId); @@ -2882,6 +3005,7 @@ static void emitUavStructLoad( IlcSpvId fZeroId = ilcSpvPutConstant(compiler->module, compiler->floatId, ZERO_LITERAL); IlcSpvId constituentIds[] = { fZeroId, fZeroId, fZeroId, fZeroId }; + IlcSpvId resourceId = emitResourceLoad(compiler, resource, SpvStorageClassStorageBuffer); for (unsigned i = 0; i < 4; i++) { IlcSpvId addrId; @@ -2898,7 +3022,7 @@ static void emitUavStructLoad( } const IlcSpvId indexIds[] = { zeroId, addrId }; - IlcSpvId ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, resource->id, + IlcSpvId ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, resourceId, 2, indexIds); constituentIds[i] = ilcSpvPutLoad(compiler->module, resource->texelTypeId, ptrId); } @@ -2921,7 +3045,7 @@ static void emitUavStore( return; } - IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, resource->id); + IlcSpvId resourceId = ilcSpvPutLoad(compiler->module, resource->typeId, emitResourceLoad(compiler, resource, SpvStorageClassUniformConstant)); IlcSpvId addressId = loadSource(compiler, &instr->srcs[0], COMP_MASK_XYZW, compiler->int4Id); IlcSpvId elementTypeId = ilcSpvPutVectorType(compiler->module, resource->texelTypeId, 4); IlcSpvId elementId = loadSource(compiler, &instr->srcs[1], COMP_MASK_XYZW, elementTypeId); @@ -2960,6 +3084,8 @@ static void emitUavRawStructStore( IlcSpvId ptrTypeId = ilcSpvPutPointerType(compiler->module, SpvStorageClassStorageBuffer, resource->texelTypeId); + IlcSpvId resourceId = emitResourceLoad(compiler, resource, SpvStorageClassStorageBuffer); + // Write up to four components based on the destination mask for (unsigned i = 0; i < 4; i++) { if (dst->component[i] == IL_MODCOMP_NOWRITE) { @@ -2973,7 +3099,7 @@ static void emitUavRawStructStore( } IlcSpvId indexIds[] = { zeroId, wordAddrId }; - IlcSpvId ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, resource->id, + IlcSpvId ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, resourceId, 2, indexIds); IlcSpvId componentId = emitVectorTrim(compiler, dataId, compiler->float4Id, i, 1); ilcSpvPutStore(compiler->module, ptrId, componentId); @@ -3042,7 +3168,8 @@ static void emitUavAtomicOp( IlcSpvId trimAddressId = emitVectorTrim(compiler, addressId, compiler->int4Id, COMP_INDEX_X, getResourceDimensionCount(resource->ilType)); IlcSpvId zeroId = ilcSpvPutConstant(compiler->module, compiler->intId, ZERO_LITERAL); - IlcSpvId texelPtrId = ilcSpvPutImageTexelPointer(compiler->module, pointerTypeId, resource->id, + IlcSpvId texelPtrId = ilcSpvPutImageTexelPointer(compiler->module, pointerTypeId, + emitResourceLoad(compiler, resource, SpvStorageClassUniformConstant), trimAddressId, zeroId); IlcSpvId readId = 0; @@ -3100,11 +3227,14 @@ static void emitAppendBufOp( const IlcResource atomicCounterResource = { .resType = RES_TYPE_ATOMIC_COUNTER, .id = resourceId, + .interfaceId = resourceId, .typeId = 0, .texelTypeId = 0, .ilId = 0, .ilType = IL_USAGE_PIXTEX_UNKNOWN, .strideId = 0, + .specOffsetId = 0, + .specDescriptorSlotId = 0, }; resource = addResource(compiler, &atomicCounterResource); @@ -3188,6 +3318,8 @@ static void emitStructuredSrvLoad( IlcSpvId fZeroId = ilcSpvPutConstant(compiler->module, compiler->floatId, ZERO_LITERAL); IlcSpvId fWordIds[] = { fZeroId, fZeroId, fZeroId, fZeroId }; + IlcSpvId resourceId = emitResourceLoad(compiler, resource, SpvStorageClassStorageBuffer); + for (unsigned i = 0; i < wordCount; i++) { IlcSpvId addrId; @@ -3205,7 +3337,7 @@ static void emitStructuredSrvLoad( } const IlcSpvId indexIds[] = { zeroId, addrId }; - IlcSpvId ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, resource->id, + IlcSpvId ptrId = ilcSpvPutAccessChain(compiler->module, ptrTypeId, resourceId, 2, indexIds); fWordIds[i] = ilcSpvPutLoad(compiler->module, resource->texelTypeId, ptrId); } @@ -3678,6 +3810,15 @@ static void emitEntryPoint( break; } + if (compiler->resourceCount > 0) { + ilcSpvPutExtension(compiler->module, "SPV_EXT_descriptor_indexing"); + ilcSpvPutCapability(compiler->module, SpvCapabilityRuntimeDescriptorArrayEXT); + ilcSpvPutCapability(compiler->module, SpvCapabilitySampledImageArrayDynamicIndexing); + ilcSpvPutCapability(compiler->module, SpvCapabilityStorageImageArrayDynamicIndexing); + ilcSpvPutCapability(compiler->module, SpvCapabilityUniformTexelBufferArrayDynamicIndexing); + ilcSpvPutCapability(compiler->module, SpvCapabilityStorageTexelBufferArrayDynamicIndexing); + ilcSpvPutCapability(compiler->module, SpvCapabilityStorageBufferArrayDynamicIndexing); + } unsigned interfaceCount = compiler->regCount + compiler->resourceCount + compiler->samplerCount; @@ -3693,13 +3834,13 @@ static void emitEntryPoint( for (int i = 0; i < compiler->resourceCount; i++) { const IlcResource* resource = &compiler->resources[i]; - interfaces[interfaceIndex] = resource->id; + interfaces[interfaceIndex] = resource->interfaceId; interfaceIndex++; } for (int i = 0; i < compiler->samplerCount; i++) { const IlcSampler* sampler = &compiler->samplers[i]; - interfaces[interfaceIndex] = sampler->id; + interfaces[interfaceIndex] = sampler->interfaceId; interfaceIndex++; } diff --git a/src/amdilc/amdilc_spirv.c b/src/amdilc/amdilc_spirv.c index 1592ba35..22645c5a 100644 --- a/src/amdilc/amdilc_spirv.c +++ b/src/amdilc/amdilc_spirv.c @@ -506,6 +506,22 @@ IlcSpvId ilcSpvPutConstantComposite( consistuentCount, consistuents); } +IlcSpvId ilcSpvPutSpecConstant( + IlcSpvModule* module, + IlcSpvId resultTypeId, + IlcSpvWord literal) +{ + IlcSpvBuffer* buffer = &module->buffer[ID_CONSTANTS]; + + IlcSpvId id = ilcSpvAllocId(module); + putInstr(buffer, SpvOpSpecConstant, 4); + putWord(buffer, resultTypeId); + putWord(buffer, id); + putWord(buffer, literal); + + return id; +} + void ilcSpvPutFunction( IlcSpvModule* module, IlcSpvId resultTypeId, diff --git a/src/amdilc/amdilc_spirv.h b/src/amdilc/amdilc_spirv.h index 8906f1c8..97323263 100644 --- a/src/amdilc/amdilc_spirv.h +++ b/src/amdilc/amdilc_spirv.h @@ -166,6 +166,11 @@ IlcSpvId ilcSpvPutConstantComposite( unsigned consistuentCount, const IlcSpvId* consistuents); +IlcSpvId ilcSpvPutSpecConstant( + IlcSpvModule* module, + IlcSpvId resultTypeId, + IlcSpvWord literal); + void ilcSpvPutFunction( IlcSpvModule* module, IlcSpvId resultType, diff --git a/src/amdilc/meson.build b/src/amdilc/meson.build index 1a35cb93..b4abfaa4 100644 --- a/src/amdilc/meson.build +++ b/src/amdilc/meson.build @@ -4,6 +4,7 @@ amdilc_src = [ 'amdilc_decoder.c', 'amdilc_dump.c', 'amdilc_rect_gs_compiler.c', + 'amdilc_binding_patcher.c', 'amdilc_spirv.c', ] diff --git a/src/mantle/mantle_cmd_buf.c b/src/mantle/mantle_cmd_buf.c index ff8a6380..b7f7c16a 100644 --- a/src/mantle/mantle_cmd_buf.c +++ b/src/mantle/mantle_cmd_buf.c @@ -7,81 +7,10 @@ typedef enum _DirtyFlags { FLAG_DIRTY_DESCRIPTOR_SET = 1u << 0, FLAG_DIRTY_RENDER_PASS = 1u << 1, FLAG_DIRTY_PIPELINE = 1u << 2, - FLAG_DIRTY_DYNAMIC_OFFSET = 1u << 3, + FLAG_DIRTY_DYNAMIC_MAPPING = 1u << 3, + FLAG_DIRTY_DYNAMIC_STRIDE = 1u << 4, } DirtyFlags; -static VkDescriptorPool getVkDescriptorPool( - const GrDevice* grDevice) -{ - VkDescriptorPool descriptorPool = VK_NULL_HANDLE; - - // TODO rebalance - const VkDescriptorPoolSize poolSizes[] = { - { VK_DESCRIPTOR_TYPE_SAMPLER, SETS_PER_POOL }, - { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, SETS_PER_POOL }, - { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, SETS_PER_POOL }, - { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, SETS_PER_POOL }, - { VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, SETS_PER_POOL }, - { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, SETS_PER_POOL }, - { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, SETS_PER_POOL }, - }; - - const VkDescriptorPoolCreateInfo createInfo = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .maxSets = SETS_PER_POOL, - .poolSizeCount = COUNT_OF(poolSizes), - .pPoolSizes = poolSizes, - }; - - VkResult res = VKD.vkCreateDescriptorPool(grDevice->device, &createInfo, NULL, &descriptorPool); - if (res != VK_SUCCESS) { - LOGE("vkCreateDescriptorPool failed (%d)\n", res); - assert(false); - } - - return descriptorPool; -} - -static void updateVkDescriptorSet( - const GrDevice* grDevice, - const GrCmdBuffer* grCmdBuffer, - const BindPoint* bindPoint, - const GrDescriptorSet* grDescriptorSet, - unsigned slotOffset, - unsigned updateTemplateSlotCount, - const UpdateTemplateSlot* updateTemplateSlots, - VkPipelineLayout pipelineLayout) -{ - for (unsigned i = 0; i < updateTemplateSlotCount; i++) { - const UpdateTemplateSlot* templateSlot = &updateTemplateSlots[i]; - const DescriptorSetSlot* slot; - - if (templateSlot->isDynamic) { - slot = &bindPoint->dynamicMemoryView; - } else { - slot = &grDescriptorSet->slots[slotOffset]; - - for (unsigned j = 0; j < templateSlot->pathDepth; j++) { - slot = &slot[templateSlot->path[j]]; - slot = &slot->nested.nextSet->slots[slot->nested.slotOffset]; - } - } - - VKD.vkUpdateDescriptorSetWithTemplate(grDevice->device, bindPoint->descriptorSet, - templateSlot->updateTemplate, (void*)slot); - - // Pass buffer strides down to the shader - for (unsigned j = 0; j < templateSlot->strideCount; j++) { - VKD.vkCmdPushConstants(grCmdBuffer->commandBuffer, pipelineLayout, - VK_SHADER_STAGE_VERTEX_BIT, - templateSlot->strideOffsets[j], sizeof(uint32_t), - &slot[templateSlot->strideSlotIndexes[j]].buffer.stride); - } - } -} - static void grCmdBufferBeginRenderPass( GrCmdBuffer* grCmdBuffer) { @@ -124,64 +53,134 @@ void grCmdBufferEndRenderPass( grCmdBuffer->isRendering = false; } -static void grCmdBufferUpdateDescriptorSet( +static void setupDescriptorSets( + const GrDevice* grDevice, + const GrCmdBuffer* grCmdBuffer, + const BindPoint* bindPoint, + const GrDescriptorSet* grDescriptorSet, + unsigned slotOffset, + unsigned pipelineDescriptorSetCount, + const PipelineDescriptorSlot* pipelineDescriptorSlots, + VkPipelineLayout pipelineLayout, + VkDescriptorSet* pDescriptorSets, + unsigned* pOffsets) +{ + for (unsigned i = 0; i < pipelineDescriptorSetCount; i++) { + const PipelineDescriptorSlot* descriptorSlot = &pipelineDescriptorSlots[i]; + const DescriptorSetSlot* slot; + unsigned descriptorSlotOffset = slotOffset; + const GrDescriptorSet* currentSet = grDescriptorSet; + + slot = ¤tSet->slots[descriptorSlotOffset]; + + for (unsigned j = 0; j < descriptorSlot->pathDepth; j++) { + slot = &slot[descriptorSlot->path[j]]; + descriptorSlotOffset = slot->nested.slotOffset; + currentSet = slot->nested.nextSet; + slot = ¤tSet->slots[descriptorSlotOffset]; + } + + pDescriptorSets[i] = currentSet->descriptorSet; + pOffsets[i] = descriptorSlotOffset * DESCRIPTORS_PER_SLOT; + // Pass buffer strides down to the shader + for (unsigned j = 0; j < descriptorSlot->strideCount; j++) { + VKD.vkCmdPushConstants(grCmdBuffer->commandBuffer, pipelineLayout, + VK_SHADER_STAGE_ALL_GRAPHICS, + descriptorSlot->strideOffsets[j], sizeof(uint32_t), + &slot[descriptorSlot->strideSlotIndexes[j]].buffer.stride); + } + } +} + +static void grCmdBufferBindVkDescriptorSets( GrCmdBuffer* grCmdBuffer, VkPipelineBindPoint vkBindPoint) { const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer); BindPoint* bindPoint = &grCmdBuffer->bindPoints[vkBindPoint]; GrPipeline* grPipeline = bindPoint->grPipeline; - VkResult vkRes; - for (unsigned i = 0; i < 2; i++) { - if (grCmdBuffer->descriptorPoolIndex < grCmdBuffer->descriptorPoolCount) { - const VkDescriptorSetAllocateInfo descSetAllocateInfo = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .pNext = NULL, - .descriptorPool = grCmdBuffer->descriptorPools[grCmdBuffer->descriptorPoolIndex], - .descriptorSetCount = 1, - .pSetLayouts = &grPipeline->descriptorSetLayout, - }; + bindPoint->boundDescriptorSetCount = 0; - vkRes = VKD.vkAllocateDescriptorSets(grDevice->device, &descSetAllocateInfo, - &bindPoint->descriptorSet); - if (vkRes == VK_SUCCESS) { - break; - } else if (vkRes != VK_ERROR_OUT_OF_POOL_MEMORY) { - LOGE("vkAllocateDescriptorSets failed (%d)\n", vkRes); - break; - } else if (i > 0) { - LOGE("descriptor set allocation failed with a new pool\n"); - assert(false); - } else { - // Use the next pool - grCmdBuffer->descriptorPoolIndex++; - } - } + for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { + assert((bindPoint->boundDescriptorSetCount + grPipeline->descriptorSetCounts[i]) < COUNT_OF(bindPoint->descriptorSets)); + setupDescriptorSets(grDevice, grCmdBuffer, bindPoint, + bindPoint->grDescriptorSets[i], bindPoint->slotOffsets[i], + grPipeline->descriptorSetCounts[i], + grPipeline->descriptorSlots[i], + grPipeline->pipelineLayout, + &bindPoint->descriptorSets[bindPoint->boundDescriptorSetCount], + &bindPoint->descriptorArrayOffsets[bindPoint->boundDescriptorSetCount]); + bindPoint->boundDescriptorSetCount += grPipeline->descriptorSetCounts[i]; + } - if (grCmdBuffer->descriptorPoolIndex == grCmdBuffer->descriptorPoolCount) { - // Need to allocate a new pool - VkDescriptorPool descriptorPool = getVkDescriptorPool(grDevice); + uint32_t descriptorOffsets[] = { 0, 0 }; + VKD.vkCmdPushConstants(grCmdBuffer->commandBuffer, grPipeline->pipelineLayout, + vkBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS ? VK_SHADER_STAGE_ALL_GRAPHICS : VK_SHADER_STAGE_COMPUTE_BIT, + DESCRIPTOR_CONST_OFFSETS_OFFSET, sizeof(descriptorOffsets), + descriptorOffsets); + if (bindPoint->boundDescriptorSetCount > 0) { + VKD.vkCmdPushConstants(grCmdBuffer->commandBuffer, grPipeline->pipelineLayout, + vkBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS ? VK_SHADER_STAGE_ALL_GRAPHICS : VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(uint32_t) * 2 + DESCRIPTOR_CONST_OFFSETS_OFFSET, sizeof(uint32_t) * bindPoint->boundDescriptorSetCount, + bindPoint->descriptorArrayOffsets); + VKD.vkCmdBindDescriptorSets(grCmdBuffer->commandBuffer, vkBindPoint, grPipeline->pipelineLayout, + DESCRIPTOR_SET_ID, bindPoint->boundDescriptorSetCount, bindPoint->descriptorSets, + 0, NULL); + } +} - // Track descriptor pool - grCmdBuffer->descriptorPoolCount++; - grCmdBuffer->descriptorPools = realloc(grCmdBuffer->descriptorPools, - grCmdBuffer->descriptorPoolCount * - sizeof(VkDescriptorPool)); - grCmdBuffer->descriptorPools[grCmdBuffer->descriptorPoolCount - 1] = descriptorPool; - } +static void grCmdBufferSetupDynamicBufferStride( + GrCmdBuffer* grCmdBuffer, + VkPipelineBindPoint vkBindPoint) +{ + const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer); + BindPoint* bindPoint = &grCmdBuffer->bindPoints[vkBindPoint]; + GrPipeline* grPipeline = bindPoint->grPipeline; + + if (bindPoint->dynamicMemoryView.buffer.bufferInfo.buffer == VK_NULL_HANDLE || !grPipeline->dynamicMappingUsed) { + return; } + const PipelineDescriptorSlot* dynamicDescriptorSlot = &grPipeline->dynamicDescriptorSlot; - for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { - updateVkDescriptorSet(grDevice, grCmdBuffer, bindPoint, - bindPoint->grDescriptorSets[i], bindPoint->slotOffsets[i], - grPipeline->updateTemplateSlotCounts[i], - grPipeline->updateTemplateSlots[i], - grPipeline->pipelineLayout); + for (unsigned j = 0; j < dynamicDescriptorSlot->strideCount; j++) { + VKD.vkCmdPushConstants(grCmdBuffer->commandBuffer, grPipeline->pipelineLayout, + VK_SHADER_STAGE_ALL_GRAPHICS, + dynamicDescriptorSlot->strideOffsets[j], sizeof(uint32_t), + &bindPoint->dynamicMemoryView.buffer.stride); + } +} + +static void grCmdBufferBindDynamicDescriptorSet( + GrCmdBuffer* grCmdBuffer, + VkPipelineBindPoint vkBindPoint) +{ + const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer); + BindPoint* bindPoint = &grCmdBuffer->bindPoints[vkBindPoint]; + GrPipeline* grPipeline = bindPoint->grPipeline; + + if (bindPoint->dynamicMemoryView.buffer.bufferInfo.buffer == VK_NULL_HANDLE) { + return; } + + VkWriteDescriptorSet dynamicBufferWrite = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .dstArrayElement = 0, + .dstBinding = DYNAMIC_MEMORY_VIEW_BINDING_ID, + .dstSet = 0,// ignored + .pBufferInfo = &bindPoint->dynamicMemoryView.buffer.bufferInfo, + }; + VKD.vkCmdPushDescriptorSetKHR( + grCmdBuffer->commandBuffer, + vkBindPoint, + grPipeline->pipelineLayout, + DYNAMIC_MEMORY_VIEW_DESCRIPTOR_SET_ID, + 1, &dynamicBufferWrite); } -static void grCmdBufferBindDescriptorSet( +static void grCmdBufferBindAtomicDescriptorSet( GrCmdBuffer* grCmdBuffer, VkPipelineBindPoint vkBindPoint) { @@ -190,19 +189,12 @@ static void grCmdBufferBindDescriptorSet( const GrPipeline* grPipeline = bindPoint->grPipeline; const VkDescriptorSet descriptorSets[] = { - bindPoint->descriptorSet, grCmdBuffer->atomicCounterSet, }; - uint32_t dynamicOffsets[MAX_STAGE_COUNT]; - - for (unsigned i = 0; i < grPipeline->dynamicOffsetCount; i++) { - dynamicOffsets[i] = bindPoint->dynamicOffset; - } - VKD.vkCmdBindDescriptorSets(grCmdBuffer->commandBuffer, vkBindPoint, grPipeline->pipelineLayout, - 0, COUNT_OF(descriptorSets), descriptorSets, - grPipeline->dynamicOffsetCount, dynamicOffsets); + ATOMIC_COUNTER_SET_ID, COUNT_OF(descriptorSets), descriptorSets, + 0, NULL); } static void grCmdBufferUpdateResources( @@ -215,11 +207,19 @@ static void grCmdBufferUpdateResources( uint32_t dirtyFlags = bindPoint->dirtyFlags; if (dirtyFlags & FLAG_DIRTY_DESCRIPTOR_SET) { - grCmdBufferUpdateDescriptorSet(grCmdBuffer, vkBindPoint); + grCmdBufferBindVkDescriptorSets(grCmdBuffer, vkBindPoint); + } + + if (dirtyFlags & FLAG_DIRTY_DESCRIPTOR_SET) { + grCmdBufferBindAtomicDescriptorSet(grCmdBuffer, vkBindPoint); } - if (dirtyFlags & (FLAG_DIRTY_DESCRIPTOR_SET | FLAG_DIRTY_DYNAMIC_OFFSET)) { - grCmdBufferBindDescriptorSet(grCmdBuffer, vkBindPoint); + if (dirtyFlags & FLAG_DIRTY_DYNAMIC_MAPPING) { + grCmdBufferBindDynamicDescriptorSet(grCmdBuffer, vkBindPoint); + } + + if (dirtyFlags & FLAG_DIRTY_DYNAMIC_STRIDE) { + grCmdBufferSetupDynamicBufferStride(grCmdBuffer, vkBindPoint); } if (dirtyFlags & FLAG_DIRTY_RENDER_PASS) { @@ -261,7 +261,7 @@ GR_VOID GR_STDCALL grCmdBindPipeline( bindPoint->grPipeline = grPipeline; if (vkBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { - bindPoint->dirtyFlags |= FLAG_DIRTY_DESCRIPTOR_SET | FLAG_DIRTY_PIPELINE; + bindPoint->dirtyFlags |= FLAG_DIRTY_DESCRIPTOR_SET | FLAG_DIRTY_DYNAMIC_STRIDE | FLAG_DIRTY_PIPELINE; } else { // Pipeline creation isn't deferred for compute, bind now VKD.vkCmdBindPipeline(grCmdBuffer->commandBuffer, vkBindPoint, grPipeline->pipeline); @@ -416,14 +416,9 @@ GR_VOID GR_STDCALL grCmdBindDynamicMemoryView( // FIXME what is pMemView->state for? - if (pMemView->offset != bindPoint->dynamicOffset) { - bindPoint->dynamicOffset = pMemView->offset; - - bindPoint->dirtyFlags |= FLAG_DIRTY_DYNAMIC_OFFSET; - } - if (grGpuMemory->buffer != bindPoint->dynamicMemoryView.buffer.bufferInfo.buffer || pMemView->range != bindPoint->dynamicMemoryView.buffer.bufferInfo.range || + pMemView->offset != bindPoint->dynamicMemoryView.buffer.bufferInfo.offset || pMemView->stride != bindPoint->dynamicMemoryView.buffer.stride) { bindPoint->dynamicMemoryView = (DescriptorSetSlot) { .type = SLOT_TYPE_BUFFER, @@ -431,14 +426,14 @@ GR_VOID GR_STDCALL grCmdBindDynamicMemoryView( .bufferView = VK_NULL_HANDLE, .bufferInfo = { .buffer = grGpuMemory->buffer, - .offset = 0, + .offset = pMemView->offset, .range = pMemView->range, }, .stride = pMemView->stride, }, }; - bindPoint->dirtyFlags |= FLAG_DIRTY_DESCRIPTOR_SET; + bindPoint->dirtyFlags |= FLAG_DIRTY_DYNAMIC_MAPPING | FLAG_DIRTY_DYNAMIC_STRIDE; } } diff --git a/src/mantle/mantle_cmd_buf_man.c b/src/mantle/mantle_cmd_buf_man.c index 21c628ab..8eb0b5b1 100644 --- a/src/mantle/mantle_cmd_buf_man.c +++ b/src/mantle/mantle_cmd_buf_man.c @@ -3,15 +3,6 @@ void grCmdBufferResetState( GrCmdBuffer* grCmdBuffer) { - GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer); - - // Reset descriptor pools - unsigned resetCount = MIN(grCmdBuffer->descriptorPoolIndex + 1, - grCmdBuffer->descriptorPoolCount); - for (unsigned i = 0; i < resetCount; i++) { - VKD.vkResetDescriptorPool(grDevice->device, grCmdBuffer->descriptorPools[i], 0); - } - // Clear state unsigned stateOffset = OFFSET_OF(GrCmdBuffer, isBuilding); memset(&((uint8_t*)grCmdBuffer)[stateOffset], 0, sizeof(GrCmdBuffer) - stateOffset); @@ -104,9 +95,6 @@ GR_RESULT GR_STDCALL grCreateCommandBuffer( .timestampQueryPool = vkQueryPool, .atomicCounterBuffer = atomicCounterBuffer, .atomicCounterSet = atomicCounterSet, - .descriptorPoolCount = 0, - .descriptorPools = NULL, - .descriptorPoolIndex = 0, }; grCmdBufferResetState(grCmdBuffer); diff --git a/src/mantle/mantle_descriptor_set.c b/src/mantle/mantle_descriptor_set.c index bb6a309b..a32ac8ce 100644 --- a/src/mantle/mantle_descriptor_set.c +++ b/src/mantle/mantle_descriptor_set.c @@ -4,7 +4,7 @@ inline static void releaseSlot( const GrDevice* grDevice, DescriptorSetSlot* slot) { - if (slot->type == SLOT_TYPE_BUFFER) { + if (slot->type == SLOT_TYPE_BUFFER && slot->buffer.bufferView != VK_NULL_HANDLE) { VKD.vkDestroyBufferView(grDevice->device, slot->buffer.bufferView, NULL); } } @@ -27,15 +27,87 @@ GR_RESULT GR_STDCALL grCreateDescriptorSet( return GR_ERROR_INVALID_POINTER; } + VkDescriptorPool descriptorPool = VK_NULL_HANDLE; + VkDescriptorSet descriptorSet = VK_NULL_HANDLE; + + VkResult vkRes = VK_SUCCESS; + const VkDescriptorType descriptorTypes[] = { + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + VK_DESCRIPTOR_TYPE_SAMPLER + }; + const VkMutableDescriptorTypeListEXT mutableTypeList = { + .descriptorTypeCount = COUNT_OF(descriptorTypes), + .pDescriptorTypes = descriptorTypes, + }; + const VkMutableDescriptorTypeCreateInfoEXT mutableTypeInfo = { + .sType = VK_STRUCTURE_TYPE_MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT, + .pNext = NULL, + .mutableDescriptorTypeListCount = 1, + .pMutableDescriptorTypeLists = &mutableTypeList, + }; + const VkDescriptorPoolSize poolSize = { + .type = VK_DESCRIPTOR_TYPE_MUTABLE_EXT, + .descriptorCount = DESCRIPTORS_PER_SLOT * pCreateInfo->slots, + }; + + const VkDescriptorPoolCreateInfo poolCreateInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = &mutableTypeInfo, + .flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = &poolSize, + }; + + vkRes = VKD.vkCreateDescriptorPool(grDevice->device, &poolCreateInfo, NULL, &descriptorPool); + if (vkRes != VK_SUCCESS) { + LOGE("vkCreateDescriptorPool failed (%d)\n", vkRes); + goto bail; + } + + const VkDescriptorSetVariableDescriptorCountAllocateInfo descriptorCountInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO, + .pNext = NULL, + .descriptorSetCount = 1, + .pDescriptorCounts = &poolSize.descriptorCount, + }; + const VkDescriptorSetAllocateInfo allocateInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = &descriptorCountInfo, + .descriptorPool = descriptorPool, + .descriptorSetCount = 1, + .pSetLayouts = &grDevice->defaultDescriptorSetLayout, + }; + + vkRes = VKD.vkAllocateDescriptorSets(grDevice->device, &allocateInfo, &descriptorSet); + if (vkRes != VK_SUCCESS) { + LOGE("vkAllocateDescriptorSets failed (%d)\n", vkRes); + goto bail; + } + GrDescriptorSet* grDescriptorSet = malloc(sizeof(GrDescriptorSet)); + if (grDescriptorSet == NULL) { + return GR_ERROR_OUT_OF_MEMORY; + } + *grDescriptorSet = (GrDescriptorSet) { .grObj = { GR_OBJ_TYPE_DESCRIPTOR_SET, grDevice }, .slotCount = pCreateInfo->slots, .slots = calloc(pCreateInfo->slots, sizeof(DescriptorSetSlot)), + .descriptorPool = descriptorPool, + .descriptorSet = descriptorSet, }; *pDescriptorSet = (GR_DESCRIPTOR_SET)grDescriptorSet; return GR_SUCCESS; + +bail: + VKD.vkDestroyDescriptorPool(grDevice->device, descriptorPool, NULL); + return getGrResult(vkRes); } GR_VOID GR_STDCALL grBeginDescriptorSetUpdate( @@ -64,6 +136,9 @@ GR_VOID GR_STDCALL grAttachSamplerDescriptors( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); + STACK_ARRAY(VkWriteDescriptorSet, writeDescriptors, 128, slotCount); + unsigned descriptorWriteCount = 0; + for (unsigned i = 0; i < slotCount; i++) { DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; const GrSampler* grSampler = (GrSampler*)pSamplers[i]; @@ -80,7 +155,24 @@ GR_VOID GR_STDCALL grAttachSamplerDescriptors( }, }, }; + + writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = NULL, + .dstSet = grDescriptorSet->descriptorSet, + .dstBinding = 0, + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_SAMPLER), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .pImageInfo = &slot->image.imageInfo, + .pBufferInfo = NULL, + .pTexelBufferView = NULL, + }; } + + VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); + + STACK_ARRAY_FINISH(writeDescriptors); } GR_VOID GR_STDCALL grAttachImageViewDescriptors( @@ -93,6 +185,9 @@ GR_VOID GR_STDCALL grAttachImageViewDescriptors( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); + STACK_ARRAY(VkWriteDescriptorSet, writeDescriptors, 128, slotCount * 2); + unsigned descriptorWriteCount = 0; + for (unsigned i = 0; i < slotCount; i++) { DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; const GR_IMAGE_VIEW_ATTACH_INFO* info = &pImageViews[i]; @@ -110,7 +205,38 @@ GR_VOID GR_STDCALL grAttachImageViewDescriptors( }, }, }; + + if (grImageView->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = NULL, + .dstSet = grDescriptorSet->descriptorSet, + .dstBinding = 0, + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = &slot->image.imageInfo, + .pBufferInfo = NULL, + .pTexelBufferView = NULL, + }; + } + writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = NULL, + .dstSet = grDescriptorSet->descriptorSet, + .dstBinding = 0, + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = &slot->image.imageInfo, + .pBufferInfo = NULL, + .pTexelBufferView = NULL, + }; } + + VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); + + STACK_ARRAY_FINISH(writeDescriptors); } GR_VOID GR_STDCALL grAttachMemoryViewDescriptors( @@ -124,6 +250,9 @@ GR_VOID GR_STDCALL grAttachMemoryViewDescriptors( const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); VkResult vkRes; + STACK_ARRAY(VkWriteDescriptorSet, writeDescriptors, 128, slotCount * 3); + unsigned descriptorWriteCount = 0; + for (unsigned i = 0; i < slotCount; i++) { DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; const GR_MEMORY_VIEW_ATTACH_INFO* info = &pMemViews[i]; @@ -149,8 +278,46 @@ GR_VOID GR_STDCALL grAttachMemoryViewDescriptors( if (vkRes != VK_SUCCESS) { LOGE("vkCreateBufferView failed (%d)\n", vkRes); } + + writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = NULL, + .dstSet = grDescriptorSet->descriptorSet, + .dstBinding = 0, + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .pImageInfo = NULL, + .pBufferInfo = NULL, + .pTexelBufferView = &slot->buffer.bufferView, + }; + writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = NULL, + .dstSet = grDescriptorSet->descriptorSet, + .dstBinding = 0, + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pImageInfo = NULL, + .pBufferInfo = NULL, + .pTexelBufferView = &slot->buffer.bufferView, + }; } + writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = NULL, + .dstSet = grDescriptorSet->descriptorSet, + .dstBinding = 0, + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pImageInfo = NULL, + .pBufferInfo = &slot->buffer.bufferInfo, + .pTexelBufferView = NULL, + }; + *slot = (DescriptorSetSlot) { .type = SLOT_TYPE_BUFFER, .buffer = { @@ -164,6 +331,10 @@ GR_VOID GR_STDCALL grAttachMemoryViewDescriptors( }, }; } + + VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); + + STACK_ARRAY_FINISH(writeDescriptors); } GR_VOID GR_STDCALL grAttachNestedDescriptors( diff --git a/src/mantle/mantle_image_view.c b/src/mantle/mantle_image_view.c index 8a99de96..a0e939d3 100644 --- a/src/mantle/mantle_image_view.c +++ b/src/mantle/mantle_image_view.c @@ -112,6 +112,7 @@ GR_RESULT GR_STDCALL grCreateImageView( .grObj = { GR_OBJ_TYPE_IMAGE_VIEW, grDevice }, .imageView = vkImageView, .format = createInfo.format, + .usage = grImage->usage, }; *pView = (GR_IMAGE_VIEW)grImageView; diff --git a/src/mantle/mantle_init_device.c b/src/mantle/mantle_init_device.c index fc0c0ace..5f71b66b 100644 --- a/src/mantle/mantle_init_device.c +++ b/src/mantle/mantle_init_device.c @@ -50,6 +50,94 @@ static VkDescriptorSetLayout getAtomicCounterDescriptorSetLayout( return layout; } +static VkDescriptorSetLayout getDynamicMemoryDescriptorSetLayout( + const GrDevice* grDevice) +{ + VkDescriptorSetLayout layout = VK_NULL_HANDLE; + + const VkDescriptorSetLayoutBinding binding = { + .binding = DYNAMIC_MEMORY_VIEW_BINDING_ID, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = NULL, + }; + + const VkDescriptorSetLayoutCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = NULL, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 1, + .pBindings = &binding, + }; + + VkResult res = VKD.vkCreateDescriptorSetLayout(grDevice->device, &createInfo, NULL, &layout); + if (res != VK_SUCCESS) { + LOGE("vkCreateDescriptorSetLayout failed (%d)\n", res); + assert(false); + } + + return layout; +} + +static VkDescriptorSetLayout getDefaultDescriptorSetLayout( + const GrDevice* grDevice) +{ + VkDescriptorSetLayout layout = VK_NULL_HANDLE; + + const VkDescriptorType descriptorTypes[] = { + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + VK_DESCRIPTOR_TYPE_SAMPLER + }; + const VkMutableDescriptorTypeListEXT mutableTypeList = { + .descriptorTypeCount = COUNT_OF(descriptorTypes), + .pDescriptorTypes = descriptorTypes, + }; + const VkMutableDescriptorTypeCreateInfoEXT mutableTypeInfo = { + .sType = VK_STRUCTURE_TYPE_MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT, + .pNext = NULL, + .mutableDescriptorTypeListCount = 1, + .pMutableDescriptorTypeLists = &mutableTypeList, + }; + const VkDescriptorBindingFlags bindingFlags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT | + VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT | + VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | + VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT; + const VkDescriptorSetLayoutBindingFlagsCreateInfo bindingFlagsCreateInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO, + .pNext = &mutableTypeInfo, + .bindingCount = 1, + .pBindingFlags = &bindingFlags, + }; + const VkDescriptorSetLayoutBinding binding = { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_MUTABLE_EXT, + .descriptorCount = 0xFFFFFFF, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = NULL, + }; + + const VkDescriptorSetLayoutCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = &bindingFlagsCreateInfo, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT, + .bindingCount = 1, + .pBindings = &binding, + }; + + VkResult res = VKD.vkCreateDescriptorSetLayout(grDevice->device, &createInfo, NULL, &layout); + if (res != VK_SUCCESS) { + LOGE("vkCreateDescriptorSetLayout failed (%d)\n", res); + assert(false); + } + + return layout; +} + static VkDeviceMemory getAtomicCounterMemory( const GrDevice* grDevice, unsigned slotCount) @@ -659,11 +747,23 @@ GR_RESULT GR_STDCALL grCreateDevice( .synchronization2 = VK_TRUE, .dynamicRendering = VK_TRUE, }; + VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT mutableDescriptorFeaturesEXT = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT, + .pNext = &vulkan13DeviceFeatures, + .mutableDescriptorType = VK_TRUE, + }; VkPhysicalDeviceVulkan12Features vulkan12DeviceFeatures = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, - .pNext = &vulkan13DeviceFeatures, + .pNext = &mutableDescriptorFeaturesEXT, + .runtimeDescriptorArray = VK_TRUE, + .bufferDeviceAddress = VK_TRUE, + .descriptorBindingVariableDescriptorCount = VK_TRUE, + .descriptorBindingPartiallyBound = VK_TRUE, + .descriptorBindingUpdateUnusedWhilePending = VK_TRUE, .samplerMirrorClampToEdge = VK_TRUE, .separateDepthStencilLayouts = VK_TRUE, + .shaderUniformTexelBufferArrayDynamicIndexing = VK_TRUE, + .shaderStorageTexelBufferArrayDynamicIndexing = VK_TRUE, }; VkPhysicalDeviceFeatures2 deviceFeatures = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, @@ -685,6 +785,10 @@ GR_RESULT GR_STDCALL grCreateDevice( .fragmentStoresAndAtomics = VK_TRUE, .shaderStorageImageReadWithoutFormat = VK_TRUE, .shaderStorageImageWriteWithoutFormat = VK_TRUE, + .shaderUniformBufferArrayDynamicIndexing = VK_TRUE, + .shaderSampledImageArrayDynamicIndexing = VK_TRUE, + .shaderStorageBufferArrayDynamicIndexing = VK_TRUE, + .shaderStorageImageArrayDynamicIndexing = VK_TRUE, .shaderClipDistance = VK_TRUE, }, }; @@ -694,6 +798,9 @@ GR_RESULT GR_STDCALL grCreateDevice( VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME, VK_KHR_SWAPCHAIN_EXTENSION_NAME, + VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, + VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, + VK_EXT_MUTABLE_DESCRIPTOR_TYPE_EXTENSION_NAME, }; const VkDeviceCreateInfo createInfo = { @@ -761,6 +868,8 @@ GR_RESULT GR_STDCALL grCreateDevice( .memoryHeapCount = memoryHeapCount, .memoryHeapMap = { 0 }, // Initialized below .atomicCounterSetLayout = VK_NULL_HANDLE, // Initialized below + .dynamicMemorySetLayout = VK_NULL_HANDLE, // Initialized below + .defaultDescriptorSetLayout = VK_NULL_HANDLE, // Initialized below .grUniversalQueue = NULL, // Initialized below .grComputeQueue = NULL, // Initialized below .grDmaQueue = NULL, // Initialized below @@ -773,6 +882,8 @@ GR_RESULT GR_STDCALL grCreateDevice( memcpy(grDevice->memoryHeapMap, memoryHeapMap, memoryHeapCount * sizeof(uint32_t)); grDevice->atomicCounterSetLayout = getAtomicCounterDescriptorSetLayout(grDevice); + grDevice->dynamicMemorySetLayout = getDynamicMemoryDescriptorSetLayout(grDevice); + grDevice->defaultDescriptorSetLayout = getDefaultDescriptorSetLayout(grDevice); if (universalQueueFamilyIndex != INVALID_QUEUE_INDEX) { grDevice->grUniversalQueue = @@ -831,6 +942,9 @@ GR_RESULT GR_STDCALL grDestroyDevice( } VKD.vkDestroyDescriptorSetLayout(grDevice->device, grDevice->atomicCounterSetLayout, NULL); + VKD.vkDestroyDescriptorSetLayout(grDevice->device, grDevice->dynamicMemorySetLayout, NULL); + VKD.vkDestroyDescriptorSetLayout(grDevice->device, grDevice->defaultDescriptorSetLayout, NULL); + if (grDevice->grUniversalQueue) { free(grDevice->grUniversalQueue->globalMemRefs); VKD.vkDestroyCommandPool(grDevice->device, grDevice->grUniversalQueue->commandPool, NULL); diff --git a/src/mantle/mantle_internal.h b/src/mantle/mantle_internal.h index ee65a5d8..6d83335b 100644 --- a/src/mantle/mantle_internal.h +++ b/src/mantle/mantle_internal.h @@ -48,6 +48,8 @@ #define STACK_ARRAY_FINISH(name) \ if (name != _stack_##name) free(name) +#define DESCRIPTORS_PER_SLOT (3) + GR_PHYSICAL_GPU_TYPE getGrPhysicalGpuType( VkPhysicalDeviceType type); @@ -171,6 +173,9 @@ VkImageSubresourceRange getVkImageSubresourceRange( GR_IMAGE_SUBRESOURCE_RANGE subresourceRange, bool multiplyCubeLayers); +unsigned getDescriptorOffset( + VkDescriptorType vkDescriptorType); + void grQueueAddInitialImage( GrImage* grImage); diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index b40d60e9..a8a2f0c0 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -95,10 +95,11 @@ typedef struct _BindPoint uint32_t dirtyFlags; GrPipeline* grPipeline; GrDescriptorSet* grDescriptorSets[GR_MAX_DESCRIPTOR_SETS]; + VkDescriptorSet descriptorSets[30]; + unsigned descriptorArrayOffsets[30]; + unsigned boundDescriptorSetCount; unsigned slotOffsets[GR_MAX_DESCRIPTOR_SETS]; DescriptorSetSlot dynamicMemoryView; - uint32_t dynamicOffset; - VkDescriptorSet descriptorSet; } BindPoint; typedef struct _PipelineCreateInfo @@ -106,6 +107,9 @@ typedef struct _PipelineCreateInfo VkPipelineCreateFlags createFlags; unsigned stageCount; VkPipelineShaderStageCreateInfo stageCreateInfos[MAX_STAGE_COUNT]; + VkSpecializationInfo specInfos[MAX_STAGE_COUNT]; + void* specData[MAX_STAGE_COUNT]; + VkSpecializationMapEntry* mapEntries[MAX_STAGE_COUNT]; VkPrimitiveTopology topology; uint32_t patchControlPoints; bool depthClipEnable; @@ -118,15 +122,14 @@ typedef struct _PipelineCreateInfo VkFormat stencilFormat; } PipelineCreateInfo; -typedef struct _UpdateTemplateSlot { - VkDescriptorUpdateTemplate updateTemplate; - bool isDynamic; +typedef struct _PipelineDescriptorSlot { unsigned pathDepth; unsigned path[MAX_PATH_DEPTH]; unsigned strideCount; unsigned strideOffsets[MAX_STRIDES]; unsigned strideSlotIndexes[MAX_STRIDES]; -} UpdateTemplateSlot; + unsigned descriptorCount; +} PipelineDescriptorSlot; // Base object typedef struct _GrBaseObject { @@ -152,10 +155,8 @@ typedef struct _GrCmdBuffer { VkCommandBuffer commandBuffer; VkQueryPool timestampQueryPool; VkBuffer atomicCounterBuffer; + VkDeviceSize atomicCounterBufferSize; VkDescriptorSet atomicCounterSet; - // Resource tracking - unsigned descriptorPoolCount; - VkDescriptorPool* descriptorPools; // NOTE: grCmdBufferResetState resets everything past that point bool isBuilding; bool isRendering; @@ -221,6 +222,8 @@ typedef struct _GrDescriptorSet { GrObject grObj; unsigned slotCount; DescriptorSetSlot* slots; + VkDescriptorPool descriptorPool; + VkDescriptorSet descriptorSet; } GrDescriptorSet; typedef struct _GrDevice { @@ -232,6 +235,8 @@ typedef struct _GrDevice { unsigned memoryHeapCount; uint32_t memoryHeapMap[GR_MAX_MEMORY_HEAPS]; VkDescriptorSetLayout atomicCounterSetLayout; + VkDescriptorSetLayout dynamicMemorySetLayout; + VkDescriptorSetLayout defaultDescriptorSetLayout; GrQueue* grUniversalQueue; GrQueue* grComputeQueue; GrQueue* grDmaQueue; @@ -282,6 +287,7 @@ typedef struct _GrImageView { GrObject grObj; VkImageView imageView; VkFormat format; + VkImageUsageFlags usage; } GrImageView; typedef struct _GrMsaaStateObject { @@ -298,16 +304,16 @@ typedef struct _GrPhysicalGpu { typedef struct _GrPipeline { GrObject grObj; - GrShader* grShaderRefs[MAX_STAGE_COUNT]; + VkShaderModule shaderModules[MAX_STAGE_COUNT]; PipelineCreateInfo* createInfo; bool hasTessellation; VkPipeline pipeline; VkPipelineLayout pipelineLayout; unsigned stageCount; - VkDescriptorSetLayout descriptorSetLayout; - unsigned dynamicOffsetCount; - unsigned updateTemplateSlotCounts[GR_MAX_DESCRIPTOR_SETS]; - UpdateTemplateSlot* updateTemplateSlots[GR_MAX_DESCRIPTOR_SETS]; + bool dynamicMappingUsed; + PipelineDescriptorSlot dynamicDescriptorSlot; + unsigned descriptorSetCounts[GR_MAX_DESCRIPTOR_SETS]; + PipelineDescriptorSlot* descriptorSlots[GR_MAX_DESCRIPTOR_SETS]; } GrPipeline; typedef struct _GrQueueSemaphore { @@ -333,13 +339,13 @@ typedef struct _GrSampler { typedef struct _GrShader { GrObject grObj; - unsigned refCount; - VkShaderModule shaderModule; unsigned bindingCount; IlcBinding* bindings; unsigned inputCount; IlcInput* inputs; char* name; + unsigned codeSize; + void* code; } GrShader; typedef struct _GrQueryPool { diff --git a/src/mantle/mantle_object_man.c b/src/mantle/mantle_object_man.c index 80432b6d..dc9e64f4 100644 --- a/src/mantle/mantle_object_man.c +++ b/src/mantle/mantle_object_man.c @@ -20,10 +20,6 @@ GR_RESULT GR_STDCALL grDestroyObject( VKD.vkDestroyCommandPool(grDevice->device, grCmdBuffer->commandPool, NULL); VKD.vkDestroyQueryPool(grDevice->device, grCmdBuffer->timestampQueryPool, NULL); - for (unsigned i = 0; i < grCmdBuffer->descriptorPoolCount; i++) { - VKD.vkDestroyDescriptorPool(grDevice->device, grCmdBuffer->descriptorPools[i], NULL); - } - free(grCmdBuffer->descriptorPools); } break; case GR_OBJ_TYPE_COLOR_BLEND_STATE_OBJECT: // Nothing to do @@ -46,6 +42,7 @@ GR_RESULT GR_STDCALL grDestroyObject( grClearDescriptorSetSlots(grDescriptorSet, 0, grDescriptorSet->slotCount); free(grDescriptorSet->slots); + VKD.vkDestroyDescriptorPool(grDevice->device, grDescriptorSet->descriptorPool, NULL); } break; case GR_OBJ_TYPE_EVENT: { GrEvent* grEvent = (GrEvent*)grObject; @@ -77,21 +74,19 @@ GR_RESULT GR_STDCALL grDestroyObject( GrPipeline* grPipeline = (GrPipeline*)grObject; for (unsigned i = 0; i < MAX_STAGE_COUNT; i++) { - if (grPipeline->grShaderRefs[i] != NULL) { - grDestroyObject((GR_OBJECT)grPipeline->grShaderRefs[i]); + if (grPipeline->createInfo != NULL) { + free(grPipeline->createInfo->specData[i]); + free(grPipeline->createInfo->mapEntries[i]); } + VKD.vkDestroyShaderModule(grDevice->device, grPipeline->shaderModules[i], NULL); } free(grPipeline->createInfo); VKD.vkDestroyPipeline(grDevice->device, grPipeline->pipeline, NULL); VKD.vkDestroyPipelineLayout(grDevice->device, grPipeline->pipelineLayout, NULL); - VKD.vkDestroyDescriptorSetLayout(grDevice->device, grPipeline->descriptorSetLayout, NULL); + for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { - for (unsigned j = 0; j < grPipeline->updateTemplateSlotCounts[i]; j++) { - UpdateTemplateSlot* slot = &grPipeline->updateTemplateSlots[i][j]; - VKD.vkDestroyDescriptorUpdateTemplate(grDevice->device, slot->updateTemplate, NULL); - } - free(grPipeline->updateTemplateSlots[i]); + free(grPipeline->descriptorSlots[i]); } } break; case GR_OBJ_TYPE_QUEUE_SEMAPHORE: { @@ -110,14 +105,10 @@ GR_RESULT GR_STDCALL grDestroyObject( case GR_OBJ_TYPE_SHADER: { GrShader* grShader = (GrShader*)grObject; - if (--grShader->refCount > 0) { - return GR_SUCCESS; - } - - VKD.vkDestroyShaderModule(grDevice->device, grShader->shaderModule, NULL); free(grShader->bindings); free(grShader->inputs); free(grShader->name); + free(grShader->code); } break; case GR_OBJ_TYPE_QUERY_POOL: { GrQueryPool* grQueryPool = (GrQueryPool*)grObject; diff --git a/src/mantle/mantle_shader_pipeline.c b/src/mantle/mantle_shader_pipeline.c index 73cb65eb..e49d4925 100644 --- a/src/mantle/mantle_shader_pipeline.c +++ b/src/mantle/mantle_shader_pipeline.c @@ -6,44 +6,14 @@ typedef struct _Stage { const VkShaderStageFlagBits flags; } Stage; -static VkDescriptorUpdateTemplate getVkDescriptorUpdateTemplate( - const GrDevice* grDevice, - unsigned descriptorUpdateEntryCount, - const VkDescriptorUpdateTemplateEntry* descriptorUpdateEntries, - VkDescriptorSetLayout descriptorSetLayout) -{ - VkDescriptorUpdateTemplate descriptorUpdateTemplate = VK_NULL_HANDLE; - VkResult res; - - const VkDescriptorUpdateTemplateCreateInfo createInfo = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .descriptorUpdateEntryCount = descriptorUpdateEntryCount, - .pDescriptorUpdateEntries = descriptorUpdateEntries, - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, - .descriptorSetLayout = descriptorSetLayout, - .pipelineBindPoint = 0, // Ignored - .pipelineLayout = VK_NULL_HANDLE, // Ignored - .set = 0, // Ignored - }; - - res = VKD.vkCreateDescriptorUpdateTemplate(grDevice->device, &createInfo, NULL, - &descriptorUpdateTemplate); - if (res != VK_SUCCESS) { - LOGE("vkCreateDescriptorUpdateTemplate failed (%d)\n", res); - assert(false); - } - - return descriptorUpdateTemplate; -} - -static void addDynamicUpdateTemplateSlots( - unsigned* updateTemplateSlotCount, - UpdateTemplateSlot** updateTemplateSlots, +static bool handleDynamicDescriptorSlots( + PipelineDescriptorSlot* descriptorSlot, const GR_DYNAMIC_MEMORY_VIEW_SLOT_INFO* dynamicMapping, unsigned bindingCount, - const IlcBinding* bindings) + const IlcBinding* bindings, + uint32_t* offsets, + uint32_t* descriptorSetIndices, + IlcBindingPatchEntry* patchEntries) { for (unsigned i = 0; i < bindingCount; i++) { const IlcBinding* binding = &bindings[i]; @@ -51,25 +21,7 @@ static void addDynamicUpdateTemplateSlots( if (dynamicMapping->slotObjectType != GR_SLOT_UNUSED && binding->ilIndex == dynamicMapping->shaderEntityIndex && binding->type == ILC_BINDING_RESOURCE) { - // Found a dynamic memory view descriptor - VkDescriptorUpdateTemplateEntry* entry = - malloc(sizeof(VkDescriptorUpdateTemplateEntry)); - *entry = (VkDescriptorUpdateTemplateEntry) { - .dstBinding = binding->vkIndex, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, - .offset = OFFSET_OF_UNION(DescriptorSetSlot, buffer, bufferInfo), - .stride = 0, - }; - - (*updateTemplateSlotCount)++; - *updateTemplateSlots = realloc(*updateTemplateSlots, - *updateTemplateSlotCount * - sizeof(UpdateTemplateSlot)); - (*updateTemplateSlots)[*updateTemplateSlotCount - 1] = (UpdateTemplateSlot) { - .updateTemplate = (VkDescriptorUpdateTemplate)entry, // Stuff the entry here - .isDynamic = true, + *descriptorSlot = (PipelineDescriptorSlot) { .pathDepth = 0, .path = { 0 }, .strideCount = 0, // Initialized below @@ -79,21 +31,35 @@ static void addDynamicUpdateTemplateSlots( if (binding->strideIndex >= 0) { unsigned strideOffset = binding->strideIndex * sizeof(uint32_t); - (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideCount = 1; - (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideOffsets[0] = - strideOffset; - (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideSlotIndexes[0] = 0; + descriptorSlot->strideCount++; + descriptorSlot->strideOffsets[descriptorSlot->strideCount - 1] = strideOffset; + descriptorSlot->strideSlotIndexes[descriptorSlot->strideCount - 1] = 0; } + + offsets[i] = 0; + unsigned int descriptorSetIndex = DYNAMIC_MEMORY_VIEW_DESCRIPTOR_SET_ID; + descriptorSetIndices[i] = descriptorSetIndex; + patchEntries[i] = (IlcBindingPatchEntry) { + .id = binding->id, + .bindingIndex = DYNAMIC_MEMORY_VIEW_BINDING_ID, + .descriptorSetIndex = descriptorSetIndex, + }; + + return true; } } + + return false; } -static void addUpdateTemplateSlotsFromMapping( - unsigned* updateTemplateSlotCount, - UpdateTemplateSlot** updateTemplateSlots, +static void getDescriptorSlotsFromMapping( + unsigned* pDescriptorSlotCount, + PipelineDescriptorSlot** pDescriptorSlots, const GR_DESCRIPTOR_SET_MAPPING* mapping, unsigned bindingCount, const IlcBinding* bindings, + uint32_t* offsets, + IlcBindingPatchEntry* patchEntries, unsigned pathDepth, unsigned* path) { @@ -113,9 +79,10 @@ static void addUpdateTemplateSlotsFromMapping( path[pathDepth] = i; // Add slots from the nested set - addUpdateTemplateSlotsFromMapping(updateTemplateSlotCount, updateTemplateSlots, - slotInfo->pNextLevelSet, bindingCount, bindings, - pathDepth + 1, path); + getDescriptorSlotsFromMapping(pDescriptorSlotCount, pDescriptorSlots, + slotInfo->pNextLevelSet, bindingCount, bindings, + offsets, patchEntries, + pathDepth + 1, path); continue; } @@ -128,49 +95,23 @@ static void addUpdateTemplateSlotsFromMapping( (slotInfo->slotObjectType == GR_SLOT_SHADER_RESOURCE || slotInfo->slotObjectType == GR_SLOT_SHADER_UAV)))) { binding = &bindings[j]; + + uint32_t descriptorTypeOffset = getDescriptorOffset(bindings[j].descriptorType); + offsets[j] = i * DESCRIPTORS_PER_SLOT + descriptorTypeOffset; + break; } } + if (binding == NULL) { // Unused mapping slot, skip continue; } - unsigned slotDataOffset = 0; - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - slotDataOffset = OFFSET_OF_UNION(DescriptorSetSlot, image, imageInfo); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - slotDataOffset = OFFSET_OF_UNION(DescriptorSetSlot, buffer, bufferView); - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - slotDataOffset = OFFSET_OF_UNION(DescriptorSetSlot, buffer, bufferInfo); - break; - default: - LOGE("unhandled descriptor type %d\n", binding->descriptorType); - assert(false); - } - - VkDescriptorUpdateTemplateEntry* entry = malloc(sizeof(VkDescriptorUpdateTemplateEntry)); - *entry = (VkDescriptorUpdateTemplateEntry) { - .dstBinding = binding->vkIndex, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = binding->descriptorType, - .offset = i * sizeof(DescriptorSetSlot) + slotDataOffset, - .stride = 0, - }; - - (*updateTemplateSlotCount)++; - *updateTemplateSlots = realloc(*updateTemplateSlots, - *updateTemplateSlotCount * sizeof(UpdateTemplateSlot)); - (*updateTemplateSlots)[*updateTemplateSlotCount - 1] = (UpdateTemplateSlot) { - .updateTemplate = (VkDescriptorUpdateTemplate)entry, // Stuff the entry here - .isDynamic = false, + (*pDescriptorSlotCount)++; + *pDescriptorSlots = realloc(*pDescriptorSlots, + *pDescriptorSlotCount * sizeof(PipelineDescriptorSlot)); + (*pDescriptorSlots)[*pDescriptorSlotCount - 1] = (PipelineDescriptorSlot) { .pathDepth = pathDepth, .path = { 0 }, // Initialized below .strideCount = 0, // Initialized below @@ -178,83 +119,59 @@ static void addUpdateTemplateSlotsFromMapping( .strideSlotIndexes = { 0 }, // Initialized below }; - memcpy((*updateTemplateSlots)[*updateTemplateSlotCount - 1].path, + memcpy((*pDescriptorSlots)[*pDescriptorSlotCount - 1].path, path, pathDepth * sizeof(unsigned)); if (binding->strideIndex >= 0) { unsigned strideOffset = binding->strideIndex * sizeof(uint32_t); - (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideCount = 1; - (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideOffsets[0] = strideOffset; - (*updateTemplateSlots)[*updateTemplateSlotCount - 1].strideSlotIndexes[0] = i; + (*pDescriptorSlots)[*pDescriptorSlotCount - 1].strideCount = 1; + (*pDescriptorSlots)[*pDescriptorSlotCount - 1].strideOffsets[0] = strideOffset; + (*pDescriptorSlots)[*pDescriptorSlotCount - 1].strideSlotIndexes[0] = i; } } } -static int compareUpdateTemplateSlots( +static int compareDescriptorSlots( const void* a, const void* b) { - const UpdateTemplateSlot* slotA = a; - const UpdateTemplateSlot* slotB = b; + const PipelineDescriptorSlot* slotA = a; + const PipelineDescriptorSlot* slotB = b; - // Make slots with the same path adjacent - if (slotA->isDynamic != slotB->isDynamic) { - return (int)slotA->isDynamic - (int)slotB->isDynamic; - } - if (slotA->pathDepth != slotB->pathDepth) { - return (int)slotA->pathDepth - (int)slotB->pathDepth; - } return memcmp(slotA->path, slotB->path, slotA->pathDepth * sizeof(slotA->path[0])); } -static void mergeUpdateTemplateSlots( - unsigned* updateTemplateSlotCount, - UpdateTemplateSlot** updateTemplateSlots, - const GrDevice* grDevice, - VkDescriptorSetLayout descriptorSetLayout) + +static void mergeDescriptorSlots( + unsigned* descriptorSlotCount, + PipelineDescriptorSlot** descriptorSlots) { // Group slots by path - qsort(*updateTemplateSlots, *updateTemplateSlotCount, sizeof(UpdateTemplateSlot), - compareUpdateTemplateSlots); - - unsigned descriptorUpdateEntryCount = 0; - VkDescriptorUpdateTemplateEntry* descriptorUpdateEntries = NULL; + qsort(*descriptorSlots, *descriptorSlotCount, sizeof(PipelineDescriptorSlot), + compareDescriptorSlots); - for (unsigned i = 0; i < *updateTemplateSlotCount; i++) { - bool isLastSlot = (i + 1) == *updateTemplateSlotCount; - UpdateTemplateSlot* slot = &(*updateTemplateSlots)[i]; - UpdateTemplateSlot* nextSlot = &(*updateTemplateSlots)[i + 1]; + unsigned mergingDescriptorCount = 0; - // Add new entry - VkDescriptorUpdateTemplateEntry* entry = - (VkDescriptorUpdateTemplateEntry*)slot->updateTemplate; + for (unsigned i = 0; i < *descriptorSlotCount; i++) { + bool isLastSlot = (i + 1) == *descriptorSlotCount; + PipelineDescriptorSlot* slot = &(*descriptorSlots)[i]; + PipelineDescriptorSlot* nextSlot = &(*descriptorSlots)[i + 1]; - descriptorUpdateEntryCount++; - descriptorUpdateEntries = realloc(descriptorUpdateEntries, - descriptorUpdateEntryCount * - sizeof(VkDescriptorUpdateTemplateEntry)); - descriptorUpdateEntries[descriptorUpdateEntryCount - 1] = *entry; - free(entry); + mergingDescriptorCount++; if (!isLastSlot && - slot->isDynamic == nextSlot->isDynamic && slot->pathDepth == nextSlot->pathDepth && memcmp(slot->path, nextSlot->path, slot->pathDepth * sizeof(slot->path[0])) == 0) { // Can't merge yet continue; } - unsigned mergedIdx = i - descriptorUpdateEntryCount + 1; - UpdateTemplateSlot* mergedSlot = &(*updateTemplateSlots)[mergedIdx]; - - mergedSlot->updateTemplate = - getVkDescriptorUpdateTemplate(grDevice, descriptorUpdateEntryCount, - descriptorUpdateEntries, descriptorSetLayout); - free(descriptorUpdateEntries); + unsigned mergedIdx = i - mergingDescriptorCount + 1; + PipelineDescriptorSlot* mergedSlot = &(*descriptorSlots)[mergedIdx]; // TODO deduplicate strides for (unsigned j = mergedIdx + 1; j <= i; j++) { - UpdateTemplateSlot* slotToMerge = &(*updateTemplateSlots)[j]; + PipelineDescriptorSlot* slotToMerge = &(*descriptorSlots)[j]; if (slotToMerge->strideCount == 1) { if (mergedSlot->strideCount >= MAX_STRIDES) { @@ -272,26 +189,91 @@ static void mergeUpdateTemplateSlots( // Drop temporary slots memmove(mergedSlot + 1, nextSlot, - (*updateTemplateSlotCount - i - 1) * sizeof(UpdateTemplateSlot)); - *updateTemplateSlotCount -= descriptorUpdateEntryCount - 1; - *updateTemplateSlots = realloc(*updateTemplateSlots, - *updateTemplateSlotCount * sizeof(UpdateTemplateSlot)); + (*descriptorSlotCount - i - 1) * sizeof(PipelineDescriptorSlot)); + *descriptorSlotCount -= mergingDescriptorCount - 1; + *descriptorSlots = realloc(*descriptorSlots, + *descriptorSlotCount * sizeof(PipelineDescriptorSlot)); // Update state i = mergedIdx; - descriptorUpdateEntryCount = 0; - descriptorUpdateEntries = NULL; + mergingDescriptorCount = 0; } } -static void getUpdateTemplateSlots( - unsigned* updateTemplateSlotCount, - UpdateTemplateSlot** updateTemplateSlots, +static void setupDescriptorSetIndices( + unsigned descriptorSetCount, + const PipelineDescriptorSlot* descriptorSlots, + const GR_DESCRIPTOR_SET_MAPPING* mapping, + unsigned bindingCount, + const IlcBinding* bindings, + IlcBindingPatchEntry* patchEntries, + unsigned* descriptorSetIndices, + unsigned descriptorSetIndexOffset, + unsigned pathDepth, + unsigned* path) +{ + unsigned descriptorSetIndex = 0xFFFFFFFF; + for (unsigned i = 0; i < descriptorSetCount; ++i) { + const PipelineDescriptorSlot* slot = (const PipelineDescriptorSlot*)(&descriptorSlots[i]); + if (slot->pathDepth == pathDepth && memcmp(slot->path, path, pathDepth * sizeof(path[0])) == 0) { + descriptorSetIndex = i; + break; + } + } + for (unsigned i = 0; i < mapping->descriptorCount; i++) { + const GR_DESCRIPTOR_SLOT_INFO* slotInfo = &mapping->pDescriptorInfo[i]; + + if (slotInfo->slotObjectType == GR_SLOT_UNUSED) { + continue; + } else if (slotInfo->slotObjectType == GR_SLOT_NEXT_DESCRIPTOR_SET) { + if (pathDepth >= MAX_PATH_DEPTH) { + LOGE("exceeded max path depth of %d\n", MAX_PATH_DEPTH); + assert(false); + } + + // Mark path + path[pathDepth] = i; + + // Add slots from the nested set + setupDescriptorSetIndices(descriptorSetCount, descriptorSlots, + slotInfo->pNextLevelSet, bindingCount, bindings, + patchEntries, descriptorSetIndices, + descriptorSetIndexOffset, + pathDepth + 1, path); + continue; + } + // Find matching binding + for (unsigned j = 0; j < bindingCount; j++) { + if (bindings[j].ilIndex == slotInfo->shaderEntityIndex && + ((bindings[j].type == ILC_BINDING_SAMPLER && + slotInfo->slotObjectType == GR_SLOT_SHADER_SAMPLER) || + (bindings[j].type == ILC_BINDING_RESOURCE && + (slotInfo->slotObjectType == GR_SLOT_SHADER_RESOURCE || + slotInfo->slotObjectType == GR_SLOT_SHADER_UAV)))) { + unsigned computedDescriptorSetIndex = descriptorSetIndexOffset + descriptorSetIndex; + descriptorSetIndices[j] = computedDescriptorSetIndex; + patchEntries[j] = (IlcBindingPatchEntry) { + .id = bindings[j].id, + .bindingIndex = 0, + .descriptorSetIndex = computedDescriptorSetIndex, + }; + break; + } + } + } +} + +static void getDescriptorSlotMappings( + unsigned* descriptorSlotCount, + PipelineDescriptorSlot** descriptorSlots, const GrDevice* grDevice, unsigned stageCount, const Stage* stages, + IlcBindingPatchEntry** patchEntries, + uint32_t** specOffsets, + uint32_t** specDescriptorIndices, unsigned mappingIndex, - VkDescriptorSetLayout descriptorSetLayout) + unsigned descriptorSetIndexOffset) { for (unsigned i = 0; i < stageCount; i++) { const Stage* stage = &stages[i]; @@ -303,108 +285,66 @@ static void getUpdateTemplateSlots( continue; } - addDynamicUpdateTemplateSlots(updateTemplateSlotCount, updateTemplateSlots, - &shader->dynamicMemoryViewMapping, - grShader->bindingCount, grShader->bindings); - addUpdateTemplateSlotsFromMapping(updateTemplateSlotCount, updateTemplateSlots, - &shader->descriptorSetMapping[mappingIndex], - grShader->bindingCount, grShader->bindings, 0, path); + getDescriptorSlotsFromMapping(descriptorSlotCount, descriptorSlots, + &shader->descriptorSetMapping[mappingIndex], + grShader->bindingCount, grShader->bindings, + specOffsets[i], patchEntries[i], + 0, path); } - mergeUpdateTemplateSlots(updateTemplateSlotCount, updateTemplateSlots, grDevice, - descriptorSetLayout); -} - -static VkDescriptorSetLayout getVkDescriptorSetLayout( - unsigned* dynamicOffsetCount, - const GrDevice* grDevice, - unsigned stageCount, - const Stage* stages) -{ - VkDescriptorSetLayout layout = VK_NULL_HANDLE; - unsigned bindingCount = 0; - VkDescriptorSetLayoutBinding* bindings = NULL; - + mergeDescriptorSlots(descriptorSlotCount, descriptorSlots); for (unsigned i = 0; i < stageCount; i++) { const Stage* stage = &stages[i]; const GR_PIPELINE_SHADER* shader = stage->shader; - const GR_DYNAMIC_MEMORY_VIEW_SLOT_INFO* dynamicSlotInfo = &shader->dynamicMemoryViewMapping; const GrShader* grShader = shader->shader; + unsigned path[MAX_PATH_DEPTH]; if (grShader == NULL) { continue; } - for (unsigned j = 0; j < grShader->bindingCount; j++) { - const IlcBinding* binding = &grShader->bindings[j]; - bool isDynamic = false; - - if (dynamicSlotInfo->slotObjectType != GR_SLOT_UNUSED && - binding->ilIndex == dynamicSlotInfo->shaderEntityIndex && - binding->type == ILC_BINDING_RESOURCE) { - // Use dynamic offsets for dynamic memory views to avoid invalidating - // descriptor sets each time the buffer offset changes - isDynamic = true; - (*dynamicOffsetCount)++; - } - - // Add new binding - bindingCount++; - bindings = realloc(bindings, bindingCount * sizeof(VkDescriptorSetLayoutBinding)); - bindings[bindingCount - 1] = (VkDescriptorSetLayoutBinding) { - .binding = binding->vkIndex, - .descriptorType = isDynamic ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC - : binding->descriptorType, - .descriptorCount = 1, - .stageFlags = stage->flags, - .pImmutableSamplers = NULL, - }; - } + setupDescriptorSetIndices(*descriptorSlotCount, *descriptorSlots, + &shader->descriptorSetMapping[mappingIndex], + grShader->bindingCount, grShader->bindings, + patchEntries[i], + specDescriptorIndices[i], + descriptorSetIndexOffset, + 0, path); } - - const VkDescriptorSetLayoutCreateInfo createInfo = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .bindingCount = bindingCount, - .pBindings = bindings, - }; - - VkResult res = VKD.vkCreateDescriptorSetLayout(grDevice->device, &createInfo, NULL, &layout); - if (res != VK_SUCCESS) { - LOGE("vkCreateDescriptorSetLayout failed (%d)\n", res); - } - - free(bindings); - return layout; } static VkPipelineLayout getVkPipelineLayout( const GrDevice* grDevice, - VkDescriptorSetLayout descriptorSetLayout, - VkDescriptorSetLayout atomicSetLayout) + unsigned descriptorSetCount, + VkPipelineBindPoint vkBindPoint) { VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; - const VkDescriptorSetLayout setLayouts[] = { - descriptorSetLayout, - atomicSetLayout, + VkDescriptorSetLayout setLayouts[32] = { + grDevice->dynamicMemorySetLayout, + grDevice->atomicCounterSetLayout, }; - const VkPushConstantRange pushConstantRange = { - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .offset = 0, - .size = ILC_MAX_STRIDE_CONSTANTS * sizeof(uint32_t), + assert((descriptorSetCount + 2) <= COUNT_OF(setLayouts)); + for (unsigned i = 0; i < descriptorSetCount; ++i) { + setLayouts[i + 2] = grDevice->defaultDescriptorSetLayout; + } + const VkPushConstantRange pushConstantRanges[] = { + { + .stageFlags = (vkBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) ? VK_SHADER_STAGE_ALL_GRAPHICS : VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = DESCRIPTOR_OFFSET_COUNT * sizeof(uint32_t) + ILC_MAX_STRIDE_CONSTANTS * sizeof(uint32_t), + } }; const VkPipelineLayoutCreateInfo createInfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = NULL, .flags = 0, - .setLayoutCount = COUNT_OF(setLayouts), + .setLayoutCount = descriptorSetCount + 2, .pSetLayouts = setLayouts, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &pushConstantRange, + .pushConstantRangeCount = COUNT_OF(pushConstantRanges), + .pPushConstantRanges = pushConstantRanges, }; VkResult res = VKD.vkCreatePipelineLayout(grDevice->device, &createInfo, NULL, &pipelineLayout); @@ -627,42 +567,21 @@ GR_RESULT GR_STDCALL grCreateShader( { LOGT("%p %p %p\n", device, pCreateInfo, pShader); GrDevice* grDevice = (GrDevice*)device; - VkShaderModule vkShaderModule = VK_NULL_HANDLE; // ALLOW_RE_Z flag doesn't have a Vulkan equivalent. RADV determines it automatically. IlcShader ilcShader = ilcCompileShader(pCreateInfo->pCode, pCreateInfo->codeSize); - const VkShaderModuleCreateInfo createInfo = { - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = NULL, - .flags = 0, - .codeSize = ilcShader.codeSize, - .pCode = ilcShader.code, - }; - - VkResult res = VKD.vkCreateShaderModule(grDevice->device, &createInfo, NULL, &vkShaderModule); - if (res != VK_SUCCESS) { - LOGE("vkCreateShaderModule failed (%d)\n", res); - free(ilcShader.code); - free(ilcShader.bindings); - free(ilcShader.inputs); - free(ilcShader.name); - return getGrResult(res); - } - - free(ilcShader.code); - GrShader* grShader = malloc(sizeof(GrShader)); *grShader = (GrShader) { .grObj = { GR_OBJ_TYPE_SHADER, grDevice }, - .refCount = 1, - .shaderModule = vkShaderModule, .bindingCount = ilcShader.bindingCount, .bindings = ilcShader.bindings, .inputCount = ilcShader.inputCount, .inputs = ilcShader.inputs, .name = ilcShader.name, + .codeSize = ilcShader.codeSize, + .code = ilcShader.code, }; *pShader = (GR_SHADER)grShader; @@ -678,13 +597,19 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( GrDevice* grDevice = (GrDevice*)device; GR_RESULT res = GR_SUCCESS; bool hasTessellation = false; - VkDescriptorSetLayout descriptorSetLayout = VK_NULL_HANDLE; VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; - VkShaderModule rectangleShaderModule = VK_NULL_HANDLE; - unsigned dynamicOffsetCount = 0; - unsigned updateTemplateSlotCounts[GR_MAX_DESCRIPTOR_SETS] = { 0 }; - UpdateTemplateSlot* updateTemplateSlots[GR_MAX_DESCRIPTOR_SETS] = { NULL }; - GrShader* grShaderRefs[MAX_STAGE_COUNT] = { NULL }; + + VkShaderModule shaderModules[MAX_STAGE_COUNT] = { 0 }; + IlcBindingPatchEntry* patchEntries[MAX_STAGE_COUNT] = { NULL }; + uint32_t* specData[MAX_STAGE_COUNT] = { NULL }; + uint32_t* descriptorSetIndices[MAX_STAGE_COUNT] = { NULL }; + VkSpecializationMapEntry* mapEntries[MAX_STAGE_COUNT] = { NULL }; + VkSpecializationInfo specInfos[MAX_STAGE_COUNT] = { { 0 } }; + + PipelineDescriptorSlot dynamicDescriptorSlot = { 0 }; + unsigned descriptorSetCounts[GR_MAX_DESCRIPTOR_SETS] = { 0 }; + PipelineDescriptorSlot* pipelineDescriptorSlots[GR_MAX_DESCRIPTOR_SETS] = { NULL }; + VkResult vkRes; // TODO validate parameters @@ -705,6 +630,55 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( unsigned stageCount = 0; VkPipelineShaderStageCreateInfo shaderStageCreateInfo[COUNT_OF(stages)]; + bool dynamicMappingUsed = false; + for (int i = 0; i < COUNT_OF(stages); i++) { + Stage* stage = &stages[i]; + + if (stage->shader->shader == GR_NULL_HANDLE) { + continue; + } + + GrShader* grShader = (GrShader*)stage->shader->shader; + + patchEntries[i] = malloc(sizeof(IlcBindingPatchEntry) * grShader->bindingCount); + mapEntries[i] = malloc(grShader->bindingCount * 2 * sizeof(VkSpecializationMapEntry)); + specData[i] = malloc(sizeof(uint32_t) * 2 * grShader->bindingCount); + specInfos[i] = (VkSpecializationInfo) { + .pData = specData[i], + .pMapEntries = mapEntries[i], + .dataSize = sizeof(uint32_t) * grShader->bindingCount * 2, + .mapEntryCount = grShader->bindingCount * 2, + }; + descriptorSetIndices[i] = &specData[i][grShader->bindingCount]; + for (unsigned j = 0; j < grShader->bindingCount; ++j) { + mapEntries[i][j * 2] = (VkSpecializationMapEntry) { + .constantID = grShader->bindings[j].offsetSpecId, + .offset = j * sizeof(uint32_t), + .size = sizeof(uint32_t), + }; + mapEntries[i][j * 2 + 1] = (VkSpecializationMapEntry) { + .constantID = grShader->bindings[j].descriptorSetIndexSpecId, + .offset = (j + grShader->bindingCount) * sizeof(uint32_t), + .size = sizeof(uint32_t), + }; + } + + dynamicMappingUsed |= handleDynamicDescriptorSlots( + &dynamicDescriptorSlot, + &stage->shader->dynamicMemoryViewMapping, + grShader->bindingCount, grShader->bindings, + specData[i], + &specData[i][grShader->bindingCount], + patchEntries[i]); + } + + unsigned descriptorSetCount = 0; + for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { + getDescriptorSlotMappings(&descriptorSetCounts[i], &pipelineDescriptorSlots[i], + grDevice, COUNT_OF(stages), stages, patchEntries, specData, descriptorSetIndices, i, descriptorSetCount + DESCRIPTOR_SET_ID); + descriptorSetCount += descriptorSetCounts[i]; + } + for (int i = 0; i < COUNT_OF(stages); i++) { Stage* stage = &stages[i]; @@ -719,15 +693,47 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( GrShader* grShader = (GrShader*)stage->shader->shader; - grShaderRefs[i] = grShader; - grShader->refCount++; + void* code = malloc(grShader->codeSize); + memcpy(code, grShader->code, grShader->codeSize); + + const VkShaderModuleCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .codeSize = grShader->codeSize, + .pCode = code,//grShader->code, + }; + + patchShaderBindings( + code, + grShader->codeSize, + patchEntries[i], + grShader->bindingCount); + + vkRes = VKD.vkCreateShaderModule(grDevice->device, &createInfo, NULL, &shaderModules[stageCount]); + free(code); + if (vkRes != VK_SUCCESS) { + res = getGrResult(vkRes); + goto bail; + } + + if (stageCount != i) { + patchEntries[stageCount] = patchEntries[i]; + mapEntries[stageCount] = mapEntries[i]; + specData[stageCount] = specData[i]; + + patchEntries[i] = NULL; + mapEntries[i] = NULL; + specData[i] = NULL; + memcpy(&specInfos[stageCount], &specInfos[i], sizeof(VkSpecializationInfo)); + } shaderStageCreateInfo[stageCount] = (VkPipelineShaderStageCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = NULL, .flags = 0, .stage = stage->flags, - .module = grShader->shaderModule, + .module = shaderModules[stageCount], .pName = "main", .pSpecializationInfo = NULL, }; @@ -763,7 +769,7 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( }; vkRes = VKD.vkCreateShaderModule(grDevice->device, &rectangleShaderModuleCreateInfo, NULL, - &rectangleShaderModule); + &shaderModules[stageCount]); free(rectangleShader.code); if (vkRes != VK_SUCCESS) { @@ -776,7 +782,7 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( .pNext = NULL, .flags = 0, .stage = VK_SHADER_STAGE_GEOMETRY_BIT, - .module = rectangleShaderModule, + .module = shaderModules[stageCount], .pName = "main", .pSpecializationInfo = NULL, }; @@ -800,6 +806,9 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT : 0, .stageCount = stageCount, .stageCreateInfos = { { 0 } }, // Initialized below + .specInfos = { { 0 } }, // Initialized below + .specData = { NULL }, // Initialized below + .mapEntries = { NULL }, // Initialized below .topology = getVkPrimitiveTopology(pCreateInfo->iaState.topology), .patchControlPoints = pCreateInfo->tessState.patchControlPoints, .depthClipEnable = !!pCreateInfo->rsState.depthClipEnable, @@ -814,59 +823,66 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( memcpy(pipelineCreateInfo->stageCreateInfos, shaderStageCreateInfo, stageCount * sizeof(VkPipelineShaderStageCreateInfo)); + memcpy(pipelineCreateInfo->specInfos, specInfos, sizeof(specInfos)); + memcpy(pipelineCreateInfo->specData, specData, sizeof(specData)); + memcpy(pipelineCreateInfo->mapEntries, mapEntries, sizeof(mapEntries)); memcpy(pipelineCreateInfo->colorFormats, colorFormats, GR_MAX_COLOR_TARGETS * sizeof(VkFormat)); memcpy(pipelineCreateInfo->colorWriteMasks, colorWriteMasks, GR_MAX_COLOR_TARGETS * sizeof(VkColorComponentFlags)); - descriptorSetLayout = getVkDescriptorSetLayout(&dynamicOffsetCount, grDevice, - COUNT_OF(stages), stages); - if (descriptorSetLayout == VK_NULL_HANDLE) { - res = GR_ERROR_OUT_OF_MEMORY; - goto bail; + for (unsigned i = 0; i < MAX_STAGE_COUNT; i++) { + pipelineCreateInfo->stageCreateInfos[i].pSpecializationInfo = &pipelineCreateInfo->specInfos[i]; } - pipelineLayout = getVkPipelineLayout(grDevice, descriptorSetLayout, - grDevice->atomicCounterSetLayout); + descriptorSetCount = 0; + for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { + descriptorSetCount += descriptorSetCounts[i]; + } + pipelineLayout = getVkPipelineLayout(grDevice, descriptorSetCount, VK_PIPELINE_BIND_POINT_GRAPHICS); if (pipelineLayout == VK_NULL_HANDLE) { res = GR_ERROR_OUT_OF_MEMORY; goto bail; } - - for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { - getUpdateTemplateSlots(&updateTemplateSlotCounts[i], &updateTemplateSlots[i], - grDevice, COUNT_OF(stages), stages, i, descriptorSetLayout); - } - // TODO keep track of rectangle shader module GrPipeline* grPipeline = malloc(sizeof(GrPipeline)); *grPipeline = (GrPipeline) { .grObj = { GR_OBJ_TYPE_PIPELINE, grDevice }, - .grShaderRefs = { NULL }, // Initialized below + .shaderModules = { VK_NULL_HANDLE }, .createInfo = pipelineCreateInfo, .hasTessellation = hasTessellation, .pipeline = VK_NULL_HANDLE, // We don't know the attachment formats yet (Frostbite bug) .pipelineLayout = pipelineLayout, .stageCount = COUNT_OF(stages), - .descriptorSetLayout = descriptorSetLayout, - .dynamicOffsetCount = dynamicOffsetCount, - .updateTemplateSlotCounts = { 0 }, // Initialized below - .updateTemplateSlots = { NULL }, // Initialized below + .dynamicMappingUsed = dynamicMappingUsed, + .dynamicDescriptorSlot = dynamicDescriptorSlot, + .descriptorSetCounts = { 0 }, // Initialized below + .descriptorSlots = { NULL }, // Initialized below }; - memcpy(grPipeline->grShaderRefs, grShaderRefs, sizeof(grPipeline->grShaderRefs)); - memcpy(grPipeline->updateTemplateSlotCounts, updateTemplateSlotCounts, - sizeof(grPipeline->updateTemplateSlotCounts)); - memcpy(grPipeline->updateTemplateSlots, updateTemplateSlots, - sizeof(grPipeline->updateTemplateSlots)); + memcpy(grPipeline->shaderModules, shaderModules, sizeof(grPipeline->shaderModules)); + + memcpy(grPipeline->descriptorSetCounts, descriptorSetCounts, + sizeof(grPipeline->descriptorSetCounts)); + memcpy(grPipeline->descriptorSlots, pipelineDescriptorSlots, + sizeof(grPipeline->descriptorSlots)); + + for (uint32_t i = 0; i < MAX_STAGE_COUNT; i++) { + free(patchEntries[i]); + } *pPipeline = (GR_PIPELINE)grPipeline; + return GR_SUCCESS; bail: - VKD.vkDestroyDescriptorSetLayout(grDevice->device, descriptorSetLayout, NULL); VKD.vkDestroyPipelineLayout(grDevice->device, pipelineLayout, NULL); - VKD.vkDestroyShaderModule(grDevice->device, rectangleShaderModule, NULL); + for (uint32_t i = 0; i < MAX_STAGE_COUNT; i++) { + VKD.vkDestroyShaderModule(grDevice->device, shaderModules[i], NULL); + free(patchEntries[i]); + free(specData[i]); + free(mapEntries[i]); + } return res; } @@ -879,12 +895,20 @@ GR_RESULT GR_STDCALL grCreateComputePipeline( GrDevice* grDevice = (GrDevice*)device; GR_RESULT res = GR_SUCCESS; VkResult vkRes; - VkDescriptorSetLayout descriptorSetLayout = VK_NULL_HANDLE; VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; - VkPipeline pipeline = VK_NULL_HANDLE; - unsigned dynamicOffsetCount = 0; - unsigned updateTemplateSlotCounts[GR_MAX_DESCRIPTOR_SETS] = { 0 }; - UpdateTemplateSlot* updateTemplateSlots[GR_MAX_DESCRIPTOR_SETS] = { NULL }; + VkShaderModule shaderModule = VK_NULL_HANDLE; + VkPipeline vkPipeline = VK_NULL_HANDLE; + + uint32_t* specData = NULL; + VkSpecializationMapEntry* mapEntries = NULL; + VkSpecializationInfo specInfo = { 0 }; + IlcBindingPatchEntry* patchEntries = NULL; + uint32_t* descriptorOffsets = NULL; + uint32_t* descriptorSetIndices = NULL; + + PipelineDescriptorSlot dynamicDescriptorSlot = { 0 }; + unsigned descriptorSetCounts[GR_MAX_DESCRIPTOR_SETS] = { 0 }; + PipelineDescriptorSlot* pipelineDescriptorSlots[GR_MAX_DESCRIPTOR_SETS] = { NULL }; // TODO validate parameters @@ -894,39 +918,91 @@ GR_RESULT GR_STDCALL grCreateComputePipeline( // TODO implement LOGW("link-time constant buffers are not implemented\n"); } - GrShader* grShader = (GrShader*)stage.shader->shader; - grShader->refCount++; + patchEntries = malloc(sizeof(IlcBindingPatchEntry) * grShader->bindingCount); + mapEntries = malloc(grShader->bindingCount * 2 * sizeof(VkSpecializationMapEntry)); + specData = malloc(sizeof(uint32_t) * 2 * grShader->bindingCount); + specInfo = (VkSpecializationInfo) { + .pData = specData, + .pMapEntries = mapEntries, + .dataSize = sizeof(uint32_t) * grShader->bindingCount * 2, + .mapEntryCount = grShader->bindingCount * 2, + }; + descriptorOffsets = specData; + descriptorSetIndices = &specData[grShader->bindingCount]; + + for (unsigned j = 0; j < grShader->bindingCount; ++j) { + mapEntries[j * 2] = (VkSpecializationMapEntry) { + .constantID = grShader->bindings[j].offsetSpecId, + .offset = j * sizeof(uint32_t), + .size = sizeof(uint32_t) + }; + mapEntries[j * 2 + 1] = (VkSpecializationMapEntry) { + .constantID = grShader->bindings[j].descriptorSetIndexSpecId, + .offset = (j + grShader->bindingCount) * sizeof(uint32_t), + .size = sizeof(uint32_t) + }; + } + bool dynamicMappingUsed = handleDynamicDescriptorSlots( + &dynamicDescriptorSlot, + &stage.shader->dynamicMemoryViewMapping, + grShader->bindingCount, grShader->bindings, + specData, + &specData[grShader->bindingCount], + patchEntries); + + unsigned descriptorSetCount = 0; + for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { + getDescriptorSlotMappings(&descriptorSetCounts[i], &pipelineDescriptorSlots[i], + grDevice, 1, &stage, &patchEntries, &descriptorOffsets, &descriptorSetIndices, i, descriptorSetCount + DESCRIPTOR_SET_ID); + descriptorSetCount += descriptorSetCounts[i]; + } + + void* code = malloc(grShader->codeSize); + memcpy(code, grShader->code, grShader->codeSize); + + const VkShaderModuleCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .codeSize = grShader->codeSize, + .pCode = code,//grShader->code, + }; + + patchShaderBindings( + code, + grShader->codeSize, + patchEntries, + grShader->bindingCount); + + vkRes = VKD.vkCreateShaderModule(grDevice->device, &createInfo, NULL, &shaderModule); + free(code); + if (vkRes != VK_SUCCESS) { + res = getGrResult(vkRes); + goto bail; + } const VkPipelineShaderStageCreateInfo shaderStageCreateInfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = NULL, .flags = 0, .stage = stage.flags, - .module = grShader->shaderModule, + .module = shaderModule, .pName = "main", - .pSpecializationInfo = NULL, + .pSpecializationInfo = &specInfo, }; - descriptorSetLayout = getVkDescriptorSetLayout(&dynamicOffsetCount, grDevice, 1, &stage); - if (descriptorSetLayout == VK_NULL_HANDLE) { - res = GR_ERROR_OUT_OF_MEMORY; - goto bail; + descriptorSetCount = 0; + for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { + descriptorSetCount += descriptorSetCounts[i]; } - - pipelineLayout = getVkPipelineLayout(grDevice, descriptorSetLayout, - grDevice->atomicCounterSetLayout); + pipelineLayout = getVkPipelineLayout(grDevice, descriptorSetCount, VK_PIPELINE_BIND_POINT_COMPUTE); if (pipelineLayout == VK_NULL_HANDLE) { res = GR_ERROR_OUT_OF_MEMORY; goto bail; } - for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { - getUpdateTemplateSlots(&updateTemplateSlotCounts[i], &updateTemplateSlots[i], - grDevice, 1, &stage, i, descriptorSetLayout); - } - const VkComputePipelineCreateInfo pipelineCreateInfo = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = NULL, @@ -939,7 +1015,7 @@ GR_RESULT GR_STDCALL grCreateComputePipeline( }; vkRes = VKD.vkCreateComputePipelines(grDevice->device, VK_NULL_HANDLE, 1, &pipelineCreateInfo, - NULL, &pipeline); + NULL, &vkPipeline); if (vkRes != VK_SUCCESS) { LOGE("vkCreateComputePipelines failed (%d)\n", vkRes); res = getGrResult(vkRes); @@ -949,29 +1025,36 @@ GR_RESULT GR_STDCALL grCreateComputePipeline( GrPipeline* grPipeline = malloc(sizeof(GrPipeline)); *grPipeline = (GrPipeline) { .grObj = { GR_OBJ_TYPE_PIPELINE, grDevice }, - .grShaderRefs = { grShader }, + .shaderModules = { shaderModule }, .createInfo = NULL, .hasTessellation = false, - .pipeline = pipeline, + .pipeline = vkPipeline, .pipelineLayout = pipelineLayout, .stageCount = 1, - .descriptorSetLayout = descriptorSetLayout, - .dynamicOffsetCount = dynamicOffsetCount, - .updateTemplateSlotCounts = { 0 }, // Initialized below - .updateTemplateSlots = { NULL }, // Initialized below + .dynamicMappingUsed = dynamicMappingUsed, + .dynamicDescriptorSlot = dynamicDescriptorSlot, + .descriptorSetCounts = { 0 }, // Initialized below + .descriptorSlots = { NULL }, // Initialized below }; - memcpy(grPipeline->updateTemplateSlotCounts, updateTemplateSlotCounts, - sizeof(grPipeline->updateTemplateSlotCounts)); - memcpy(grPipeline->updateTemplateSlots, updateTemplateSlots, - sizeof(grPipeline->updateTemplateSlots)); + memcpy(grPipeline->descriptorSetCounts, descriptorSetCounts, + sizeof(grPipeline->descriptorSetCounts)); + memcpy(grPipeline->descriptorSlots, pipelineDescriptorSlots, + sizeof(grPipeline->descriptorSlots)); + + free(specData); + free(mapEntries); + free(patchEntries); *pPipeline = (GR_PIPELINE)grPipeline; return GR_SUCCESS; bail: - VKD.vkDestroyDescriptorSetLayout(grDevice->device, descriptorSetLayout, NULL); + free(patchEntries); + free(specData); + free(mapEntries); VKD.vkDestroyPipelineLayout(grDevice->device, pipelineLayout, NULL); + VKD.vkDestroyShaderModule(grDevice->device, shaderModule, NULL); return res; } diff --git a/src/mantle/util.c b/src/mantle/util.c index 49cb9770..a9731527 100644 --- a/src/mantle/util.c +++ b/src/mantle/util.c @@ -1134,3 +1134,25 @@ VkImageSubresourceRange getVkImageSubresourceRange( VK_REMAINING_ARRAY_LAYERS : subresourceRange.arraySize * layerFactor, }; } + +unsigned getDescriptorOffset( + VkDescriptorType vkDescriptorType) +{ + switch (vkDescriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + return 0; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + return 1; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + return 0; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return 1; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + return 2; + case VK_DESCRIPTOR_TYPE_SAMPLER: + return 0; + default: + LOGE("unsupported descriptor type for offset 0x%X\n", vkDescriptorType); + return 0; + } +} diff --git a/src/mantle/vulkan_loader.c b/src/mantle/vulkan_loader.c index 3abbbbc4..3e700703 100644 --- a/src/mantle/vulkan_loader.c +++ b/src/mantle/vulkan_loader.c @@ -235,4 +235,8 @@ void vulkanLoaderDeviceInit( LOAD_VULKAN_DEV_FN(vkd, device, vkCmdSetRasterizationSamplesEXT); LOAD_VULKAN_DEV_FN(vkd, device, vkCmdSetSampleMaskEXT); #endif + +#ifdef VK_KHR_push_descriptor + LOAD_VULKAN_DEV_FN(vkd, device, vkCmdPushDescriptorSetKHR); +#endif } diff --git a/src/mantle/vulkan_loader.h b/src/mantle/vulkan_loader.h index bcf712fb..dbffc29a 100644 --- a/src/mantle/vulkan_loader.h +++ b/src/mantle/vulkan_loader.h @@ -217,6 +217,10 @@ typedef struct _VULKAN_DEVICE { VULKAN_FN(vkCmdSetRasterizationSamplesEXT); VULKAN_FN(vkCmdSetSampleMaskEXT); #endif + +#ifdef VK_KHR_push_descriptor + VULKAN_FN(vkCmdPushDescriptorSetKHR); +#endif } VULKAN_DEVICE; extern VULKAN_LIBRARY vkl; From 939f35519e4815e7b2afd9ac0976bec47fad698e Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Sun, 4 Dec 2022 22:33:35 +0300 Subject: [PATCH 04/10] mantle: replace VkPhysicalDeviceProperties to VkPhysicalDeviceProperties2 --- src/mantle/mantle_init_device.c | 47 ++++++++++++++++++--------------- src/mantle/mantle_object.h | 2 +- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/src/mantle/mantle_init_device.c b/src/mantle/mantle_init_device.c index 5f71b66b..fea0e837 100644 --- a/src/mantle/mantle_init_device.c +++ b/src/mantle/mantle_init_device.c @@ -373,7 +373,10 @@ GR_RESULT GR_STDCALL grInitAndEnumerateGpus( .physicalDeviceProps = { 0 }, // Initialized below }; - vki.vkGetPhysicalDeviceProperties(physicalDevices[i], &grPhysicalGpu->physicalDeviceProps); + grPhysicalGpu->physicalDeviceProps.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + grPhysicalGpu->physicalDeviceProps.pNext = NULL; + + vki.vkGetPhysicalDeviceProperties2(physicalDevices[i], &grPhysicalGpu->physicalDeviceProps); gpus[i] = (GR_PHYSICAL_GPU)grPhysicalGpu; } @@ -398,7 +401,7 @@ GR_RESULT GR_STDCALL grGetGpuInfo( return GR_ERROR_INVALID_POINTER; } - const VkPhysicalDeviceProperties* props = &grPhysicalGpu->physicalDeviceProps; + const VkPhysicalDeviceProperties2* props = &grPhysicalGpu->physicalDeviceProps; unsigned expectedSize = 0; switch (infoType) { @@ -417,19 +420,19 @@ GR_RESULT GR_STDCALL grGetGpuInfo( *gpuProps = (GR_PHYSICAL_GPU_PROPERTIES) { .apiVersion = 0x19000, // 19.4.3 .driverVersion = 0x49C00000, // 19.4.3 - .vendorId = props->vendorID, - .deviceId = props->deviceID, - .gpuType = getGrPhysicalGpuType(props->deviceType), + .vendorId = props->properties.vendorID, + .deviceId = props->properties.deviceID, + .gpuType = getGrPhysicalGpuType(props->properties.deviceType), .gpuName = "", // Initialized below .maxMemRefsPerSubmission = 16384, // 19.4.3 .reserved = 4200043, // 19.4.3 .maxInlineMemoryUpdateSize = 32768, // 19.4.3 .maxBoundDescriptorSets = 2, // 19.4.3 - .maxThreadGroupSize = props->limits.maxComputeWorkGroupSize[0], - .timestampFrequency = 1000000000.f / props->limits.timestampPeriod, + .maxThreadGroupSize = props->properties.limits.maxComputeWorkGroupSize[0], + .timestampFrequency = 1000000000.f / props->properties.limits.timestampPeriod, .multiColorTargetClears = true, // 19.4.3 }; - strncpy(gpuProps->gpuName, props->deviceName, GR_MAX_PHYSICAL_GPU_NAME); + strncpy(gpuProps->gpuName, props->properties.deviceName, GR_MAX_PHYSICAL_GPU_NAME); break; case GR_INFO_TYPE_PHYSICAL_GPU_PERFORMANCE: expectedSize = sizeof(GR_PHYSICAL_GPU_PERFORMANCE); @@ -521,10 +524,10 @@ GR_RESULT GR_STDCALL grGetGpuInfo( } *(GR_PHYSICAL_GPU_IMAGE_PROPERTIES*)pData = (GR_PHYSICAL_GPU_IMAGE_PROPERTIES) { - .maxSliceWidth = props->limits.maxImageDimension1D, - .maxSliceHeight = props->limits.maxImageDimension2D, - .maxDepth = props->limits.maxImageDimension3D, - .maxArraySlices = props->limits.maxImageArrayLayers, + .maxSliceWidth = props->properties.limits.maxImageDimension1D, + .maxSliceHeight = props->properties.limits.maxImageDimension2D, + .maxDepth = props->properties.limits.maxImageDimension3D, + .maxArraySlices = props->properties.limits.maxImageArrayLayers, .reserved1 = 0, // 19.4.3 .reserved2 = 0, // 19.4.3 .maxMemoryAlignment = 262144, // 19.4.3 @@ -587,23 +590,23 @@ GR_RESULT GR_STDCALL grCreateDevice( uint32_t dmaQueueIndex = 0; uint32_t driverVersion; - const VkPhysicalDeviceProperties* props = &grPhysicalGpu->physicalDeviceProps; + const VkPhysicalDeviceProperties2* props = &grPhysicalGpu->physicalDeviceProps; - if (props->vendorID == NVIDIA_VENDOR_ID) { + if (props->properties.vendorID == NVIDIA_VENDOR_ID) { // Fix up driver version driverVersion = VK_MAKE_VERSION( - VK_VERSION_MAJOR(props->driverVersion), - VK_VERSION_MINOR(props->driverVersion >> 0) >> 2, - VK_VERSION_PATCH(props->driverVersion >> 2) >> 4); + VK_VERSION_MAJOR(props->properties.driverVersion), + VK_VERSION_MINOR(props->properties.driverVersion >> 0) >> 2, + VK_VERSION_PATCH(props->properties.driverVersion >> 2) >> 4); } else { - driverVersion = props->driverVersion; + driverVersion = props->properties.driverVersion; } LOGI("%04X:%04X \"%s\" (Vulkan %d.%d.%d, driver %d.%d.%d)\n", - props->vendorID, props->deviceID, props->deviceName, - VK_VERSION_MAJOR(props->apiVersion), - VK_VERSION_MINOR(props->apiVersion), - VK_VERSION_PATCH(props->apiVersion), + props->properties.vendorID, props->properties.deviceID, props->properties.deviceName, + VK_VERSION_MAJOR(props->properties.apiVersion), + VK_VERSION_MINOR(props->properties.apiVersion), + VK_VERSION_PATCH(props->properties.apiVersion), VK_VERSION_MAJOR(driverVersion), VK_VERSION_MINOR(driverVersion), VK_VERSION_PATCH(driverVersion)); diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index a8a2f0c0..490025e5 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -299,7 +299,7 @@ typedef struct _GrMsaaStateObject { typedef struct _GrPhysicalGpu { GrBaseObject grBaseObj; VkPhysicalDevice physicalDevice; - VkPhysicalDeviceProperties physicalDeviceProps; + VkPhysicalDeviceProperties2 physicalDeviceProps; } GrPhysicalGpu; typedef struct _GrPipeline { From 241106a15398c0663f0d65043d078d27da28bfd1 Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Sun, 4 Dec 2022 22:34:30 +0300 Subject: [PATCH 05/10] mantle: fix device check for destruction --- src/mantle/mantle_init_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mantle/mantle_init_device.c b/src/mantle/mantle_init_device.c index fea0e837..de4769f4 100644 --- a/src/mantle/mantle_init_device.c +++ b/src/mantle/mantle_init_device.c @@ -925,7 +925,7 @@ GR_RESULT GR_STDCALL grCreateDevice( *pDevice = (GR_DEVICE)grDevice; bail: - if (res != GR_SUCCESS) { + if (res != GR_SUCCESS && vkDevice != VK_NULL_HANDLE) { vkd.vkDestroyDevice(vkDevice, NULL); } From 7526a418ab5e9aac64e622a1c233894c2dbe9697 Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Sun, 4 Dec 2022 22:36:46 +0300 Subject: [PATCH 06/10] mantle: load descriptor buffer and buffer address functions --- src/mantle/vulkan_loader.c | 10 ++++++++++ src/mantle/vulkan_loader.h | 9 +++++++++ 2 files changed, 19 insertions(+) diff --git a/src/mantle/vulkan_loader.c b/src/mantle/vulkan_loader.c index 3e700703..18457f9e 100644 --- a/src/mantle/vulkan_loader.c +++ b/src/mantle/vulkan_loader.c @@ -67,6 +67,7 @@ void vulkanLoaderInstanceInit( LOAD_VULKAN_FN(vki, instance, vkCreateWin32SurfaceKHR); LOAD_VULKAN_FN(vki, instance, vkGetPhysicalDeviceWin32PresentationSupportKHR); #endif + } void vulkanLoaderDeviceInit( @@ -173,6 +174,7 @@ void vulkanLoaderDeviceInit( LOAD_VULKAN_DEV_FN(vkd, device, vkFreeMemory); LOAD_VULKAN_DEV_FN(vkd, device, vkGetBufferMemoryRequirements); LOAD_VULKAN_DEV_FN(vkd, device, vkGetBufferMemoryRequirements2); + LOAD_VULKAN_DEV_FN(vkd, device, vkGetBufferDeviceAddress); LOAD_VULKAN_DEV_FN(vkd, device, vkGetDeviceMemoryCommitment); LOAD_VULKAN_DEV_FN(vkd, device, vkGetDeviceQueue); LOAD_VULKAN_DEV_FN(vkd, device, vkGetEventStatus); @@ -239,4 +241,12 @@ void vulkanLoaderDeviceInit( #ifdef VK_KHR_push_descriptor LOAD_VULKAN_DEV_FN(vkd, device, vkCmdPushDescriptorSetKHR); #endif + +#ifdef VK_EXT_descriptor_buffer + LOAD_VULKAN_DEV_FN(vkd, device, vkCmdBindDescriptorBuffersEXT); + LOAD_VULKAN_DEV_FN(vkd, device, vkCmdSetDescriptorBufferOffsetsEXT); + LOAD_VULKAN_DEV_FN(vkd, device, vkGetDescriptorEXT); + LOAD_VULKAN_DEV_FN(vkd, device, vkGetDescriptorSetLayoutBindingOffsetEXT); + LOAD_VULKAN_DEV_FN(vkd, device, vkGetDescriptorSetLayoutSizeEXT); +#endif } diff --git a/src/mantle/vulkan_loader.h b/src/mantle/vulkan_loader.h index dbffc29a..86aacf18 100644 --- a/src/mantle/vulkan_loader.h +++ b/src/mantle/vulkan_loader.h @@ -155,6 +155,7 @@ typedef struct _VULKAN_DEVICE { VULKAN_FN(vkFreeMemory); VULKAN_FN(vkGetBufferMemoryRequirements); VULKAN_FN(vkGetBufferMemoryRequirements2); + VULKAN_FN(vkGetBufferDeviceAddress); VULKAN_FN(vkGetDeviceMemoryCommitment); VULKAN_FN(vkGetDeviceQueue); VULKAN_FN(vkGetEventStatus); @@ -221,6 +222,14 @@ typedef struct _VULKAN_DEVICE { #ifdef VK_KHR_push_descriptor VULKAN_FN(vkCmdPushDescriptorSetKHR); #endif + +#ifdef VK_EXT_descriptor_buffer + VULKAN_FN(vkCmdBindDescriptorBuffersEXT); + VULKAN_FN(vkCmdSetDescriptorBufferOffsetsEXT); + VULKAN_FN(vkGetDescriptorEXT); + VULKAN_FN(vkGetDescriptorSetLayoutBindingOffsetEXT); + VULKAN_FN(vkGetDescriptorSetLayoutSizeEXT); +#endif } VULKAN_DEVICE; extern VULKAN_LIBRARY vkl; From 5e73a84b5de289412c02629b558e616ed6f1b5b7 Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Sun, 4 Dec 2022 22:40:46 +0300 Subject: [PATCH 07/10] mantle: add descriptor implementation based on VK_EXT_descriptor_buffer --- src/amdilc/amdilc.h | 5 + src/mantle/mantle_cmd_buf.c | 219 ++++++++++- src/mantle/mantle_cmd_buf_man.c | 4 + src/mantle/mantle_descriptor_set.c | 556 ++++++++++++++++++---------- src/mantle/mantle_init_device.c | 155 ++++++-- src/mantle/mantle_memory_man.c | 53 ++- src/mantle/mantle_object.h | 41 +- src/mantle/mantle_object_man.c | 65 +++- src/mantle/mantle_shader_pipeline.c | 30 +- 9 files changed, 884 insertions(+), 244 deletions(-) diff --git a/src/amdilc/amdilc.h b/src/amdilc/amdilc.h index 3b899078..3b6c8937 100644 --- a/src/amdilc/amdilc.h +++ b/src/amdilc/amdilc.h @@ -11,6 +11,11 @@ #define DYNAMIC_MEMORY_VIEW_DESCRIPTOR_SET_ID (0) #define DESCRIPTOR_SET_ID (2) +#define DESCRIPTOR_BUFFERS_ATOMIC_BINDING_ID (0) +#define DESCRIPTOR_BUFFERS_DYNAMIC_MAPPING_BINDING_ID (1) +#define DESCRIPTOR_BUFFERS_BASE_DESCRIPTOR_SET_ID (2) +#define DESCRIPTOR_BUFFERS_PUSH_DESCRIPTOR_SET_ID (1) + #define ILC_MAX_STRIDE_CONSTANTS (8) #define DESCRIPTOR_CONST_OFFSETS_OFFSET (sizeof(uint32_t) * ILC_MAX_STRIDE_CONSTANTS) diff --git a/src/mantle/mantle_cmd_buf.c b/src/mantle/mantle_cmd_buf.c index b7f7c16a..bff0322d 100644 --- a/src/mantle/mantle_cmd_buf.c +++ b/src/mantle/mantle_cmd_buf.c @@ -130,6 +130,156 @@ static void grCmdBufferBindVkDescriptorSets( } } +static void setupDescriptorBuffers( + const GrDevice* grDevice, + const GrCmdBuffer* grCmdBuffer, + const BindPoint* bindPoint, + const GrDescriptorSet* grDescriptorSet, + unsigned slotOffset, + unsigned pipelineDescriptorSetCount, + const PipelineDescriptorSlot* pipelineDescriptorSlots, + VkPipelineLayout pipelineLayout, + VkDeviceAddress* pBufferAddresses, + VkDeviceSize* pOffsets) +{ + for (unsigned i = 0; i < pipelineDescriptorSetCount; i++) { + const PipelineDescriptorSlot* descriptorSlot = &pipelineDescriptorSlots[i]; + const DescriptorSetSlot* slot; + unsigned descriptorSlotOffset = slotOffset; + const GrDescriptorSet* currentSet = grDescriptorSet; + + slot = ¤tSet->slots[descriptorSlotOffset]; + + for (unsigned j = 0; j < descriptorSlot->pathDepth; j++) { + slot = &slot[descriptorSlot->path[j]]; + descriptorSlotOffset = slot->nested.slotOffset; + currentSet = slot->nested.nextSet; + slot = ¤tSet->slots[descriptorSlotOffset]; + } + + pBufferAddresses[i] = currentSet->descriptorBufferAddress; + pOffsets[i] = descriptorSlotOffset * DESCRIPTORS_PER_SLOT * grDevice->maxMutableDescriptorSize; + // Pass buffer strides down to the shader + for (unsigned j = 0; j < descriptorSlot->strideCount; j++) { + VKD.vkCmdPushConstants(grCmdBuffer->commandBuffer, pipelineLayout, + VK_SHADER_STAGE_ALL_GRAPHICS, + descriptorSlot->strideOffsets[j], sizeof(uint32_t), + &slot[descriptorSlot->strideSlotIndexes[j]].buffer.stride); + } + } +} + +static void grCmdBufferBindDescriptorBuffers( + GrCmdBuffer* grCmdBuffer, + VkPipelineBindPoint vkBindPoint) +{ + const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer); + BindPoint* bindPoint = &grCmdBuffer->bindPoints[vkBindPoint]; + GrPipeline* grPipeline = bindPoint->grPipeline; + + bindPoint->boundDescriptorSetCount = 0; + + for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { + assert((bindPoint->boundDescriptorSetCount + grPipeline->descriptorSetCounts[i]) < COUNT_OF(bindPoint->descriptorBufferAddresses)); + setupDescriptorBuffers(grDevice, grCmdBuffer, bindPoint, + bindPoint->grDescriptorSets[i], bindPoint->slotOffsets[i], + grPipeline->descriptorSetCounts[i], + grPipeline->descriptorSlots[i], + grPipeline->pipelineLayout, + &bindPoint->descriptorBufferAddresses[bindPoint->boundDescriptorSetCount], + &bindPoint->descriptorOffsets[bindPoint->boundDescriptorSetCount]); + bindPoint->boundDescriptorSetCount += grPipeline->descriptorSetCounts[i]; + } + + // check if descriptor buffer state is dirty + bool dirtyBufferState = false; + + uint32_t setIndices[COUNT_OF(bindPoint->descriptorOffsets) * COUNT_OF(grCmdBuffer->bindPoints)]; + for (unsigned i = 0; i < bindPoint->boundDescriptorSetCount; ++i) { + unsigned bufferIndex = 0xFFFFFFFFu; + for (unsigned j = 0; j < grCmdBuffer->descriptorBufferCount; j++) { + if (grCmdBuffer->bufferAddresses[j] == bindPoint->descriptorBufferAddresses[i]) { + bufferIndex = j; + break; + } + } + if (bufferIndex >= grCmdBuffer->descriptorBufferCount) { + dirtyBufferState = true; + break; + } else { + setIndices[i] = bufferIndex; + } + } + + if (!bindPoint->descriptorSetOffsetsPushed) { + bindPoint->descriptorSetOffsetsPushed = true; + uint32_t descriptorOffsets[DESCRIPTOR_OFFSET_COUNT] = { 0 }; + VKD.vkCmdPushConstants(grCmdBuffer->commandBuffer, grPipeline->pipelineLayout, + vkBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS ? VK_SHADER_STAGE_ALL_GRAPHICS : VK_SHADER_STAGE_COMPUTE_BIT, + DESCRIPTOR_CONST_OFFSETS_OFFSET, sizeof(descriptorOffsets), + descriptorOffsets); + } + + if (dirtyBufferState) { + VkDescriptorBufferBindingInfoEXT bufferBindingInfos[COUNT_OF(grCmdBuffer->bufferAddresses)]; + grCmdBuffer->descriptorBufferCount = 0; + // reinitialize descriptor buffer state and then bind descriptor sets for all bind points + for (unsigned i = 0; i < COUNT_OF(grCmdBuffer->bindPoints); i++) { + for (unsigned j = 0; j < grCmdBuffer->bindPoints[i].boundDescriptorSetCount; j++) { + unsigned descriptorBufferIndex = 0xFFFFFFFF; + for (unsigned k = 0; k < grCmdBuffer->descriptorBufferCount; k++) { + if (grCmdBuffer->bindPoints[i].descriptorBufferAddresses[j] == grCmdBuffer->bufferAddresses[k]) { + descriptorBufferIndex = k; + break; + } + } + if (descriptorBufferIndex >= grCmdBuffer->descriptorBufferCount) { + if (grCmdBuffer->descriptorBufferCount >= COUNT_OF(grCmdBuffer->bufferAddresses)) { + LOGE("descriptor buffer overflow\n"); + assert(false); + } + grCmdBuffer->descriptorBufferCount++; + descriptorBufferIndex = grCmdBuffer->descriptorBufferCount - 1; + grCmdBuffer->bufferAddresses[descriptorBufferIndex] = grCmdBuffer->bindPoints[i].descriptorBufferAddresses[j]; + bufferBindingInfos[descriptorBufferIndex] = (VkDescriptorBufferBindingInfoEXT) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, + .pNext = NULL, + .address = grCmdBuffer->bindPoints[i].descriptorBufferAddresses[j], + .usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT + | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + }; + } + setIndices[i * COUNT_OF(bindPoint->descriptorOffsets) + j] = descriptorBufferIndex; + } + } + + VKD.vkCmdBindDescriptorBuffersEXT(grCmdBuffer->commandBuffer, grCmdBuffer->descriptorBufferCount, bufferBindingInfos); + // now set up the offsets + for (unsigned i = 0; i < COUNT_OF(grCmdBuffer->bindPoints); i++) { + BindPoint* descriptorBindPoint = &grCmdBuffer->bindPoints[(VkPipelineBindPoint)i]; + GrPipeline* rebindPipeline = descriptorBindPoint->grPipeline; + if (rebindPipeline == NULL || descriptorBindPoint->boundDescriptorSetCount == 0) { + continue; + } + VKD.vkCmdSetDescriptorBufferOffsetsEXT( + grCmdBuffer->commandBuffer, + (VkPipelineBindPoint)i, rebindPipeline->pipelineLayout, + DESCRIPTOR_BUFFERS_BASE_DESCRIPTOR_SET_ID, + descriptorBindPoint->boundDescriptorSetCount, + &setIndices[i * COUNT_OF(descriptorBindPoint->descriptorOffsets)], + descriptorBindPoint->descriptorOffsets); + } + } else if (bindPoint->boundDescriptorSetCount > 0) { + // just associate the offsets for the bind point + VKD.vkCmdSetDescriptorBufferOffsetsEXT( + grCmdBuffer->commandBuffer, + vkBindPoint, grPipeline->pipelineLayout, + DESCRIPTOR_BUFFERS_BASE_DESCRIPTOR_SET_ID, + bindPoint->boundDescriptorSetCount, + setIndices, bindPoint->descriptorOffsets); + } +} + static void grCmdBufferSetupDynamicBufferStride( GrCmdBuffer* grCmdBuffer, VkPipelineBindPoint vkBindPoint) @@ -197,6 +347,50 @@ static void grCmdBufferBindAtomicDescriptorSet( 0, NULL); } +static void grCmdBufferDescriptorBufferPushDescriptorSet( + GrCmdBuffer* grCmdBuffer, + VkPipelineBindPoint vkBindPoint) +{ + const GrDevice* grDevice = GET_OBJ_DEVICE(grCmdBuffer); + const BindPoint* bindPoint = &grCmdBuffer->bindPoints[vkBindPoint]; + GrPipeline* grPipeline = bindPoint->grPipeline; + + VkDescriptorBufferInfo atomicBufferInfo = { + .buffer = grCmdBuffer->atomicCounterBuffer, + .offset = 0, + .range = grCmdBuffer->atomicCounterBufferSize, + }; + bool hasDynamic = (bindPoint->dynamicMemoryView.buffer.bufferInfo.buffer != VK_NULL_HANDLE); + unsigned descriptorCount = hasDynamic ? 2 : 1; + + VkWriteDescriptorSet bufferWrites[] = { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .dstArrayElement = 0, + .dstBinding = DESCRIPTOR_BUFFERS_ATOMIC_BINDING_ID, + .dstSet = 0,// ignored + .pBufferInfo = &atomicBufferInfo, + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .dstArrayElement = 0, + .dstBinding = DESCRIPTOR_BUFFERS_DYNAMIC_MAPPING_BINDING_ID, + .dstSet = 0,// ignored + .pBufferInfo = &bindPoint->dynamicMemoryView.buffer.bufferInfo, + } + }; + VKD.vkCmdPushDescriptorSetKHR( + grCmdBuffer->commandBuffer, + vkBindPoint, + grPipeline->pipelineLayout, + DESCRIPTOR_BUFFERS_PUSH_DESCRIPTOR_SET_ID, + descriptorCount, bufferWrites); +} + static void grCmdBufferUpdateResources( GrCmdBuffer* grCmdBuffer, VkPipelineBindPoint vkBindPoint) @@ -206,16 +400,25 @@ static void grCmdBufferUpdateResources( GrPipeline* grPipeline = bindPoint->grPipeline; uint32_t dirtyFlags = bindPoint->dirtyFlags; - if (dirtyFlags & FLAG_DIRTY_DESCRIPTOR_SET) { - grCmdBufferBindVkDescriptorSets(grCmdBuffer, vkBindPoint); - } + if (grDevice->descriptorBufferSupported) { + if (dirtyFlags & FLAG_DIRTY_DESCRIPTOR_SET) { + grCmdBufferBindDescriptorBuffers(grCmdBuffer, vkBindPoint); + } + if (dirtyFlags & (FLAG_DIRTY_DESCRIPTOR_SET | FLAG_DIRTY_DYNAMIC_MAPPING)) { + grCmdBufferDescriptorBufferPushDescriptorSet(grCmdBuffer, vkBindPoint); + } + } else { + if (dirtyFlags & FLAG_DIRTY_DESCRIPTOR_SET) { + grCmdBufferBindVkDescriptorSets(grCmdBuffer, vkBindPoint); + } - if (dirtyFlags & FLAG_DIRTY_DESCRIPTOR_SET) { - grCmdBufferBindAtomicDescriptorSet(grCmdBuffer, vkBindPoint); - } + if (dirtyFlags & FLAG_DIRTY_DESCRIPTOR_SET) { + grCmdBufferBindAtomicDescriptorSet(grCmdBuffer, vkBindPoint); + } - if (dirtyFlags & FLAG_DIRTY_DYNAMIC_MAPPING) { - grCmdBufferBindDynamicDescriptorSet(grCmdBuffer, vkBindPoint); + if (dirtyFlags & FLAG_DIRTY_DYNAMIC_MAPPING) { + grCmdBufferBindDynamicDescriptorSet(grCmdBuffer, vkBindPoint); + } } if (dirtyFlags & FLAG_DIRTY_DYNAMIC_STRIDE) { diff --git a/src/mantle/mantle_cmd_buf_man.c b/src/mantle/mantle_cmd_buf_man.c index 8eb0b5b1..7ddd8acb 100644 --- a/src/mantle/mantle_cmd_buf_man.c +++ b/src/mantle/mantle_cmd_buf_man.c @@ -79,11 +79,14 @@ GR_RESULT GR_STDCALL grCreateCommandBuffer( VkBuffer atomicCounterBuffer = VK_NULL_HANDLE; VkDescriptorSet atomicCounterSet = VK_NULL_HANDLE; + VkDeviceSize atomicCounterBufferSize = 0ull; if (pCreateInfo->queueType == GR_QUEUE_UNIVERSAL) { atomicCounterBuffer = grDevice->universalAtomicCounterBuffer; + atomicCounterBufferSize = grDevice->universalAtomicCounterBufferSize; atomicCounterSet = grDevice->universalAtomicCounterSet; } else if (pCreateInfo->queueType == GR_QUEUE_COMPUTE) { atomicCounterBuffer = grDevice->computeAtomicCounterBuffer; + atomicCounterBufferSize = grDevice->computeAtomicCounterBufferSize; atomicCounterSet = grDevice->computeAtomicCounterSet; } @@ -94,6 +97,7 @@ GR_RESULT GR_STDCALL grCreateCommandBuffer( .commandBuffer = vkCommandBuffer, .timestampQueryPool = vkQueryPool, .atomicCounterBuffer = atomicCounterBuffer, + .atomicCounterBufferSize = atomicCounterBufferSize, .atomicCounterSet = atomicCounterSet, }; diff --git a/src/mantle/mantle_descriptor_set.c b/src/mantle/mantle_descriptor_set.c index a32ac8ce..5c051eca 100644 --- a/src/mantle/mantle_descriptor_set.c +++ b/src/mantle/mantle_descriptor_set.c @@ -27,66 +27,98 @@ GR_RESULT GR_STDCALL grCreateDescriptorSet( return GR_ERROR_INVALID_POINTER; } + VkBuffer vkBuffer = VK_NULL_HANDLE; + VkDeviceSize bufferSize = 0; VkDescriptorPool descriptorPool = VK_NULL_HANDLE; VkDescriptorSet descriptorSet = VK_NULL_HANDLE; VkResult vkRes = VK_SUCCESS; - const VkDescriptorType descriptorTypes[] = { - VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - VK_DESCRIPTOR_TYPE_SAMPLER - }; - const VkMutableDescriptorTypeListEXT mutableTypeList = { - .descriptorTypeCount = COUNT_OF(descriptorTypes), - .pDescriptorTypes = descriptorTypes, - }; - const VkMutableDescriptorTypeCreateInfoEXT mutableTypeInfo = { - .sType = VK_STRUCTURE_TYPE_MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT, - .pNext = NULL, - .mutableDescriptorTypeListCount = 1, - .pMutableDescriptorTypeLists = &mutableTypeList, - }; - const VkDescriptorPoolSize poolSize = { - .type = VK_DESCRIPTOR_TYPE_MUTABLE_EXT, - .descriptorCount = DESCRIPTORS_PER_SLOT * pCreateInfo->slots, - }; + if (grDevice->descriptorBufferSupported) { + bufferSize = grDevice->maxMutableDescriptorSize * pCreateInfo->slots * DESCRIPTORS_PER_SLOT; + uint32_t queueFamilyIndices[2]; + uint32_t queueFamilyIndexCount = 0; + if (grDevice->grUniversalQueue) { + queueFamilyIndexCount++; + queueFamilyIndices[0] = grDevice->grUniversalQueue->queueFamilyIndex; + } + if (grDevice->grComputeQueue) { + queueFamilyIndexCount++; + queueFamilyIndices[queueFamilyIndexCount - 1] = grDevice->grComputeQueue->queueFamilyIndex; + } + const VkBufferCreateInfo bufferCreateInfo = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .size = bufferSize, + .usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + .sharingMode = queueFamilyIndexCount <= 1 ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT, + .queueFamilyIndexCount = queueFamilyIndexCount <= 1 ? 0 : queueFamilyIndexCount, + .pQueueFamilyIndices = queueFamilyIndexCount <= 1 ? NULL : queueFamilyIndices, + }; - const VkDescriptorPoolCreateInfo poolCreateInfo = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = &mutableTypeInfo, - .flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT, - .maxSets = 1, - .poolSizeCount = 1, - .pPoolSizes = &poolSize, - }; + vkRes = VKD.vkCreateBuffer(grDevice->device, &bufferCreateInfo, NULL, &vkBuffer); + if (vkRes != VK_SUCCESS) { + LOGE("vkCreateBuffer failed (%d)\n", vkRes); + return getGrResult(vkRes); + } + } else { + const VkDescriptorType descriptorTypes[] = { + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + VK_DESCRIPTOR_TYPE_SAMPLER + }; + const VkMutableDescriptorTypeListEXT mutableTypeList = { + .descriptorTypeCount = COUNT_OF(descriptorTypes), + .pDescriptorTypes = descriptorTypes, + }; + const VkMutableDescriptorTypeCreateInfoEXT mutableTypeInfo = { + .sType = VK_STRUCTURE_TYPE_MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT, + .pNext = NULL, + .mutableDescriptorTypeListCount = 1, + .pMutableDescriptorTypeLists = &mutableTypeList, + }; + const VkDescriptorPoolSize poolSize = { + .type = VK_DESCRIPTOR_TYPE_MUTABLE_EXT, + .descriptorCount = DESCRIPTORS_PER_SLOT * pCreateInfo->slots, + }; - vkRes = VKD.vkCreateDescriptorPool(grDevice->device, &poolCreateInfo, NULL, &descriptorPool); - if (vkRes != VK_SUCCESS) { - LOGE("vkCreateDescriptorPool failed (%d)\n", vkRes); - goto bail; - } + const VkDescriptorPoolCreateInfo poolCreateInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = &mutableTypeInfo, + .flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = &poolSize, + }; - const VkDescriptorSetVariableDescriptorCountAllocateInfo descriptorCountInfo = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO, - .pNext = NULL, - .descriptorSetCount = 1, - .pDescriptorCounts = &poolSize.descriptorCount, - }; - const VkDescriptorSetAllocateInfo allocateInfo = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .pNext = &descriptorCountInfo, - .descriptorPool = descriptorPool, - .descriptorSetCount = 1, - .pSetLayouts = &grDevice->defaultDescriptorSetLayout, - }; + vkRes = VKD.vkCreateDescriptorPool(grDevice->device, &poolCreateInfo, NULL, &descriptorPool); + if (vkRes != VK_SUCCESS) { + LOGE("vkCreateDescriptorPool failed (%d)\n", vkRes); + goto bail; + } - vkRes = VKD.vkAllocateDescriptorSets(grDevice->device, &allocateInfo, &descriptorSet); - if (vkRes != VK_SUCCESS) { - LOGE("vkAllocateDescriptorSets failed (%d)\n", vkRes); - goto bail; + const VkDescriptorSetVariableDescriptorCountAllocateInfo descriptorCountInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO, + .pNext = NULL, + .descriptorSetCount = 1, + .pDescriptorCounts = &poolSize.descriptorCount, + }; + const VkDescriptorSetAllocateInfo allocateInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = &descriptorCountInfo, + .descriptorPool = descriptorPool, + .descriptorSetCount = 1, + .pSetLayouts = &grDevice->defaultDescriptorSetLayout, + }; + + vkRes = VKD.vkAllocateDescriptorSets(grDevice->device, &allocateInfo, &descriptorSet); + if (vkRes != VK_SUCCESS) { + LOGE("vkAllocateDescriptorSets failed (%d)\n", vkRes); + goto bail; + } } GrDescriptorSet* grDescriptorSet = malloc(sizeof(GrDescriptorSet)); @@ -100,6 +132,11 @@ GR_RESULT GR_STDCALL grCreateDescriptorSet( .slots = calloc(pCreateInfo->slots, sizeof(DescriptorSetSlot)), .descriptorPool = descriptorPool, .descriptorSet = descriptorSet, + .descriptorBufferPtr = NULL, + .descriptorBuffer = vkBuffer, + .descriptorBufferSize = bufferSize, + .descriptorBufferMemoryOffset = 0ull, + .descriptorBufferAddress = 0ull, }; *pDescriptorSet = (GR_DESCRIPTOR_SET)grDescriptorSet; @@ -107,6 +144,7 @@ GR_RESULT GR_STDCALL grCreateDescriptorSet( bail: VKD.vkDestroyDescriptorPool(grDevice->device, descriptorPool, NULL); + VKD.vkDestroyBuffer(grDevice->device, vkBuffer, NULL); return getGrResult(vkRes); } @@ -115,7 +153,12 @@ GR_VOID GR_STDCALL grBeginDescriptorSetUpdate( { LOGT("%p\n", descriptorSet); - // No-op + GrDevice* grDevice = GET_OBJ_DEVICE(descriptorSet); + GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; + + if (grDevice->descriptorBufferSupported && grDescriptorSet->descriptorBufferPtr == NULL) { + LOGE("memory is not mapped for descriptor buffer"); + } } GR_VOID GR_STDCALL grEndDescriptorSetUpdate( @@ -136,43 +179,64 @@ GR_VOID GR_STDCALL grAttachSamplerDescriptors( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); - STACK_ARRAY(VkWriteDescriptorSet, writeDescriptors, 128, slotCount); - unsigned descriptorWriteCount = 0; + if (grDevice->descriptorBufferSupported) { + for (unsigned i = 0; i < slotCount; i++) { + const GrSampler* grSampler = (GrSampler*)pSamplers[i]; - for (unsigned i = 0; i < slotCount; i++) { - DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; - const GrSampler* grSampler = (GrSampler*)pSamplers[i]; - - releaseSlot(grDevice, slot); + VkDescriptorGetInfoEXT descriptorInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .pNext = NULL, + .type = VK_DESCRIPTOR_TYPE_SAMPLER, + .data = { + .pSampler = &grSampler->sampler + } + }; - *slot = (DescriptorSetSlot) { - .type = SLOT_TYPE_IMAGE, - .image = { - .imageInfo = { - .sampler = grSampler->sampler, - .imageView = VK_NULL_HANDLE, - .imageLayout = VK_IMAGE_LAYOUT_UNDEFINED, + VKD.vkGetDescriptorEXT( + grDevice->device, + &descriptorInfo, + grDevice->descriptorBufferProps.samplerDescriptorSize, + grDescriptorSet->descriptorBufferPtr + ((startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_SAMPLER)) * grDevice->maxMutableDescriptorSize); + } + } else { + STACK_ARRAY(VkWriteDescriptorSet, writeDescriptors, 128, slotCount); + unsigned descriptorWriteCount = 0; + + for (unsigned i = 0; i < slotCount; i++) { + DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; + const GrSampler* grSampler = (GrSampler*)pSamplers[i]; + + releaseSlot(grDevice, slot); + + *slot = (DescriptorSetSlot) { + .type = SLOT_TYPE_IMAGE, + .image = { + .imageInfo = { + .sampler = grSampler->sampler, + .imageView = VK_NULL_HANDLE, + .imageLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }, }, - }, - }; + }; - writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = NULL, - .dstSet = grDescriptorSet->descriptorSet, - .dstBinding = 0, - .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_SAMPLER), - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, - .pImageInfo = &slot->image.imageInfo, - .pBufferInfo = NULL, - .pTexelBufferView = NULL, - }; - } + writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = NULL, + .dstSet = grDescriptorSet->descriptorSet, + .dstBinding = 0, + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_SAMPLER), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .pImageInfo = &slot->image.imageInfo, + .pBufferInfo = NULL, + .pTexelBufferView = NULL, + }; + } - VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); + VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); + STACK_ARRAY_FINISH(writeDescriptors); + } - STACK_ARRAY_FINISH(writeDescriptors); } GR_VOID GR_STDCALL grAttachImageViewDescriptors( @@ -185,58 +249,95 @@ GR_VOID GR_STDCALL grAttachImageViewDescriptors( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); - STACK_ARRAY(VkWriteDescriptorSet, writeDescriptors, 128, slotCount * 2); - unsigned descriptorWriteCount = 0; - - for (unsigned i = 0; i < slotCount; i++) { - DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; - const GR_IMAGE_VIEW_ATTACH_INFO* info = &pImageViews[i]; - const GrImageView* grImageView = (GrImageView*)info->view; + if (grDevice->descriptorBufferSupported) { + for (unsigned i = 0; i < slotCount; i++) { + const GR_IMAGE_VIEW_ATTACH_INFO* info = &pImageViews[i]; + const GrImageView* grImageView = (GrImageView*)info->view; - releaseSlot(grDevice, slot); + VkDescriptorImageInfo imageInfo = { + .sampler = VK_NULL_HANDLE, + .imageView = grImageView->imageView, + .imageLayout = getVkImageLayout(info->state), + }; - *slot = (DescriptorSetSlot) { - .type = SLOT_TYPE_IMAGE, - .image = { - .imageInfo = { - .sampler = VK_NULL_HANDLE, - .imageView = grImageView->imageView, - .imageLayout = getVkImageLayout(info->state), - }, - }, - }; + VkDescriptorGetInfoEXT descriptorInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .pNext = NULL, + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .data = { + .pSampledImage = &imageInfo, + } + }; + VKD.vkGetDescriptorEXT( + grDevice->device, + &descriptorInfo, + grDevice->descriptorBufferProps.sampledImageDescriptorSize, + grDescriptorSet->descriptorBufferPtr + ((startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)) * grDevice->maxMutableDescriptorSize); + + if (grImageView->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + descriptorInfo.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + descriptorInfo.data.pStorageImage = &imageInfo; + + VKD.vkGetDescriptorEXT( + grDevice->device, + &descriptorInfo, + grDevice->descriptorBufferProps.storageImageDescriptorSize, + grDescriptorSet->descriptorBufferPtr + ((startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)) * grDevice->maxMutableDescriptorSize); + } + } + } else { + STACK_ARRAY(VkWriteDescriptorSet, writeDescriptors, 128, slotCount * 2); + unsigned descriptorWriteCount = 0; + + for (unsigned i = 0; i < slotCount; i++) { + DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; + const GR_IMAGE_VIEW_ATTACH_INFO* info = &pImageViews[i]; + const GrImageView* grImageView = (GrImageView*)info->view; + + releaseSlot(grDevice, slot); + + *slot = (DescriptorSetSlot) { + .type = SLOT_TYPE_IMAGE, + .image = { + .imageInfo = { + .sampler = VK_NULL_HANDLE, + .imageView = grImageView->imageView, + .imageLayout = getVkImageLayout(info->state), + }, + }, + }; - if (grImageView->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + if (grImageView->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = NULL, + .dstSet = grDescriptorSet->descriptorSet, + .dstBinding = 0, + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = &slot->image.imageInfo, + .pBufferInfo = NULL, + .pTexelBufferView = NULL, + }; + } writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .pNext = NULL, .dstSet = grDescriptorSet->descriptorSet, .dstBinding = 0, - .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE), + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .pImageInfo = &slot->image.imageInfo, .pBufferInfo = NULL, .pTexelBufferView = NULL, }; } - writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = NULL, - .dstSet = grDescriptorSet->descriptorSet, - .dstBinding = 0, - .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE), - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = &slot->image.imageInfo, - .pBufferInfo = NULL, - .pTexelBufferView = NULL, - }; - } - VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); - - STACK_ARRAY_FINISH(writeDescriptors); + VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); + STACK_ARRAY_FINISH(writeDescriptors); + } } GR_VOID GR_STDCALL grAttachMemoryViewDescriptors( @@ -250,91 +351,161 @@ GR_VOID GR_STDCALL grAttachMemoryViewDescriptors( const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); VkResult vkRes; - STACK_ARRAY(VkWriteDescriptorSet, writeDescriptors, 128, slotCount * 3); - unsigned descriptorWriteCount = 0; - - for (unsigned i = 0; i < slotCount; i++) { - DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; - const GR_MEMORY_VIEW_ATTACH_INFO* info = &pMemViews[i]; - GrGpuMemory* grGpuMemory = (GrGpuMemory*)info->mem; - VkFormat vkFormat = getVkFormat(info->format); - VkBufferView vkBufferView = VK_NULL_HANDLE; - - releaseSlot(grDevice, slot); + if (grDevice->descriptorBufferSupported) { + for (unsigned i = 0; i < slotCount; i++) { + DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; + const GR_MEMORY_VIEW_ATTACH_INFO* info = &pMemViews[i]; + GrGpuMemory* grGpuMemory = (GrGpuMemory*)info->mem; + VkFormat vkFormat = getVkFormat(info->format); + VkBufferView vkBufferView = VK_NULL_HANDLE; - if (vkFormat != VK_FORMAT_UNDEFINED) { - // Create buffer view for typed buffers - const VkBufferViewCreateInfo createInfo = { - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + VkDescriptorAddressInfoEXT bufferInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT, .pNext = NULL, - .flags = 0, - .buffer = grGpuMemory->buffer, - .format = vkFormat, - .offset = info->offset, + .address = grGpuMemory->address + info->offset, .range = info->range, + .format = vkFormat }; - vkRes = VKD.vkCreateBufferView(grDevice->device, &createInfo, NULL, &vkBufferView); - if (vkRes != VK_SUCCESS) { - LOGE("vkCreateBufferView failed (%d)\n", vkRes); + if (vkFormat != VK_FORMAT_UNDEFINED) { + VkDescriptorGetInfoEXT descriptorInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .pNext = NULL, + .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .data = { + .pUniformTexelBuffer = &bufferInfo + } + }; + + VKD.vkGetDescriptorEXT( + grDevice->device, + &descriptorInfo, + grDevice->descriptorBufferProps.uniformTexelBufferDescriptorSize, + grDescriptorSet->descriptorBufferPtr + ((startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)) * grDevice->maxMutableDescriptorSize); + + descriptorInfo.type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + descriptorInfo.data.pStorageTexelBuffer = &bufferInfo; + + VKD.vkGetDescriptorEXT( + grDevice->device, + &descriptorInfo, + grDevice->descriptorBufferProps.storageTexelBufferDescriptorSize, + grDescriptorSet->descriptorBufferPtr + ((startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)) * grDevice->maxMutableDescriptorSize); + } - writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + VkDescriptorGetInfoEXT descriptorInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, .pNext = NULL, - .dstSet = grDescriptorSet->descriptorSet, - .dstBinding = 0, - .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, - .pImageInfo = NULL, - .pBufferInfo = NULL, - .pTexelBufferView = &slot->buffer.bufferView, + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .data = { + .pStorageBuffer = &bufferInfo + } + }; + + VKD.vkGetDescriptorEXT( + grDevice->device, + &descriptorInfo, + grDevice->descriptorBufferProps.storageBufferDescriptorSize, + grDescriptorSet->descriptorBufferPtr + ((startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)) * grDevice->maxMutableDescriptorSize); + + *slot = (DescriptorSetSlot) { + .type = SLOT_TYPE_BUFFER, + .buffer = { + .bufferView = vkBufferView, + .bufferInfo = { + .buffer = grGpuMemory->buffer, + .offset = info->offset, + .range = info->range, + }, + .stride = info->stride, + }, }; + } + } else { + STACK_ARRAY(VkWriteDescriptorSet, writeDescriptors, 128, slotCount * 3); + unsigned descriptorWriteCount = 0; + + for (unsigned i = 0; i < slotCount; i++) { + DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; + const GR_MEMORY_VIEW_ATTACH_INFO* info = &pMemViews[i]; + GrGpuMemory* grGpuMemory = (GrGpuMemory*)info->mem; + VkFormat vkFormat = getVkFormat(info->format); + VkBufferView vkBufferView = VK_NULL_HANDLE; + + releaseSlot(grDevice, slot); + + if (vkFormat != VK_FORMAT_UNDEFINED) { + // Create buffer view for typed buffers + const VkBufferViewCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .buffer = grGpuMemory->buffer, + .format = vkFormat, + .offset = info->offset, + .range = info->range, + }; + + vkRes = VKD.vkCreateBufferView(grDevice->device, &createInfo, NULL, &vkBufferView); + if (vkRes != VK_SUCCESS) { + LOGE("vkCreateBufferView failed (%d)\n", vkRes); + } + + writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = NULL, + .dstSet = grDescriptorSet->descriptorSet, + .dstBinding = 0, + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, + .pImageInfo = NULL, + .pBufferInfo = NULL, + .pTexelBufferView = &slot->buffer.bufferView, + }; + writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = NULL, + .dstSet = grDescriptorSet->descriptorSet, + .dstBinding = 0, + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .pImageInfo = NULL, + .pBufferInfo = NULL, + .pTexelBufferView = &slot->buffer.bufferView, + }; + } writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .pNext = NULL, .dstSet = grDescriptorSet->descriptorSet, .dstBinding = 0, - .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER), + .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .pImageInfo = NULL, - .pBufferInfo = NULL, - .pTexelBufferView = &slot->buffer.bufferView, + .pBufferInfo = &slot->buffer.bufferInfo, + .pTexelBufferView = NULL, + }; + *slot = (DescriptorSetSlot) { + .type = SLOT_TYPE_BUFFER, + .buffer = { + .bufferView = vkBufferView, + .bufferInfo = { + .buffer = grGpuMemory->buffer, + .offset = info->offset, + .range = info->range, + }, + .stride = info->stride, + }, }; } - writeDescriptors[descriptorWriteCount++] = (VkWriteDescriptorSet) { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = NULL, - .dstSet = grDescriptorSet->descriptorSet, - .dstBinding = 0, - .dstArrayElement = (startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER), - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pImageInfo = NULL, - .pBufferInfo = &slot->buffer.bufferInfo, - .pTexelBufferView = NULL, - }; - - *slot = (DescriptorSetSlot) { - .type = SLOT_TYPE_BUFFER, - .buffer = { - .bufferView = vkBufferView, - .bufferInfo = { - .buffer = grGpuMemory->buffer, - .offset = info->offset, - .range = info->range, - }, - .stride = info->stride, - }, - }; + VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); + STACK_ARRAY_FINISH(writeDescriptors); } - - VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); - - STACK_ARRAY_FINISH(writeDescriptors); } GR_VOID GR_STDCALL grAttachNestedDescriptors( @@ -372,11 +543,16 @@ GR_VOID GR_STDCALL grClearDescriptorSetSlots( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); - for (unsigned i = 0; i < slotCount; i++) { - DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; + if (grDevice->descriptorBufferSupported) { + memset(grDescriptorSet->descriptorBufferPtr + (startSlot * DESCRIPTORS_PER_SLOT * grDevice->maxMutableDescriptorSize), 0, grDevice->maxMutableDescriptorSize * slotCount * DESCRIPTORS_PER_SLOT); + memset(&grDescriptorSet->slots[startSlot], 0, sizeof(DescriptorSetSlot) * slotCount); + } else { + for (unsigned i = 0; i < slotCount; i++) { + DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; - releaseSlot(grDevice, slot); + releaseSlot(grDevice, slot); - slot->type = SLOT_TYPE_NONE; + slot->type = SLOT_TYPE_NONE; + } } } diff --git a/src/mantle/mantle_init_device.c b/src/mantle/mantle_init_device.c index de4769f4..13942d1a 100644 --- a/src/mantle/mantle_init_device.c +++ b/src/mantle/mantle_init_device.c @@ -80,6 +80,45 @@ static VkDescriptorSetLayout getDynamicMemoryDescriptorSetLayout( return layout; } +static VkDescriptorSetLayout getBufferPushDescriptorSetLayout( + const GrDevice* grDevice) +{ + VkDescriptorSetLayout layout = VK_NULL_HANDLE; + + const VkDescriptorSetLayoutBinding bindings[] = { + { + .binding = DESCRIPTOR_BUFFERS_ATOMIC_BINDING_ID, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = NULL, + }, + { + .binding = DESCRIPTOR_BUFFERS_DYNAMIC_MAPPING_BINDING_ID, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = NULL, + }, + }; + + const VkDescriptorSetLayoutCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = NULL, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR | VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, + .bindingCount = COUNT_OF(bindings), + .pBindings = bindings, + }; + + VkResult res = VKD.vkCreateDescriptorSetLayout(grDevice->device, &createInfo, NULL, &layout); + if (res != VK_SUCCESS) { + LOGE("vkCreateDescriptorSetLayout failed (%d)\n", res); + assert(false); + } + + return layout; +} + static VkDescriptorSetLayout getDefaultDescriptorSetLayout( const GrDevice* grDevice) { @@ -106,7 +145,7 @@ static VkDescriptorSetLayout getDefaultDescriptorSetLayout( const VkDescriptorBindingFlags bindingFlags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT | - VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT; + (!grDevice->descriptorBufferSupported ? VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT : 0); const VkDescriptorSetLayoutBindingFlagsCreateInfo bindingFlagsCreateInfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO, .pNext = &mutableTypeInfo, @@ -124,7 +163,7 @@ static VkDescriptorSetLayout getDefaultDescriptorSetLayout( const VkDescriptorSetLayoutCreateInfo createInfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = &bindingFlagsCreateInfo, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT, + .flags = grDevice->descriptorBufferSupported ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT, .bindingCount = 1, .pBindings = &binding, }; @@ -370,11 +409,13 @@ GR_RESULT GR_STDCALL grInitAndEnumerateGpus( *grPhysicalGpu = (GrPhysicalGpu) { .grBaseObj = { GR_OBJ_TYPE_PHYSICAL_GPU }, .physicalDevice = physicalDevices[i], + .descriptorBufferProps = { 0 }, // Initialized below .physicalDeviceProps = { 0 }, // Initialized below }; grPhysicalGpu->physicalDeviceProps.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - grPhysicalGpu->physicalDeviceProps.pNext = NULL; + grPhysicalGpu->physicalDeviceProps.pNext = &grPhysicalGpu->descriptorBufferProps; + grPhysicalGpu->descriptorBufferProps.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT; vki.vkGetPhysicalDeviceProperties2(physicalDevices[i], &grPhysicalGpu->physicalDeviceProps); @@ -755,9 +796,14 @@ GR_RESULT GR_STDCALL grCreateDevice( .pNext = &vulkan13DeviceFeatures, .mutableDescriptorType = VK_TRUE, }; + VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptorBufferFeatures = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT, + .pNext = &mutableDescriptorFeaturesEXT, + .descriptorBuffer = VK_TRUE, + }; VkPhysicalDeviceVulkan12Features vulkan12DeviceFeatures = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, - .pNext = &mutableDescriptorFeaturesEXT, + .pNext = &descriptorBufferFeatures, .runtimeDescriptorArray = VK_TRUE, .bufferDeviceAddress = VK_TRUE, .descriptorBindingVariableDescriptorCount = VK_TRUE, @@ -796,16 +842,48 @@ GR_RESULT GR_STDCALL grCreateDevice( }, }; + unsigned supportedExtensionCount = 0; + if (vki.vkEnumerateDeviceExtensionProperties(grPhysicalGpu->physicalDevice, NULL, &supportedExtensionCount, NULL) != VK_SUCCESS) { + LOGE("vkEnumerateDeviceExtensionProperties failed\n"); + res = GR_ERROR_INITIALIZATION_FAILED; + goto bail; + } + + STACK_ARRAY(VkExtensionProperties, extensionProperties, 180, supportedExtensionCount); + + if (vki.vkEnumerateDeviceExtensionProperties(grPhysicalGpu->physicalDevice, NULL, &supportedExtensionCount, extensionProperties) != VK_SUCCESS) { + STACK_ARRAY_FINISH(extensionProperties); + LOGE("vkEnumerateDeviceExtensionProperties failed\n"); + res = GR_ERROR_INITIALIZATION_FAILED; + goto bail; + } + const char *deviceExtensions[] = { VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME, VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, + // TODO: make descriptor buffer optional VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, VK_EXT_MUTABLE_DESCRIPTOR_TYPE_EXTENSION_NAME, + VK_VALVE_MUTABLE_DESCRIPTOR_TYPE_EXTENSION_NAME, + NULL, }; + unsigned deviceExtensionCount = COUNT_OF(deviceExtensions) - 1; + bool descriptorBufferSupported = false; + + for (unsigned i = 0; i < supportedExtensionCount; i++) { + if (strcmp(extensionProperties[i].extensionName, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME) == 0) { + descriptorBufferSupported = true; // TODO: also check the extension properties + deviceExtensions[deviceExtensionCount++] = VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME; + break; + } + } + + STACK_ARRAY_FINISH(extensionProperties); + const VkDeviceCreateInfo createInfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = &deviceFeatures, @@ -814,7 +892,7 @@ GR_RESULT GR_STDCALL grCreateDevice( .pQueueCreateInfos = queueCreateInfos, .enabledLayerCount = 0, .ppEnabledLayerNames = NULL, - .enabledExtensionCount = COUNT_OF(deviceExtensions), + .enabledExtensionCount = deviceExtensionCount, .ppEnabledExtensionNames = deviceExtensions, .pEnabledFeatures = NULL, }; @@ -868,6 +946,7 @@ GR_RESULT GR_STDCALL grCreateDevice( .device = vkDevice, .physicalDevice = grPhysicalGpu->physicalDevice, .memoryProperties = memoryProperties, + .descriptorBufferProps = grPhysicalGpu->descriptorBufferProps, .memoryHeapCount = memoryHeapCount, .memoryHeapMap = { 0 }, // Initialized below .atomicCounterSetLayout = VK_NULL_HANDLE, // Initialized below @@ -878,16 +957,37 @@ GR_RESULT GR_STDCALL grCreateDevice( .grDmaQueue = NULL, // Initialized below .universalAtomicCounterBuffer = VK_NULL_HANDLE, // Initialized below .universalAtomicCounterSet = VK_NULL_HANDLE, // Initialized below + .descriptorPushSetLayout = VK_NULL_HANDLE, .computeAtomicCounterBuffer = VK_NULL_HANDLE, // Initialized below .computeAtomicCounterSet = VK_NULL_HANDLE, // Initialized below .grBorderColorPalette = NULL, + .descriptorBufferSupported = descriptorBufferSupported, + .maxMutableUniformDescriptorSize = 0, // Initialized below + .maxMutableStorageDescriptorSize = 0, // Initialized below + .maxMutableDescriptorSize = 0, // Initialized below }; + if (grDevice->descriptorBufferSupported) { + // TODO: handle descriptors in the case if mutable descriptors aren't supported + uint32_t maxUniformDescriptorSize = grPhysicalGpu->descriptorBufferProps.sampledImageDescriptorSize; + maxUniformDescriptorSize = MAX(maxUniformDescriptorSize, grPhysicalGpu->descriptorBufferProps.uniformTexelBufferDescriptorSize); + maxUniformDescriptorSize = MAX(maxUniformDescriptorSize, grPhysicalGpu->descriptorBufferProps.samplerDescriptorSize); + grDevice->maxMutableUniformDescriptorSize = maxUniformDescriptorSize; + uint32_t maxStorageDescriptorSize = grPhysicalGpu->descriptorBufferProps.storageImageDescriptorSize; + maxStorageDescriptorSize = MAX(maxStorageDescriptorSize, grPhysicalGpu->descriptorBufferProps.storageTexelBufferDescriptorSize); + maxStorageDescriptorSize = MAX(maxStorageDescriptorSize, grPhysicalGpu->descriptorBufferProps.storageBufferDescriptorSize); + grDevice->maxMutableStorageDescriptorSize = maxStorageDescriptorSize; + grDevice->maxMutableDescriptorSize = MAX(maxStorageDescriptorSize, maxUniformDescriptorSize); + } + memcpy(grDevice->memoryHeapMap, memoryHeapMap, memoryHeapCount * sizeof(uint32_t)); - grDevice->atomicCounterSetLayout = getAtomicCounterDescriptorSetLayout(grDevice); - grDevice->dynamicMemorySetLayout = getDynamicMemoryDescriptorSetLayout(grDevice); + if (grDevice->descriptorBufferSupported) { + grDevice->descriptorPushSetLayout = getBufferPushDescriptorSetLayout(grDevice); + } else { + grDevice->atomicCounterSetLayout = getAtomicCounterDescriptorSetLayout(grDevice); + grDevice->dynamicMemorySetLayout = getDynamicMemoryDescriptorSetLayout(grDevice); + } grDevice->defaultDescriptorSetLayout = getDefaultDescriptorSetLayout(grDevice); - if (universalQueueFamilyIndex != INVALID_QUEUE_INDEX) { grDevice->grUniversalQueue = grQueueCreate(grDevice, universalQueueFamilyIndex, universalQueueIndex); @@ -896,12 +996,15 @@ GR_RESULT GR_STDCALL grCreateDevice( grDevice->universalAtomicCounterBuffer = getAtomicCounterBuffer(grDevice, grDevice->universalAtomicCounterMemory, UNIVERSAL_ATOMIC_COUNTERS_COUNT); - grDevice->universalAtomicCounterPool = - getAtomicCounterDescriptorPool(grDevice); - grDevice->universalAtomicCounterSet = - getAtomicCounterDescriptorSet(grDevice, grDevice->atomicCounterSetLayout, - grDevice->universalAtomicCounterPool, - grDevice->universalAtomicCounterBuffer); + grDevice->universalAtomicCounterBufferSize = UNIVERSAL_ATOMIC_COUNTERS_COUNT * sizeof(uint32_t); + if (!grDevice->descriptorBufferSupported) { + grDevice->universalAtomicCounterPool = + getAtomicCounterDescriptorPool(grDevice); + grDevice->universalAtomicCounterSet = + getAtomicCounterDescriptorSet(grDevice, grDevice->atomicCounterSetLayout, + grDevice->universalAtomicCounterPool, + grDevice->universalAtomicCounterBuffer); + } } if (computeQueueFamilyIndex != INVALID_QUEUE_INDEX) { grDevice->grComputeQueue = @@ -911,12 +1014,15 @@ GR_RESULT GR_STDCALL grCreateDevice( grDevice->computeAtomicCounterBuffer = getAtomicCounterBuffer(grDevice, grDevice->computeAtomicCounterMemory, COMPUTE_ATOMIC_COUNTERS_COUNT); - grDevice->computeAtomicCounterPool = - getAtomicCounterDescriptorPool(grDevice); - grDevice->computeAtomicCounterSet = - getAtomicCounterDescriptorSet(grDevice, grDevice->atomicCounterSetLayout, - grDevice->computeAtomicCounterPool, - grDevice->computeAtomicCounterBuffer); + grDevice->computeAtomicCounterBufferSize = COMPUTE_ATOMIC_COUNTERS_COUNT * sizeof(uint32_t); + if (!grDevice->descriptorBufferSupported) { + grDevice->computeAtomicCounterPool = + getAtomicCounterDescriptorPool(grDevice); + grDevice->computeAtomicCounterSet = + getAtomicCounterDescriptorSet(grDevice, grDevice->atomicCounterSetLayout, + grDevice->computeAtomicCounterPool, + grDevice->computeAtomicCounterBuffer); + } } if (dmaQueueFamilyIndex != INVALID_QUEUE_INDEX) { grDevice->grDmaQueue = grQueueCreate(grDevice, dmaQueueFamilyIndex, dmaQueueIndex); @@ -944,8 +1050,13 @@ GR_RESULT GR_STDCALL grDestroyDevice( return GR_ERROR_INVALID_OBJECT_TYPE; } - VKD.vkDestroyDescriptorSetLayout(grDevice->device, grDevice->atomicCounterSetLayout, NULL); - VKD.vkDestroyDescriptorSetLayout(grDevice->device, grDevice->dynamicMemorySetLayout, NULL); + if (grDevice->descriptorBufferSupported) { + VKD.vkDestroyDescriptorSetLayout(grDevice->device, grDevice->descriptorPushSetLayout, NULL); + } else { + VKD.vkDestroyDescriptorSetLayout(grDevice->device, grDevice->atomicCounterSetLayout, NULL); + VKD.vkDestroyDescriptorSetLayout(grDevice->device, grDevice->dynamicMemorySetLayout, NULL); + } + VKD.vkDestroyDescriptorSetLayout(grDevice->device, grDevice->defaultDescriptorSetLayout, NULL); if (grDevice->grUniversalQueue) { diff --git a/src/mantle/mantle_memory_man.c b/src/mantle/mantle_memory_man.c index 5f931b2b..e8982e36 100644 --- a/src/mantle/mantle_memory_man.c +++ b/src/mantle/mantle_memory_man.c @@ -122,20 +122,37 @@ GR_RESULT GR_STDCALL grAllocMemory( // Try to allocate from the best heap vkRes = VK_ERROR_UNKNOWN; + unsigned selectedMemoryTypeIndex = ~0u; + unsigned memoryTypeBits = 0; for (int i = 0; i < pAllocInfo->heapCount; i++) { if (pAllocInfo->heaps[i] >= grDevice->memoryHeapCount) { return GR_ERROR_INVALID_ORDINAL; } + memoryTypeBits |= (1 << grDevice->memoryHeapMap[pAllocInfo->heaps[i]]); + } + for (int i = 0; i < pAllocInfo->heapCount; i++) { + if (pAllocInfo->heaps[i] >= grDevice->memoryHeapCount) { + return GR_ERROR_INVALID_ORDINAL; + } + + + const VkMemoryAllocateFlagsInfo flagsInfo = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .pNext = NULL, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + .deviceMask = 0, + }; const VkMemoryAllocateInfo allocateInfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = NULL, + .pNext = &flagsInfo, .allocationSize = pAllocInfo->size, .memoryTypeIndex = grDevice->memoryHeapMap[pAllocInfo->heaps[i]], }; vkRes = VKD.vkAllocateMemory(grDevice->device, &allocateInfo, NULL, &vkMemory); if (vkRes == VK_SUCCESS) { + selectedMemoryTypeIndex = allocateInfo.memoryTypeIndex; break; } else if (vkRes == VK_ERROR_OUT_OF_DEVICE_MEMORY) { continue; @@ -163,7 +180,8 @@ GR_RESULT GR_STDCALL grAllocMemory( VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, + VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = NULL, @@ -182,12 +200,22 @@ GR_RESULT GR_STDCALL grAllocMemory( return getGrResult(vkRes); } + VkBufferDeviceAddressInfo vkBufferAddressInfo = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .pNext = NULL, + .buffer = vkBuffer, + }; + VkDeviceAddress addr = VKD.vkGetBufferDeviceAddress(grDevice->device, &vkBufferAddressInfo); GrGpuMemory* grGpuMemory = malloc(sizeof(GrGpuMemory)); *grGpuMemory = (GrGpuMemory) { .grObj = { GR_OBJ_TYPE_GPU_MEMORY, grDevice }, .deviceMemory = vkMemory, .deviceSize = pAllocInfo->size, + .memoryTypeIndex = selectedMemoryTypeIndex, .buffer = vkBuffer, + .address = addr, + .userPtr = NULL, + .forceMapping = false, }; *pMem = (GR_GPU_MEMORY)grGpuMemory; @@ -235,10 +263,18 @@ GR_RESULT GR_STDCALL grMapMemory( GrDevice* grDevice = GET_OBJ_DEVICE(grGpuMemory); - VkResult vkRes = VKD.vkMapMemory(grDevice->device, grGpuMemory->deviceMemory, - 0, VK_WHOLE_SIZE, 0, ppData); - if (vkRes != VK_SUCCESS) { - LOGE("vkMapMemory failed (%d)\n", vkRes); + VkResult vkRes; + if (grGpuMemory->userPtr) { + *ppData = grGpuMemory->userPtr; + vkRes = VK_SUCCESS; + } else { + vkRes = VKD.vkMapMemory(grDevice->device, grGpuMemory->deviceMemory, + 0, VK_WHOLE_SIZE, 0, ppData); + if (vkRes != VK_SUCCESS) { + LOGE("vkMapMemory failed (%d)\n", vkRes); + } else { + grGpuMemory->userPtr = *ppData; + } } return getGrResult(vkRes); @@ -258,7 +294,10 @@ GR_RESULT GR_STDCALL grUnmapMemory( GrDevice* grDevice = GET_OBJ_DEVICE(grGpuMemory); - VKD.vkUnmapMemory(grDevice->device, grGpuMemory->deviceMemory); + if (!grGpuMemory->forceMapping) { + VKD.vkUnmapMemory(grDevice->device, grGpuMemory->deviceMemory); + grGpuMemory->userPtr = NULL; + } return GR_SUCCESS; } diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index 490025e5..675df51a 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -95,11 +95,20 @@ typedef struct _BindPoint uint32_t dirtyFlags; GrPipeline* grPipeline; GrDescriptorSet* grDescriptorSets[GR_MAX_DESCRIPTOR_SETS]; - VkDescriptorSet descriptorSets[30]; - unsigned descriptorArrayOffsets[30]; + union { + struct { + VkDeviceAddress descriptorBufferAddresses[30]; + VkDeviceSize descriptorOffsets[30]; + }; + struct { + VkDescriptorSet descriptorSets[30]; + unsigned descriptorArrayOffsets[30]; + }; + }; unsigned boundDescriptorSetCount; unsigned slotOffsets[GR_MAX_DESCRIPTOR_SETS]; DescriptorSetSlot dynamicMemoryView; + bool descriptorSetOffsetsPushed; } BindPoint; typedef struct _PipelineCreateInfo @@ -172,6 +181,8 @@ typedef struct _GrCmdBuffer { GrColorBlendStateObject* grColorBlendState; // Render pass VkRenderingAttachmentInfo colorAttachments[GR_MAX_COLOR_TARGETS]; + VkDeviceAddress bufferAddresses[32]; + unsigned descriptorBufferCount; bool hasDepth; bool hasStencil; VkRenderingAttachmentInfo depthAttachment; @@ -224,6 +235,11 @@ typedef struct _GrDescriptorSet { DescriptorSetSlot* slots; VkDescriptorPool descriptorPool; VkDescriptorSet descriptorSet; + void* descriptorBufferPtr; + VkBuffer descriptorBuffer; + VkDeviceSize descriptorBufferSize; + VkDeviceSize descriptorBufferMemoryOffset; + VkDeviceAddress descriptorBufferAddress; } GrDescriptorSet; typedef struct _GrDevice { @@ -232,23 +248,35 @@ typedef struct _GrDevice { VkDevice device; VkPhysicalDevice physicalDevice; VkPhysicalDeviceMemoryProperties memoryProperties; + VkPhysicalDeviceDescriptorBufferPropertiesEXT descriptorBufferProps; unsigned memoryHeapCount; uint32_t memoryHeapMap[GR_MAX_MEMORY_HEAPS]; - VkDescriptorSetLayout atomicCounterSetLayout; - VkDescriptorSetLayout dynamicMemorySetLayout; + union { + struct { + VkDescriptorSetLayout atomicCounterSetLayout; + VkDescriptorSetLayout dynamicMemorySetLayout; + }; + VkDescriptorSetLayout descriptorPushSetLayout; + }; VkDescriptorSetLayout defaultDescriptorSetLayout; GrQueue* grUniversalQueue; GrQueue* grComputeQueue; GrQueue* grDmaQueue; VkDeviceMemory universalAtomicCounterMemory; VkBuffer universalAtomicCounterBuffer; + VkDeviceSize universalAtomicCounterBufferSize; VkDescriptorPool universalAtomicCounterPool; VkDescriptorSet universalAtomicCounterSet; VkDeviceMemory computeAtomicCounterMemory; VkBuffer computeAtomicCounterBuffer; + VkDeviceSize computeAtomicCounterBufferSize; VkDescriptorPool computeAtomicCounterPool; VkDescriptorSet computeAtomicCounterSet; GrBorderColorPalette* grBorderColorPalette; + bool descriptorBufferSupported; + uint32_t maxMutableUniformDescriptorSize; + uint32_t maxMutableStorageDescriptorSize; + uint32_t maxMutableDescriptorSize; } GrDevice; typedef struct _GrEvent { @@ -266,7 +294,11 @@ typedef struct _GrGpuMemory { GrObject grObj; // FIXME base object? VkDeviceMemory deviceMemory; VkDeviceSize deviceSize; + unsigned memoryTypeIndex; VkBuffer buffer; + VkDeviceAddress address; + void* userPtr; + bool forceMapping; } GrGpuMemory; typedef struct _GrImage { @@ -299,6 +331,7 @@ typedef struct _GrMsaaStateObject { typedef struct _GrPhysicalGpu { GrBaseObject grBaseObj; VkPhysicalDevice physicalDevice; + VkPhysicalDeviceDescriptorBufferPropertiesEXT descriptorBufferProps; VkPhysicalDeviceProperties2 physicalDeviceProps; } GrPhysicalGpu; diff --git a/src/mantle/mantle_object_man.c b/src/mantle/mantle_object_man.c index dc9e64f4..c9cb222e 100644 --- a/src/mantle/mantle_object_man.c +++ b/src/mantle/mantle_object_man.c @@ -42,6 +42,7 @@ GR_RESULT GR_STDCALL grDestroyObject( grClearDescriptorSetSlots(grDescriptorSet, 0, grDescriptorSet->slotCount); free(grDescriptorSet->slots); + VKD.vkDestroyBuffer(grDevice->device, grDescriptorSet->descriptorBuffer, NULL); VKD.vkDestroyDescriptorPool(grDevice->device, grDescriptorSet->descriptorPool, NULL); } break; case GR_OBJ_TYPE_EVENT: { @@ -174,10 +175,36 @@ GR_RESULT GR_STDCALL grGetObjectInfo( VKD.vkGetImageMemoryRequirements(grDevice->device, grImage->image, &memReqs); *grMemReqs = getGrMemoryRequirements(grDevice, memReqs); } break; + case GR_OBJ_TYPE_DESCRIPTOR_SET: { + GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)grBaseObject; + GrDevice* grDevice = GET_OBJ_DEVICE(grBaseObject); + if (grDevice->descriptorBufferSupported) { + VKD.vkGetBufferMemoryRequirements(grDevice->device, grDescriptorSet->descriptorBuffer, &memReqs); + + // exclude host non-visible memory types + for (unsigned i = 0; i < grDevice->memoryProperties.memoryTypeCount; ++i) { + if ((memReqs.memoryTypeBits & (1 << i)) && + !(grDevice->memoryProperties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { + memReqs.memoryTypeBits &= ~(1 << i); + } + } + + *grMemReqs = getGrMemoryRequirements(grDevice, memReqs); + } else { + // Mantle spec: "Not all objects have memory requirements, in which case it is valid + // for the requirements structure to return zero size and alignment, and no heaps." + *grMemReqs = (GR_MEMORY_REQUIREMENTS) { + // No actual allocation will be done with a heap count of 0. See grAllocMemory. + .size = quirkHas(QUIRK_NON_ZERO_MEM_REQ) ? 4096 : 0, + .alignment = quirkHas(QUIRK_NON_ZERO_MEM_REQ) ? 4 : 0, + .heapCount = 0, + }; + } + break; + } case GR_OBJ_TYPE_BORDER_COLOR_PALETTE: case GR_OBJ_TYPE_COLOR_TARGET_VIEW: case GR_OBJ_TYPE_DEPTH_STENCIL_VIEW: - case GR_OBJ_TYPE_DESCRIPTOR_SET: case GR_OBJ_TYPE_EVENT: case GR_OBJ_TYPE_FENCE: case GR_OBJ_TYPE_IMAGE_VIEW: @@ -292,10 +319,44 @@ GR_RESULT GR_STDCALL grBindObjectMemory( vkRes = VKD.vkBindImageMemory(grDevice->device, grImage->image, grGpuMemory->deviceMemory, offset); } break; + case GR_OBJ_TYPE_DESCRIPTOR_SET: { + GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)grObject; + GrDevice* grDevice = GET_OBJ_DEVICE(grObject); + + if (grDevice->descriptorBufferSupported) { + vkRes = VKD.vkBindBufferMemory(grDevice->device, grDescriptorSet->descriptorBuffer, grGpuMemory->deviceMemory, offset); + if (vkRes == VK_SUCCESS) { + VkBufferDeviceAddressInfo vkBufferAddressInfo = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .pNext = NULL, + .buffer = grDescriptorSet->descriptorBuffer, + }; + grDescriptorSet->descriptorBufferMemoryOffset = offset; + grDescriptorSet->descriptorBufferAddress = VKD.vkGetBufferDeviceAddress(grDevice->device, &vkBufferAddressInfo); + + grGpuMemory->forceMapping = true; + void* memoryPtr = NULL; + if (grGpuMemory->userPtr) { + memoryPtr = grGpuMemory->userPtr; + vkRes = VK_SUCCESS; + } else { + vkRes = VKD.vkMapMemory(grDevice->device, grGpuMemory->deviceMemory, + 0, VK_WHOLE_SIZE, 0, &memoryPtr); + if (vkRes != VK_SUCCESS) { + LOGE("vkMapMemory failed (%d)\n", vkRes); + } else { + grGpuMemory->userPtr = memoryPtr; + } + } + + memset(grGpuMemory->userPtr + offset, 0, grDescriptorSet->descriptorBufferSize); + grDescriptorSet->descriptorBufferPtr = grGpuMemory->userPtr + offset; + } + } + } break; case GR_OBJ_TYPE_BORDER_COLOR_PALETTE: case GR_OBJ_TYPE_COLOR_TARGET_VIEW: case GR_OBJ_TYPE_DEPTH_STENCIL_VIEW: - case GR_OBJ_TYPE_DESCRIPTOR_SET: case GR_OBJ_TYPE_EVENT: case GR_OBJ_TYPE_FENCE: case GR_OBJ_TYPE_IMAGE_VIEW: diff --git a/src/mantle/mantle_shader_pipeline.c b/src/mantle/mantle_shader_pipeline.c index e49d4925..3ab975f2 100644 --- a/src/mantle/mantle_shader_pipeline.c +++ b/src/mantle/mantle_shader_pipeline.c @@ -9,6 +9,7 @@ typedef struct _Stage { static bool handleDynamicDescriptorSlots( PipelineDescriptorSlot* descriptorSlot, const GR_DYNAMIC_MEMORY_VIEW_SLOT_INFO* dynamicMapping, + bool descriptorBufferUsed, unsigned bindingCount, const IlcBinding* bindings, uint32_t* offsets, @@ -37,11 +38,11 @@ static bool handleDynamicDescriptorSlots( } offsets[i] = 0; - unsigned int descriptorSetIndex = DYNAMIC_MEMORY_VIEW_DESCRIPTOR_SET_ID; + unsigned int descriptorSetIndex = descriptorBufferUsed ? DESCRIPTOR_BUFFERS_PUSH_DESCRIPTOR_SET_ID : DYNAMIC_MEMORY_VIEW_DESCRIPTOR_SET_ID; descriptorSetIndices[i] = descriptorSetIndex; patchEntries[i] = (IlcBindingPatchEntry) { .id = binding->id, - .bindingIndex = DYNAMIC_MEMORY_VIEW_BINDING_ID, + .bindingIndex = descriptorBufferUsed ? DESCRIPTOR_BUFFERS_DYNAMIC_MAPPING_BINDING_ID : DYNAMIC_MEMORY_VIEW_BINDING_ID, .descriptorSetIndex = descriptorSetIndex, }; @@ -321,8 +322,8 @@ static VkPipelineLayout getVkPipelineLayout( VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; VkDescriptorSetLayout setLayouts[32] = { - grDevice->dynamicMemorySetLayout, - grDevice->atomicCounterSetLayout, + grDevice->descriptorBufferSupported ? grDevice->defaultDescriptorSetLayout : grDevice->dynamicMemorySetLayout, + grDevice->descriptorBufferSupported ? grDevice->descriptorPushSetLayout : grDevice->atomicCounterSetLayout, }; assert((descriptorSetCount + 2) <= COUNT_OF(setLayouts)); @@ -362,7 +363,7 @@ VkPipeline grPipelineGetVkPipeline( VkFormat depthFormat, VkFormat stencilFormat) { - const GrDevice* grDevice = GET_OBJ_DEVICE(grPipeline); + GrDevice* grDevice = GET_OBJ_DEVICE(grPipeline); const PipelineCreateInfo* createInfo = grPipeline->createInfo; VkPipeline vkPipeline = VK_NULL_HANDLE; VkResult vkRes; @@ -666,6 +667,7 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( dynamicMappingUsed |= handleDynamicDescriptorSlots( &dynamicDescriptorSlot, &stage->shader->dynamicMemoryViewMapping, + grDevice->descriptorBufferSupported, grShader->bindingCount, grShader->bindings, specData[i], &specData[i][grShader->bindingCount], @@ -675,7 +677,8 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( unsigned descriptorSetCount = 0; for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { getDescriptorSlotMappings(&descriptorSetCounts[i], &pipelineDescriptorSlots[i], - grDevice, COUNT_OF(stages), stages, patchEntries, specData, descriptorSetIndices, i, descriptorSetCount + DESCRIPTOR_SET_ID); + grDevice, COUNT_OF(stages), stages, patchEntries, specData, descriptorSetIndices, i, + descriptorSetCount + (grDevice->descriptorBufferSupported ? DESCRIPTOR_BUFFERS_BASE_DESCRIPTOR_SET_ID : DESCRIPTOR_SET_ID)); descriptorSetCount += descriptorSetCounts[i]; } @@ -802,8 +805,10 @@ GR_RESULT GR_STDCALL grCreateGraphicsPipeline( PipelineCreateInfo* pipelineCreateInfo = malloc(sizeof(PipelineCreateInfo)); *pipelineCreateInfo = (PipelineCreateInfo) { - .createFlags = (pCreateInfo->flags & GR_PIPELINE_CREATE_DISABLE_OPTIMIZATION) != 0 ? - VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT : 0, + .createFlags = + ((pCreateInfo->flags & GR_PIPELINE_CREATE_DISABLE_OPTIMIZATION) != 0 ? + VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT : 0) | + (grDevice->descriptorBufferSupported ? VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0), .stageCount = stageCount, .stageCreateInfos = { { 0 } }, // Initialized below .specInfos = { { 0 } }, // Initialized below @@ -947,6 +952,7 @@ GR_RESULT GR_STDCALL grCreateComputePipeline( bool dynamicMappingUsed = handleDynamicDescriptorSlots( &dynamicDescriptorSlot, &stage.shader->dynamicMemoryViewMapping, + grDevice->descriptorBufferSupported, grShader->bindingCount, grShader->bindings, specData, &specData[grShader->bindingCount], @@ -955,7 +961,8 @@ GR_RESULT GR_STDCALL grCreateComputePipeline( unsigned descriptorSetCount = 0; for (unsigned i = 0; i < GR_MAX_DESCRIPTOR_SETS; i++) { getDescriptorSlotMappings(&descriptorSetCounts[i], &pipelineDescriptorSlots[i], - grDevice, 1, &stage, &patchEntries, &descriptorOffsets, &descriptorSetIndices, i, descriptorSetCount + DESCRIPTOR_SET_ID); + grDevice, 1, &stage, &patchEntries, &descriptorOffsets, &descriptorSetIndices, i, + descriptorSetCount + (grDevice->descriptorBufferSupported ? DESCRIPTOR_BUFFERS_BASE_DESCRIPTOR_SET_ID : DESCRIPTOR_SET_ID)); descriptorSetCount += descriptorSetCounts[i]; } @@ -1006,8 +1013,9 @@ GR_RESULT GR_STDCALL grCreateComputePipeline( const VkComputePipelineCreateInfo pipelineCreateInfo = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = NULL, - .flags = (pCreateInfo->flags & GR_PIPELINE_CREATE_DISABLE_OPTIMIZATION) != 0 ? - VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT : 0, + .flags = ((pCreateInfo->flags & GR_PIPELINE_CREATE_DISABLE_OPTIMIZATION) != 0 ? + VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT : 0) | + (grDevice->descriptorBufferSupported ? VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0), .stage = shaderStageCreateInfo, .layout = pipelineLayout, .basePipelineHandle = VK_NULL_HANDLE, From 23d4016a2c4243429a0448a8ca231f60aa0e844c Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Mon, 5 Dec 2022 18:53:21 +0300 Subject: [PATCH 08/10] mantle: added quirk to allocate memory specifically for descriptor buffers --- src/mantle/mantle_descriptor_set.c | 73 ++++++++++++++++++++++++++++-- src/mantle/mantle_object.h | 1 + src/mantle/mantle_object_man.c | 5 +- src/mantle/quirk.c | 3 +- src/mantle/quirk.h | 3 ++ 5 files changed, 79 insertions(+), 6 deletions(-) diff --git a/src/mantle/mantle_descriptor_set.c b/src/mantle/mantle_descriptor_set.c index 5c051eca..7dc8bb53 100644 --- a/src/mantle/mantle_descriptor_set.c +++ b/src/mantle/mantle_descriptor_set.c @@ -28,7 +28,10 @@ GR_RESULT GR_STDCALL grCreateDescriptorSet( } VkBuffer vkBuffer = VK_NULL_HANDLE; + VkDeviceMemory bufferMemory = VK_NULL_HANDLE; VkDeviceSize bufferSize = 0; + VkDeviceAddress bufferAddress = 0ull; + void* descriptorBufferPtr = NULL; VkDescriptorPool descriptorPool = VK_NULL_HANDLE; VkDescriptorSet descriptorSet = VK_NULL_HANDLE; @@ -59,7 +62,69 @@ GR_RESULT GR_STDCALL grCreateDescriptorSet( vkRes = VKD.vkCreateBuffer(grDevice->device, &bufferCreateInfo, NULL, &vkBuffer); if (vkRes != VK_SUCCESS) { LOGE("vkCreateBuffer failed (%d)\n", vkRes); - return getGrResult(vkRes); + goto bail; + } + if (quirkHas(QUIRK_DESCRIPTOR_SET_USE_DEDICATED_ALLOCATION)) { + VkMemoryRequirements memReqs; + VKD.vkGetBufferMemoryRequirements(grDevice->device, vkBuffer, &memReqs); + + const VkMemoryAllocateFlagsInfo flagsInfo = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .pNext = NULL, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + .deviceMask = 0, + }; + VkMemoryAllocateInfo allocateInfo = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &flagsInfo, + .allocationSize = memReqs.size, + .memoryTypeIndex = 0xFFFF, + }; + // exclude host non-visible memory types + for (unsigned i = 0; i < grDevice->memoryProperties.memoryTypeCount; ++i) { + if (!(memReqs.memoryTypeBits & (1 << i))) { + continue; + } + if (!(grDevice->memoryProperties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { + continue; + } + + allocateInfo.memoryTypeIndex = i; + vkRes = VKD.vkAllocateMemory(grDevice->device, &allocateInfo, NULL, &bufferMemory); + if (vkRes == VK_SUCCESS) { + break; + } + } + + if (vkRes != VK_SUCCESS) { + LOGE("failed to allocate memory for descriptor buffer (%d)\n", vkRes); + goto bail; + } else if (bufferMemory == VK_NULL_HANDLE) { + LOGE("failed to select memory memory for descriptor buffer\n"); + goto bail; + } + + vkRes = VKD.vkBindBufferMemory(grDevice->device, vkBuffer, bufferMemory, 0); + if (vkRes != VK_SUCCESS) { + LOGE("Buffer binding failed (%d)\n", vkRes); + goto bail; + } + VkBufferDeviceAddressInfo vkBufferAddressInfo = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .pNext = NULL, + .buffer = vkBuffer, + }; + + bufferAddress = VKD.vkGetBufferDeviceAddress(grDevice->device, &vkBufferAddressInfo); + + vkRes = VKD.vkMapMemory(grDevice->device, bufferMemory, + 0, VK_WHOLE_SIZE, 0, &descriptorBufferPtr); + if (vkRes != VK_SUCCESS) { + LOGE("vkMapMemory failed (%d)\n", vkRes); + goto bail; + } + + memset(descriptorBufferPtr, 0, bufferSize); } } else { const VkDescriptorType descriptorTypes[] = { @@ -132,11 +197,12 @@ GR_RESULT GR_STDCALL grCreateDescriptorSet( .slots = calloc(pCreateInfo->slots, sizeof(DescriptorSetSlot)), .descriptorPool = descriptorPool, .descriptorSet = descriptorSet, - .descriptorBufferPtr = NULL, + .descriptorBufferPtr = descriptorBufferPtr, .descriptorBuffer = vkBuffer, + .descriptorBufferMemory = bufferMemory, .descriptorBufferSize = bufferSize, .descriptorBufferMemoryOffset = 0ull, - .descriptorBufferAddress = 0ull, + .descriptorBufferAddress = bufferAddress, }; *pDescriptorSet = (GR_DESCRIPTOR_SET)grDescriptorSet; @@ -145,6 +211,7 @@ GR_RESULT GR_STDCALL grCreateDescriptorSet( bail: VKD.vkDestroyDescriptorPool(grDevice->device, descriptorPool, NULL); VKD.vkDestroyBuffer(grDevice->device, vkBuffer, NULL); + VKD.vkFreeMemory(grDevice->device, bufferMemory, NULL); return getGrResult(vkRes); } diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index 675df51a..1c2e25db 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -237,6 +237,7 @@ typedef struct _GrDescriptorSet { VkDescriptorSet descriptorSet; void* descriptorBufferPtr; VkBuffer descriptorBuffer; + VkDeviceMemory descriptorBufferMemory; VkDeviceSize descriptorBufferSize; VkDeviceSize descriptorBufferMemoryOffset; VkDeviceAddress descriptorBufferAddress; diff --git a/src/mantle/mantle_object_man.c b/src/mantle/mantle_object_man.c index c9cb222e..8fa2e0a9 100644 --- a/src/mantle/mantle_object_man.c +++ b/src/mantle/mantle_object_man.c @@ -43,6 +43,7 @@ GR_RESULT GR_STDCALL grDestroyObject( grClearDescriptorSetSlots(grDescriptorSet, 0, grDescriptorSet->slotCount); free(grDescriptorSet->slots); VKD.vkDestroyBuffer(grDevice->device, grDescriptorSet->descriptorBuffer, NULL); + VKD.vkFreeMemory(grDevice->device, grDescriptorSet->descriptorBufferMemory, NULL); VKD.vkDestroyDescriptorPool(grDevice->device, grDescriptorSet->descriptorPool, NULL); } break; case GR_OBJ_TYPE_EVENT: { @@ -178,7 +179,7 @@ GR_RESULT GR_STDCALL grGetObjectInfo( case GR_OBJ_TYPE_DESCRIPTOR_SET: { GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)grBaseObject; GrDevice* grDevice = GET_OBJ_DEVICE(grBaseObject); - if (grDevice->descriptorBufferSupported) { + if (grDevice->descriptorBufferSupported && !quirkHas(QUIRK_DESCRIPTOR_SET_USE_DEDICATED_ALLOCATION)) { VKD.vkGetBufferMemoryRequirements(grDevice->device, grDescriptorSet->descriptorBuffer, &memReqs); // exclude host non-visible memory types @@ -323,7 +324,7 @@ GR_RESULT GR_STDCALL grBindObjectMemory( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)grObject; GrDevice* grDevice = GET_OBJ_DEVICE(grObject); - if (grDevice->descriptorBufferSupported) { + if (grDevice->descriptorBufferSupported && !quirkHas(QUIRK_DESCRIPTOR_SET_USE_DEDICATED_ALLOCATION)) { vkRes = VKD.vkBindBufferMemory(grDevice->device, grDescriptorSet->descriptorBuffer, grGpuMemory->deviceMemory, offset); if (vkRes == VK_SUCCESS) { VkBufferDeviceAddressInfo vkBufferAddressInfo = { diff --git a/src/mantle/quirk.c b/src/mantle/quirk.c index 9a465279..4c35fcd8 100644 --- a/src/mantle/quirk.c +++ b/src/mantle/quirk.c @@ -16,7 +16,8 @@ void quirkInit( QUIRK_COMPRESSED_IMAGE_COPY_IN_TEXELS | QUIRK_INVALID_CMD_BUFFER_RESET | QUIRK_CUBEMAP_LAYER_DIV_6 | - QUIRK_SILENCE_TRANSFER_ONLY_LINEAR_IMAGE_WARNINGS; + QUIRK_SILENCE_TRANSFER_ONLY_LINEAR_IMAGE_WARNINGS | + QUIRK_DESCRIPTOR_SET_USE_DEDICATED_ALLOCATION; } else if (!strcmp(appInfo->pEngineName, "CivTech")) { mQuirks = QUIRK_NON_ZERO_MEM_REQ | QUIRK_READ_ONLY_IMAGE_STATE_MISMATCH | diff --git a/src/mantle/quirk.h b/src/mantle/quirk.h index 448c882f..1f1f1336 100644 --- a/src/mantle/quirk.h +++ b/src/mantle/quirk.h @@ -28,6 +28,9 @@ typedef enum { // RADV doesn't support linear transfer-only images used by Star Swarm, but it has no effect // on rendering. Silence it. QUIRK_SILENCE_TRANSFER_ONLY_LINEAR_IMAGE_WARNINGS = 1 << 7, + + // Star Swarm uses completely incompatible memory types for descriptor buffers + QUIRK_DESCRIPTOR_SET_USE_DEDICATED_ALLOCATION = 1 << 8, } QUIRK_FLAGS; void quirkInit( From 0c680e3bfa2d03ea0eee04d38ff2176991754f0c Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Sun, 23 Apr 2023 17:39:04 +0300 Subject: [PATCH 09/10] mantle: optimize image view and sampler descriptor generation Allows generating image view and sampler descriptors at the moment when they are created, this will not work on Qualcomm GPUs as they use 64 byte descriptors everywhere. --- src/mantle/mantle_descriptor_set.c | 27 ++++++++++++++++++++++++-- src/mantle/mantle_image_sample.c | 16 +++++++++++++++ src/mantle/mantle_image_view.c | 31 ++++++++++++++++++++++++++++++ src/mantle/mantle_init_device.c | 15 +++++++++++++++ src/mantle/mantle_internal.h | 3 +++ src/mantle/mantle_object.h | 5 +++++ 6 files changed, 95 insertions(+), 2 deletions(-) diff --git a/src/mantle/mantle_descriptor_set.c b/src/mantle/mantle_descriptor_set.c index 7dc8bb53..39358e1b 100644 --- a/src/mantle/mantle_descriptor_set.c +++ b/src/mantle/mantle_descriptor_set.c @@ -246,7 +246,14 @@ GR_VOID GR_STDCALL grAttachSamplerDescriptors( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); - if (grDevice->descriptorBufferSupported) { + if (grDevice->descriptorBufferSupported && grDevice->descriptorBufferAllowPreparedSampler) { + for (unsigned i = 0; i < slotCount; i++) { + const GrSampler* grSampler = (GrSampler*)pSamplers[i]; + memcpy(grDescriptorSet->descriptorBufferPtr + ((startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_SAMPLER)) * grDevice->maxMutableDescriptorSize, + &grSampler->descriptor, + grDevice->descriptorBufferProps.samplerDescriptorSize); + } + } else if (grDevice->descriptorBufferSupported) { for (unsigned i = 0; i < slotCount; i++) { const GrSampler* grSampler = (GrSampler*)pSamplers[i]; @@ -316,7 +323,23 @@ GR_VOID GR_STDCALL grAttachImageViewDescriptors( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); - if (grDevice->descriptorBufferSupported) { + if (grDevice->descriptorBufferSupported && grDevice->descriptorBufferAllowPreparedImageView) { + for (unsigned i = 0; i < slotCount; i++) { + const GR_IMAGE_VIEW_ATTACH_INFO* info = &pImageViews[i]; + const GrImageView* grImageView = (GrImageView*)info->view; + memcpy(grDescriptorSet->descriptorBufferPtr + ((startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)) * grDevice->maxMutableDescriptorSize, + &grImageView->sampledDescriptor, + grDevice->descriptorBufferProps.sampledImageDescriptorSize + ); + + if (grImageView->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + memcpy( + grDescriptorSet->descriptorBufferPtr + ((startSlot + i) * DESCRIPTORS_PER_SLOT + getDescriptorOffset(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)) * grDevice->maxMutableDescriptorSize, + &grImageView->storageDescriptor, + grDevice->descriptorBufferProps.storageImageDescriptorSize); + } + } + } else if (grDevice->descriptorBufferSupported) { for (unsigned i = 0; i < slotCount; i++) { const GR_IMAGE_VIEW_ATTACH_INFO* info = &pImageViews[i]; const GrImageView* grImageView = (GrImageView*)info->view; diff --git a/src/mantle/mantle_image_sample.c b/src/mantle/mantle_image_sample.c index a2bbd0ca..2b450b9d 100644 --- a/src/mantle/mantle_image_sample.c +++ b/src/mantle/mantle_image_sample.c @@ -338,6 +338,22 @@ GR_RESULT GR_STDCALL grCreateSampler( .sampler = vkSampler, }; + if (grDevice->descriptorBufferAllowPreparedSampler) { + VkDescriptorGetInfoEXT descriptorInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .pNext = NULL, + .type = VK_DESCRIPTOR_TYPE_SAMPLER, + .data = { + .pSampler = &vkSampler + } + }; + + VKD.vkGetDescriptorEXT( + grDevice->device, + &descriptorInfo, + grDevice->descriptorBufferProps.samplerDescriptorSize, + &grSampler->descriptor); + } *pSampler = (GR_SAMPLER)grSampler; return GR_SUCCESS; } diff --git a/src/mantle/mantle_image_view.c b/src/mantle/mantle_image_view.c index a0e939d3..a9c7431f 100644 --- a/src/mantle/mantle_image_view.c +++ b/src/mantle/mantle_image_view.c @@ -115,6 +115,37 @@ GR_RESULT GR_STDCALL grCreateImageView( .usage = grImage->usage, }; + if (grDevice->descriptorBufferAllowPreparedImageView) { + VkDescriptorImageInfo imageInfo = { + .sampler = VK_NULL_HANDLE, + .imageView = vkImageView, + .imageLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; + + VkDescriptorGetInfoEXT descriptorInfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .pNext = NULL, + .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .data = { + .pSampledImage = &imageInfo, + } + }; + VKD.vkGetDescriptorEXT( + grDevice->device, + &descriptorInfo, + grDevice->descriptorBufferProps.sampledImageDescriptorSize, + &grImageView->sampledDescriptor); + if (grImageView->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + descriptorInfo.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + descriptorInfo.data.pStorageImage = &imageInfo; + + VKD.vkGetDescriptorEXT( + grDevice->device, + &descriptorInfo, + grDevice->descriptorBufferProps.storageImageDescriptorSize, + &grImageView->storageDescriptor); + } + } *pView = (GR_IMAGE_VIEW)grImageView; return GR_SUCCESS; } diff --git a/src/mantle/mantle_init_device.c b/src/mantle/mantle_init_device.c index 13942d1a..4efe9b93 100644 --- a/src/mantle/mantle_init_device.c +++ b/src/mantle/mantle_init_device.c @@ -764,6 +764,18 @@ GR_RESULT GR_STDCALL grCreateDevice( goto bail; } + VkPhysicalDeviceDescriptorBufferFeaturesEXT queriedDescriptorBufferFeatures = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT, + .pNext = NULL, + }; + + VkPhysicalDeviceFeatures2 queriedDeviceFeatures = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .pNext = &queriedDescriptorBufferFeatures, + }; + + vki.vkGetPhysicalDeviceFeatures2(grPhysicalGpu->physicalDevice, &queriedDeviceFeatures); + VkPhysicalDeviceCustomBorderColorFeaturesEXT customBorderColor = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, .pNext = NULL, @@ -800,6 +812,7 @@ GR_RESULT GR_STDCALL grCreateDevice( .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT, .pNext = &mutableDescriptorFeaturesEXT, .descriptorBuffer = VK_TRUE, + .descriptorBufferImageLayoutIgnored = queriedDescriptorBufferFeatures.descriptorBufferImageLayoutIgnored, }; VkPhysicalDeviceVulkan12Features vulkan12DeviceFeatures = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, @@ -962,6 +975,8 @@ GR_RESULT GR_STDCALL grCreateDevice( .computeAtomicCounterSet = VK_NULL_HANDLE, // Initialized below .grBorderColorPalette = NULL, .descriptorBufferSupported = descriptorBufferSupported, + .descriptorBufferAllowPreparedImageView = descriptorBufferSupported && grPhysicalGpu->descriptorBufferProps.storageImageDescriptorSize <= MEMBER_SIZEOF(GrImageView, storageDescriptor) && grPhysicalGpu->descriptorBufferProps.sampledImageDescriptorSize <= MEMBER_SIZEOF(GrImageView, sampledDescriptor) && queriedDescriptorBufferFeatures.descriptorBufferImageLayoutIgnored, + .descriptorBufferAllowPreparedSampler = descriptorBufferSupported && grPhysicalGpu->descriptorBufferProps.samplerDescriptorSize <= MEMBER_SIZEOF(GrSampler, descriptor), .maxMutableUniformDescriptorSize = 0, // Initialized below .maxMutableStorageDescriptorSize = 0, // Initialized below .maxMutableDescriptorSize = 0, // Initialized below diff --git a/src/mantle/mantle_internal.h b/src/mantle/mantle_internal.h index 6d83335b..60a8785d 100644 --- a/src/mantle/mantle_internal.h +++ b/src/mantle/mantle_internal.h @@ -38,6 +38,9 @@ #define OFFSET_OF(struct, member) \ (size_t)(&((struct*)0)->member) +#define MEMBER_SIZEOF(struct, member) \ + sizeof(((struct *)0)->member) + #define OFFSET_OF_UNION(struct, unionMember, member) \ (size_t)(&((struct*)0)->unionMember.member) diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index 1c2e25db..315f1936 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -275,6 +275,8 @@ typedef struct _GrDevice { VkDescriptorSet computeAtomicCounterSet; GrBorderColorPalette* grBorderColorPalette; bool descriptorBufferSupported; + bool descriptorBufferAllowPreparedImageView; + bool descriptorBufferAllowPreparedSampler; uint32_t maxMutableUniformDescriptorSize; uint32_t maxMutableStorageDescriptorSize; uint32_t maxMutableDescriptorSize; @@ -321,6 +323,8 @@ typedef struct _GrImageView { VkImageView imageView; VkFormat format; VkImageUsageFlags usage; + uint8_t storageDescriptor[32]; + uint8_t sampledDescriptor[64]; } GrImageView; typedef struct _GrMsaaStateObject { @@ -369,6 +373,7 @@ typedef struct _GrRasterStateObject { typedef struct _GrSampler { GrObject grObj; VkSampler sampler; + uint8_t descriptor[32]; } GrSampler; typedef struct _GrShader { From b09e9110caae4c84a5fb96a2032ad71079ee1521 Mon Sep 17 00:00:00 2001 From: Cherser-s <29800876+Cherser-s@users.noreply.github.com> Date: Thu, 8 Dec 2022 20:55:44 +0300 Subject: [PATCH 10/10] mantle: added quirk for internal synchronization of descriptor set access Star Swarm doesn't care about synchronizing access to descriptor sets, so sometimes it crashes with normal descriptor sets and crashes almost immediately with descriptor buffers. --- src/mantle/mantle_descriptor_set.c | 39 ++++++++++++++++++++++++++++++ src/mantle/mantle_object.h | 1 + src/mantle/quirk.c | 3 ++- src/mantle/quirk.h | 3 +++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/mantle/mantle_descriptor_set.c b/src/mantle/mantle_descriptor_set.c index 39358e1b..26288c74 100644 --- a/src/mantle/mantle_descriptor_set.c +++ b/src/mantle/mantle_descriptor_set.c @@ -195,6 +195,7 @@ GR_RESULT GR_STDCALL grCreateDescriptorSet( .grObj = { GR_OBJ_TYPE_DESCRIPTOR_SET, grDevice }, .slotCount = pCreateInfo->slots, .slots = calloc(pCreateInfo->slots, sizeof(DescriptorSetSlot)), + .descriptorLock = SRWLOCK_INIT, .descriptorPool = descriptorPool, .descriptorSet = descriptorSet, .descriptorBufferPtr = descriptorBufferPtr, @@ -246,6 +247,10 @@ GR_VOID GR_STDCALL grAttachSamplerDescriptors( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); + if (quirkHas(QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED)) { + AcquireSRWLockExclusive(&grDescriptorSet->descriptorLock); + } + if (grDevice->descriptorBufferSupported && grDevice->descriptorBufferAllowPreparedSampler) { for (unsigned i = 0; i < slotCount; i++) { const GrSampler* grSampler = (GrSampler*)pSamplers[i]; @@ -311,6 +316,9 @@ GR_VOID GR_STDCALL grAttachSamplerDescriptors( STACK_ARRAY_FINISH(writeDescriptors); } + if (quirkHas(QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED)) { + ReleaseSRWLockExclusive(&grDescriptorSet->descriptorLock); + } } GR_VOID GR_STDCALL grAttachImageViewDescriptors( @@ -323,6 +331,10 @@ GR_VOID GR_STDCALL grAttachImageViewDescriptors( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); + if (quirkHas(QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED)) { + AcquireSRWLockExclusive(&grDescriptorSet->descriptorLock); + } + if (grDevice->descriptorBufferSupported && grDevice->descriptorBufferAllowPreparedImageView) { for (unsigned i = 0; i < slotCount; i++) { const GR_IMAGE_VIEW_ATTACH_INFO* info = &pImageViews[i]; @@ -428,6 +440,10 @@ GR_VOID GR_STDCALL grAttachImageViewDescriptors( VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); STACK_ARRAY_FINISH(writeDescriptors); } + + if (quirkHas(QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED)) { + ReleaseSRWLockExclusive(&grDescriptorSet->descriptorLock); + } } GR_VOID GR_STDCALL grAttachMemoryViewDescriptors( @@ -441,6 +457,10 @@ GR_VOID GR_STDCALL grAttachMemoryViewDescriptors( const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); VkResult vkRes; + if (quirkHas(QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED)) { + AcquireSRWLockExclusive(&grDescriptorSet->descriptorLock); + } + if (grDevice->descriptorBufferSupported) { for (unsigned i = 0; i < slotCount; i++) { DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; @@ -596,6 +616,10 @@ GR_VOID GR_STDCALL grAttachMemoryViewDescriptors( VKD.vkUpdateDescriptorSets(grDevice->device, descriptorWriteCount, writeDescriptors, 0, NULL); STACK_ARRAY_FINISH(writeDescriptors); } + + if (quirkHas(QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED)) { + ReleaseSRWLockExclusive(&grDescriptorSet->descriptorLock); + } } GR_VOID GR_STDCALL grAttachNestedDescriptors( @@ -608,6 +632,10 @@ GR_VOID GR_STDCALL grAttachNestedDescriptors( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); + if (quirkHas(QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED)) { + AcquireSRWLockExclusive(&grDescriptorSet->descriptorLock); + } + for (unsigned i = 0; i < slotCount; i++) { DescriptorSetSlot* slot = &grDescriptorSet->slots[startSlot + i]; const GR_DESCRIPTOR_SET_ATTACH_INFO* info = &pNestedDescriptorSets[i]; @@ -622,6 +650,9 @@ GR_VOID GR_STDCALL grAttachNestedDescriptors( }, }; } + if (quirkHas(QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED)) { + ReleaseSRWLockExclusive(&grDescriptorSet->descriptorLock); + } } GR_VOID GR_STDCALL grClearDescriptorSetSlots( @@ -633,6 +664,10 @@ GR_VOID GR_STDCALL grClearDescriptorSetSlots( GrDescriptorSet* grDescriptorSet = (GrDescriptorSet*)descriptorSet; const GrDevice* grDevice = GET_OBJ_DEVICE(grDescriptorSet); + if (quirkHas(QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED)) { + AcquireSRWLockExclusive(&grDescriptorSet->descriptorLock); + } + if (grDevice->descriptorBufferSupported) { memset(grDescriptorSet->descriptorBufferPtr + (startSlot * DESCRIPTORS_PER_SLOT * grDevice->maxMutableDescriptorSize), 0, grDevice->maxMutableDescriptorSize * slotCount * DESCRIPTORS_PER_SLOT); memset(&grDescriptorSet->slots[startSlot], 0, sizeof(DescriptorSetSlot) * slotCount); @@ -645,4 +680,8 @@ GR_VOID GR_STDCALL grClearDescriptorSetSlots( slot->type = SLOT_TYPE_NONE; } } + + if (quirkHas(QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED)) { + ReleaseSRWLockExclusive(&grDescriptorSet->descriptorLock); + } } diff --git a/src/mantle/mantle_object.h b/src/mantle/mantle_object.h index 315f1936..85f5176c 100644 --- a/src/mantle/mantle_object.h +++ b/src/mantle/mantle_object.h @@ -233,6 +233,7 @@ typedef struct _GrDescriptorSet { GrObject grObj; unsigned slotCount; DescriptorSetSlot* slots; + SRWLOCK descriptorLock; VkDescriptorPool descriptorPool; VkDescriptorSet descriptorSet; void* descriptorBufferPtr; diff --git a/src/mantle/quirk.c b/src/mantle/quirk.c index 4c35fcd8..236318fc 100644 --- a/src/mantle/quirk.c +++ b/src/mantle/quirk.c @@ -17,7 +17,8 @@ void quirkInit( QUIRK_INVALID_CMD_BUFFER_RESET | QUIRK_CUBEMAP_LAYER_DIV_6 | QUIRK_SILENCE_TRANSFER_ONLY_LINEAR_IMAGE_WARNINGS | - QUIRK_DESCRIPTOR_SET_USE_DEDICATED_ALLOCATION; + QUIRK_DESCRIPTOR_SET_USE_DEDICATED_ALLOCATION | + QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED; } else if (!strcmp(appInfo->pEngineName, "CivTech")) { mQuirks = QUIRK_NON_ZERO_MEM_REQ | QUIRK_READ_ONLY_IMAGE_STATE_MISMATCH | diff --git a/src/mantle/quirk.h b/src/mantle/quirk.h index 1f1f1336..6fa32de5 100644 --- a/src/mantle/quirk.h +++ b/src/mantle/quirk.h @@ -31,6 +31,9 @@ typedef enum { // Star Swarm uses completely incompatible memory types for descriptor buffers QUIRK_DESCRIPTOR_SET_USE_DEDICATED_ALLOCATION = 1 << 8, + + // Star Swarm also doesn't care about external sync + QUIRK_DESCRIPTOR_SET_INTERNAL_SYNCHRONIZED = 1 << 9, } QUIRK_FLAGS; void quirkInit(