diff --git a/.github/workflows/build-nightly.yaml b/.github/workflows/build-nightly.yaml index 67fa890e801..285d648da5f 100644 --- a/.github/workflows/build-nightly.yaml +++ b/.github/workflows/build-nightly.yaml @@ -143,12 +143,6 @@ jobs: # arch: win64_msvc2017_64 # cached: ${{ steps.cache-qt-win.outputs.cache-hit }} # aqtversion: ==0.8 - - name: Prepare Vulkan SDK - uses: humbletim/setup-vulkan-sdk@v1.2.1 - with: - vulkan-query-version: 1.4.304.1 - vulkan-components: Vulkan-Headers, Vulkan-Loader - vulkan-use-cache: true - name: Configure CMake env: CONFIGURATION: ${{ matrix.configuration }} @@ -158,20 +152,11 @@ jobs: run: | mkdir build cd build - - if [ "$ARCHITECTURE" = "Win32" ]; then - cmake -DCMAKE_INSTALL_PREFIX="$(pwd)/install" -DFSO_USE_SPEECH="ON" \ - -DFSO_USE_VOICEREC="ON" -DFORCED_SIMD_INSTRUCTIONS="$SIMD" \ - -DFSO_BUILD_QTFRED=OFF -DFSO_BUILD_TESTS=ON \ - -DFSO_INSTALL_DEBUG_FILES="ON" -DFSO_BUILD_WITH_VULKAN="OFF" -A "$ARCHITECTURE" \ - -G "Visual Studio 17 2022" -T "v143" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - else - cmake -DCMAKE_INSTALL_PREFIX="$(pwd)/install" -DFSO_USE_SPEECH="ON" \ - -DFSO_USE_VOICEREC="ON" -DFORCED_SIMD_INSTRUCTIONS="$SIMD" \ - -DFSO_BUILD_QTFRED=OFF -DFSO_BUILD_TESTS=ON \ - -DFSO_INSTALL_DEBUG_FILES="ON" -A "$ARCHITECTURE" \ - -G "Visual Studio 17 2022" -T "v143" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - fi + cmake -DCMAKE_INSTALL_PREFIX="$(pwd)/install" -DFSO_USE_SPEECH="ON" \ + -DFSO_USE_VOICEREC="ON" -DFORCED_SIMD_INSTRUCTIONS="$SIMD" \ + -DFSO_BUILD_QTFRED=OFF -DFSO_BUILD_TESTS=ON \ + -DFSO_INSTALL_DEBUG_FILES="ON" -A "$ARCHITECTURE" \ + -G "Visual Studio 17 2022" -T "v143" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - name: Compile working-directory: ./build env: @@ -269,12 +254,6 @@ jobs: - name: Set workspace as safe # This appears to be broken in current actions, so do it manually run: git config --global --add safe.directory "$GITHUB_WORKSPACE" - - name: Prepare Vulkan SDK - uses: humbletim/setup-vulkan-sdk@v1.2.1 - with: - vulkan-query-version: 1.4.304.1 - vulkan-components: Vulkan-Headers, Vulkan-Loader - vulkan-use-cache: true - name: Set up test version shell: bash run: | diff --git a/.github/workflows/build-release.yaml b/.github/workflows/build-release.yaml index 8f803214011..2b0f4d5d4b7 100644 --- a/.github/workflows/build-release.yaml +++ b/.github/workflows/build-release.yaml @@ -199,12 +199,6 @@ jobs: # arch: win64_msvc2017_64 # cached: ${{ steps.cache-qt-win.outputs.cache-hit }} # aqtversion: ==0.8 - - name: Prepare Vulkan SDK - uses: humbletim/setup-vulkan-sdk@v1.2.1 - with: - vulkan-query-version: 1.4.304.1 - vulkan-components: Vulkan-Headers, Vulkan-Loader - vulkan-use-cache: true - name: Configure CMake env: CONFIGURATION: ${{ matrix.configuration }} @@ -214,20 +208,11 @@ jobs: run: | mkdir build cd build - - if [ "$ARCHITECTURE" = "Win32" ]; then - cmake -DCMAKE_INSTALL_PREFIX="$(pwd)/install" -DFSO_USE_SPEECH="ON" \ - -DFSO_USE_VOICEREC="ON" -DFORCED_SIMD_INSTRUCTIONS="$SIMD" \ - -DFSO_BUILD_QTFRED=OFF -DFSO_BUILD_TESTS=ON \ - -DFSO_INSTALL_DEBUG_FILES="ON" -DFSO_BUILD_WITH_VULKAN="OFF" -A "$ARCHITECTURE" \ - -G "Visual Studio 17 2022" -T "v143" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - else - cmake -DCMAKE_INSTALL_PREFIX="$(pwd)/install" -DFSO_USE_SPEECH="ON" \ - -DFSO_USE_VOICEREC="ON" -DFORCED_SIMD_INSTRUCTIONS="$SIMD" \ - -DFSO_BUILD_QTFRED=OFF -DFSO_BUILD_TESTS=ON \ - -DFSO_INSTALL_DEBUG_FILES="ON" -A "$ARCHITECTURE" \ - -G "Visual Studio 17 2022" -T "v143" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - fi + cmake -DCMAKE_INSTALL_PREFIX="$(pwd)/install" -DFSO_USE_SPEECH="ON" \ + -DFSO_USE_VOICEREC="ON" -DFORCED_SIMD_INSTRUCTIONS="$SIMD" \ + -DFSO_BUILD_QTFRED=OFF -DFSO_BUILD_TESTS=ON \ + -DFSO_INSTALL_DEBUG_FILES="ON" -A "$ARCHITECTURE" \ + -G "Visual Studio 17 2022" -T "v143" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - name: Compile working-directory: ./build env: @@ -345,12 +330,6 @@ jobs: - name: Set workspace as safe # This appears to be broken in current actions, so do it manually run: git config --global --add safe.directory "$GITHUB_WORKSPACE" - - name: Prepare Vulkan SDK - uses: humbletim/setup-vulkan-sdk@v1.2.1 - with: - vulkan-query-version: 1.4.304.1 - vulkan-components: Vulkan-Headers, Vulkan-Loader - vulkan-use-cache: true - name: Configure CMake env: CONFIGURATION: ${{ matrix.configuration }} diff --git a/.github/workflows/build-test.yaml b/.github/workflows/build-test.yaml index 0e60cc89f6c..6ebd2471412 100644 --- a/.github/workflows/build-test.yaml +++ b/.github/workflows/build-test.yaml @@ -141,12 +141,6 @@ jobs: # arch: win64_msvc2017_64 # cached: ${{ steps.cache-qt-win.outputs.cache-hit }} # aqtversion: ==0.8 - - name: Prepare Vulkan SDK - uses: humbletim/setup-vulkan-sdk@v1.2.1 - with: - vulkan-query-version: 1.4.304.1 - vulkan-components: Vulkan-Headers, Vulkan-Loader - vulkan-use-cache: true - name: Configure CMake env: CONFIGURATION: ${{ matrix.configuration }} @@ -156,20 +150,11 @@ jobs: run: | mkdir build cd build - - if [ "$ARCHITECTURE" = "Win32" ]; then - cmake -DCMAKE_INSTALL_PREFIX="$(pwd)/install" -DFSO_USE_SPEECH="ON" \ - -DFSO_USE_VOICEREC="ON" -DFORCED_SIMD_INSTRUCTIONS="$SIMD" \ - -DFSO_BUILD_QTFRED=OFF -DFSO_BUILD_TESTS=ON \ - -DFSO_INSTALL_DEBUG_FILES="ON" -DFSO_BUILD_WITH_VULKAN="OFF" -A "$ARCHITECTURE" \ - -G "Visual Studio 17 2022" -T "v143" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - else - cmake -DCMAKE_INSTALL_PREFIX="$(pwd)/install" -DFSO_USE_SPEECH="ON" \ - -DFSO_USE_VOICEREC="ON" -DFORCED_SIMD_INSTRUCTIONS="$SIMD" \ - -DFSO_BUILD_QTFRED=OFF -DFSO_BUILD_TESTS=ON \ - -DFSO_INSTALL_DEBUG_FILES="ON" -A "$ARCHITECTURE" \ - -G "Visual Studio 17 2022" -T "v143" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - fi + cmake -DCMAKE_INSTALL_PREFIX="$(pwd)/install" -DFSO_USE_SPEECH="ON" \ + -DFSO_USE_VOICEREC="ON" -DFORCED_SIMD_INSTRUCTIONS="$SIMD" \ + -DFSO_BUILD_QTFRED=OFF -DFSO_BUILD_TESTS=ON \ + -DFSO_INSTALL_DEBUG_FILES="ON" -A "$ARCHITECTURE" \ + -G "Visual Studio 17 2022" -T "v143" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - name: Compile working-directory: ./build env: @@ -268,12 +253,6 @@ jobs: - name: Set workspace as safe # This appears to be broken in current actions, so do it manually run: git config --global --add safe.directory "$GITHUB_WORKSPACE" - - name: Prepare Vulkan SDK - uses: humbletim/setup-vulkan-sdk@v1.2.1 - with: - vulkan-query-version: 1.4.304.1 - vulkan-components: Vulkan-Headers, Vulkan-Loader - vulkan-use-cache: true - name: Set up test version shell: bash run: | diff --git a/.github/workflows/test-pull_request.yaml b/.github/workflows/test-pull_request.yaml index 14383e589ff..966cda9caa0 100644 --- a/.github/workflows/test-pull_request.yaml +++ b/.github/workflows/test-pull_request.yaml @@ -120,12 +120,6 @@ jobs: # arch: win64_msvc2017_64 # cached: ${{ steps.cache-qt-win.outputs.cache-hit }} # aqtversion: ==0.8 - - name: Prepare Vulkan SDK - uses: humbletim/setup-vulkan-sdk@v1.2.1 - with: - vulkan-query-version: 1.4.304.1 - vulkan-components: Vulkan-Headers, Vulkan-Loader - vulkan-use-cache: true - name: Configure CMake env: CONFIGURATION: ${{ matrix.configuration }} @@ -137,23 +131,12 @@ jobs: cd build if [ "$COMPILER" = "MinGW" ]; then - if [ "$ARCHITECTURE" = "Win32" ]; then - cmake -DFSO_USE_SPEECH="OFF" -DFSO_FATAL_WARNINGS="ON" -DFSO_USE_VOICEREC="OFF" -DFSO_BUILD_TESTS="ON" \ - -DFSO_BUILD_FRED2="OFF" -DFSO_BUILD_WITH_VULKAN="OFF" -DCMAKE_BUILD_TYPE=$CONFIGURATION -G "Ninja" .. - else - cmake -DFSO_USE_SPEECH="OFF" -DFSO_FATAL_WARNINGS="ON" -DFSO_USE_VOICEREC="OFF" -DFSO_BUILD_TESTS="ON" \ - -DFSO_BUILD_FRED2="OFF" -DCMAKE_BUILD_TYPE=$CONFIGURATION -G "Ninja" .. - fi + cmake -DFSO_USE_SPEECH="OFF" -DFSO_FATAL_WARNINGS="ON" -DFSO_USE_VOICEREC="OFF" -DFSO_BUILD_TESTS="ON" \ + -DFSO_BUILD_FRED2="OFF" -DCMAKE_BUILD_TYPE=$CONFIGURATION -G "Ninja" .. else - if [ "$ARCHITECTURE" = "Win32" ]; then - cmake -DFSO_USE_SPEECH="ON" -DFSO_FATAL_WARNINGS="ON" -DFSO_USE_VOICEREC="OFF" -DFSO_BUILD_TESTS="ON" \ - -DFORCED_SIMD_INSTRUCTIONS=SSE2 -DFSO_BUILD_FRED2="ON" -DFSO_BUILD_WITH_VULKAN="OFF" -G "Visual Studio 17 2022" \ - -DFSO_BUILD_QTFRED=OFF -T "v143" -A "$ARCHITECTURE" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - else - cmake -DFSO_USE_SPEECH="ON" -DFSO_FATAL_WARNINGS="ON" -DFSO_USE_VOICEREC="OFF" -DFSO_BUILD_TESTS="ON" \ - -DFORCED_SIMD_INSTRUCTIONS=SSE2 -DFSO_BUILD_FRED2="ON" -G "Visual Studio 17 2022" \ - -DFSO_BUILD_QTFRED=OFF -T "v143" -A "$ARCHITECTURE" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. - fi + cmake -DFSO_USE_SPEECH="ON" -DFSO_FATAL_WARNINGS="ON" -DFSO_USE_VOICEREC="OFF" -DFSO_BUILD_TESTS="ON" \ + -DFORCED_SIMD_INSTRUCTIONS=SSE2 -DFSO_BUILD_FRED2="ON" -G "Visual Studio 17 2022" \ + -DFSO_BUILD_QTFRED=OFF -T "v143" -A "$ARCHITECTURE" -DCMAKE_BUILD_TYPE=$CONFIGURATION .. fi - name: Compile working-directory: ./build @@ -218,12 +201,6 @@ jobs: with: key: ${{ runner.os }}-${{ matrix.configuration }}-${{ matrix.compiler }}-${{ matrix.arch }} save: false # Caches are created by a separate job and only restored for PRs - - name: Prepare Vulkan SDK - uses: humbletim/setup-vulkan-sdk@v1.2.1 - with: - vulkan-query-version: 1.4.304.1 - vulkan-components: Vulkan-Headers, Vulkan-Loader - vulkan-use-cache: true - name: Configure CMake env: CONFIGURATION: ${{ matrix.configuration }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 27279b799f8..84f77d04ffa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,7 +129,7 @@ OPTION(FSO_BUILD_WITH_OPENGL "Enable compilation of the OpenGL renderer" ON) OPTION(FSO_BUILD_WITH_OPENGL_DEBUG "Enables debug option for OpenGL" OFF) OPTION(FSO_BUILD_WITH_OPENGL_ES "When building OpenGL, use OpenGL ES compatibility layer (External shaders only!)" OFF) -OPTION(FSO_BUILD_WITH_VULKAN "Enable compilation of the Vulkan renderer" OFF) +OPTION(FSO_BUILD_WITH_VULKAN "Enable compilation of the Vulkan renderer" ON) if(NOT FSO_BUILD_WITH_OPENGL AND FSO_BUILD_WITH_OPENGL_ES) # Disable GLES if not building with OpenGL diff --git a/ci/linux/clang_tidy.sh b/ci/linux/clang_tidy.sh index 8e514ef40fe..2a0eec709cb 100755 --- a/ci/linux/clang_tidy.sh +++ b/ci/linux/clang_tidy.sh @@ -18,13 +18,8 @@ fi # branch BASE_COMMIT=$(git merge-base $1 $2) -# Note: Manually passing in the Vulkan flags that are normally provided by cmake (but are not so, here), to ensure -# that the source files are checked with the actual configuration used. echo "Running clang-tidy on changed files" git diff -U0 --no-color "$BASE_COMMIT..$2" | \ $HERE/clang-tidy-diff.py -path "$(pwd)/build" -p1 \ - -extra-arg="-DWITH_VULKAN" \ - -extra-arg="-DVULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1" \ - -extra-arg="-DVK_NO_PROTOTYPES" \ -regex '(code(?!((\/graphics\/shaders\/compiled)|(\/globalincs\/windebug)|(\/def_files\/data)))|freespace2|qtfred|test\/src|build|tools)\/.*\.(cpp|h)' \ -clang-tidy-binary /usr/bin/clang-tidy-16 -j$(nproc) -export-fixes "$(pwd)/clang-fixes.yaml" diff --git a/ci/linux/configure_cmake.sh b/ci/linux/configure_cmake.sh index bb0decb22be..3a9b7b0e14d 100755 --- a/ci/linux/configure_cmake.sh +++ b/ci/linux/configure_cmake.sh @@ -17,8 +17,6 @@ LD_LIBRARY_PATH=$Qt5_DIR/lib:$LD_LIBRARY_PATH if [ "$RUNNER_OS" = "macOS" ]; then CXXFLAGS="-mtune=generic -pipe -Wno-unknown-pragmas" CFLAGS="-mtune=generic -pipe -Wno-unknown-pragmas" - # TODO: Vulkan support is disabled on MacOS due to issues with the test suite not linking correctly - PLATFORM_CMAKE_OPTIONS="-DFSO_BUILD_WITH_VULKAN=OFF" export CMAKE_OSX_ARCHITECTURES="$ARCHITECTURE" else PLATFORM_CMAKE_OPTIONS="-DFSO_BUILD_APPIMAGE=ON -DFORCED_SIMD_INSTRUCTIONS=SSE2 -DUSE_STATIC_LIBCXX=ON" @@ -26,8 +24,8 @@ fi CMAKE_OPTIONS="$JOB_CMAKE_OPTIONS" if [[ "$COMPILER" =~ ^clang.*$ ]]; then + # Force clang to silently allow -static-libstdc++ flag CMAKE_OPTIONS="$CMAKE_OPTIONS -DCLANG_USE_LIBCXX=ON" - # force clang to silently allow -static-libstdc++ flag fi if [ ! "$CCACHE_PATH" = "" ]; then @@ -52,4 +50,4 @@ fi cmake -G Ninja -DFSO_FATAL_WARNINGS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON $CMAKE_OPTIONS $PLATFORM_CMAKE_OPTIONS \ -DCMAKE_INSTALL_PREFIX="$(pwd)/install" -DCMAKE_BUILD_TYPE=$CONFIGURATION \ -DFFMPEG_USE_PRECOMPILED=ON -DFSO_BUILD_TESTS=ON -DFSO_BUILD_INCLUDED_LIBS=ON -DFSO_BUILD_QTFRED=${ENABLE_QTFRED:-OFF} \ - -DSHADERS_ENABLE_COMPILATION=ON -DCMAKE_JOB_POOLS=link=1 -DCMAKE_JOB_POOL_LINK=link .. + -DCMAKE_JOB_POOLS=link=1 -DCMAKE_JOB_POOL_LINK=link .. diff --git a/cmake/util.cmake b/cmake/util.cmake index d162e032fe3..bfce4de182d 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -66,33 +66,37 @@ ENDIF(EXISTS \"${CMAKE_CURRENT_BINARY_DIR}/${TARGET}/${FILE}\") SET(${OUTVAR} "${CMAKE_COMMAND}" -P ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}/${NAME} PARENT_SCOPE) ENDFUNCTION(EP_CHECK_FILE_EXISTS) +# Use rsync if possible for COPY_FILE_TO_TARGET macro in order to preserve +# symlinks as copy_if_different follows them which results in multiple copies +if(PLATFORM_UNIX AND NOT DEFINED RSYNC_BIN) + find_program(RSYNC_BIN rsync) +endif() + MACRO(COPY_FILE_TO_TARGET _target _file) - if (IS_DIRECTORY "${_file}") - get_filename_component(_dirName "${_file}" NAME) - if (PLATFORM_MAC AND ("${_file}" MATCHES ".framework$")) - # This is stupid, but it preserves symlinks, unlike copy_directory_if_different. - # Otherwise we end up creating duplicate files in the copied framework. + if(RSYNC_BIN) + ADD_CUSTOM_COMMAND( + TARGET ${_target} POST_BUILD + COMMAND ${RSYNC_BIN} -rlq "${_file}" "$/${LIBRAY_DESTINATION}" + COMMENT "copying '${_file}'..." + VERBATIM + ) + else() + if (IS_DIRECTORY "${_file}") + get_filename_component(_dirName "${_file}" NAME) ADD_CUSTOM_COMMAND( TARGET ${_target} POST_BUILD - COMMAND rsync -rlq "${_file}" "$/${LIBRAY_DESTINATION}" + COMMAND ${CMAKE_COMMAND} -E copy_directory_if_different "${_file}" "$/${LIBRAY_DESTINATION}/${_dirName}" COMMENT "copying '${_file}'..." VERBATIM ) else() ADD_CUSTOM_COMMAND( TARGET ${_target} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_directory_if_different "${_file}" "$/${LIBRAY_DESTINATION}/${_dirName}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${_file}" "$/${LIBRAY_DESTINATION}/" COMMENT "copying '${_file}'..." VERBATIM ) endif() - else() - ADD_CUSTOM_COMMAND( - TARGET ${_target} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different "${_file}" "$/${LIBRAY_DESTINATION}/" - COMMENT "copying '${_file}'..." - VERBATIM - ) endif() endmacro(COPY_FILE_TO_TARGET) diff --git a/code/CMakeLists.txt b/code/CMakeLists.txt index b622564e7ed..4a20ff99314 100644 --- a/code/CMakeLists.txt +++ b/code/CMakeLists.txt @@ -113,16 +113,9 @@ if (FSO_BUILD_WITH_OPENGL) target_compile_definitions(code PUBLIC WITH_OPENGL) endif() if (FSO_BUILD_WITH_VULKAN) - find_package(Vulkan REQUIRED) - if (Vulkan_FOUND) - target_compile_definitions(code PUBLIC WITH_VULKAN) - target_link_libraries(code PRIVATE Vulkan::Vulkan) - - target_compile_definitions(code PUBLIC WITH_VULKAN VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1 VK_NO_PROTOTYPES) - else() - message(WARNING "FSO_BUILD_WITH_VULKAN was set, but the package was unable to be found. Forcing OFF.") - set(FSO_BUILD_WITH_VULKAN OFF CACHE BOOL "Enable compilation of the Vulkan renderer" FORCE) - endif() + target_compile_definitions(code PUBLIC WITH_VULKAN VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1 VK_NO_PROTOTYPES) + target_include_directories(code PUBLIC Vulkan::Headers) + target_link_libraries(code PUBLIC VulkanMemoryAllocator) endif() include(shaders.cmake) diff --git a/code/ddsutils/ddsutils.cpp b/code/ddsutils/ddsutils.cpp index a843a874495..84fd1bdcb94 100644 --- a/code/ddsutils/ddsutils.cpp +++ b/code/ddsutils/ddsutils.cpp @@ -1,18 +1,12 @@ #include "ddsutils/ddsutils.h" #include "cfile/cfile.h" +#include "graphics/2d.h" #include "osapi/osregistry.h" #ifdef USE_OPENGL_ES #include "graphics/opengl/es_compatibility.h" #endif -#ifdef WITH_OPENGL -#include -#else -static constexpr int GLAD_GL_EXT_texture_compression_s3tc = 0; -static constexpr int GLAD_GL_ARB_texture_compression_bptc = 0; -#endif - #define BCDEC_IMPLEMENTATION 1 PUSH_SUPPRESS_WARNINGS #include "ddsutils/bcdec.h" @@ -60,11 +54,11 @@ static bool conversion_needed(const DDS_HEADER &dds_header) case FOURCC_DXT1: case FOURCC_DXT3: case FOURCC_DXT5: - return !GLAD_GL_EXT_texture_compression_s3tc; + return !gr_is_capable(gr_capability::CAPABILITY_S3TC); case FOURCC_DX10: // anything other than BC7 will end up invalid - return !GLAD_GL_ARB_texture_compression_bptc; + return !gr_is_capable(gr_capability::CAPABILITY_BPTC); default: break; @@ -226,7 +220,7 @@ static size_t compute_dds_size(const DDS_HEADER &dds_header, bool converting = f if (dds_header.ddspf.dwFlags & DDPF_FOURCC) { // size of data block (4x4) - d_size += ((d_width + 3) / 4) * ((d_height + 3) / 4) * d_depth * ((dds_header.ddspf.dwFourCC == FOURCC_DXT1) ? 8 : 16); + d_size += dds_compressed_mip_size(d_width, d_height, (dds_header.ddspf.dwFourCC == FOURCC_DXT1) ? 8 : 16) * d_depth; } else { d_size += d_width * d_height * d_depth * (dds_header.ddspf.dwRGBBitCount / 8); } diff --git a/code/ddsutils/ddsutils.h b/code/ddsutils/ddsutils.h index 0d6a6ca6392..e262204a397 100644 --- a/code/ddsutils/ddsutils.h +++ b/code/ddsutils/ddsutils.h @@ -273,6 +273,27 @@ typedef struct { } DDS_HEADER_DXT10; #pragma pack() +// Block size in bytes for a 4x4 texel block of a compressed DDS format. +// comp_type is one of the DDS_DXT*/DDS_CUBEMAP_DXT* constants. +inline int dds_block_size(int comp_type) { + switch (comp_type) { + case DDS_DXT1: + case DDS_CUBEMAP_DXT1: + return 8; + case DDS_DXT3: case DDS_CUBEMAP_DXT3: + case DDS_DXT5: case DDS_CUBEMAP_DXT5: + case DDS_BC7: + return 16; + default: + return 0; + } +} + +// Size in bytes of one mip level of a block-compressed texture. +inline size_t dds_compressed_mip_size(int w, int h, int block_size) { + return static_cast(((w + 3) / 4) * ((h + 3) / 4) * block_size); +} + #define DDS_OFFSET 4+sizeof(DDS_HEADER) //place where the data starts -- should be 128 #define DX10_OFFSET DDS_OFFSET+sizeof(DDS_HEADER_DXT10) // Unless a DX10 header is present diff --git a/code/def_files/data/effects/batched-f.sdr b/code/def_files/data/effects/batched-f.sdr index 82239181efa..eced516efa2 100644 --- a/code/def_files/data/effects/batched-f.sdr +++ b/code/def_files/data/effects/batched-f.sdr @@ -1,6 +1,31 @@ #include "gamma.sdr" +#ifdef VULKAN +layout (location = 0) in vec4 fragTexCoord; +layout (location = 1) in vec4 fragColor; + +layout (location = 0) out vec4 fragOut0; + +layout (set = 1, binding = 1) uniform sampler2DArray baseMap; + +layout (set = 2, binding = 0, std140) uniform genericData { + mat4 modelMatrix; + + vec4 color; + + vec4 clipEquation; + + int baseMapIndex; + int alphaTexture; + int noTexturing; + int srgb; + + float intensity; + float alphaThreshold; + uint clipEnabled; +}; +#else in vec4 fragTexCoord; in vec4 fragColor; @@ -12,6 +37,7 @@ layout (std140) uniform genericData { vec4 color; float intensity; }; +#endif void main() { diff --git a/code/def_files/data/effects/batched-v.sdr b/code/def_files/data/effects/batched-v.sdr index 499ed2a6a0f..8efb153c3a7 100644 --- a/code/def_files/data/effects/batched-v.sdr +++ b/code/def_files/data/effects/batched-v.sdr @@ -1,3 +1,33 @@ +#ifdef VULKAN +layout (location = 0) in vec4 vertPosition; +layout (location = 1) in vec4 vertColor; +layout (location = 2) in vec4 vertTexCoord; + +layout (location = 0) out vec4 fragTexCoord; +layout (location = 1) out vec4 fragColor; + +layout (set = 2, binding = 1, std140) uniform matrixData { + mat4 modelViewMatrix; + mat4 projMatrix; +}; + +layout (set = 2, binding = 0, std140) uniform genericData { + mat4 modelMatrix; + + vec4 color; + + vec4 clipEquation; + + int baseMapIndex; + int alphaTexture; + int noTexturing; + int srgb; + + float intensity; + float alphaThreshold; + uint clipEnabled; +}; +#else in vec4 vertPosition; in vec4 vertTexCoord; in vec4 vertColor; @@ -14,6 +44,7 @@ layout (std140) uniform genericData { vec4 color; float intensity; }; +#endif void main() { diff --git a/code/def_files/data/effects/bloom-comp-f.sdr b/code/def_files/data/effects/bloom-comp-f.sdr index bc502b31313..20d8859a643 100644 --- a/code/def_files/data/effects/bloom-comp-f.sdr +++ b/code/def_files/data/effects/bloom-comp-f.sdr @@ -1,11 +1,20 @@ +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; +layout(set = 1, binding = 1) uniform sampler2D bloomed; +#else in vec4 fragTexCoord; - out vec4 fragOut0; - uniform sampler2D bloomed; +#endif -layout (std140) uniform genericData { +#ifdef VULKAN +layout(std140, set = 2, binding = 0) +#else +layout(std140) +#endif +uniform genericData { float bloom_intensity; int levels; }; diff --git a/code/def_files/data/effects/blur-f.sdr b/code/def_files/data/effects/blur-f.sdr index b170964a627..8063b8e4e03 100644 --- a/code/def_files/data/effects/blur-f.sdr +++ b/code/def_files/data/effects/blur-f.sdr @@ -1,16 +1,26 @@ +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; +layout(set = 1, binding = 1) uniform sampler2D tex; +#else out vec4 fragOut0; - in vec4 fragTexCoord; - uniform sampler2D tex; +#endif -const float TapSize = 1.0; - -layout (std140) uniform genericData { +#ifdef VULKAN +layout(set = 2, binding = 0, std140) +#else +layout(std140) +#endif +uniform genericData { float texSize; int level; + int pad[2]; }; +const float TapSize = 1.0; + // Gaussian Blur // 2 passes required void main() diff --git a/code/def_files/data/effects/brightpass-f.sdr b/code/def_files/data/effects/brightpass-f.sdr index d92f74e5f9f..e1632a23d13 100644 --- a/code/def_files/data/effects/brightpass-f.sdr +++ b/code/def_files/data/effects/brightpass-f.sdr @@ -1,6 +1,12 @@ +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; +layout(set = 1, binding = 1) uniform sampler2D tex; +#else in vec4 fragTexCoord; out vec4 fragOut0; uniform sampler2D tex; +#endif const float Luminance = 0.08; const float fMiddleGray = 0.2; const float fWhiteCutoff = 0.4; diff --git a/code/def_files/data/effects/copy-f.sdr b/code/def_files/data/effects/copy-f.sdr index 6c0b8d74781..45f5082c027 100644 --- a/code/def_files/data/effects/copy-f.sdr +++ b/code/def_files/data/effects/copy-f.sdr @@ -1,15 +1,20 @@ +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; +layout(set = 1, binding = 1) uniform sampler2D tex; +#else in vec4 fragTexCoord; out vec4 fragOut0; - #ifdef COPY_ARRAY uniform sampler2DArray tex; #else uniform sampler2D tex; #endif +#endif void main() { -#ifdef COPY_ARRAY +#if defined(COPY_ARRAY) && !defined(VULKAN) fragOut0 = texture(tex, vec3(fragTexCoord.xy, 0)); #else fragOut0 = texture(tex, fragTexCoord.xy); diff --git a/code/def_files/data/effects/decal-f.sdr b/code/def_files/data/effects/decal-f.sdr index 138caae0ec5..34d65a67ba8 100644 --- a/code/def_files/data/effects/decal-f.sdr +++ b/code/def_files/data/effects/decal-f.sdr @@ -6,6 +6,29 @@ #include "normals.sdr" #include "gamma.sdr" +#ifdef VULKAN +// G-buffer: 6 attachments, 1/3/5 are write-masked by pipeline blend state +layout (location = 0) out vec4 fragOut0; // Color/Diffuse +layout (location = 1) out vec4 fragOut1; // Position (masked) +layout (location = 2) out vec4 fragOut2; // Normal +layout (location = 3) out vec4 fragOut3; // Specular (masked) +layout (location = 4) out vec4 fragOut4; // Emissive +layout (location = 5) out vec4 fragOut5; // Composite (masked) + +layout (location = 0) flat in mat4 invModelMatrix; // locations 0-3 +layout (location = 4) flat in vec3 decalDirection; +layout (location = 5) flat in float normal_angle_cutoff; +layout (location = 6) flat in float angle_fade_start; +layout (location = 7) flat in float alpha_scale; + +layout (set = 1, binding = 1) uniform sampler2DArray decalTextures; +#define diffuseMap decalTextures +#define glowMap decalTextures +#define normalMap decalTextures + +layout (set = 1, binding = 4) uniform sampler2D gDepthBuffer; +layout (set = 1, binding = 6) uniform sampler2D gNormalBuffer; +#else out vec4 fragOut0; // Diffuse buffer out vec4 fragOut1; // Normal buffer out vec4 fragOut2; // Emissive buffer @@ -22,8 +45,14 @@ uniform sampler2D gNormalBuffer; uniform sampler2DArray diffuseMap; uniform sampler2DArray glowMap; uniform sampler2DArray normalMap; +#endif -layout (std140) uniform decalGlobalData { +#ifdef VULKAN +layout (set = 1, binding = 2, std140) +#else +layout (std140) +#endif +uniform decalGlobalData { mat4 viewMatrix; mat4 projMatrix; mat4 invViewMatrix; @@ -32,7 +61,12 @@ layout (std140) uniform decalGlobalData { vec2 viewportSize; }; -layout (std140) uniform decalInfoData { +#ifdef VULKAN +layout (set = 2, binding = 3, std140) +#else +layout (std140) +#endif +uniform decalInfoData { int diffuse_index; int glow_index; int normal_index; @@ -45,8 +79,16 @@ vec3 computeViewPosition(vec2 textureCoord) { vec4 clipSpaceLocation; vec2 normalizedCoord = textureCoord / viewportSize; +#ifdef VULKAN + clipSpaceLocation.x = normalizedCoord.x * 2.0 - 1.0; + // Vulkan negative viewport inverts Y mapping + clipSpaceLocation.y = 1.0 - normalizedCoord.y * 2.0; + // Vulkan depth is [0,1] — use directly + clipSpaceLocation.z = texelFetch(gDepthBuffer, ivec2(textureCoord), 0).r; +#else clipSpaceLocation.xy = normalizedCoord * 2.0f - 1.0f; clipSpaceLocation.z = texelFetch(gDepthBuffer, ivec2(textureCoord), 0).r * 2.0f - 1.0f; +#endif clipSpaceLocation.w = 1.0f; vec4 homogenousLocation = invProjMatrix * clipSpaceLocation; @@ -57,24 +99,24 @@ vec3 computeViewPosition(vec2 textureCoord) { vec3 getPixelNormal(vec3 frag_position, vec2 tex_coord, inout float alpha, out vec3 binormal, out vec3 tangent) { #ifdef USE_NORMAL_MAP // If we can then we just use the existing normal buffer - vec3 normal = texelFetch(gNormalBuffer, ivec2(tex_coord), 0).xyz; + vec3 normal = texelFetch(gNormalBuffer, ivec2(tex_coord), 0).xyz; // If we use the normal map then we don't really need these values so we don't need to compute them here - binormal = vec3(0.0); - tangent = vec3(0.0); + binormal = vec3(0.0); + tangent = vec3(0.0); #else // Use some fancy screen-space derivates to determine the normal of the current pixel by looking at the surrounding pixels vec3 pos_dx = dFdx(frag_position); vec3 pos_dy = dFdy(frag_position); - vec3 normal = normalize(cross(pos_dx, pos_dy)); + vec3 normal = normalize(cross(pos_dx, pos_dy)); - binormal = normalize(pos_dx); - tangent = normalize(pos_dy); + binormal = normalize(pos_dx); + tangent = normalize(pos_dy); #endif //Calculate angle between surface normal and decal direction - float angle = acos(dot(normal, decalDirection)); + float angle = acos(clamp(dot(normal, decalDirection), -1.0, 1.0)); if (angle > normal_angle_cutoff) { // The angle between surface normal and decal direction is too big @@ -162,7 +204,18 @@ void main() { normal_out = tangentToView * decalNormal * alpha; } +#ifdef VULKAN + // 6-attachment G-buffer; attachments 1/3/5 are write-masked + fragOut0 = diffuse_out; + fragOut1 = vec4(0.0); + fragOut2 = vec4(normal_out, 0.0); + fragOut3 = vec4(0.0); + fragOut4 = emissive_out; + fragOut5 = vec4(0.0); +#else + // OpenGL: 3 draw buffers mapped to attachments 0/2/4 fragOut0 = diffuse_out; fragOut1 = vec4(normal_out, 0.0); fragOut2 = emissive_out; +#endif } diff --git a/code/def_files/data/effects/decal-v.sdr b/code/def_files/data/effects/decal-v.sdr index bd3511e0c7e..42d99146327 100644 --- a/code/def_files/data/effects/decal-v.sdr +++ b/code/def_files/data/effects/decal-v.sdr @@ -1,4 +1,20 @@ +#ifdef VULKAN +// Binding 0: box vertex positions +layout (location = 0) in vec4 vertPosition; + +// Binding 1: per-instance model matrix (mat4 = 4 vec4s at locations 8-11) +layout (location = 8) in vec4 vertModelMatrix0; +layout (location = 9) in vec4 vertModelMatrix1; +layout (location = 10) in vec4 vertModelMatrix2; +layout (location = 11) in vec4 vertModelMatrix3; + +layout (location = 0) flat out mat4 invModelMatrix; // locations 0-3 +layout (location = 4) flat out vec3 decalDirection; +layout (location = 5) flat out float normal_angle_cutoff; +layout (location = 6) flat out float angle_fade_start; +layout (location = 7) flat out float alpha_scale; +#else in vec4 vertPosition; in mat4 vertModelMatrix; @@ -7,8 +23,14 @@ flat out vec3 decalDirection; flat out float normal_angle_cutoff; flat out float angle_fade_start; flat out float alpha_scale; +#endif -layout (std140) uniform decalGlobalData { +#ifdef VULKAN +layout (set = 1, binding = 2, std140) +#else +layout (std140) +#endif +uniform decalGlobalData { mat4 viewMatrix; mat4 projMatrix; mat4 invViewMatrix; @@ -17,7 +39,12 @@ layout (std140) uniform decalGlobalData { vec2 viewportSize; }; -layout (std140) uniform decalInfoData { +#ifdef VULKAN +layout (set = 2, binding = 3, std140) +#else +layout (std140) +#endif +uniform decalInfoData { int diffuse_index; int glow_index; int normal_index; @@ -27,6 +54,11 @@ layout (std140) uniform decalInfoData { }; void main() { +#ifdef VULKAN + // Reconstruct per-instance model matrix from 4 vec4 columns + mat4 vertModelMatrix = mat4(vertModelMatrix0, vertModelMatrix1, vertModelMatrix2, vertModelMatrix3); +#endif + normal_angle_cutoff = vertModelMatrix[0][3]; angle_fade_start = vertModelMatrix[1][3]; alpha_scale = vertModelMatrix[2][3]; diff --git a/code/graphics/shaders/default-material.frag b/code/def_files/data/effects/default-material-f.sdr similarity index 60% rename from code/graphics/shaders/default-material.frag rename to code/def_files/data/effects/default-material-f.sdr index 8ea9de01f4e..cec7df3a766 100644 --- a/code/graphics/shaders/default-material.frag +++ b/code/def_files/data/effects/default-material-f.sdr @@ -1,14 +1,9 @@ -#version 450 -#extension GL_ARB_separate_shader_objects : enable - -#include "gamma.sdr" - -layout (location = 0) in vec4 fragTexCoord; -layout (location = 1) in vec4 fragColor; - -layout (location = 0) out vec4 fragOut0; - -layout (binding = 1, std140) uniform genericData { +#ifdef VULKAN +layout(set = 2, binding = 0, std140) +#else +layout(std140) +#endif +uniform genericData { mat4 modelMatrix; vec4 color; @@ -22,10 +17,29 @@ layout (binding = 1, std140) uniform genericData { float intensity; float alphaThreshold; - bool clipEnabled; + uint clipEnabled; }; -layout(binding = 2) uniform sampler2DArray baseMap; +#ifdef VULKAN + +layout(set = 1, binding = 1) uniform sampler2DArray baseMap; + +layout(location = 0) in vec4 fragTexCoord; +layout(location = 1) in vec4 fragColor; + +layout(location = 0) out vec4 fragOut0; + +#else + +uniform sampler2DArray baseMap; + +in vec4 fragTexCoord; +in vec4 fragColor; +out vec4 fragOut0; + +#endif + +#include "gamma.sdr" void main() { diff --git a/code/def_files/data/effects/default-material-v.sdr b/code/def_files/data/effects/default-material-v.sdr new file mode 100644 index 00000000000..902240860ea --- /dev/null +++ b/code/def_files/data/effects/default-material-v.sdr @@ -0,0 +1,70 @@ +#ifdef VULKAN +layout(set = 2, binding = 0, std140) +#else +layout(std140) +#endif +uniform genericData { + mat4 modelMatrix; + + vec4 color; + + vec4 clipEquation; + + int baseMapIndex; + int alphaTexture; + int noTexturing; + int srgb; + + float intensity; + float alphaThreshold; + uint clipEnabled; +}; + +#ifdef VULKAN +layout(set = 2, binding = 1, std140) +#else +layout(std140) +#endif +uniform matrixData { + mat4 modelViewMatrix; + mat4 projMatrix; +}; + +#ifdef VULKAN + +layout(location = 0) in vec4 vertPosition; +layout(location = 1) in vec4 vertColor; +layout(location = 2) in vec4 vertTexCoord; + +layout(location = 0) out vec4 fragTexCoord; +layout(location = 1) out vec4 fragColor; + +#else + +out float gl_ClipDistance[1]; + +out vec4 fragTexCoord; +in vec4 vertTexCoord; +out vec4 fragColor; +in vec4 vertColor; +in vec4 vertPosition; + +#endif + +void main() +{ + fragTexCoord = vertTexCoord; + fragColor = vertColor * color; + gl_Position = projMatrix * modelViewMatrix * vertPosition; + + if (clipEnabled != 0u) { + gl_ClipDistance[0] = dot(clipEquation, modelMatrix * vertPosition); + } else { +#ifdef VULKAN + // Vulkan has no glEnable(GL_CLIP_DISTANCE0) equivalent — clip distances are + // always evaluated when declared in the shader. Must write a positive value + // when clipping is disabled to prevent undefined-value vertex culling. + gl_ClipDistance[0] = 1.0; +#endif + } +} diff --git a/code/def_files/data/effects/deferred-f.sdr b/code/def_files/data/effects/deferred-f.sdr index 748432a009b..034619aec2c 100644 --- a/code/def_files/data/effects/deferred-f.sdr +++ b/code/def_files/data/effects/deferred-f.sdr @@ -4,8 +4,26 @@ #include "shadows.sdr" //! #include "shadows.sdr" #include "z-compress.sdr" //! #include "z-compress.sdr" +#ifdef VULKAN +layout(location = 0) out vec4 fragOut0; +#else out vec4 fragOut0; +#endif + +#ifdef VULKAN +layout(set = 1, binding = 1) uniform sampler2D sTextures[16]; +#define ColorBuffer sTextures[0] +#define NormalBuffer sTextures[1] +#define PositionBuffer sTextures[2] +#define SpecBuffer sTextures[3] + +layout(set = 0, binding = 2) uniform sampler2DArray shadow_map; +#ifdef ENV_MAP +layout(set = 0, binding = 3) uniform samplerCube sEnvmap; +layout(set = 0, binding = 4) uniform samplerCube sIrrmap; +#endif +#else uniform sampler2D ColorBuffer; uniform sampler2D NormalBuffer; uniform sampler2D PositionBuffer; @@ -16,8 +34,14 @@ uniform sampler2DArray shadow_map; uniform samplerCube sEnvmap; uniform samplerCube sIrrmap; #endif +#endif -layout (std140) uniform globalDeferredData { +#ifdef VULKAN +layout(set = 0, binding = 1, std140) +#else +layout(std140) +#endif +uniform globalDeferredData { mat4 shadow_mv_matrix; mat4 shadow_proj_matrix[4]; @@ -34,12 +58,22 @@ layout (std140) uniform globalDeferredData { float nearPlane; }; -layout (std140) uniform matrixData { +#ifdef VULKAN +layout(set = 2, binding = 1, std140) +#else +layout(std140) +#endif +uniform matrixData { mat4 modelViewMatrix; mat4 projMatrix; }; -layout (std140) uniform lightData { +#ifdef VULKAN +layout(set = 0, binding = 0, std140) +#else +layout(std140) +#endif +uniform lightData { vec3 diffuseLightColor; float coneAngle; @@ -47,18 +81,18 @@ layout (std140) uniform lightData { float coneInnerAngle; vec3 coneDir; - bool dualCone; + int dualCone; vec3 scale; float lightRadius; int lightType; - bool enable_shadows; + int enable_shadows; float sourceRadius; }; -// Nearest point sphere and tube light calculations taken from +// Nearest point sphere and tube light calculations taken from // "Real Shading in Unreal Engine 4" by Brian Karis, Epic Games // Part of SIGGRAPH 2013 Course: Physically Based Shading in Theory and Practice @@ -66,7 +100,7 @@ vec3 ExpandLightSize(in vec3 lightDir, in vec3 reflectDir) { // There's an extra max(...,sourceRadius) call here vs the version in the paper. // This prevents the centerToRay calculation from choosing a point behind // the reflection ray's origin (i.e. underneath the surface). - // this is necessary for situations where the fragment being shaded lies inside + // this is necessary for situations where the fragment being shaded lies inside // the sourceRadius of the light. // Instead, we choose a point suffciently far away from the reflection origin (hence max(...,sourceRadius)) // so that we have a gradual transition as the shaded fragments along the surface @@ -152,7 +186,7 @@ void GetLightInfo(vec3 position, in float alpha, in vec3 reflectDir, out vec3 li attenuation = 1.0 - clamp(sqrt(dist / lightRadius), 0.0, 1.0); area_normalisation = 1.0; - if(dualCone) { + if(dualCone != 0) { if(abs(coneDot) < coneAngle) { discard; } else { @@ -257,7 +291,7 @@ void main() float area_normalisation; GetLightInfo(position, alpha, reflectDir, lightDir, attenuation, area_normalisation); - if (enable_shadows) { + if (enable_shadows != 0) { vec4 fragShadowPos = shadow_mv_matrix * inv_view_matrix * vec4(position, 1.0); vec4 fragShadowUV[4]; fragShadowUV[0] = transformToShadowMap(shadow_proj_matrix[0], 0, fragShadowPos); diff --git a/code/def_files/data/effects/deferred-v.sdr b/code/def_files/data/effects/deferred-v.sdr index f1633569875..2c9d4b02c66 100644 --- a/code/def_files/data/effects/deferred-v.sdr +++ b/code/def_files/data/effects/deferred-v.sdr @@ -1,14 +1,28 @@ #include "lighting.sdr" +#ifdef VULKAN +layout(location = 0) in vec4 vertPosition; +#else in vec4 vertPosition; - -layout (std140) uniform matrixData { +#endif + +#ifdef VULKAN +layout(set = 2, binding = 1, std140) +#else +layout(std140) +#endif +uniform matrixData { mat4 modelViewMatrix; mat4 projMatrix; }; -layout (std140) uniform lightData { +#ifdef VULKAN +layout(set = 0, binding = 0, std140) +#else +layout(std140) +#endif +uniform lightData { vec3 diffuseLightColor; float coneAngle; @@ -16,13 +30,13 @@ layout (std140) uniform lightData { float coneInnerAngle; vec3 coneDir; - bool dualCone; + int dualCone; vec3 scale; float lightRadius; int lightType; - bool enable_shadows; + int enable_shadows; float sourceRadius; }; @@ -30,7 +44,13 @@ layout (std140) uniform lightData { void main() { if (lightType == LT_DIRECTIONAL || lightType == LT_AMBIENT) { +#ifdef VULKAN + // Fullscreen triangle from gl_VertexIndex (no vertex buffer) + vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0); +#else gl_Position = vec4(vertPosition.xyz, 1.0); +#endif } else { gl_Position = projMatrix * modelViewMatrix * vec4(vertPosition.xyz * scale, 1.0); } diff --git a/code/def_files/data/effects/effect-distort-f.sdr b/code/def_files/data/effects/effect-distort-f.sdr index dc030e63067..b43907a39da 100644 --- a/code/def_files/data/effects/effect-distort-f.sdr +++ b/code/def_files/data/effects/effect-distort-f.sdr @@ -1,3 +1,15 @@ +#ifdef VULKAN +layout(location = 0) in vec4 fragTexCoord; +layout(location = 1) in vec4 fragColor; +layout(location = 2) in float fragOffset; + +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2DArray baseMap; +layout(set = 1, binding = 4) uniform sampler2D depthMap; +layout(set = 1, binding = 5) uniform sampler2D frameBuffer; +layout(set = 1, binding = 6) uniform sampler2D distMap; +#else in vec4 fragTexCoord; in vec4 fragColor; in float fragOffset; @@ -8,8 +20,14 @@ uniform sampler2DArray baseMap; uniform sampler2D depthMap; uniform sampler2D distMap; uniform sampler2D frameBuffer; +#endif -layout (std140) uniform genericData { +#ifdef VULKAN +layout(std140, set = 2, binding = 0) +#else +layout (std140) +#endif +uniform genericData { float window_width; float window_height; float use_offset; diff --git a/code/def_files/data/effects/effect-distort-v.sdr b/code/def_files/data/effects/effect-distort-v.sdr index 02c4790b8ab..5673b1baf3e 100644 --- a/code/def_files/data/effects/effect-distort-v.sdr +++ b/code/def_files/data/effects/effect-distort-v.sdr @@ -1,3 +1,13 @@ +#ifdef VULKAN +layout(location = 0) in vec4 vertPosition; +layout(location = 1) in vec4 vertColor; +layout(location = 2) in vec4 vertTexCoord; +layout(location = 6) in float vertRadius; + +layout(location = 0) out vec4 fragTexCoord; +layout(location = 1) out vec4 fragColor; +layout(location = 2) out float fragOffset; +#else in vec4 vertPosition; in vec4 vertTexCoord; in vec4 vertColor; @@ -5,13 +15,24 @@ in float vertRadius; out vec4 fragTexCoord; out vec4 fragColor; out float fragOffset; +#endif -layout (std140) uniform matrixData { +#ifdef VULKAN +layout(std140, set = 2, binding = 1) +#else +layout (std140) +#endif +uniform matrixData { mat4 modelViewMatrix; mat4 projMatrix; }; -layout (std140) uniform genericData { +#ifdef VULKAN +layout(std140, set = 2, binding = 0) +#else +layout (std140) +#endif +uniform genericData { float window_width; float window_height; float use_offset; diff --git a/code/def_files/data/effects/effect-f.sdr b/code/def_files/data/effects/effect-f.sdr index 40ac90bce92..f46147830e5 100644 --- a/code/def_files/data/effects/effect-f.sdr +++ b/code/def_files/data/effects/effect-f.sdr @@ -2,6 +2,17 @@ #include "gamma.sdr" #include "z-compress.sdr" +#ifdef VULKAN +layout(location = 0) in vec4 fragTexCoord; +layout(location = 1) in vec4 fragColor; +layout(location = 2) in float fragRadius; +layout(location = 3) in vec4 fragPosition; + +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2DArray baseMap; +layout(set = 1, binding = 4) uniform sampler2D depthMap; +#else in float fragRadius; in vec4 fragPosition; in vec4 fragTexCoord; @@ -11,8 +22,14 @@ out vec4 fragOut0; uniform sampler2DArray baseMap; uniform sampler2D depthMap; +#endif -layout (std140) uniform genericData { +#ifdef VULKAN +layout(std140, set = 2, binding = 0) +#else +layout(std140) +#endif +uniform genericData { float window_width; float window_height; float nearZ; @@ -39,7 +56,13 @@ void main() float sceneDepthLinear; float fragDepthLinear; if ( linear_depth == 1 ) { - sceneDepthLinear = -uncompress_depth_value(sceneDepth.z); + // Position buffer .z == 0 means no deferred geometry was rendered at this pixel + // (G-buffer was cleared to zero). Treat as max distance so particles render fully. + if (sceneDepth.z == 0.0) { + sceneDepthLinear = farZ; + } else { + sceneDepthLinear = -uncompress_depth_value(sceneDepth.z); + } fragDepthLinear = -fragPosition.z; } else { sceneDepthLinear = ( 2.0 * farZ * nearZ ) / ( farZ + nearZ - sceneDepth.x * (farZ-nearZ) ); diff --git a/code/def_files/data/effects/effect-v.sdr b/code/def_files/data/effects/effect-v.sdr index 3a86333b637..86244a7ee84 100644 --- a/code/def_files/data/effects/effect-v.sdr +++ b/code/def_files/data/effects/effect-v.sdr @@ -1,4 +1,15 @@ +#ifdef VULKAN +layout(location = 0) in vec4 vertPosition; +layout(location = 1) in vec4 vertColor; +layout(location = 2) in vec4 vertTexCoord; +layout(location = 6) in float vertRadius; + +layout(location = 0) out vec4 fragTexCoord; +layout(location = 1) out vec4 fragColor; +layout(location = 2) out float fragRadius; +layout(location = 3) out vec4 fragPosition; +#else in vec4 vertPosition; in vec4 vertTexCoord; in vec4 vertColor; @@ -16,15 +27,21 @@ in float vertRadius; out vec4 fragTexCoord; out vec4 fragColor; #endif +#endif -layout (std140) uniform matrixData { +#ifdef VULKAN +layout(std140, set = 2, binding = 1) +#else +layout(std140) +#endif +uniform matrixData { mat4 modelViewMatrix; mat4 projMatrix; }; void main() { -#ifdef FLAG_EFFECT_GEOMETRY +#if !defined(VULKAN) && defined(FLAG_EFFECT_GEOMETRY) geoRadius = vertRadius; geoUvec = vertUvec; gl_Position = modelViewMatrix * vertPosition; diff --git a/code/def_files/data/effects/fog-f.sdr b/code/def_files/data/effects/fog-f.sdr index 3785d719722..6c822bb67e3 100644 --- a/code/def_files/data/effects/fog-f.sdr +++ b/code/def_files/data/effects/fog-f.sdr @@ -1,14 +1,27 @@ #include "gamma.sdr" +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D tex; +layout(set = 1, binding = 4) uniform sampler2D depth_tex; +#else in vec4 fragTexCoord; out vec4 fragOut0; uniform sampler2D tex; uniform sampler2D depth_tex; - -layout (std140) uniform genericData { +#endif + +#ifdef VULKAN +layout(std140, set = 2, binding = 0) +#else +layout (std140) +#endif +uniform genericData { vec3 fog_color; float fog_start; diff --git a/code/def_files/data/effects/fxaa-f.sdr b/code/def_files/data/effects/fxaa-f.sdr index ae2b28e2ec6..329b68384b7 100644 --- a/code/def_files/data/effects/fxaa-f.sdr +++ b/code/def_files/data/effects/fxaa-f.sdr @@ -487,10 +487,27 @@ FxaaFloat4 FxaaPixelShader( return FxaaFloat4(FxaaTexTop(tex, posM).xyz, lumaM); #endif } +#ifdef VULKAN +layout(set = 1, binding = 1) uniform sampler2D tex0; +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; +layout(std140, set = 2, binding = 0) uniform genericData { + float rt_w; + float rt_h; + float pad0; + float pad1; +}; +#else uniform sampler2D tex0; in vec2 v_rcpFrame; noperspective in vec2 v_pos; out vec4 fragOut0; +#endif + void main() { +#ifdef VULKAN + vec2 v_rcpFrame = vec2(1.0 / rt_w, 1.0 / rt_h); + vec2 v_pos = fragTexCoord; +#endif fragOut0 = FxaaPixelShader(v_pos, tex0, v_rcpFrame, FXAA_QUALITY_SUBPIX, FXAA_QUALITY_EDGE_THRESHOLD, FXAA_QUALITY_EDGE_THRESHOLD_MIN); } diff --git a/code/def_files/data/effects/fxaa-v.sdr b/code/def_files/data/effects/fxaa-v.sdr index 16aa5cc43e3..22a9053f2a1 100644 --- a/code/def_files/data/effects/fxaa-v.sdr +++ b/code/def_files/data/effects/fxaa-v.sdr @@ -1,4 +1,12 @@ +#ifdef VULKAN +layout(location = 0) out vec2 fragTexCoord; +void main() { + vec2 uv = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + fragTexCoord = uv; + gl_Position = vec4(uv * 2.0 - 1.0, 0.0, 1.0); +} +#else in vec4 vertPosition; out vec2 v_rcpFrame; @@ -14,3 +22,4 @@ void main() { v_rcpFrame = vec2(1.0/rt_w, 1.0/rt_h); v_pos = vertPosition.xy*0.5 + 0.5; } +#endif diff --git a/code/def_files/data/effects/fxaapre-f.sdr b/code/def_files/data/effects/fxaapre-f.sdr index 5756a847433..dbfc588863c 100644 --- a/code/def_files/data/effects/fxaapre-f.sdr +++ b/code/def_files/data/effects/fxaapre-f.sdr @@ -1,6 +1,12 @@ +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; +layout(set = 1, binding = 1) uniform sampler2D tex; +#else in vec4 fragTexCoord; out vec4 fragOut0; uniform sampler2D tex; +#endif void main() { vec4 color = texture(tex, fragTexCoord.xy); fragOut0 = vec4(color.rgb, dot(color.rgb, vec3(0.299, 0.587, 0.114)) ); diff --git a/code/def_files/data/effects/irrmap-f.sdr b/code/def_files/data/effects/irrmap-f.sdr index 4400ff1f0f6..556480d22e5 100644 --- a/code/def_files/data/effects/irrmap-f.sdr +++ b/code/def_files/data/effects/irrmap-f.sdr @@ -1,16 +1,26 @@ //!#version 150 #include "gamma.sdr" //! #include "gamma.sdr" #define PI 3.1415926535897932384626433832795 + +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; +layout(set = 1, binding = 1) uniform samplerCube envmap; +#else uniform samplerCube envmap; +in vec4 fragTexCoord; +out vec4 fragOut0; +#endif -layout (std140) uniform genericData { +#ifdef VULKAN +layout(set = 2, binding = 0, std140) +#else +layout(std140) +#endif +uniform genericData { int face; }; -in vec4 fragTexCoord; - -out vec4 fragOut0; - // Iteratively optimised points to give low discrepancy distribution on arbitrary hemisphere. const vec3 points[128] = vec3[128]( vec3(-0.2268, 0.6185, 0.7523), diff --git a/code/def_files/data/effects/ls-f.sdr b/code/def_files/data/effects/ls-f.sdr index 037d47d71e1..cc4ff755da0 100644 --- a/code/def_files/data/effects/ls-f.sdr +++ b/code/def_files/data/effects/ls-f.sdr @@ -1,12 +1,26 @@ +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D textures[16]; +#define scene textures[0] +#define cockpit textures[1] +#else in vec4 fragTexCoord; out vec4 fragOut0; uniform sampler2D scene; uniform sampler2D cockpit; +#endif -layout (std140) uniform genericData { +#ifdef VULKAN +layout(std140, set = 2, binding = 0) +#else +layout (std140) +#endif +uniform genericData { vec2 sun_pos; float density; float weight; diff --git a/code/def_files/data/effects/main-f.sdr b/code/def_files/data/effects/main-f.sdr index 8a36e96d8e1..4e9e4c8fd7f 100644 --- a/code/def_files/data/effects/main-f.sdr +++ b/code/def_files/data/effects/main-f.sdr @@ -25,7 +25,12 @@ struct model_light { }; -layout (std140) uniform modelData { +#ifdef VULKAN +layout(set = 1, binding = 0, std140) +#else +layout(std140) +#endif +uniform modelData { mat4 modelViewMatrix; mat4 modelMatrix; mat4 viewMatrix; @@ -46,7 +51,7 @@ layout (std140) uniform modelData { vec4 clip_equation; float thruster_scale; - bool use_clip_plane; + int use_clip_plane; int n_lights; float defaultGloss; @@ -59,10 +64,10 @@ layout (std140) uniform modelData { vec3 emissionFactor; - bool alphaGloss; + int alphaGloss; - bool gammaSpec; - bool envGloss; + int gammaSpec; + int envGloss; int effect_num; int sBasemapIndex; @@ -76,7 +81,7 @@ layout (std140) uniform modelData { float vpwidth; float vpheight; - bool team_glow_enabled; + int team_glow_enabled; float znear; float zfar; @@ -86,6 +91,7 @@ layout (std140) uniform modelData { float fardist; int sGlowmapIndex; + int sSpecmapIndex; int sNormalmapIndex; int sAmbientmapIndex; @@ -95,23 +101,34 @@ layout (std140) uniform modelData { int flags; }; -in VertexOutput { - mat3 tangentMatrix; +#ifdef VULKAN +layout(set = 1, binding = 1) uniform sampler2DArray materialTextures[16]; +#define sBasemap materialTextures[0] +#define sGlowmap materialTextures[1] +#define sSpecmap materialTextures[2] +#define sNormalmap materialTextures[3] +#define sAmbientmap materialTextures[5] +#define sMiscmap materialTextures[6] -#prereplace IF_FLAG_COMPILED MODEL_SDR_FLAG_FOG - float fogDist; -#prereplace ENDIF_FLAG_COMPILED MODEL_SDR_FLAG_FOG +layout(set = 0, binding = 2) uniform sampler2DArray shadow_map; +layout(set = 1, binding = 5) uniform sampler2D sFramebuffer; +layout(location = 0) in VertexOutput { vec4 position; vec3 normal; vec4 texCoord; - -#prereplace IF_FLAG_COMPILED MODEL_SDR_FLAG_SHADOWS + mat3 tangentMatrix; + float fogDist; vec4 shadowUV[4]; vec4 shadowPos; -#prereplace ENDIF_FLAG_COMPILED MODEL_SDR_FLAG_SHADOWS } vertIn; +layout(location = 0) out vec4 fragOut0; +layout(location = 1) out vec4 fragOut1; +layout(location = 2) out vec4 fragOut2; +layout(location = 3) out vec4 fragOut3; +layout(location = 4) out vec4 fragOut4; +#else #prereplace IF_FLAG_COMPILED MODEL_SDR_FLAG_DIFFUSE uniform sampler2DArray sBasemap; #prereplace ENDIF_FLAG_COMPILED MODEL_SDR_FLAG_DIFFUSE @@ -135,8 +152,24 @@ uniform sampler2DArray sMiscmap; uniform sampler2DArray shadow_map; #prereplace ENDIF_FLAG_COMPILED MODEL_SDR_FLAG_SHADOWS -out vec4 fragOut0; +in VertexOutput { + mat3 tangentMatrix; +#prereplace IF_FLAG_COMPILED MODEL_SDR_FLAG_FOG + float fogDist; +#prereplace ENDIF_FLAG_COMPILED MODEL_SDR_FLAG_FOG + + vec4 position; + vec3 normal; + vec4 texCoord; + +#prereplace IF_FLAG_COMPILED MODEL_SDR_FLAG_SHADOWS + vec4 shadowUV[4]; + vec4 shadowPos; +#prereplace ENDIF_FLAG_COMPILED MODEL_SDR_FLAG_SHADOWS +} vertIn; + +out vec4 fragOut0; #ifndef MODEL_SDR_FLAG_SHADOW_MAP out vec4 fragOut1; out vec4 fragOut2; @@ -144,9 +177,11 @@ out vec4 fragOut3; out vec4 fragOut4; #endif +#endif + vec3 FresnelLazarovEnv(vec3 specColor, vec3 view, vec3 normal, float gloss) { - // Fresnel for environment lighting + // Fresnel for environment lighting // Equation referenced from Dimitar Lazarov's presentation titled Physically Based Rendering in Call of Duty: Black Ops return specColor + (vec3(1.0) - specColor) * pow(1.0 - clamp(dot(view, normal), 0.0, 1.0), 5.0) / (4.0 - 3.0 * gloss); } @@ -206,15 +241,14 @@ void main() #ifdef MODEL_SDR_FLAG_SHADOW_MAP // need depth and depth squared for variance shadow maps fragOut0 = vec4(vertIn.position.z, vertIn.position.z * vertIn.position.z * VARIANCE_SHADOW_SCALE_INV, 0.0, 1.0); - return; #else + vec3 eyeDir = vec3(normalize(-vertIn.position).xyz); vec2 texCoord = vertIn.texCoord.xy; // setup our baseline values for base, emissive, fresnel, gloss, AO and normal // default specular value is set below, as it depends on the diffuse color - vec4 baseColor = color; vec4 emissiveColor = vec4(0.0, 0.0, 0.0, 1.0); float fresnelFactor = 0.0; @@ -223,6 +257,7 @@ void main() vec3 unitNormal = normalize(vertIn.normal); vec3 normal = unitNormal; + // Ambient occlusion map #prereplace IF_FLAG MODEL_SDR_FLAG_AMBIENT // red channel is ambient occlusion factor which only affects ambient lighting. // green is cavity occlusion factor which only affects diffuse and specular lighting. @@ -249,6 +284,7 @@ void main() distort = vec2(cos(vertIn.position.x*vertIn.position.w*0.005+anim_timer*20.0)*sin(vertIn.position.y*vertIn.position.w*0.005),sin(vertIn.position.x*vertIn.position.w*0.005+anim_timer*20.0)*cos(vertIn.position.y*vertIn.position.w*0.005))*0.03; } + // Diffuse map #prereplace IF_FLAG MODEL_SDR_FLAG_DIFFUSE vec2 diffuseTexCoord = texCoord; if (effect_num == 2) { @@ -256,7 +292,7 @@ void main() } baseColor = texture(sBasemap, vec3(diffuseTexCoord, float(sBasemapIndex))); - #prereplace IF_FLAG MODEL_SDR_FLAG_HDR + #prereplace IF_FLAG MODEL_SDR_FLAG_HDR baseColor.rgb = srgb_to_linear(baseColor.rgb); #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_HDR @@ -272,7 +308,7 @@ void main() // Anti-glint "trick" based on Valve's "Advanced VR Rendering" talk at GDC2015: // http://media.steampowered.com/apps/valve/2015/Alex_Vlachos_Advanced_VR_Rendering_GDC2015.pdf Page 43 - // basically make surfaces rougher if local normals change too fast in screenspace + // basically make surfaces rougher if local normals change too fast in screenspace vec2 normDx = dFdx(unitNormal.xy); vec2 normDy = dFdy(unitNormal.xy); float glossGeo = 1.0f - pow(clamp(max(dot(normDx,normDx), dot(normDy,normDy)),0.0,1.0),0.33); @@ -280,19 +316,20 @@ void main() // Now that we have a base color and min gloss value, compute the spec color vec4 specColor = vec4(baseColor.rgb * SPEC_FACTOR_NO_SPEC_MAP, glossData); - + + // Specular map #prereplace IF_FLAG MODEL_SDR_FLAG_SPEC specColor = texture(sSpecmap, vec3(texCoord, float(sSpecmapIndex))); #prereplace IF_FLAG MODEL_SDR_FLAG_ALPHA_MULT specColor *= alphaMult; #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_ALPHA_MULT - if(alphaGloss) glossData = specColor.a; - if(gammaSpec) { - specColor.rgb = max(specColor.rgb, vec3(0.03f)); // hardcoded minimum specular value. read John Hable's blog post titled 'Everything Is Shiny'. + if(alphaGloss != 0) glossData = specColor.a; + if(gammaSpec != 0) { + specColor.rgb = max(specColor.rgb, vec3(0.03f)); // hardcoded minimum specular value. read John Hable's blog post titled 'Everything Is Shiny'. fresnelFactor = 1.0; } - + #prereplace IF_FLAG MODEL_SDR_FLAG_HDR specColor.rgb = srgb_to_linear(specColor.rgb); #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_HDR @@ -304,11 +341,12 @@ void main() vec4 teamMask = vec4(0.0); vec3 team_color_glow = vec3(0.0); + // Misc map / team colors #prereplace IF_FLAG MODEL_SDR_FLAG_MISC #prereplace IF_FLAG MODEL_SDR_FLAG_TEAMCOLOR teamMask = texture(sMiscmap, vec3(texCoord, float(sMiscmapIndex))); - //For team colors applied to a diffuse or spec map, we assume that the base color of the diffuse + //For team colors applied to a diffuse or spec map, we assume that the base color of the diffuse //at this point is vec3(0.5). To get accurate results, we subtract 0.5 from the team colors vec3 color_offset = vec3(-0.5) * (teamMask.x + teamMask.y); @@ -320,11 +358,11 @@ void main() specColor.rgb = linear_to_srgb(specColor.rgb); #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_HDR - baseColor.rgb += team_color; + baseColor.rgb += team_color; baseColor.rgb = max(baseColor.rgb, vec3(0.0)); // We need to make sure that nothing here ever goes negative specColor.rgb += team_color; specColor.rgb = max(specColor.rgb, vec3(0.03)); - + #prereplace IF_FLAG MODEL_SDR_FLAG_HDR baseColor.rgb = srgb_to_linear(baseColor.rgb); specColor.rgb = srgb_to_linear(specColor.rgb); @@ -347,12 +385,13 @@ void main() #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_LIGHT #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_DEFERRED + // Glow map #prereplace IF_FLAG MODEL_SDR_FLAG_GLOW vec3 glowColor = texture(sGlowmap, vec3(texCoord, float(sGlowmapIndex))).rgb; #prereplace IF_FLAG MODEL_SDR_FLAG_MISC #prereplace IF_FLAG MODEL_SDR_FLAG_TEAMCOLOR float glowColorLuminance = dot(glowColor, vec3(0.299, 0.587, 0.114)); - glowColor = team_glow_enabled ? mix(max(team_color_glow, vec3(0.0)), glowColor, clamp(glowColorLuminance - teamMask.b - teamMask.a, 0.0, 1.0)) : glowColor; + glowColor = (team_glow_enabled != 0) ? mix(max(team_color_glow, vec3(0.0)), glowColor, clamp(glowColorLuminance - teamMask.b - teamMask.a, 0.0, 1.0)) : glowColor; #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_TEAMCOLOR #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_MISC #prereplace IF_FLAG MODEL_SDR_FLAG_HDR @@ -382,12 +421,14 @@ void main() specColor.rgb *= fogDensityFinal; #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_FOG + // Desaturation #prereplace IF_FLAG MODEL_SDR_FLAG_DIFFUSE if(desaturate == 1) { baseColor.rgb = color.rgb * dot(vec3(1.0), baseColor.rgb) * 0.3333333; } #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_DIFFUSE - + + // Ship effects if (effect_num == 0) { float shinefactor = 1.0/(1.0 + pow(abs((fract(abs(texCoord.x))-anim_timer) * 1000.0), 2.0)) * 1000.0; emissiveColor.rgb += vec3(shinefactor); @@ -429,12 +470,18 @@ void main() fragOut0 = baseColor; +#ifdef VULKAN + // Vulkan has undefined values for unwritten outputs; zero-init G-buffer outputs + fragOut1 = vec4(0.0); + fragOut2 = vec4(0.0); + fragOut3 = vec4(0.0); + fragOut4 = vec4(0.0); +#endif #prereplace IF_FLAG MODEL_SDR_FLAG_DEFERRED fragOut1 = vec4(vertIn.position.xy, compress_depth_value(vertIn.position.z), aoFactors.x); fragOut2 = vec4(normal, glossData); fragOut3 = vec4(specColor.rgb, fresnelFactor); fragOut4 = emissiveColor; #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_DEFERRED - #endif -} \ No newline at end of file +} diff --git a/code/def_files/data/effects/main-g.sdr b/code/def_files/data/effects/main-g.sdr index 6904e113ed3..11973fcc080 100644 --- a/code/def_files/data/effects/main-g.sdr +++ b/code/def_files/data/effects/main-g.sdr @@ -52,7 +52,7 @@ layout (std140) uniform modelData { vec4 clip_equation; float thruster_scale; - bool use_clip_plane; + int use_clip_plane; int n_lights; float defaultGloss; @@ -64,10 +64,10 @@ layout (std140) uniform modelData { vec3 emissionFactor; - bool alphaGloss; + int alphaGloss; - bool gammaSpec; - bool envGloss; + int gammaSpec; + int envGloss; int effect_num; int sBasemapIndex; @@ -80,7 +80,7 @@ layout (std140) uniform modelData { float vpwidth; float vpheight; - bool team_glow_enabled; + int team_glow_enabled; float znear; float zfar; @@ -185,7 +185,7 @@ out VertexOutput { vertOut.shadowPos = vertIn[vert].shadowPos; #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_SHADOWS - if (use_clip_plane) { + if (use_clip_plane != 0) { gl_ClipDistance[0] = gl_in[vert].gl_ClipDistance[0]; } EmitVertex(); @@ -239,7 +239,7 @@ out VertexOutput { vertOut.shadowPos = vertIn[vert].shadowPos; #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_SHADOWS - if (use_clip_plane) { + if (use_clip_plane != 0) { gl_ClipDistance[0] = gl_in[vert].gl_ClipDistance[0]; } EmitVertex(); diff --git a/code/def_files/data/effects/main-v.sdr b/code/def_files/data/effects/main-v.sdr index c3aa9ad74c1..b7678f629af 100644 --- a/code/def_files/data/effects/main-v.sdr +++ b/code/def_files/data/effects/main-v.sdr @@ -1,16 +1,30 @@ + +#ifndef VULKAN //? #version 150 #extension GL_ARB_gpu_shader5: enable - +#endif +#if defined(VULKAN) && defined(MODEL_SDR_FLAG_SHADOW_MAP) +#extension GL_ARB_shader_viewport_layer_array : enable +#endif #include "shadows.sdr" #conditional_include +"LARGE_SHADER" "main_large.sdr" #conditional_include -"LARGE_SHADER" "main_small.sdr" +#ifdef VULKAN +layout(location = 0) in vec4 vertPosition; +layout(location = 1) in vec4 vertColor; +layout(location = 2) in vec4 vertTexCoord; +layout(location = 3) in vec3 vertNormal; +layout(location = 4) in vec4 vertTangent; +layout(location = 5) in float vertModelID; +#else in vec4 vertPosition; in vec4 vertTexCoord; in vec3 vertNormal; in vec4 vertTangent; in float vertModelID; +#endif #define MAX_LIGHTS 8 @@ -26,7 +40,12 @@ struct model_light { float ml_sourceRadius; }; -layout (std140) uniform modelData { +#ifdef VULKAN +layout(set = 1, binding = 0, std140) +#else +layout(std140) +#endif +uniform modelData { mat4 modelViewMatrix; mat4 modelMatrix; mat4 viewMatrix; @@ -47,7 +66,7 @@ layout (std140) uniform modelData { vec4 clip_equation; float thruster_scale; - bool use_clip_plane; + int use_clip_plane; int n_lights; float defaultGloss; @@ -59,10 +78,10 @@ layout (std140) uniform modelData { vec3 emissionFactor; - bool alphaGloss; + int alphaGloss; - bool gammaSpec; - bool envGloss; + int gammaSpec; + int envGloss; int effect_num; int sBasemapIndex; @@ -75,7 +94,7 @@ layout (std140) uniform modelData { float vpwidth; float vpheight; - bool team_glow_enabled; + int team_glow_enabled; float znear; float zfar; @@ -96,6 +115,23 @@ layout (std140) uniform modelData { int flags; }; +#ifdef VULKAN +// Transform buffer for batched submodel rendering +layout(set = 1, binding = 3, std430) readonly buffer TransformBuffer { + mat4 transforms[]; +} transformBuf; + +// Outputs to fragment shader +layout(location = 0) out VertexOutput { + vec4 position; + vec3 normal; + vec4 texCoord; + mat3 tangentMatrix; + float fogDist; + vec4 shadowUV[4]; + vec4 shadowPos; +} vertOut; +#else #prereplace IF_FLAG_COMPILED MODEL_SDR_FLAG_TRANSFORM uniform samplerBuffer transform_tex; #prereplace ENDIF_FLAG_COMPILED MODEL_SDR_FLAG_TRANSFORM @@ -138,6 +174,7 @@ void getModelTransform(inout mat4 transform, out bool invisible, int id, int mat transform[3].w = 1.0; } #prereplace ENDIF_FLAG_COMPILED MODEL_SDR_FLAG_TRANSFORM +#endif void main() { @@ -145,23 +182,50 @@ void main() vec3 normal; vec4 texCoord; mat4 orient = mat4(1.0); - mat4 scale = mat4(1.0); bool clipModel = false; - + + // Transform loading #prereplace IF_FLAG MODEL_SDR_FLAG_TRANSFORM +#ifdef VULKAN + int id = int(vertModelID); + orient = transformBuf.transforms[buffer_matrix_offset + id]; + clipModel = (orient[3].w >= 0.9); + orient[3].w = 1.0; +#else getModelTransform(orient, clipModel, int(vertModelID), buffer_matrix_offset); +#endif #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_TRANSFORM texCoord = textureMatrix * vertTexCoord; vec4 vertex = vertPosition; + + // Thruster scale #prereplace IF_FLAG MODEL_SDR_FLAG_THRUSTER if(vertex.z < -1.5) { vertex.z *= thruster_scale; } #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_THRUSTER + // Transform the normal into eye space and normalize the result. normal = normalize(mat3(modelViewMatrix) * mat3(orient) * vertNormal); position = modelViewMatrix * orient * vertex; + +#ifdef VULKAN + #ifdef MODEL_SDR_FLAG_SHADOW_MAP + gl_Position = shadow_proj_matrix[gl_InstanceIndex] * position; + gl_Position.z = clamp(gl_Position.z, 0.0, gl_Position.w); + gl_Layer = gl_InstanceIndex; + #else + gl_Position = projMatrix * position; + #endif + + // Clip invisible submodels by moving vertices off-screen + #prereplace IF_FLAG MODEL_SDR_FLAG_TRANSFORM + if (clipModel) { + gl_Position = vec4(-2.0, -2.0, -2.0, 1.0); + } + #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_TRANSFORM +#else #ifdef MODEL_SDR_FLAG_SHADOW_MAP gl_Position = position; #if !defined(GL_ARB_gpu_shader5) @@ -174,6 +238,7 @@ void main() #else gl_Position = projMatrix * position; #endif +#endif #prereplace IF_FLAG MODEL_SDR_FLAG_SHADOWS vec4 shadowPos = shadow_mv_matrix * modelMatrix * orient * vertPosition; vertOut.shadowPos = shadow_mv_matrix * modelMatrix * orient * vertPosition; @@ -188,10 +253,18 @@ void main() vec3 b = cross(normal, t) * vertTangent.w; vertOut.tangentMatrix = mat3(t, b, normal); + // Fog distance #prereplace IF_FLAG MODEL_SDR_FLAG_FOG vertOut.fogDist = gl_Position.z - fogNear; #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_FOG +#ifdef VULKAN + #prereplace IF_NOT_FLAG MODEL_SDR_FLAG_FOG + vertOut.fogDist = 0.0; + #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_FOG +#endif + // Clip invisible submodels (OpenGL path) +#ifndef VULKAN #prereplace IF_FLAG MODEL_SDR_FLAG_TRANSFORM if (clipModel) { // Clip this model by moving all vertices outside the clip volume @@ -201,8 +274,10 @@ void main() vertOut.clipModel = clipModel ? 1.0 : 0.0; #endif #prereplace ENDIF_FLAG //MODEL_SDR_FLAG_TRANSFORM +#endif - if(use_clip_plane) { + // Clip plane + if(use_clip_plane != 0) { gl_ClipDistance[0] = dot(clip_equation, modelMatrix * orient * vertex); } else { gl_ClipDistance[0] = 1.0; @@ -213,6 +288,9 @@ void main() vertOut.normal = normal; vertOut.texCoord = texCoord; #else + #ifdef VULKAN + vertOut.position = position; + #endif vertOut.normal = normal; vertOut.texCoord = texCoord; #endif diff --git a/code/def_files/data/effects/msaa-f.sdr b/code/def_files/data/effects/msaa-f.sdr index 521834f6798..3e7c0db195a 100644 --- a/code/def_files/data/effects/msaa-f.sdr +++ b/code/def_files/data/effects/msaa-f.sdr @@ -1,5 +1,27 @@ #include "z-compress.sdr" +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; + +layout(location = 0) out vec4 fragOut0; +layout(location = 1) out vec4 fragOut1; +layout(location = 2) out vec4 fragOut2; +layout(location = 3) out vec4 fragOut3; +layout(location = 4) out vec4 fragOut4; + +layout(set = 1, binding = 1) uniform sampler2DMS msaaTex[6]; +#define texColor msaaTex[0] +#define texPos msaaTex[1] +#define texNormal msaaTex[2] +#define texSpecular msaaTex[3] +#define texEmissive msaaTex[4] +#define texDepth msaaTex[5] + +layout(std140, set = 2, binding = 0) uniform genericData { + int samples; + float fov; +}; +#else in vec4 fragTexCoord; out vec4 fragOut0; out vec4 fragOut1; @@ -19,6 +41,7 @@ layout (std140) uniform genericData { int samples; float fov; }; +#endif const float voxelDepth = 2.5f; const float voxelDepthFalloff = 2.5f; @@ -160,7 +183,11 @@ float getMedianDist(ivec2 texel) { void main() { vec2 texSize = textureSize(texColor); +#ifdef VULKAN + ivec2 texel = ivec2(texSize * fragTexCoord); +#else ivec2 texel = ivec2(texSize * fragTexCoord.xy); +#endif float texelWidthFactor = tan(fov / texSize.y); float dist = getMedianDist(texel); diff --git a/code/def_files/data/effects/nanovg-f.sdr b/code/def_files/data/effects/nanovg-f.sdr index 9433077661b..dbe41703069 100644 --- a/code/def_files/data/effects/nanovg-f.sdr +++ b/code/def_files/data/effects/nanovg-f.sdr @@ -1,37 +1,44 @@ -// Enabled at all times but the define is kept here in case this should be configurable in the future -#define EDGE_AA - -layout(std140) uniform NanoVGUniformData { - mat3 scissorMat; - - mat3 paintMat; - - vec4 innerCol; +#ifdef VULKAN +layout (location = 0) in vec2 ftcoord; +layout (location = 1) in vec2 fpos; - vec4 outerCol; +layout (location = 0) out vec4 outColor; - vec2 scissorExt; - vec2 scissorScale; - - vec2 extent; - float radius; - float feather; - - float strokeMult; - float strokeThr; - int texType; - int type; - - vec2 viewSize; - int texArrayIndex; -}; +layout (set = 1, binding = 1) uniform sampler2DArray nvg_tex; +#else +// Enabled at all times but the define is kept here in case this should be configurable in the future +#define EDGE_AA uniform sampler2DArray nvg_tex; in vec2 ftcoord; in vec2 fpos; out vec4 outColor; +#endif + +#ifdef VULKAN +layout (set = 2, binding = 2, std140) +#else +layout(std140) +#endif +uniform NanoVGUniformData { + mat3 scissorMat; + mat3 paintMat; + vec4 innerCol; + vec4 outerCol; + vec2 scissorExt; + vec2 scissorScale; + vec2 extent; + float radius; + float feather; + float strokeMult; + float strokeThr; + int texType; + int type; + vec2 viewSize; + int texArrayIndex; +}; float sdroundrect(vec2 pt, vec2 ext, float rad) { vec2 ext2 = ext - vec2(rad,rad); diff --git a/code/def_files/data/effects/nanovg-v.sdr b/code/def_files/data/effects/nanovg-v.sdr index 48b5cbb9a0e..8247510faec 100644 --- a/code/def_files/data/effects/nanovg-v.sdr +++ b/code/def_files/data/effects/nanovg-v.sdr @@ -1,4 +1,29 @@ +#ifdef VULKAN +layout (location = 0) in vec4 vertPosition; +layout (location = 2) in vec4 vertTexCoord; + +layout (location = 0) out vec2 ftcoord; +layout (location = 1) out vec2 fpos; + +layout (set = 2, binding = 2, std140) uniform NanoVGUniformData { + mat3 scissorMat; + mat3 paintMat; + vec4 innerCol; + vec4 outerCol; + vec2 scissorExt; + vec2 scissorScale; + vec2 extent; + float radius; + float feather; + float strokeMult; + float strokeThr; + int texType; + int type; + vec2 viewSize; + int texArrayIndex; +}; +#else layout(std140) uniform NanoVGUniformData { mat3 scissorMat; @@ -29,9 +54,18 @@ in vec2 vertTexCoord; out vec2 ftcoord; out vec2 fpos; +#endif void main(void) { +#ifdef VULKAN + ftcoord = vertTexCoord.xy; + fpos = vertPosition.xy; + gl_Position = vec4(2.0 * vertPosition.x / viewSize.x - 1.0, + 1.0 - 2.0 * vertPosition.y / viewSize.y, + 0.0, 1.0); +#else ftcoord = vertTexCoord; fpos = vertPosition; gl_Position = vec4(2.0*vertPosition.x/viewSize.x - 1.0, 1.0 - 2.0*vertPosition.y/viewSize.y, 0, 1); +#endif } diff --git a/code/def_files/data/effects/passthrough-f.sdr b/code/def_files/data/effects/passthrough-f.sdr index 6e3134e068e..390126d1644 100644 --- a/code/def_files/data/effects/passthrough-f.sdr +++ b/code/def_files/data/effects/passthrough-f.sdr @@ -1,6 +1,26 @@ #include "gamma.sdr" +#ifdef VULKAN +layout(location = 0) in vec4 fragTexCoord; +layout(location = 1) in vec4 fragColor; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2DArray baseMap; + +layout(std140, set = 2, binding = 0) uniform genericData { + mat4 modelMatrix; + vec4 color; + vec4 clipEquation; + int baseMapIndex; + int alphaTexture; + int noTexturing; + int srgb; + float intensity; + float alphaThreshold; + uint clipEnabled; +}; +#else in vec4 fragTexCoord; in vec4 fragColor; out vec4 fragOut0; @@ -11,10 +31,15 @@ layout (std140) uniform genericData { int noTexturing; int srgb; }; +#endif void main() { +#ifdef VULKAN + vec4 baseColor = texture(baseMap, vec3(fragTexCoord.xy, float(baseMapIndex))); +#else vec4 baseColor = texture(baseMap, fragTexCoord.xy); +#endif baseColor.rgb = (srgb == 1) ? srgb_to_linear(baseColor.rgb) : baseColor.rgb; vec4 blendColor = (srgb == 1) ? vec4(srgb_to_linear(fragColor.rgb), fragColor.a) : fragColor; diff --git a/code/def_files/data/effects/passthrough-v.sdr b/code/def_files/data/effects/passthrough-v.sdr index befd226d836..1b3b3b30694 100644 --- a/code/def_files/data/effects/passthrough-v.sdr +++ b/code/def_files/data/effects/passthrough-v.sdr @@ -1,3 +1,28 @@ +#ifdef VULKAN +layout(location = 0) in vec4 vertPosition; +layout(location = 1) in vec4 vertColor; +layout(location = 2) in vec4 vertTexCoord; +layout(location = 0) out vec4 fragTexCoord; +layout(location = 1) out vec4 fragColor; + +layout(std140, set = 2, binding = 0) uniform genericData { + mat4 modelMatrix; + vec4 color; + vec4 clipEquation; + int baseMapIndex; + int alphaTexture; + int noTexturing; + int srgb; + float intensity; + float alphaThreshold; + uint clipEnabled; +}; + +layout(std140, set = 2, binding = 1) uniform matrixData { + mat4 modelViewMatrix; + mat4 projMatrix; +}; +#else in vec4 vertPosition; in vec4 vertTexCoord; in vec4 vertColor; @@ -8,10 +33,14 @@ layout (std140) uniform matrixData { mat4 modelViewMatrix; mat4 projMatrix; }; +#endif void main() { fragTexCoord = vertTexCoord; fragColor = vertColor; gl_Position = projMatrix * modelViewMatrix * vertPosition; +#ifdef VULKAN + gl_ClipDistance[0] = clipEnabled != 0u ? dot(modelMatrix * vertPosition, clipEquation) : 1.0; +#endif } diff --git a/code/def_files/data/effects/post-f.sdr b/code/def_files/data/effects/post-f.sdr index 08eafe5899e..6e12a5e20fb 100644 --- a/code/def_files/data/effects/post-f.sdr +++ b/code/def_files/data/effects/post-f.sdr @@ -1,11 +1,21 @@ +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; +layout(set = 1, binding = 1) uniform sampler2D tex; +layout(set = 1, binding = 4) uniform sampler2D depth_tex; +#else in vec4 fragTexCoord; - out vec4 fragOut0; - uniform sampler2D tex; uniform sampler2D depth_tex; +#endif -layout (std140) uniform genericData { +#ifdef VULKAN +layout(set = 2, binding = 0, std140) +#else +layout(std140) +#endif +uniform genericData { float timer; float noise_amount; float saturation; @@ -19,7 +29,7 @@ layout (std140) uniform genericData { vec3 tint; float dither; - // these are blank, valid slots for modders to create custom effects + // these are blank, valid slots for modders to create custom effects // that can be defined in post_processing.tbl and coded below vec3 custom_effect_vec3_a; float custom_effect_float_a; diff --git a/code/def_files/data/effects/post-v.sdr b/code/def_files/data/effects/post-v.sdr index 12b4555353e..fca8e2cd41c 100644 --- a/code/def_files/data/effects/post-v.sdr +++ b/code/def_files/data/effects/post-v.sdr @@ -1,8 +1,19 @@ +#ifdef VULKAN +layout(location = 0) out vec2 fragTexCoord; +#else in vec4 vertPosition; in vec4 vertTexCoord; out vec4 fragTexCoord; +#endif + void main() { +#ifdef VULKAN + vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + fragTexCoord = pos; + gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0); +#else fragTexCoord = vertTexCoord; gl_Position = vertPosition; +#endif } diff --git a/code/def_files/data/effects/rocketui-f.sdr b/code/def_files/data/effects/rocketui-f.sdr index bffff254716..688d2ec5fe2 100644 --- a/code/def_files/data/effects/rocketui-f.sdr +++ b/code/def_files/data/effects/rocketui-f.sdr @@ -1,4 +1,13 @@ +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 1) in vec4 fragColor; +layout(location = 2) in vec2 fragScreenPosition; + +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2DArray baseMap; +#else in vec2 fragTexCoord; in vec4 fragColor; in vec2 fragScreenPosition; @@ -6,12 +15,18 @@ in vec2 fragScreenPosition; out vec4 fragOut0; uniform sampler2DArray baseMap; - -layout (std140) uniform genericData { +#endif + +#ifdef VULKAN +layout(std140, set = 2, binding = 0) +#else +layout(std140) +#endif +uniform genericData { mat4 projMatrix; vec2 offset; - bool textured; + int textured; int baseMapIndex; float horizontalSwipeOffset; @@ -25,7 +40,7 @@ void main() { float distance = horizontalSwipeOffset - fragScreenPosition.x; vec4 color; - if (textured) { + if (textured != 0) { color = texture(baseMap, vec3(fragTexCoord, float(baseMapIndex))) * fragColor; } else { color = fragColor; diff --git a/code/def_files/data/effects/rocketui-v.sdr b/code/def_files/data/effects/rocketui-v.sdr index dd62c939226..07ae245966f 100644 --- a/code/def_files/data/effects/rocketui-v.sdr +++ b/code/def_files/data/effects/rocketui-v.sdr @@ -1,4 +1,13 @@ +#ifdef VULKAN +layout(location = 0) in vec2 vertPosition; +layout(location = 1) in vec4 vertColor; +layout(location = 2) in vec2 vertTexCoord; + +layout(location = 0) out vec2 fragTexCoord; +layout(location = 1) out vec4 fragColor; +layout(location = 2) out vec2 fragScreenPosition; +#else in vec2 vertPosition; in vec4 vertColor; in vec2 vertTexCoord; @@ -6,12 +15,18 @@ in vec2 vertTexCoord; out vec2 fragTexCoord; out vec4 fragColor; out vec2 fragScreenPosition; - -layout (std140) uniform genericData { +#endif + +#ifdef VULKAN +layout(std140, set = 2, binding = 0) +#else +layout(std140) +#endif +uniform genericData { mat4 projMatrix; vec2 offset; - bool textured; + int textured; int baseMapIndex; float horizontalSwipeOffset; diff --git a/code/def_files/data/effects/shadows.sdr b/code/def_files/data/effects/shadows.sdr index a765482f30c..4440cf19208 100644 --- a/code/def_files/data/effects/shadows.sdr +++ b/code/def_files/data/effects/shadows.sdr @@ -104,8 +104,14 @@ vec4 transformToShadowMap(mat4 shadow_proj_matrix, int i, vec4 pos) { vec4 shadow_proj; shadow_proj = shadow_proj_matrix * pos; +#ifdef VULKAN + // Vulkan shadow projection: XY is [-1,1] → [0,1], Z is already [0,1] + shadow_proj.xy = shadow_proj.xy * 0.5 + 0.5; +#else + // OpenGL: all components [-1,1] → [0,1] shadow_proj += 1.0; shadow_proj *= 0.5; +#endif shadow_proj.w = shadow_proj.z; shadow_proj.z = float(i); return shadow_proj; diff --git a/code/def_files/data/effects/shield-impact-f.sdr b/code/def_files/data/effects/shield-impact-f.sdr index d5214d18f62..e847867a9b0 100644 --- a/code/def_files/data/effects/shield-impact-f.sdr +++ b/code/def_files/data/effects/shield-impact-f.sdr @@ -3,14 +3,28 @@ const float EMISSIVE_GAIN = 2.0; +#ifdef VULKAN +layout(location = 0) in vec4 fragImpactUV; +layout(location = 1) in float fragNormOffset; + +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2DArray shieldMap; +#else in vec4 fragImpactUV; in float fragNormOffset; out vec4 fragOut0; uniform sampler2DArray shieldMap; - -layout (std140) uniform genericData { +#endif + +#ifdef VULKAN +layout(set = 2, binding = 0, std140) +#else +layout(std140) +#endif +uniform genericData { mat4 shieldModelViewMatrix; mat4 shieldProjMatrix; diff --git a/code/def_files/data/effects/shield-impact-v.sdr b/code/def_files/data/effects/shield-impact-v.sdr index a1260a63389..22f423cb3fb 100644 --- a/code/def_files/data/effects/shield-impact-v.sdr +++ b/code/def_files/data/effects/shield-impact-v.sdr @@ -1,17 +1,34 @@ +#ifdef VULKAN +layout(location = 0) in vec4 vertPosition; +layout(location = 3) in vec3 vertNormal; + +layout(location = 0) out vec4 fragImpactUV; +layout(location = 1) out float fragNormOffset; +#else in vec4 vertPosition; in vec3 vertNormal; out vec4 fragImpactUV; out float fragNormOffset; - - -layout (std140) uniform matrixData { +#endif + +#ifdef VULKAN +layout(set = 2, binding = 1, std140) +#else +layout(std140) +#endif +uniform matrixData { mat4 modelViewMatrix; mat4 projMatrix; }; -layout (std140) uniform genericData { +#ifdef VULKAN +layout(set = 2, binding = 0, std140) +#else +layout(std140) +#endif +uniform genericData { mat4 shieldModelViewMatrix; mat4 shieldProjMatrix; @@ -26,7 +43,6 @@ layout (std140) uniform genericData { void main() { gl_Position = projMatrix * modelViewMatrix * vertPosition; - //vec3 normal = normalize(mat3(modelViewMatrix) * vertNormal); fragNormOffset = dot(hitNormal, vertNormal); fragImpactUV = shieldProjMatrix * shieldModelViewMatrix * vertPosition; fragImpactUV += 1.0f; diff --git a/code/def_files/data/effects/tonemapping-f.sdr b/code/def_files/data/effects/tonemapping-f.sdr index 8011d0cdfe9..a63ff0769d9 100644 --- a/code/def_files/data/effects/tonemapping-f.sdr +++ b/code/def_files/data/effects/tonemapping-f.sdr @@ -1,11 +1,22 @@ #include "gamma.sdr" +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; +layout(set = 1, binding = 1) uniform sampler2D tex; +#else in vec4 fragTexCoord; out vec4 fragOut0; uniform sampler2D tex; +#endif -layout (std140) uniform genericData { +#ifdef VULKAN +layout(set = 2, binding = 0, std140) +#else +layout(std140) +#endif +uniform genericData { float exposure; int tonemapper; float x0; //from here on these are for the PPC tonemappers diff --git a/code/def_files/data/effects/video-f.sdr b/code/def_files/data/effects/video-f.sdr index 4e4fab88952..8ea4044baa3 100644 --- a/code/def_files/data/effects/video-f.sdr +++ b/code/def_files/data/effects/video-f.sdr @@ -1,9 +1,25 @@ +#ifdef VULKAN +layout(location = 0) in vec4 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2DArray textures[16]; +#define ytex textures[0] +#define utex textures[1] +#define vtex textures[2] +#else in vec4 fragTexCoord; out vec4 fragOut0; uniform sampler2DArray ytex; uniform sampler2DArray utex; uniform sampler2DArray vtex; -layout (std140) uniform movieData { +#endif + +#ifdef VULKAN +layout(std140, set = 2, binding = 4) +#else +layout(std140) +#endif +uniform movieData { float alpha; float pad[3]; }; diff --git a/code/def_files/data/effects/video-v.sdr b/code/def_files/data/effects/video-v.sdr index a855aa0a8ff..08411b84db6 100644 --- a/code/def_files/data/effects/video-v.sdr +++ b/code/def_files/data/effects/video-v.sdr @@ -1,8 +1,19 @@ +#ifdef VULKAN +layout(location = 0) in vec4 vertPosition; +layout(location = 2) in vec4 vertTexCoord; +layout(location = 0) out vec4 fragTexCoord; +#else in vec4 vertPosition; in vec4 vertTexCoord; out vec4 fragTexCoord; +#endif -layout (std140) uniform matrixData { +#ifdef VULKAN +layout(std140, set = 2, binding = 1) +#else +layout(std140) +#endif +uniform matrixData { mat4 modelViewMatrix; mat4 projMatrix; }; diff --git a/code/def_files/data/effects/volumetric-f.sdr b/code/def_files/data/effects/volumetric-f.sdr index d511fea68a9..8abaf600fd2 100644 --- a/code/def_files/data/effects/volumetric-f.sdr +++ b/code/def_files/data/effects/volumetric-f.sdr @@ -1,17 +1,31 @@ +#ifdef VULKAN +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; +layout(set = 1, binding = 1) uniform sampler2D tex2D[16]; +#define composite tex2D[0] +#define emissive tex2D[1] +layout(set = 1, binding = 4) uniform sampler2D depth_tex; +layout(set = 1, binding = 5) uniform sampler3D volume_tex; +layout(set = 1, binding = 6) uniform sampler3D noise_volume_tex; +#else in vec4 fragTexCoord; - out vec4 fragOut0; - layout (binding = 0) uniform sampler2D composite; layout (binding = 1) uniform sampler2D emissive; -layout (binding = 2) uniform sampler2D depth; +layout (binding = 2) uniform sampler2D depth_tex; layout (binding = 3) uniform sampler3D volume_tex; #ifdef NOISE layout (binding = 4) uniform sampler3D noise_volume_tex; #endif +#endif -layout (std140) uniform genericData { +#ifdef VULKAN +layout(set = 2, binding = 0, std140) +#else +layout (std140) +#endif +uniform genericData { mat4 p_inv; mat4 v_inv; vec3 camera; @@ -73,12 +87,19 @@ void main() vec2 fragcoordAngle = (fragTexCoord.xy - 0.5) * fov; fragcoordAngle.x *= aspect; - float depth = (2.0 * zNear * zFar / (zFar + zNear - (2 * texture(depth, fragTexCoord.xy).x - 1) * (zFar - zNear))) * sqrt(1.0 + tan(fragcoordAngle.x) * tan(fragcoordAngle.x) + tan(fragcoordAngle.y) * tan(fragcoordAngle.y)); + float rawDepth = texture(depth_tex, fragTexCoord.xy).x; +#ifdef VULKAN + // Vulkan depth range is [0,1] + float fragDepth = (zNear * zFar / (zFar - rawDepth * (zFar - zNear))) * sqrt(1.0 + tan(fragcoordAngle.x) * tan(fragcoordAngle.x) + tan(fragcoordAngle.y) * tan(fragcoordAngle.y)); +#else + // OpenGL depth range is [-1,1] + float fragDepth = (2.0 * zNear * zFar / (zFar + zNear - (2.0 * rawDepth - 1.0) * (zFar - zNear))) * sqrt(1.0 + tan(fragcoordAngle.x) * tan(fragcoordAngle.x) + tan(fragcoordAngle.y) * tan(fragcoordAngle.y)); +#endif //The t at which the ray enters the nebula cube float maxtMin = max(0,max(tMin.x, max(tMin.y, tMin.z))); //The t at which the ray leaves the nebula cube - float mintMax = min(depth, min(tMax.x, min(tMax.y, tMax.z))); + float mintMax = min(fragDepth, min(tMax.x, min(tMax.y, tMax.z))); //The cumulative one minus alpha value of the nebula. Corresponds to the multiplier of the current step when additively drawing to a fragment for which the steps closer to the camera are already drawn. float cumOMAlpha = 1; @@ -151,7 +172,7 @@ void main() //Emissive cumnebdist += stepcolor_alpha * stepsize_current; vec3 emissive_lod = textureLod(emissive, fragTexCoord.xy, clamp(cumnebdist * emissiveSpreadFactor, 0, float(textureQueryLevels(emissive) - 1))).rgb; - vec3 stepcolor_emissive = clamp(emissive_lod.rgb * pow(alphalim, 1.0 / (opacitydistance / ((depth - stept) * emissiveFalloff + 0.01))) * emissiveIntensity, 0, 1); + vec3 stepcolor_emissive = clamp(emissive_lod.rgb * pow(alphalim, 1.0 / (opacitydistance / ((fragDepth - stept) * emissiveFalloff + 0.01))) * emissiveIntensity, 0, 1); //Step finish vec3 stepcolor = clamp(stepcolor_diffuse + stepcolor_emissive, 0, 1); diff --git a/code/external_dll/externalcode.h b/code/external_dll/externalcode.h index eebdee4cae2..596a2b151a4 100644 --- a/code/external_dll/externalcode.h +++ b/code/external_dll/externalcode.h @@ -23,13 +23,23 @@ class SCP_ExternalCode } protected: - bool LoadExternal( const char* externlib ) + bool LoadExternal( const char* externlib, const char* basePath = nullptr ) { if ( !externlib ) return FALSE; m_library = SDL_LoadObject(externlib); + // check full path as a fallback for our own libraries + // NOTE: basePath is assumed to have a trailing slash! + if ( !m_library && basePath ) + { + SCP_string fullpath = basePath; + fullpath += externlib; + + m_library = SDL_LoadObject(fullpath.c_str()); + } + #ifndef NDEBUG if (m_library == NULL) { @@ -113,4 +123,4 @@ typedef int (SCP_EXT_CALLCONV *SCPDLL_PFVERSION)(SCPDLL_Version*); v->major = Major; v->minor = Minor; v->patch = Patch;\ return 0; } -#endif /* EXTERNALCODE_H_INCLUDED_ */ \ No newline at end of file +#endif /* EXTERNALCODE_H_INCLUDED_ */ diff --git a/code/globalincs/pstypes.h b/code/globalincs/pstypes.h index c8c504ccc53..e5df7026327 100644 --- a/code/globalincs/pstypes.h +++ b/code/globalincs/pstypes.h @@ -597,6 +597,11 @@ inline void* memset_if_trivial_else_error(ImDrawListSplitter* memset_data, int c return ptr_memcpy(memcpy_dest, memcpy_src, count); } + inline void *memcpy_if_trivial_else_error(void *memcpy_dest, const void *memcpy_src, size_t count) + { + return ptr_memcpy(memcpy_dest, memcpy_src, count); + } + // MEMMOVE! const auto ptr_memmove = std::memmove; #define memmove memmove_if_trivial_else_error diff --git a/code/graphics/2d.cpp b/code/graphics/2d.cpp index 189227e8912..d9021ebfcac 100644 --- a/code/graphics/2d.cpp +++ b/code/graphics/2d.cpp @@ -80,6 +80,7 @@ gr_capability_def gr_capabilities[] = { GR_CAPABILITY_ENTRY(SEPARATE_BLEND_FUNCTIONS), GR_CAPABILITY_ENTRY(PERSISTENT_BUFFER_MAPPING), gr_capability_def {gr_capability::CAPABILITY_BPTC, "BPTC Texture Compression"}, //This one had a different parse string already! + gr_capability_def {gr_capability::CAPABILITY_S3TC, "S3TC Texture Compression"}, GR_CAPABILITY_ENTRY(LARGE_SHADER), GR_CAPABILITY_ENTRY(INSTANCED_RENDERING), }; @@ -1325,6 +1326,10 @@ void gr_close() graphics::paths::PathRenderer::shutdown(); + // Free bitmaps before destroying the graphics backend, since + // gf_bm_free_data needs the backend (texture manager, GL context, etc.) + bm_close(); + switch (gr_screen.mode) { case GR_OPENGL: #ifdef WITH_OPENGL @@ -1340,13 +1345,11 @@ void gr_close() case GR_STUB: break; - + default: Int3(); // Invalid graphics mode } - bm_close(); - Gr_inited = 0; } @@ -2942,6 +2945,16 @@ void gr_flip(bool execute_scripting) model_process_cached_ui_render_instances(); + if (Cmdline_graphics_debug_output) { + output_uniform_debug_data(); + } + + // IMPORTANT: No rendering may happen after this point until gf_flip()/gr_setup_frame(). + // gr_reset_immediate_buffer() resets the write offset to 0, so any subsequent immediate + // buffer write would overwrite vertex data that already-recorded draw commands reference. + // In Vulkan (deferred submission), the GPU reads the final buffer state at submit time, + // so overwrites here silently corrupt earlier draws. OpenGL's immediate execution hides + // this, but it is still logically wrong for any deferred-submission backend. gr_reset_immediate_buffer(); // Do per frame operations on the matrix state @@ -2951,10 +2964,6 @@ void gr_flip(bool execute_scripting) mouse_reset_deltas(); - if (Cmdline_graphics_debug_output) { - output_uniform_debug_data(); - } - // Use this opportunity for retiring the uniform buffers uniform_buffer_managers_retire_buffers(); diff --git a/code/graphics/2d.h b/code/graphics/2d.h index 53dbc1980e2..929042405e5 100644 --- a/code/graphics/2d.h +++ b/code/graphics/2d.h @@ -262,7 +262,6 @@ struct vertex_format_data POSITION4, POSITION3, POSITION2, - SCREEN_POS, COLOR3, COLOR4, COLOR4F, @@ -338,8 +337,10 @@ enum class gr_capability { CAPABILITY_SEPARATE_BLEND_FUNCTIONS, CAPABILITY_PERSISTENT_BUFFER_MAPPING, CAPABILITY_BPTC, + CAPABILITY_S3TC, CAPABILITY_LARGE_SHADER, - CAPABILITY_INSTANCED_RENDERING + CAPABILITY_INSTANCED_RENDERING, + CAPABILITY_QUERIES_REUSABLE }; struct gr_capability_def { @@ -933,6 +934,10 @@ typedef struct screen { std::function gf_override_fog; + // ImGui backend integration + std::function gf_imgui_new_frame; + std::function gf_imgui_render_draw_data; + //OpenXR functions std::function()> gf_openxr_get_extensions; std::function gf_openxr_test_capabilities; @@ -1195,6 +1200,9 @@ inline void gr_post_process_restore_zbuffer() #define gr_override_fog GR_CALL(gr_screen.gf_override_fog) +#define gr_imgui_new_frame GR_CALL(gr_screen.gf_imgui_new_frame) +#define gr_imgui_render_draw_data GR_CALL(gr_screen.gf_imgui_render_draw_data) + inline void gr_render_primitives(material* material_info, primitive_type prim_type, vertex_layout* layout, diff --git a/code/graphics/matrix.cpp b/code/graphics/matrix.cpp index c7681159191..6b5f22fc03b 100644 --- a/code/graphics/matrix.cpp +++ b/code/graphics/matrix.cpp @@ -56,9 +56,18 @@ static void create_perspective_projection_matrix(matrix4 *out, float left, float out->a1d[5] = 2.0f * near_dist / (top - bottom); out->a1d[8] = (right + left) / (right - left); out->a1d[9] = (top + bottom) / (top - bottom); - out->a1d[10] = -(far_dist + near_dist) / (far_dist - near_dist); out->a1d[11] = -1.0f; - out->a1d[14] = -2.0f * far_dist * near_dist / (far_dist - near_dist); + + if (gr_screen.mode == GR_VULKAN) { + // Vulkan NDC Z range is [0, 1] (OpenGL is [-1, 1]) + // Y-flip is handled by negative viewport height (VK_KHR_maintenance1) + out->a1d[10] = -far_dist / (far_dist - near_dist); + out->a1d[14] = -far_dist * near_dist / (far_dist - near_dist); + } else { + // OpenGL NDC Z range is [-1, 1] + out->a1d[10] = -(far_dist + near_dist) / (far_dist - near_dist); + out->a1d[14] = -2.0f * far_dist * near_dist / (far_dist - near_dist); + } } static void create_orthographic_projection_matrix(matrix4* out, float left, float right, float bottom, float top, float near_dist, float far_dist) @@ -67,11 +76,20 @@ static void create_orthographic_projection_matrix(matrix4* out, float left, floa out->a1d[0] = 2.0f / (right - left); out->a1d[5] = 2.0f / (top - bottom); - out->a1d[10] = -2.0f / (far_dist - near_dist); out->a1d[12] = -(right + left) / (right - left); out->a1d[13] = -(top + bottom) / (top - bottom); - out->a1d[14] = -(far_dist + near_dist) / (far_dist - near_dist); out->a1d[15] = 1.0f; + + if (gr_screen.mode == GR_VULKAN) { + // Vulkan NDC Z range is [0, 1] (OpenGL is [-1, 1]) + // Y-flip is handled by negative viewport height (VK_KHR_maintenance1) + out->a1d[10] = -1.0f / (far_dist - near_dist); + out->a1d[14] = -near_dist / (far_dist - near_dist); + } else { + // OpenGL NDC Z range is [-1, 1] + out->a1d[10] = -2.0f / (far_dist - near_dist); + out->a1d[14] = -(far_dist + near_dist) / (far_dist - near_dist); + } } void gr_start_instance_matrix(const vec3d *offset, const matrix *rotation) @@ -286,7 +304,11 @@ void gr_end_2d_matrix() Assert( htl_2d_matrix_depth == 1 ); // reset viewport to what it was originally set to by the proj matrix - gr_set_viewport(gr_screen.offset_x, (gr_screen.max_h - gr_screen.offset_y - gr_screen.clip_height), gr_screen.clip_width, gr_screen.clip_height); + if (gr_screen.rendering_to_texture != -1) { + gr_set_viewport(gr_screen.offset_x, gr_screen.offset_y, gr_screen.clip_width, gr_screen.clip_height); + } else { + gr_set_viewport(gr_screen.offset_x, (gr_screen.max_h - gr_screen.offset_y - gr_screen.clip_height), gr_screen.clip_width, gr_screen.clip_height); + } gr_projection_matrix = gr_last_projection_matrix; diff --git a/code/graphics/opengl/gropengl.cpp b/code/graphics/opengl/gropengl.cpp index 79252e7eec3..bbe1355866a 100644 --- a/code/graphics/opengl/gropengl.cpp +++ b/code/graphics/opengl/gropengl.cpp @@ -46,6 +46,8 @@ #include "es_compatibility.h" #endif +#include "backends/imgui_impl_opengl3.h" + #include // minimum GL / GLES version we can reliably support is 3.2 @@ -1001,6 +1003,16 @@ int opengl_init_display_device() return 0; } +static void gr_opengl_imgui_new_frame() +{ + ImGui_ImplOpenGL3_NewFrame(); +} + +static void gr_opengl_imgui_render_draw_data() +{ + ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); +} + void gr_opengl_init_function_pointers() { gr_screen.gf_flip = gr_opengl_flip; @@ -1133,6 +1145,9 @@ void gr_opengl_init_function_pointers() gr_screen.gf_override_fog = gr_opengl_override_fog; + gr_screen.gf_imgui_new_frame = gr_opengl_imgui_new_frame; + gr_screen.gf_imgui_render_draw_data = gr_opengl_imgui_render_draw_data; + gr_screen.gf_openxr_get_extensions = gr_opengl_openxr_get_extensions; gr_screen.gf_openxr_test_capabilities = gr_opengl_openxr_test_capabilities; gr_screen.gf_openxr_create_session = gr_opengl_openxr_create_session; @@ -1556,10 +1571,14 @@ bool gr_opengl_is_capable(gr_capability capability) return GLAD_GL_ARB_buffer_storage != 0; case gr_capability::CAPABILITY_BPTC: return GLAD_GL_ARB_texture_compression_bptc != 0; + case gr_capability::CAPABILITY_S3TC: + return GLAD_GL_EXT_texture_compression_s3tc != 0; case gr_capability::CAPABILITY_LARGE_SHADER: return !Cmdline_no_large_shaders; case gr_capability::CAPABILITY_INSTANCED_RENDERING: return GLAD_GL_ARB_vertex_attrib_binding; + case gr_capability::CAPABILITY_QUERIES_REUSABLE: + return true; } diff --git a/code/graphics/opengl/gropengldeferred.cpp b/code/graphics/opengl/gropengldeferred.cpp index 52f733b3bdc..e036f0746ef 100644 --- a/code/graphics/opengl/gropengldeferred.cpp +++ b/code/graphics/opengl/gropengldeferred.cpp @@ -1,7 +1,7 @@ #include "gropengldeferred.h" -#include "globalincs/vmallocator.h" +#include "graphics/util/primitives.h" #include "ShaderProgram.h" #include "gropengldraw.h" @@ -704,69 +704,12 @@ void gr_opengl_draw_deferred_light_sphere(const vec3d *position) } -void gr_opengl_deferred_light_cylinder_init(int segments) // Generate a VBO of a cylinder of radius and height 1.0f, based on code at http://www.ogre3d.org/tikiwiki/ManualSphereMeshes +void gr_opengl_deferred_light_cylinder_init(int segments) { - unsigned int nVertex = (segments + 1) * 2 * 3 + 6; // Can someone verify this? - unsigned int nIndex = deferred_light_cylinder_icount = 12 * (segments + 1) - 6; //This too - float *Vertices = (float*)vm_malloc(sizeof(float) * nVertex); - float *pVertex = Vertices; - ushort *Indices = (ushort*)vm_malloc(sizeof(ushort) * nIndex); - ushort *pIndex = Indices; - - float fDeltaSegAngle = (2.0f * PI / segments); - unsigned short wVerticeIndex = 0 ; - - *pVertex++ = 0.0f; - *pVertex++ = 0.0f; - *pVertex++ = 0.0f; - wVerticeIndex ++; - *pVertex++ = 0.0f; - *pVertex++ = 0.0f; - *pVertex++ = 1.0f; - wVerticeIndex ++; - - for( int ring = 0; ring <= 1; ring++ ) { - float z0 = (float)ring; - - // Generate the group of segments for the current ring - for(int seg = 0; seg <= segments; seg++) { - float x0 = sinf(seg * fDeltaSegAngle); - float y0 = cosf(seg * fDeltaSegAngle); - - // Add one vertex to the strip which makes up the cylinder - *pVertex++ = x0; - *pVertex++ = y0; - *pVertex++ = z0; - - if (!ring) { - *pIndex++ = wVerticeIndex + (ushort)segments + 1; - *pIndex++ = wVerticeIndex; - *pIndex++ = wVerticeIndex + (ushort)segments; - *pIndex++ = wVerticeIndex + (ushort)segments + 1; - *pIndex++ = wVerticeIndex + 1; - *pIndex++ = wVerticeIndex; - if(seg != segments) - { - *pIndex++ = wVerticeIndex + 1; - *pIndex++ = wVerticeIndex; - *pIndex++ = 0; - } - wVerticeIndex ++; - } - else - { - if(seg != segments) - { - *pIndex++ = wVerticeIndex + 1; - *pIndex++ = wVerticeIndex; - *pIndex++ = 1; - wVerticeIndex ++; - } - } - }; // end for seg - } // end for ring + auto mesh = graphics::util::generate_cylinder_mesh(segments); - deferred_light_cylinder_vcount = wVerticeIndex; + deferred_light_cylinder_vcount = static_cast(mesh.vertex_count); + deferred_light_cylinder_icount = mesh.index_count; glGetError(); @@ -775,17 +718,12 @@ void gr_opengl_deferred_light_cylinder_init(int segments) // Generate a VBO of a // make sure we have one if (deferred_light_cylinder_vbo) { glBindBuffer(GL_ARRAY_BUFFER, deferred_light_cylinder_vbo); - glBufferData(GL_ARRAY_BUFFER, nVertex * sizeof(float), Vertices, GL_STATIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, mesh.vertices.size() * sizeof(float), mesh.vertices.data(), GL_STATIC_DRAW); // just in case if ( opengl_check_for_errors() ) { glDeleteBuffers(1, &deferred_light_cylinder_vbo); deferred_light_cylinder_vbo = 0; - - vm_free(Indices); - Indices = nullptr; - vm_free(Vertices); - Vertices = nullptr; return; } @@ -797,71 +735,25 @@ void gr_opengl_deferred_light_cylinder_init(int segments) // Generate a VBO of a // make sure we have one if (deferred_light_cylinder_ibo) { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, deferred_light_cylinder_ibo); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, nIndex * sizeof(ushort), Indices, GL_STATIC_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, mesh.indices.size() * sizeof(ushort), mesh.indices.data(), GL_STATIC_DRAW); // just in case if ( opengl_check_for_errors() ) { glDeleteBuffers(1, &deferred_light_cylinder_ibo); deferred_light_cylinder_ibo = 0; - - vm_free(Indices); - Indices = nullptr; - vm_free(Vertices); - Vertices = nullptr; return; } glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } - - vm_free(Indices); - Indices = nullptr; - vm_free(Vertices); - Vertices = nullptr; } -void gr_opengl_deferred_light_sphere_init(int rings, int segments) // Generate a VBO of a sphere of radius 1.0f, based on code at http://www.ogre3d.org/tikiwiki/ManualSphereMeshes +void gr_opengl_deferred_light_sphere_init(int rings, int segments) { - unsigned int nVertex = (rings + 1) * (segments+1) * 3; - unsigned int nIndex = deferred_light_sphere_icount = 6 * rings * (segments + 1); - float *Vertices = (float*)vm_malloc(sizeof(float) * nVertex); - float *pVertex = Vertices; - ushort *Indices = (ushort*)vm_malloc(sizeof(ushort) * nIndex); - ushort *pIndex = Indices; - - float fDeltaRingAngle = (PI / rings); - float fDeltaSegAngle = (2.0f * PI / segments); - unsigned short wVerticeIndex = 0 ; - - // Generate the group of rings for the sphere - for( int ring = 0; ring <= rings; ring++ ) { - float r0 = sinf (ring * fDeltaRingAngle); - float y0 = cosf (ring * fDeltaRingAngle); - - // Generate the group of segments for the current ring - for(int seg = 0; seg <= segments; seg++) { - float x0 = r0 * sinf(seg * fDeltaSegAngle); - float z0 = r0 * cosf(seg * fDeltaSegAngle); - - // Add one vertex to the strip which makes up the sphere - *pVertex++ = x0; - *pVertex++ = y0; - *pVertex++ = z0; - - if (ring != rings) { - // each vertex (except the last) has six indices pointing to it - *pIndex++ = wVerticeIndex + (ushort)segments + 1; - *pIndex++ = wVerticeIndex; - *pIndex++ = wVerticeIndex + (ushort)segments; - *pIndex++ = wVerticeIndex + (ushort)segments + 1; - *pIndex++ = wVerticeIndex + 1; - *pIndex++ = wVerticeIndex; - wVerticeIndex ++; - } - }; // end for seg - } // end for ring + auto mesh = graphics::util::generate_sphere_mesh(rings, segments); - deferred_light_sphere_vcount = wVerticeIndex; + deferred_light_sphere_vcount = static_cast(mesh.vertex_count); + deferred_light_sphere_icount = mesh.index_count; glGetError(); @@ -870,17 +762,12 @@ void gr_opengl_deferred_light_sphere_init(int rings, int segments) // Generate a // make sure we have one if (deferred_light_sphere_vbo) { glBindBuffer(GL_ARRAY_BUFFER, deferred_light_sphere_vbo); - glBufferData(GL_ARRAY_BUFFER, nVertex * sizeof(float), Vertices, GL_STATIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, mesh.vertices.size() * sizeof(float), mesh.vertices.data(), GL_STATIC_DRAW); // just in case if ( opengl_check_for_errors() ) { glDeleteBuffers(1, &deferred_light_sphere_vbo); deferred_light_sphere_vbo = 0; - - vm_free(Vertices); - Vertices = nullptr; - vm_free(Indices); - Indices = nullptr; return; } @@ -892,27 +779,17 @@ void gr_opengl_deferred_light_sphere_init(int rings, int segments) // Generate a // make sure we have one if (deferred_light_sphere_ibo) { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, deferred_light_sphere_ibo); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, nIndex * sizeof(ushort), Indices, GL_STATIC_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, mesh.indices.size() * sizeof(ushort), mesh.indices.data(), GL_STATIC_DRAW); // just in case if ( opengl_check_for_errors() ) { glDeleteBuffers(1, &deferred_light_sphere_ibo); deferred_light_sphere_ibo = 0; - - vm_free(Vertices); - Vertices = nullptr; - vm_free(Indices); - Indices = nullptr; return; } glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } - - vm_free(Vertices); - Vertices = nullptr; - vm_free(Indices); - Indices = nullptr; } void opengl_draw_sphere() diff --git a/code/graphics/opengl/gropenglpostprocessing.cpp b/code/graphics/opengl/gropenglpostprocessing.cpp index ea9b5c4fa80..0794705f150 100644 --- a/code/graphics/opengl/gropenglpostprocessing.cpp +++ b/code/graphics/opengl/gropenglpostprocessing.cpp @@ -12,6 +12,7 @@ #include "cmdline/cmdline.h" #include "def_files/def_files.h" +#include "graphics/shader_types.h" #include "graphics/grinternal.h" #include "graphics/openxr.h" #include "graphics/util/uniform_structs.h" @@ -786,46 +787,6 @@ void gr_opengl_post_process_restore_zbuffer() } } -static void set_fxaa_defines(SCP_stringstream& sflags) -{ - // Since we require OpenGL 3.2 we always have support for GLSL 130 - sflags << "#define FXAA_GLSL_120 0\n"; - sflags << "#define FXAA_GLSL_130 1\n"; - - if (GLSL_version >= 400) { - // The gather function became part of the standard with GLSL 4.00 - sflags << "#define FXAA_GATHER4_ALPHA 1\n"; - } - - switch (Gr_aa_mode) { - case AntiAliasMode::None: - sflags << "#define FXAA_QUALITY_PRESET 10\n"; - sflags << "#define FXAA_QUALITY_EDGE_THRESHOLD (1.0/6.0)\n"; - sflags << "#define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/12.0)\n"; - sflags << "#define FXAA_QUALITY_SUBPIX 0.33\n"; - break; - case AntiAliasMode::FXAA_Low: - sflags << "#define FXAA_QUALITY_PRESET 12\n"; - sflags << "#define FXAA_QUALITY_EDGE_THRESHOLD (1.0/8.0)\n"; - sflags << "#define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/16.0)\n"; - sflags << "#define FXAA_QUALITY_SUBPIX 0.33\n"; - break; - case AntiAliasMode::FXAA_Medium: - sflags << "#define FXAA_QUALITY_PRESET 26\n"; - sflags << "#define FXAA_QUALITY_EDGE_THRESHOLD (1.0/12.0)\n"; - sflags << "#define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/24.0)\n"; - sflags << "#define FXAA_QUALITY_SUBPIX 0.33\n"; - break; - case AntiAliasMode::FXAA_High: - sflags << "#define FXAA_QUALITY_PRESET 39\n"; - sflags << "#define FXAA_QUALITY_EDGE_THRESHOLD (1.0/15.0)\n"; - sflags << "#define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/32.0)\n"; - sflags << "#define FXAA_QUALITY_SUBPIX 0.33\n"; - break; - default: - UNREACHABLE("Unhandled FXAA mode!"); - } -} void set_smaa_defines(SCP_stringstream& sflags) { // Define what GLSL version we use @@ -869,7 +830,7 @@ void opengl_post_shader_header(SCP_stringstream& sflags, shader_type shader_t, i snprintf(temp, 64, "#define SAMPLE_NUM %d\n", ls_params.samplenum); sflags << temp; } else if (shader_t == SDR_TYPE_POST_PROCESS_FXAA) { - set_fxaa_defines(sflags); + sflags << shader_get_fxaa_defines(Gr_aa_mode, GLSL_version >= 400); } else if (shader_t == SDR_TYPE_POST_PROCESS_SMAA_EDGE || shader_t == SDR_TYPE_POST_PROCESS_SMAA_BLENDING_WEIGHT || shader_t == SDR_TYPE_POST_PROCESS_SMAA_NEIGHBORHOOD_BLENDING) { set_smaa_defines(sflags); @@ -1218,4 +1179,4 @@ void opengl_post_process_shutdown() Post_active_shader_index = 0; Post_initialized = 0; -} \ No newline at end of file +} diff --git a/code/graphics/opengl/gropenglshader.cpp b/code/graphics/opengl/gropenglshader.cpp index 6baeb10bb6d..fd6c18ce253 100644 --- a/code/graphics/opengl/gropenglshader.cpp +++ b/code/graphics/opengl/gropenglshader.cpp @@ -12,6 +12,7 @@ #include "ShaderProgram.h" #include "cfile/cfile.h" +#include "graphics/shader_preprocess.h" #include "cmdline/cmdline.h" #include "def_files/def_files.h" #include "graphics/2d.h" @@ -84,141 +85,6 @@ opengl_uniform_block_binding GL_uniform_blocks[] = { * Static lookup reference for shader uniforms * When adding a new shader, list all associated uniforms and attributes here */ -// clang-format off -static opengl_shader_type_t GL_shader_types[] = { - { SDR_TYPE_MODEL, "main-v.sdr", "main-f.sdr", "main-g.sdr", - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD, opengl_vert_attrib::NORMAL, opengl_vert_attrib::TANGENT, opengl_vert_attrib::MODEL_ID }, "Model Rendering", false }, - - { SDR_TYPE_EFFECT_PARTICLE, "effect-v.sdr", "effect-f.sdr", "effect-g.sdr", - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD, opengl_vert_attrib::RADIUS, opengl_vert_attrib::COLOR }, "Particle Effects", false }, - - { SDR_TYPE_EFFECT_DISTORTION, "effect-distort-v.sdr", "effect-distort-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD, opengl_vert_attrib::RADIUS, opengl_vert_attrib::COLOR }, "Distortion Effects", false }, - - { SDR_TYPE_POST_PROCESS_MAIN, "post-v.sdr", "post-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Post Processing", false }, - - { SDR_TYPE_POST_PROCESS_BLUR, "post-v.sdr", "blur-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Gaussian Blur", false }, - - { SDR_TYPE_POST_PROCESS_BLOOM_COMP, "post-v.sdr", "bloom-comp-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Bloom Compositing", false }, - - { SDR_TYPE_POST_PROCESS_BRIGHTPASS, "post-v.sdr", "brightpass-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Bloom Brightpass", false }, - - { SDR_TYPE_POST_PROCESS_FXAA, "fxaa-v.sdr", "fxaa-f.sdr", 0, - { opengl_vert_attrib::POSITION }, "FXAA", false }, - - { SDR_TYPE_POST_PROCESS_FXAA_PREPASS, "post-v.sdr", "fxaapre-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "FXAA Prepass", false }, - - { SDR_TYPE_POST_PROCESS_LIGHTSHAFTS, "post-v.sdr", "ls-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Lightshafts", false }, - - { SDR_TYPE_POST_PROCESS_TONEMAPPING, "post-v.sdr", "tonemapping-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Tonemapping", false }, - - { SDR_TYPE_DEFERRED_LIGHTING, "deferred-v.sdr", "deferred-f.sdr", 0, - { opengl_vert_attrib::POSITION }, "Deferred Lighting", false }, - - { SDR_TYPE_DEFERRED_CLEAR, "deferred-clear-v.sdr", "deferred-clear-f.sdr", 0, - { opengl_vert_attrib::POSITION }, "Clear Deferred Lighting Buffer", false }, - - { SDR_TYPE_VIDEO_PROCESS, "video-v.sdr", "video-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Video Playback", false }, - - { SDR_TYPE_PASSTHROUGH_RENDER, "passthrough-v.sdr", "passthrough-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD, opengl_vert_attrib::COLOR }, "Passthrough", false }, - - { SDR_TYPE_SHIELD_DECAL, "shield-impact-v.sdr", "shield-impact-f.sdr", 0, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::NORMAL }, "Shield Decals", false }, - - { SDR_TYPE_BATCHED_BITMAP, "batched-v.sdr", "batched-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD, opengl_vert_attrib::COLOR }, "Batched bitmaps", false }, - - { SDR_TYPE_DEFAULT_MATERIAL, "default-material.vert.spv.glsl", "default-material.frag.spv.glsl", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD, opengl_vert_attrib::COLOR }, "Default material", true }, - - { SDR_TYPE_NANOVG, "nanovg-v.sdr", "nanovg-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "NanoVG shader", false }, - - { SDR_TYPE_DECAL, "decal-v.sdr", "decal-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::MODEL_MATRIX }, "Decal rendering", false }, - - { SDR_TYPE_SCENE_FOG, "post-v.sdr", "fog-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Scene fogging", false }, - - { SDR_TYPE_VOLUMETRIC_FOG, "post-v.sdr", "volumetric-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Volumetric fogging", false }, - - { SDR_TYPE_ROCKET_UI, "rocketui-v.sdr", "rocketui-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::COLOR, opengl_vert_attrib::TEXCOORD }, "libRocket UI", false }, - - { SDR_TYPE_COPY, "post-v.sdr", "copy-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Texture copy", false }, - - { SDR_TYPE_COPY_WORLD, "passthrough-v.sdr", "copy-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Texture copy world space", false }, - - { SDR_TYPE_MSAA_RESOLVE, "post-v.sdr", "msaa-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "MSAA resolve shader", false }, - - { SDR_TYPE_POST_PROCESS_SMAA_EDGE, "smaa-edge-v.sdr", "smaa-edge-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "SMAA Edge detection", false }, - - { SDR_TYPE_POST_PROCESS_SMAA_BLENDING_WEIGHT, "smaa-blend-v.sdr", "smaa-blend-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "SMAA Blending weight calculation", false }, - - { SDR_TYPE_POST_PROCESS_SMAA_NEIGHBORHOOD_BLENDING, "smaa-neighbour-v.sdr", "smaa-neighbour-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "SMAA Neighborhood Blending", false }, - - { SDR_TYPE_ENVMAP_SPHERE_WARP, "post-v.sdr", "envmap-sphere-warp-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Environment Map Export", false }, - - { SDR_TYPE_IRRADIANCE_MAP_GEN, "post-v.sdr", "irrmap-f.sdr", nullptr, - { opengl_vert_attrib::POSITION, opengl_vert_attrib::TEXCOORD }, "Irradiance Map Generation", false }, -}; -// clang-format on - -/** - * Static lookup reference for shader variant uniforms - * When adding a new shader variant for a shader, list all associated uniforms and attributes here - */ -static opengl_shader_variant_t GL_shader_variants[] = { -//Model shader flags, both those set always as a compile flag as well as those that are usually runtime checks, need to be defined in this file -#define MODEL_SDR_FLAG_MODE_CPP_ARRAY -#include "def_files/data/effects/model_shader_flags.h" -#undef MODEL_SDR_FLAG_MODE_CPP_ARRAY - - {SDR_TYPE_EFFECT_PARTICLE, true, SDR_FLAG_PARTICLE_POINT_GEN, "FLAG_EFFECT_GEOMETRY", {opengl_vert_attrib::UVEC}, "Geometry shader point-based particles"}, - - {SDR_TYPE_DEFERRED_LIGHTING, false, SDR_FLAG_ENV_MAP, "ENV_MAP", {}, "Render ambient light with env and irrmaps"}, - - {SDR_TYPE_POST_PROCESS_BLUR, false, SDR_FLAG_BLUR_HORIZONTAL, "PASS_0", {}, "Horizontal blur pass"}, - - {SDR_TYPE_POST_PROCESS_BLUR, false, SDR_FLAG_BLUR_VERTICAL, "PASS_1", {}, "Vertical blur pass"}, - - {SDR_TYPE_NANOVG, false, SDR_FLAG_NANOVG_EDGE_AA, "EDGE_AA", {}, "NanoVG edge anti-alias"}, - - {SDR_TYPE_DECAL, false, SDR_FLAG_DECAL_USE_NORMAL_MAP, "USE_NORMAL_MAP", {}, "Decal use scene normal map"}, - - {SDR_TYPE_MSAA_RESOLVE, false, SDR_FLAG_MSAA_SAMPLES_4, "SAMPLES_4", {}, "Sets the MSAA resolve shader to 4 samples"}, - - {SDR_TYPE_MSAA_RESOLVE, false, SDR_FLAG_MSAA_SAMPLES_8, "SAMPLES_8", {}, "Sets the MSAA resolve shader to 8 samples"}, - - {SDR_TYPE_MSAA_RESOLVE, false, SDR_FLAG_MSAA_SAMPLES_16, "SAMPLES_16", {}, "Sets the MSAA resolve shader to 16 samples"}, - - {SDR_TYPE_VOLUMETRIC_FOG, false, SDR_FLAG_VOLUMETRICS_DO_EDGE_SMOOTHING, "DO_EDGE_SMOOTHING", {}, "Perform costly edge smoothing lookups"}, - - {SDR_TYPE_VOLUMETRIC_FOG, false, SDR_FLAG_VOLUMETRICS_NOISE, "NOISE", {}, "Add noise to volumetrics"}, - - {SDR_TYPE_COPY_WORLD, false, SDR_FLAG_COPY_FROM_ARRAY, "COPY_ARRAY", {}, "Expects to copy from an array texture"}, - - {SDR_TYPE_POST_PROCESS_TONEMAPPING, false, SDR_FLAG_TONEMAPPING_LINEAR_OUT, "LINEAR_OUT", {}, "Will make the tonemapper output in linear color space and not in sRGB"} -}; - -static const int GL_num_shader_variants = sizeof(GL_shader_variants) / sizeof(opengl_shader_variant_t); opengl_shader_t *Current_shader = NULL; @@ -312,6 +178,9 @@ static SCP_string opengl_shader_get_header(shader_type type_id, int flags, bool #else sflags << "#version " << GLSL_version << " es\n"; #endif + + sflags << "#define OPENGL\n"; + if (Detail.lighting < 3) { sflags << "#define FLAG_LIGHT_MODEL_BLINN_PHONG\n"; } @@ -328,13 +197,7 @@ static SCP_string opengl_shader_get_header(shader_type type_id, int flags, bool // ignore looking for variants. main post process, lightshafts, and FXAA shaders need special headers to be hacked in opengl_post_shader_header(sflags, type_id, flags); } else { - for (int i = 0; i < GL_num_shader_variants; ++i) { - opengl_shader_variant_t &variant = GL_shader_variants[i]; - - if (type_id == variant.type_id && flags & variant.flag) { - sflags << "#define " << variant.flag_text << "\n"; - } - } + sflags << shader_build_variant_defines(type_id, flags); } return sflags.str(); @@ -350,258 +213,17 @@ static SCP_string opengl_shader_get_header(shader_type type_id, int flags, bool * @param flags integer variable holding a combination of SDR_* flags * @return C-string holding the complete shader source code */ -static SCP_string opengl_load_shader(const char* filename) { - SCP_string content; - if (Enable_external_shaders) { - CFILE* cf_shader = cfopen(filename, "rt", CF_TYPE_EFFECTS); - - if (cf_shader != NULL) { - int len = cfilelength(cf_shader); - content.resize(len); - - cfread(&content[0], len + 1, 1, cf_shader); - cfclose(cf_shader); - - return content; - } - } - - //If we're still here, proceed with internals - nprintf(("shaders"," Loading built-in default shader for: %s\n", filename)); - auto def_shader = defaults_get_file(filename); - content.assign(reinterpret_cast(def_shader.data), def_shader.size); - - return content; -} - -static void handle_includes_impl(SCP_vector& include_stack, - SCP_stringstream& output, - int& include_counter, - const SCP_string& filename, - const SCP_string& original) { - include_stack.emplace_back(filename); - auto current_source_number = include_counter + 1; - - const char* INCLUDE_STRING = "#include"; - const char* CONDITIONAL_INCLUDE_STRING = "#conditional_include"; - SCP_stringstream input(original); - - int line_num = 1; - for (SCP_string line; std::getline(input, line);) { - auto include_start = line.find(CONDITIONAL_INCLUDE_STRING); - - if (include_start != SCP_string::npos) { - //This is a conditional include. Figure out whether to include, or whether not to. - // Conditional include syntax: #conditional_include (+|-)"capability" "filename" - // On +, include if capability is available, on -, include if not available - include_start += strlen(CONDITIONAL_INCLUDE_STRING) + 1; - bool require_capability = true; - - switch(line.at(include_start)) { - case '+': - require_capability = true; - break; - case '-': - require_capability = false; - break; - default: - Error(LOCATION, - "Shader %s:%d: Malformed conditional_include line. Expected + or -, got %c.", - filename.c_str(), - line_num, - line.at(include_start)); - break; - } - - auto first_quote = line.find('"', include_start); - auto second_quote = line.find('"', first_quote + 1); - - if (first_quote == SCP_string::npos || second_quote == SCP_string::npos) { - Error(LOCATION, - "Shader %s:%d: Malformed conditional_include line. Could not find both quote characters for capability.", - filename.c_str(), - line_num); - } - auto condition = line.substr(first_quote + 1, second_quote - first_quote - 1); - auto capability = std::find_if(&gr_capabilities[0], &gr_capabilities[gr_capabilities_num], - [condition](const gr_capability_def &ext_pair) { return !stricmp(ext_pair.parse_name, condition.c_str()); }); - if (capability == &gr_capabilities[gr_capabilities_num]) { - Error(LOCATION, - "Shader %s:%d: Malformed conditional_include line. Capability %s does not exist.", - filename.c_str(), - line_num, - condition.c_str()); - } - - //Prepare for including if capability is correct, skip otherwise. - if(gr_is_capable(capability->capability) == require_capability) - include_start = second_quote + 1 - strlen(INCLUDE_STRING); - else - include_start = SCP_string::npos - 1; - } - else { - //Only search for normal includes if it's not a conditional include. - include_start = line.find(INCLUDE_STRING); - } - - if (include_start != SCP_string::npos && include_start != SCP_string::npos - 1) { - auto first_quote = line.find('"', include_start + strlen(INCLUDE_STRING)); - auto second_quote = line.find('"', first_quote + 1); - - if (first_quote == SCP_string::npos || second_quote == SCP_string::npos) { - Error(LOCATION, - "Shader %s:%d: Malformed include line. Could not find both quote characters.", - filename.c_str(), - line_num); - } - - auto file_name = line.substr(first_quote + 1, second_quote - first_quote - 1); - auto existing_name = std::find_if(include_stack.begin(), include_stack.end(), [&file_name](const SCP_string& str) { - return str == file_name; - }); - if (existing_name != include_stack.end()) { - SCP_stringstream stack_string; - for (auto& name : include_stack) { - stack_string << "\t" << name << "\n"; - } - - Error(LOCATION, - "Shader %s:%d: Detected cyclic include! Previous includes (top level file first):\n%s", - filename.c_str(), - line_num, - stack_string.str().c_str()); - } - - ++include_counter; - // The second parameter defines which source string we are currently working with. We keep track of how many - // excludes have been in the file so far to specify this - output << "#line 1 " << include_counter + 1 << "\n"; - - handle_includes_impl(include_stack, - output, - include_counter, - file_name, - opengl_load_shader(file_name.c_str())); - - // We are done with the include file so now we can return to the original file - output << "#line " << line_num + 1 << " " << current_source_number << "\n"; - } else if (include_start != SCP_string::npos - 1) { - output << line << "\n"; - } - - ++line_num; - } - - include_stack.pop_back(); -} - -static SCP_string handle_includes(const char* filename, const SCP_string& original) { - SCP_stringstream output; - SCP_vector include_stack; - auto include_counter = 0; - - handle_includes_impl(include_stack, output, include_counter, filename, original); - - return output.str(); -} - -static SCP_string handle_predefines(const char* filename, const SCP_string& original){ - SCP_stringstream output; - SCP_unordered_map defines; - - //In any shader, define GLOBAL_FAR_Z - output << "#define GLOBAL_FAR_Z " << std::fixed << std::setprecision(2) << Max_draw_distance << std::defaultfloat << '\n'; - - const char* PREDEFINE_STRING = "#predefine"; - const char* PREREPLACE_STRING = "#prereplace"; - - SCP_stringstream input(original); - for (SCP_string line; std::getline(input, line);) { - auto predefine_start = line.find(PREDEFINE_STRING); - auto prereplace_start = line.find(PREREPLACE_STRING); - - if (predefine_start != SCP_string::npos){ - predefine_start += strlen(PREDEFINE_STRING); - - auto token_start = line.find(' ', predefine_start); - auto token_end = line.find(' ', token_start + 1); - - if (token_start == SCP_string::npos || token_end == SCP_string::npos) { - Error(LOCATION, - "Shader %s: Malformed predefine line. Could not find define token.", - filename); - } - - auto token = line.substr(token_start + 1, token_end - token_start - 1); - auto replaceWith = line.substr(token_end + 1); - - auto replaceStrToken = replaceWith.find("%s"); - if (replaceStrToken == SCP_string::npos || replaceWith.find("%s", replaceStrToken + 1) != SCP_string::npos){ - Error(LOCATION, - "Shader %s: Malformed predefine line. Replacing string must have exactly one %%s.", - filename); - } - if (defines.find(token) != defines.end()) { - Error(LOCATION, - "Shader %s: Malformed predefine line. Token %s is already defined.", - filename, - token.c_str()); - } - - defines.emplace(std::move(token), std::move(replaceWith)); - - output << "\n"; //At this point, don't mess with the linecount - } - else if (prereplace_start != SCP_string::npos){ - prereplace_start += strlen(PREREPLACE_STRING); - - auto token_start = line.find(' ', prereplace_start); - auto token_end = line.find(' ', token_start + 1); - - if (token_start == SCP_string::npos || token_end == SCP_string::npos) { - Error(LOCATION, - "Shader %s: Malformed prereplace line. Could not find define token.", - filename); - } - - auto token = line.substr(token_start + 1, token_end - token_start - 1); - auto replaceArg = line.substr(token_end + 1); - - auto replaceWithIt = defines.find(token); - if (replaceWithIt == defines.end()) { - Error(LOCATION, - "Shader %s: Malformed prereplace line. Could not find token %s.", - filename, - token.c_str()); - } - - size_t size = replaceWithIt->second.length() - 1 + replaceArg.size(); - std::unique_ptr buffer = make_unique(size); - - snprintf(buffer.get(), size, replaceWithIt->second.c_str(), replaceArg.c_str()); - buffer[size - 1] = '\0'; - - output << buffer.get() << "\n"; - } - else { - output << line << "\n"; - } - } - - return output.str(); -} - static SCP_vector opengl_get_shader_content(shader_type type_id, const char* filename, int flags, bool has_geo_shader, bool spirv_shader) { SCP_vector parts; if (spirv_shader) { // No need to add a header here or handle includes since the original compiler did that - parts.push_back(opengl_load_shader(filename)); + parts.push_back(shader_load_source(filename)); } else { parts.push_back(opengl_shader_get_header(type_id, flags, has_geo_shader)); - parts.push_back(handle_predefines(filename, handle_includes(filename, opengl_load_shader(filename)))); + parts.push_back(shader_preprocess_defines(filename, shader_preprocess_includes(filename, shader_load_source(filename)))); } return parts; @@ -812,9 +434,9 @@ static void opengl_set_default_uniforms(const opengl_shader_t& sdr) { void opengl_compile_shader_actual(shader_type sdr, const uint &flags, opengl_shader_t &new_shader) { - opengl_shader_type_t *sdr_info = &GL_shader_types[sdr]; + const ShaderTypeInfo *sdr_info = shader_get_type_info(sdr); - Assert(sdr_info->type_id == sdr); + Assert(sdr_info != nullptr); nprintf(("shaders","Compiling new shader:\n")); nprintf(("shaders"," %s\n", sdr_info->description)); @@ -822,27 +444,24 @@ void opengl_compile_shader_actual(shader_type sdr, const uint &flags, opengl_sha bool use_geo_sdr = false; // do we even have a geometry shader? - if (sdr_info->geo != NULL) { - for (int i = 0; i < GL_num_shader_variants; ++i) { - opengl_shader_variant_t *variant = &GL_shader_variants[i]; - - if (variant->type_id == sdr && flags & variant->flag && variant->use_geometry_sdr) { + if (sdr_info->geo != nullptr) { + shader_for_each_active_variant(sdr, flags, [&](const ShaderVariantInfo& variant) { + if (variant.use_geometry_sdr) { use_geo_sdr = true; - break; } - } + }); } auto vert_content = - opengl_get_shader_content(sdr_info->type_id, sdr_info->vert, flags, use_geo_sdr, sdr_info->spirv_shader); + opengl_get_shader_content(sdr, sdr_info->vert, flags, use_geo_sdr, sdr_info->spirv_shader); auto frag_content = - opengl_get_shader_content(sdr_info->type_id, sdr_info->frag, flags, use_geo_sdr, sdr_info->spirv_shader); + opengl_get_shader_content(sdr, sdr_info->frag, flags, use_geo_sdr, sdr_info->spirv_shader); SCP_vector geom_content; if (use_geo_sdr) { // read geometry shader geom_content = - opengl_get_shader_content(sdr_info->type_id, sdr_info->geo, flags, use_geo_sdr, sdr_info->spirv_shader); + opengl_get_shader_content(sdr, sdr_info->geo, flags, use_geo_sdr, sdr_info->spirv_shader); } auto shader_hash = get_shader_hash(vert_content, geom_content, frag_content); @@ -859,7 +478,7 @@ void opengl_compile_shader_actual(shader_type sdr, const uint &flags, opengl_sha for (size_t i = 0; i < GL_vertex_attrib_info.size(); ++i) { // Check that the enum values match the position in the vector to make accessing that information more efficient - Assertion(GL_vertex_attrib_info[i].attribute_id == (int)i, "Mistmatch between enum values and attribute vector detected!"); + Assertion(GL_vertex_attrib_info[i].attribute_id == i, "Mistmatch between enum values and attribute vector detected!"); // assign vert attribute binding locations before we link the shader glBindAttribLocation(program->getShaderHandle(), (GLint)i, GL_vertex_attrib_info[i].name.c_str()); @@ -887,7 +506,7 @@ void opengl_compile_shader_actual(shader_type sdr, const uint &flags, opengl_sha cache_program_binary(program->getShaderHandle(), shader_hash); } - new_shader.shader = sdr_info->type_id; + new_shader.shader = sdr; new_shader.flags = flags; new_shader.program = std::move(program); @@ -908,19 +527,13 @@ void opengl_compile_shader_actual(shader_type sdr, const uint &flags, opengl_sha nprintf(("shaders","Shader Variant Features:\n")); - // initialize all uniforms and attributes that are specific to this variant - for (int i = 0; i < GL_num_shader_variants; ++i) { - opengl_shader_variant_t &variant = GL_shader_variants[i]; - - if (sdr_info->type_id == variant.type_id && variant.flag & flags) { - for (auto& attr : variant.attributes) { - auto& attr_info = GL_vertex_attrib_info[attr]; - new_shader.program->initAttribute(attr_info.name, attr_info.default_value); - } - - nprintf(("shaders"," %s\n", variant.description)); + // initialize extra attributes from active variants + shader_for_each_active_variant(sdr, flags, [&](const ShaderVariantInfo& v) { + for (auto& attr : v.attributes) { + new_shader.program->initAttribute(GL_vertex_attrib_info[attr].name, GL_vertex_attrib_info[attr].default_value); } - } + nprintf(("shaders"," %s\n", v.description)); + }); opengl_set_default_uniforms(new_shader); } @@ -1105,8 +718,8 @@ void opengl_shader_set_default_material(bool textured, bool alpha, vec4* clr, fl { Current_shader->program->Uniforms.setTextureUniform("baseMap", 0); - opengl_set_generic_uniform_data( - [=](genericData_default_material_vert* data) { + opengl_set_generic_uniform_data( + [=](genericData_default_material_v_sdr* data) { if (textured) { data->noTexturing = 0; data->baseMapIndex = array_index; diff --git a/code/graphics/opengl/gropenglshader.h b/code/graphics/opengl/gropenglshader.h index 6df7875f554..8887ba9614f 100644 --- a/code/graphics/opengl/gropenglshader.h +++ b/code/graphics/opengl/gropenglshader.h @@ -12,6 +12,7 @@ #include "globalincs/pstypes.h" #include "graphics/2d.h" +#include "graphics/shader_types.h" #include "graphics/material.h" #include "graphics/opengl/gropengl.h" #include "graphics/util/UniformBuffer.h" @@ -26,18 +27,19 @@ class ShaderProgram; enum shader_stage { SDR_STAGE_VERTEX, SDR_STAGE_FRAGMENT, SDR_STAGE_GEOMETRY }; struct opengl_vert_attrib { - enum attrib_id { - POSITION, - COLOR, - TEXCOORD, - NORMAL, - TANGENT, - MODEL_ID, - RADIUS, - UVEC, - MODEL_MATRIX, - NUM_ATTRIBS, - }; + // Attribute location enum — aliases the shared VertexAttributeLocation enum + // from shader_types.h. + using attrib_id = VertexAttributeLocation; + static constexpr attrib_id POSITION = VATTRIB_POSITION; + static constexpr attrib_id COLOR = VATTRIB_COLOR; + static constexpr attrib_id TEXCOORD = VATTRIB_TEXCOORD; + static constexpr attrib_id NORMAL = VATTRIB_NORMAL; + static constexpr attrib_id TANGENT = VATTRIB_TANGENT; + static constexpr attrib_id MODEL_ID = VATTRIB_MODELID; + static constexpr attrib_id RADIUS = VATTRIB_RADIUS; + static constexpr attrib_id UVEC = VATTRIB_UVEC; + static constexpr attrib_id MODEL_MATRIX = VATTRIB_MODEL_MATRIX; + static constexpr attrib_id NUM_ATTRIBS = NUM_VERTEX_ATTRIBS; attrib_id attribute_id; SCP_string name; @@ -60,33 +62,6 @@ struct geometry_sdr_params int vertices_out; }; -struct opengl_shader_type_t { - shader_type type_id; - - const char *vert; - const char *frag; - const char *geo; - - SCP_vector attributes; - - const char* description; - - bool spirv_shader; // Specified if this shader was generated from SPIR-V -}; - -struct opengl_shader_variant_t { - shader_type type_id; - - bool use_geometry_sdr; - - int flag; - SCP_string flag_text; - - SCP_vector attributes; - - const char* description; -}; - struct opengl_shader_file_t { const char *vert; const char *frag; diff --git a/code/graphics/opengl/gropengltexture.cpp b/code/graphics/opengl/gropengltexture.cpp index 26dfa57fd9b..ff521da6945 100644 --- a/code/graphics/opengl/gropengltexture.cpp +++ b/code/graphics/opengl/gropengltexture.cpp @@ -421,30 +421,26 @@ static int opengl_texture_set_level(int bitmap_handle, int bitmap_type, int bmap } // check for compressed image types - auto block_size = 0; - auto bm_handle = bm_is_compressed(bitmap_handle); - switch (bm_handle) { + auto bm_type = bm_is_compressed(bitmap_handle); + auto block_size = dds_block_size(bm_type); + switch (bm_type) { case DDS_DXT1: case DDS_CUBEMAP_DXT1: intFormat = GL_COMPRESSED_RGB_S3TC_DXT1_EXT; - block_size = 8; break; case DDS_DXT3: case DDS_CUBEMAP_DXT3: intFormat = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; - block_size = 16; break; case DDS_DXT5: case DDS_CUBEMAP_DXT5: intFormat = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; - block_size = 16; break; case DDS_BC7: intFormat = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB; - block_size = 16; break; case KTX_ETC2_RGB: @@ -453,7 +449,7 @@ static int opengl_texture_set_level(int bitmap_handle, int bitmap_type, int bmap case KTX_ETC2_SRGBA_EAC: case KTX_ETC2_RGB_A1: case KTX_ETC2_SRGB_A1: - intFormat = ktx_map_ktx_format_to_gl_internal(bm_handle); + intFormat = ktx_map_ktx_format_to_gl_internal(bm_type); block_size = ktx_etc_block_size(intFormat); break; } @@ -474,8 +470,7 @@ static int opengl_texture_set_level(int bitmap_handle, int bitmap_type, int bmap auto mipmap_h = bmap_h; for (auto i = 0; i < mipmap_levels + base_level; i++) { - // size of data block (4x4) - dsize = ((mipmap_h + 3) / 4) * ((mipmap_w + 3) / 4) * block_size; + dsize = static_cast(dds_compressed_mip_size(mipmap_w, mipmap_h, block_size)); if (i >= base_level) { glCompressedTexSubImage3D(tSlot->texture_target, i - base_level, 0, 0, tSlot->array_index, mipmap_w, @@ -601,8 +596,7 @@ static int opengl_texture_set_level(int bitmap_handle, int bitmap_type, int bmap // check if it's a compressed cubemap first if (block_size > 0) { for (auto level = 0; level < mipmap_levels + base_level; level++) { - // size of data block (4x4) - dsize = ((mipmap_h + 3) / 4) * ((mipmap_w + 3) / 4) * block_size; + dsize = static_cast(dds_compressed_mip_size(mipmap_w, mipmap_h, block_size)); if (level >= base_level) { // We skipped ahead to the base level so we can start uploading frames now diff --git a/code/graphics/opengl/gropengltnl.cpp b/code/graphics/opengl/gropengltnl.cpp index 9ed9f6d6a8c..649531b1d32 100644 --- a/code/graphics/opengl/gropengltnl.cpp +++ b/code/graphics/opengl/gropengltnl.cpp @@ -77,7 +77,6 @@ static opengl_vertex_bind GL_array_binding_data[] = { vertex_format_data::POSITION4, 4, GL_FLOAT, GL_FALSE, opengl_vert_attrib::POSITION }, { vertex_format_data::POSITION3, 3, GL_FLOAT, GL_FALSE, opengl_vert_attrib::POSITION }, { vertex_format_data::POSITION2, 2, GL_FLOAT, GL_FALSE, opengl_vert_attrib::POSITION }, - { vertex_format_data::SCREEN_POS, 2, GL_INT, GL_FALSE, opengl_vert_attrib::POSITION }, { vertex_format_data::COLOR3, 3, GL_UNSIGNED_BYTE, GL_TRUE, opengl_vert_attrib::COLOR }, { vertex_format_data::COLOR4, 4, GL_UNSIGNED_BYTE, GL_TRUE, opengl_vert_attrib::COLOR }, { vertex_format_data::COLOR4F, 4, GL_FLOAT, GL_FALSE, opengl_vert_attrib::COLOR }, @@ -971,11 +970,6 @@ void opengl_tnl_set_model_material(model_material *material_info) if ( material_info->is_batched() ) { GL_state.Texture.Enable(10, GL_TEXTURE_BUFFER, opengl_get_transform_buffer_texture()); } - - if ( Deferred_lighting ) { - // don't blend if we're drawing to the g-buffers - GL_state.SetAlphaBlendMode(ALPHA_BLEND_NONE); - } } void opengl_tnl_set_material_particle(particle_material * material_info) @@ -1300,4 +1294,4 @@ void opengl_bind_vertex_layout_multiple(vertex_layout &layout, const SCP_vector< i++; } GL_state.Array.BindElementBuffer(indexBuffer); -} \ No newline at end of file +} diff --git a/code/graphics/render.cpp b/code/graphics/render.cpp index d99dc080cef..0b1a8cda0ca 100644 --- a/code/graphics/render.cpp +++ b/code/graphics/render.cpp @@ -36,13 +36,14 @@ static void gr_flash_internal(int r, int g, int b, int a, bool alpha_flash) render_material.set_blend_mode(ALPHA_BLEND_ALPHA_ADDITIVE); } - int glVertices[8] = { x1, y1, x1, y2, x2, y1, x2, y2 }; + float glVertices[8] = { (float)x1, (float)y1, (float)x1, (float)y2, + (float)x2, (float)y1, (float)x2, (float)y2 }; vertex_layout vert_def; - vert_def.add_vertex_component(vertex_format_data::SCREEN_POS, sizeof(int) * 2, 0); + vert_def.add_vertex_component(vertex_format_data::POSITION2, sizeof(float) * 2, 0); - gr_render_primitives_2d_immediate(&render_material, PRIM_TYPE_TRISTRIP, &vert_def, 4, glVertices, sizeof(int) * 8); + gr_render_primitives_2d_immediate(&render_material, PRIM_TYPE_TRISTRIP, &vert_def, 4, glVertices, sizeof(float) * 8); } void gr_flash(int r, int g, int b) { diff --git a/code/graphics/shader_preprocess.cpp b/code/graphics/shader_preprocess.cpp new file mode 100644 index 00000000000..b86e33297c5 --- /dev/null +++ b/code/graphics/shader_preprocess.cpp @@ -0,0 +1,229 @@ +#include "graphics/shader_preprocess.h" + +#include "cfile/cfile.h" +#include "def_files/def_files.h" +#include "graphics/2d.h" +#include "mod_table/mod_table.h" + +static void handle_includes_impl(SCP_vector& include_stack, + SCP_stringstream& output, + int& include_counter, + const SCP_string& filename, + const SCP_string& original) +{ + include_stack.emplace_back(filename); + auto current_source_number = include_counter + 1; + + const char* INCLUDE_STRING = "#include"; + const char* CONDITIONAL_INCLUDE_STRING = "#conditional_include"; + SCP_stringstream input(original); + + int line_num = 1; + for (SCP_string line; std::getline(input, line);) { + auto include_start = line.find(CONDITIONAL_INCLUDE_STRING); + + if (include_start != SCP_string::npos) { + include_start += strlen(CONDITIONAL_INCLUDE_STRING) + 1; + bool require_capability = true; + + switch (line.at(include_start)) { + case '+': + require_capability = true; + break; + case '-': + require_capability = false; + break; + default: + Error(LOCATION, + "Shader %s:%d: Malformed conditional_include line. Expected + or -, got %c.", + filename.c_str(), line_num, line.at(include_start)); + break; + } + + auto first_quote = line.find('"', include_start); + auto second_quote = line.find('"', first_quote + 1); + + if (first_quote == SCP_string::npos || second_quote == SCP_string::npos) { + Error(LOCATION, + "Shader %s:%d: Malformed conditional_include line. Could not find both quote characters for capability.", + filename.c_str(), line_num); + } + auto condition = line.substr(first_quote + 1, second_quote - first_quote - 1); + auto capability = std::find_if(&gr_capabilities[0], &gr_capabilities[gr_capabilities_num], + [condition](const gr_capability_def& ext_pair) { + return !stricmp(ext_pair.parse_name, condition.c_str()); + }); + if (capability == &gr_capabilities[gr_capabilities_num]) { + Error(LOCATION, + "Shader %s:%d: Malformed conditional_include line. Capability %s does not exist.", + filename.c_str(), line_num, condition.c_str()); + } + + if (gr_is_capable(capability->capability) == require_capability) + include_start = second_quote + 1 - strlen(INCLUDE_STRING); + else + include_start = SCP_string::npos - 1; + } else { + include_start = line.find(INCLUDE_STRING); + } + + if (include_start != SCP_string::npos && include_start != SCP_string::npos - 1) { + auto first_quote = line.find('"', include_start + strlen(INCLUDE_STRING)); + auto second_quote = line.find('"', first_quote + 1); + + if (first_quote == SCP_string::npos || second_quote == SCP_string::npos) { + Error(LOCATION, + "Shader %s:%d: Malformed include line. Could not find both quote characters.", + filename.c_str(), line_num); + } + + auto file_name = line.substr(first_quote + 1, second_quote - first_quote - 1); + auto existing_name = + std::find_if(include_stack.begin(), include_stack.end(), + [&file_name](const SCP_string& str) { return str == file_name; }); + if (existing_name != include_stack.end()) { + SCP_stringstream stack_string; + for (auto& name : include_stack) { + stack_string << "\t" << name << "\n"; + } + + Error(LOCATION, + "Shader %s:%d: Detected cyclic include! Previous includes (top level file first):\n%s", + filename.c_str(), line_num, stack_string.str().c_str()); + } + + ++include_counter; + output << "#line 1 " << include_counter + 1 << "\n"; + + handle_includes_impl(include_stack, output, include_counter, file_name, + shader_load_source(file_name)); + + output << "#line " << line_num + 1 << " " << current_source_number << "\n"; + } else if (include_start != SCP_string::npos - 1) { + output << line << "\n"; + } + + ++line_num; + } + + include_stack.pop_back(); +} + +SCP_string shader_load_source(const SCP_string& filename) +{ + SCP_string content; + + // Check external shaders first (modding support) + if (Enable_external_shaders) { + CFILE* cf_shader = cfopen(filename.c_str(), "rt", CF_TYPE_EFFECTS); + if (cf_shader != nullptr) { + int len = cfilelength(cf_shader); + content.resize(len); + cfread(content.data(), len + 1, 1, cf_shader); + cfclose(cf_shader); + return content; + } + } + + // Fall back to embedded defaults + auto def_shader = defaults_get_file(filename.c_str()); + if (def_shader.data != nullptr && def_shader.size > 0) { + content.assign(reinterpret_cast(def_shader.data), def_shader.size); + } else { + mprintf(("shader_load_source: Could not load shader source: %s\n", filename.c_str())); + } + + return content; +} + +SCP_string shader_preprocess_includes(const SCP_string& filename, const SCP_string& source) +{ + SCP_stringstream output; + SCP_vector include_stack; + auto include_counter = 0; + + handle_includes_impl(include_stack, output, include_counter, filename, source); + + return output.str(); +} + +SCP_string shader_preprocess_defines(const SCP_string& filename, const SCP_string& source) +{ + SCP_stringstream output; + SCP_unordered_map defines; + + //In any shader, define GLOBAL_FAR_Z + output << "#define GLOBAL_FAR_Z " << std::fixed << std::setprecision(2) << Max_draw_distance << std::defaultfloat << '\n'; + + const char* PREDEFINE_STRING = "#predefine"; + const char* PREREPLACE_STRING = "#prereplace"; + + SCP_stringstream input(source); + for (SCP_string line; std::getline(input, line);) { + auto predefine_start = line.find(PREDEFINE_STRING); + auto prereplace_start = line.find(PREREPLACE_STRING); + + if (predefine_start != SCP_string::npos) { + predefine_start += strlen(PREDEFINE_STRING); + + auto token_start = line.find(' ', predefine_start); + auto token_end = line.find(' ', token_start + 1); + + if (token_start == SCP_string::npos || token_end == SCP_string::npos) { + Error(LOCATION, "Shader %s: Malformed predefine line. Could not find define token.", + filename.c_str()); + } + + auto token = line.substr(token_start + 1, token_end - token_start - 1); + auto replaceWith = line.substr(token_end + 1); + + auto replaceStrToken = replaceWith.find("%s"); + if (replaceStrToken == SCP_string::npos || + replaceWith.find("%s", replaceStrToken + 1) != SCP_string::npos) { + Error(LOCATION, + "Shader %s: Malformed predefine line. Replacing string must have exactly one %%s.", + filename.c_str()); + } + if (defines.find(token) != defines.end()) { + Error(LOCATION, + "Shader %s: Malformed predefine line. Token %s is already defined.", + filename.c_str(), token.c_str()); + } + + defines.emplace(std::move(token), std::move(replaceWith)); + + output << "\n"; // Preserve line count + } else if (prereplace_start != SCP_string::npos) { + prereplace_start += strlen(PREREPLACE_STRING); + + auto token_start = line.find(' ', prereplace_start); + auto token_end = line.find(' ', token_start + 1); + + if (token_start == SCP_string::npos || token_end == SCP_string::npos) { + Error(LOCATION, "Shader %s: Malformed prereplace line. Could not find define token.", + filename.c_str()); + } + + auto token = line.substr(token_start + 1, token_end - token_start - 1); + auto replaceArg = line.substr(token_end + 1); + + auto replaceWithIt = defines.find(token); + if (replaceWithIt == defines.end()) { + Error(LOCATION, "Shader %s: Malformed prereplace line. Could not find token %s.", + filename.c_str(), token.c_str()); + } + + size_t size = replaceWithIt->second.length() - 1 + replaceArg.size(); + std::unique_ptr buffer = make_unique(size); + + snprintf(buffer.get(), size, replaceWithIt->second.c_str(), replaceArg.c_str()); + buffer[size - 1] = '\0'; + + output << buffer.get() << "\n"; + } else { + output << line << "\n"; + } + } + + return output.str(); +} diff --git a/code/graphics/shader_preprocess.h b/code/graphics/shader_preprocess.h new file mode 100644 index 00000000000..38bd876a611 --- /dev/null +++ b/code/graphics/shader_preprocess.h @@ -0,0 +1,7 @@ +#pragma once + +#include "globalincs/pstypes.h" + +SCP_string shader_load_source(const SCP_string& filename); +SCP_string shader_preprocess_includes(const SCP_string& filename, const SCP_string& source); +SCP_string shader_preprocess_defines(const SCP_string& filename, const SCP_string& source); diff --git a/code/graphics/shader_types.cpp b/code/graphics/shader_types.cpp new file mode 100644 index 00000000000..8cd7223ce8c --- /dev/null +++ b/code/graphics/shader_types.cpp @@ -0,0 +1,225 @@ +#include "graphics/shader_types.h" + +// Pull in MODEL_SDR_FLAG_* constants for the variant table +#define MODEL_SDR_FLAG_MODE_CPP +#include "def_files/data/effects/model_shader_flags.h" +#undef MODEL_SDR_FLAG_MODE_CPP +#undef SDR_FLAG + +// ========== Shared shader type table ========== +// Moved from gropenglshader.cpp — single source of truth for both backends. +// clang-format off +static ShaderTypeInfo SHADER_TYPES[] = { + { SDR_TYPE_MODEL, "main-v.sdr", "main-f.sdr", "main-g.sdr", + { VATTRIB_POSITION, VATTRIB_TEXCOORD, VATTRIB_NORMAL, VATTRIB_TANGENT, VATTRIB_MODELID }, "Model Rendering", false }, + + { SDR_TYPE_EFFECT_PARTICLE, "effect-v.sdr", "effect-f.sdr", "effect-g.sdr", + { VATTRIB_POSITION, VATTRIB_TEXCOORD, VATTRIB_RADIUS, VATTRIB_COLOR }, "Particle Effects", false }, + + { SDR_TYPE_EFFECT_DISTORTION, "effect-distort-v.sdr", "effect-distort-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD, VATTRIB_RADIUS, VATTRIB_COLOR }, "Distortion Effects", false }, + + { SDR_TYPE_POST_PROCESS_MAIN, "post-v.sdr", "post-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Post Processing", false }, + + { SDR_TYPE_POST_PROCESS_BLUR, "post-v.sdr", "blur-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Gaussian Blur", false }, + + { SDR_TYPE_POST_PROCESS_BLOOM_COMP, "post-v.sdr", "bloom-comp-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Bloom Compositing", false }, + + { SDR_TYPE_POST_PROCESS_BRIGHTPASS, "post-v.sdr", "brightpass-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Bloom Brightpass", false }, + + { SDR_TYPE_POST_PROCESS_FXAA, "fxaa-v.sdr", "fxaa-f.sdr", nullptr, + { VATTRIB_POSITION }, "FXAA", false }, + + { SDR_TYPE_POST_PROCESS_FXAA_PREPASS, "post-v.sdr", "fxaapre-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "FXAA Prepass", false }, + + { SDR_TYPE_POST_PROCESS_LIGHTSHAFTS, "post-v.sdr", "ls-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Lightshafts", false }, + + { SDR_TYPE_POST_PROCESS_TONEMAPPING, "post-v.sdr", "tonemapping-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Tonemapping", false }, + + { SDR_TYPE_DEFERRED_LIGHTING, "deferred-v.sdr", "deferred-f.sdr", nullptr, + { VATTRIB_POSITION }, "Deferred Lighting", false }, + + { SDR_TYPE_DEFERRED_CLEAR, "deferred-clear-v.sdr", "deferred-clear-f.sdr", nullptr, + { VATTRIB_POSITION }, "Clear Deferred Lighting Buffer", false }, + + { SDR_TYPE_VIDEO_PROCESS, "video-v.sdr", "video-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Video Playback", false }, + + { SDR_TYPE_PASSTHROUGH_RENDER, "passthrough-v.sdr", "passthrough-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD, VATTRIB_COLOR }, "Passthrough", false }, + + { SDR_TYPE_SHIELD_DECAL, "shield-impact-v.sdr", "shield-impact-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_NORMAL }, "Shield Decals", false }, + + { SDR_TYPE_BATCHED_BITMAP, "batched-v.sdr", "batched-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD, VATTRIB_COLOR }, "Batched bitmaps", false }, + + { SDR_TYPE_DEFAULT_MATERIAL, "default-material-v.sdr", "default-material-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD, VATTRIB_COLOR }, "Default material", false }, + + { SDR_TYPE_NANOVG, "nanovg-v.sdr", "nanovg-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "NanoVG shader", false }, + + { SDR_TYPE_DECAL, "decal-v.sdr", "decal-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_MODEL_MATRIX }, "Decal rendering", false }, + + { SDR_TYPE_SCENE_FOG, "post-v.sdr", "fog-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Scene fogging", false }, + + { SDR_TYPE_VOLUMETRIC_FOG, "post-v.sdr", "volumetric-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Volumetric fogging", false }, + + { SDR_TYPE_ROCKET_UI, "rocketui-v.sdr", "rocketui-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_COLOR, VATTRIB_TEXCOORD }, "libRocket UI", false }, + + { SDR_TYPE_COPY, "post-v.sdr", "copy-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Texture copy", false }, + + { SDR_TYPE_COPY_WORLD, "passthrough-v.sdr", "copy-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Texture copy world space", false }, + + { SDR_TYPE_MSAA_RESOLVE, "post-v.sdr", "msaa-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "MSAA resolve shader", false }, + + { SDR_TYPE_POST_PROCESS_SMAA_EDGE, "smaa-edge-v.sdr", "smaa-edge-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "SMAA Edge detection", false }, + + { SDR_TYPE_POST_PROCESS_SMAA_BLENDING_WEIGHT, "smaa-blend-v.sdr", "smaa-blend-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "SMAA Blending weight calculation", false }, + + { SDR_TYPE_POST_PROCESS_SMAA_NEIGHBORHOOD_BLENDING, "smaa-neighbour-v.sdr", "smaa-neighbour-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "SMAA Neighborhood Blending", false }, + + { SDR_TYPE_ENVMAP_SPHERE_WARP, "post-v.sdr", "envmap-sphere-warp-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Environment Map Export", false }, + + { SDR_TYPE_IRRADIANCE_MAP_GEN, "post-v.sdr", "irrmap-f.sdr", nullptr, + { VATTRIB_POSITION, VATTRIB_TEXCOORD }, "Irradiance Map Generation", false }, +}; +// clang-format on + +// ========== Shared shader variant table ========== +// Moved from gropenglshader.cpp — single source of truth for both backends. +// MODEL flags come from model_shader_flags.h; others from 2d.h defines. +static ShaderVariantInfo SHADER_VARIANTS[] = { +#define MODEL_SDR_FLAG_MODE_CPP_ARRAY +#include "def_files/data/effects/model_shader_flags.h" +#undef MODEL_SDR_FLAG_MODE_CPP_ARRAY + + {SDR_TYPE_EFFECT_PARTICLE, true, SDR_FLAG_PARTICLE_POINT_GEN, "FLAG_EFFECT_GEOMETRY", {VATTRIB_UVEC}, "Geometry shader point-based particles"}, + + {SDR_TYPE_DEFERRED_LIGHTING, false, SDR_FLAG_ENV_MAP, "ENV_MAP", {}, "Render ambient light with env and irrmaps"}, + + {SDR_TYPE_POST_PROCESS_BLUR, false, SDR_FLAG_BLUR_HORIZONTAL, "PASS_0", {}, "Horizontal blur pass"}, + + {SDR_TYPE_POST_PROCESS_BLUR, false, SDR_FLAG_BLUR_VERTICAL, "PASS_1", {}, "Vertical blur pass"}, + + {SDR_TYPE_NANOVG, false, SDR_FLAG_NANOVG_EDGE_AA, "EDGE_AA", {}, "NanoVG edge anti-alias"}, + + {SDR_TYPE_DECAL, false, SDR_FLAG_DECAL_USE_NORMAL_MAP, "USE_NORMAL_MAP", {}, "Decal use scene normal map"}, + + {SDR_TYPE_MSAA_RESOLVE, false, SDR_FLAG_MSAA_SAMPLES_4, "SAMPLES_4", {}, "Sets the MSAA resolve shader to 4 samples"}, + + {SDR_TYPE_MSAA_RESOLVE, false, SDR_FLAG_MSAA_SAMPLES_8, "SAMPLES_8", {}, "Sets the MSAA resolve shader to 8 samples"}, + + {SDR_TYPE_MSAA_RESOLVE, false, SDR_FLAG_MSAA_SAMPLES_16, "SAMPLES_16", {}, "Sets the MSAA resolve shader to 16 samples"}, + + {SDR_TYPE_VOLUMETRIC_FOG, false, SDR_FLAG_VOLUMETRICS_DO_EDGE_SMOOTHING, "DO_EDGE_SMOOTHING", {}, "Perform costly edge smoothing lookups"}, + + {SDR_TYPE_VOLUMETRIC_FOG, false, SDR_FLAG_VOLUMETRICS_NOISE, "NOISE", {}, "Add noise to volumetrics"}, + + {SDR_TYPE_COPY_WORLD, false, SDR_FLAG_COPY_FROM_ARRAY, "COPY_ARRAY", {}, "Expects to copy from an array texture"}, + + {SDR_TYPE_POST_PROCESS_TONEMAPPING, false, SDR_FLAG_TONEMAPPING_LINEAR_OUT, "LINEAR_OUT", {}, "Will make the tonemapper output in linear color space and not in sRGB"} +}; + +const ShaderTypeInfo* shader_get_type_info(shader_type type) +{ + for (auto & i : SHADER_TYPES) { + if (i.type_id == type) { + return &i; + } + } + return nullptr; +} + +const ShaderVariantInfo* shader_get_variant_info(shader_type type, int flag) +{ + for (auto & i : SHADER_VARIANTS) { + if (i.type_id == type && i.flag == flag) { + return &i; + } + } + return nullptr; +} + +void shader_for_each_active_variant(shader_type type, unsigned int flags, const std::function& fn) +{ + for (auto & i : SHADER_VARIANTS) { + if (i.type_id == type && (flags & i.flag)) { + fn(i); + } + } +} + +SCP_string shader_build_variant_defines(shader_type type, unsigned int flags) +{ + SCP_string header; + shader_for_each_active_variant(type, flags, [&](const ShaderVariantInfo& v) { + header += "#define "; + header += v.flag_text; + header += "\n"; + }); + return header; +} + +SCP_string shader_get_fxaa_defines(AntiAliasMode aa_mode, bool gather4_alpha) +{ + SCP_string defines; + defines.reserve(256); + + defines += "#define FXAA_GLSL_120 0\n"; + defines += "#define FXAA_GLSL_130 1\n"; + + if (gather4_alpha) { + defines += "#define FXAA_GATHER4_ALPHA 1\n"; + } + + switch (aa_mode) { + case AntiAliasMode::None: + defines += "#define FXAA_QUALITY_PRESET 10\n"; + defines += "#define FXAA_QUALITY_EDGE_THRESHOLD (1.0/6.0)\n"; + defines += "#define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/12.0)\n"; + defines += "#define FXAA_QUALITY_SUBPIX 0.33\n"; + break; + case AntiAliasMode::FXAA_Low: + defines += "#define FXAA_QUALITY_PRESET 12\n"; + defines += "#define FXAA_QUALITY_EDGE_THRESHOLD (1.0/8.0)\n"; + defines += "#define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/16.0)\n"; + defines += "#define FXAA_QUALITY_SUBPIX 0.33\n"; + break; + case AntiAliasMode::FXAA_Medium: + defines += "#define FXAA_QUALITY_PRESET 26\n"; + defines += "#define FXAA_QUALITY_EDGE_THRESHOLD (1.0/12.0)\n"; + defines += "#define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/24.0)\n"; + defines += "#define FXAA_QUALITY_SUBPIX 0.33\n"; + break; + case AntiAliasMode::FXAA_High: + defines += "#define FXAA_QUALITY_PRESET 39\n"; + defines += "#define FXAA_QUALITY_EDGE_THRESHOLD (1.0/15.0)\n"; + defines += "#define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/32.0)\n"; + defines += "#define FXAA_QUALITY_SUBPIX 0.33\n"; + break; + default: + UNREACHABLE("Unhandled FXAA mode!"); + } + + return defines; +} diff --git a/code/graphics/shader_types.h b/code/graphics/shader_types.h new file mode 100644 index 00000000000..04e49d47d39 --- /dev/null +++ b/code/graphics/shader_types.h @@ -0,0 +1,66 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" + +// Shared vertex attribute locations. +// Both backends (OpenGL and Vulkan) use these same location numbers +// for vertex shader inputs. +enum VertexAttributeLocation : uint32_t { + VATTRIB_POSITION = 0, // vec2, vec3, or vec4 + VATTRIB_COLOR = 1, // vec3/vec4 (normalized u8 or float) + VATTRIB_TEXCOORD = 2, // vec2 or vec4 + VATTRIB_NORMAL = 3, // vec3 + VATTRIB_TANGENT = 4, // vec4 + VATTRIB_MODELID = 5, // float + VATTRIB_RADIUS = 6, // float + VATTRIB_UVEC = 7, // vec3 + VATTRIB_MODEL_MATRIX = 8, // Occupies locations 8-11 + + NUM_VERTEX_ATTRIBS = 9, +}; + +// Shared shader type info — matches the original opengl_shader_type_t layout. +struct ShaderTypeInfo { + shader_type type_id; + + const char *vert; + const char *frag; + const char *geo; + + SCP_vector attributes; + + const char* description; + + bool spirv_shader; +}; + +// Shared shader variant info — matches the original opengl_shader_variant_t layout. +struct ShaderVariantInfo { + shader_type type_id; + + bool use_geometry_sdr; + + int flag; + SCP_string flag_text; + + SCP_vector attributes; + + const char* description; +}; + +// Lookup helper: find type info by shader_type. Returns nullptr if not found. +const ShaderTypeInfo* shader_get_type_info(shader_type type); + +// Lookup helper: find variant info by (type, flag). Returns nullptr if not found. +const ShaderVariantInfo* shader_get_variant_info(shader_type type, int flag); + +// Iterate all variants matching type whose flag bit is set in flags. +void shader_for_each_active_variant(shader_type type, unsigned int flags, const std::function& fn); + +// Build the variant #define header for a shader given type+flags. +SCP_string shader_build_variant_defines(shader_type type, unsigned int flags); + +// Returns FXAA preprocessor defines for the given AA mode. +// gather4_alpha: whether the backend supports textureGather. +SCP_string shader_get_fxaa_defines(AntiAliasMode aa_mode, bool gather4_alpha); diff --git a/code/graphics/shaders/compiled/default-material.frag.spv b/code/graphics/shaders/compiled/default-material.frag.spv deleted file mode 100644 index 72854a6895e..00000000000 Binary files a/code/graphics/shaders/compiled/default-material.frag.spv and /dev/null differ diff --git a/code/graphics/shaders/compiled/default-material.frag.spv.glsl b/code/graphics/shaders/compiled/default-material.frag.spv.glsl deleted file mode 100644 index 060a12e4634..00000000000 --- a/code/graphics/shaders/compiled/default-material.frag.spv.glsl +++ /dev/null @@ -1,51 +0,0 @@ -#version 150 - -layout(std140) uniform genericData -{ - mat4 modelMatrix; - vec4 color; - vec4 clipEquation; - int baseMapIndex; - int alphaTexture; - int noTexturing; - int srgb; - float intensity; - float alphaThreshold; - uint clipEnabled; -} _39; - -uniform sampler2DArray baseMap; - -in vec4 fragTexCoord; -in vec4 fragColor; -out vec4 fragOut0; - -void main() -{ - vec4 _48 = texture(baseMap, vec3(fragTexCoord.xy, float(_39.baseMapIndex))); - if (_39.alphaThreshold > _48.w) - { - discard; - } - bool _66 = _39.srgb == 1; - vec3 _146; - if (_66) - { - _146 = pow(_48.xyz, vec3(2.2000000476837158203125)); - } - else - { - _146 = _48.xyz; - } - vec4 _148; - if (_66) - { - _148 = vec4(pow(fragColor.xyz, vec3(2.2000000476837158203125)), fragColor.w); - } - else - { - _148 = fragColor; - } - fragOut0 = mix(mix(vec4(_146.x, _146.y, _146.z, _48.w) * _148, vec4(_148.xyz, _146.x * _148.w), vec4(float(_39.alphaTexture))), _148, vec4(float(_39.noTexturing))) * _39.intensity; -} - diff --git a/code/graphics/shaders/compiled/default-material.vert.spv b/code/graphics/shaders/compiled/default-material.vert.spv deleted file mode 100644 index b1ad969cb55..00000000000 Binary files a/code/graphics/shaders/compiled/default-material.vert.spv and /dev/null differ diff --git a/code/graphics/shaders/compiled/default-material.vert.spv.glsl b/code/graphics/shaders/compiled/default-material.vert.spv.glsl deleted file mode 100644 index 60decc4f843..00000000000 --- a/code/graphics/shaders/compiled/default-material.vert.spv.glsl +++ /dev/null @@ -1,41 +0,0 @@ -#version 150 - -out float gl_ClipDistance[1]; - -layout(std140) uniform genericData -{ - mat4 modelMatrix; - vec4 color; - vec4 clipEquation; - int baseMapIndex; - int alphaTexture; - int noTexturing; - int srgb; - float intensity; - float alphaThreshold; - uint clipEnabled; -} _22; - -layout(std140) uniform matrixData -{ - mat4 modelViewMatrix; - mat4 projMatrix; -} _36; - -out vec4 fragTexCoord; -in vec4 vertTexCoord; -out vec4 fragColor; -in vec4 vertColor; -in vec4 vertPosition; - -void main() -{ - fragTexCoord = vertTexCoord; - fragColor = vertColor * _22.color; - gl_Position = (_36.projMatrix * _36.modelViewMatrix) * vertPosition; - if (_22.clipEnabled != 0u) - { - gl_ClipDistance[0] = dot(_22.clipEquation, _22.modelMatrix * vertPosition); - } -} - diff --git a/code/graphics/shaders/compiled/default-material_structs.frag.h b/code/graphics/shaders/compiled/default-material_structs.frag.h index 7367cf86e2c..dfea92140d0 100644 --- a/code/graphics/shaders/compiled/default-material_structs.frag.h +++ b/code/graphics/shaders/compiled/default-material_structs.frag.h @@ -4,7 +4,7 @@ #include #include -struct genericData_default_material_frag { +struct genericData_default_material_f_sdr { SPIRV_FLOAT_MAT_4x4 modelMatrix; SPIRV_FLOAT_VEC4 color; SPIRV_FLOAT_VEC4 clipEquation; @@ -16,14 +16,14 @@ struct genericData_default_material_frag { float alphaThreshold; std::uint32_t clipEnabled; }; -static_assert(sizeof(genericData_default_material_frag) == 124, "Size of struct genericData_default_material_frag does not match what is expected for the uniform block!"); -static_assert(offsetof(genericData_default_material_frag, modelMatrix) == 0, "Offset of member modelMatrix does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_frag, color) == 64, "Offset of member color does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_frag, clipEquation) == 80, "Offset of member clipEquation does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_frag, baseMapIndex) == 96, "Offset of member baseMapIndex does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_frag, alphaTexture) == 100, "Offset of member alphaTexture does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_frag, noTexturing) == 104, "Offset of member noTexturing does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_frag, srgb) == 108, "Offset of member srgb does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_frag, intensity) == 112, "Offset of member intensity does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_frag, alphaThreshold) == 116, "Offset of member alphaThreshold does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_frag, clipEnabled) == 120, "Offset of member clipEnabled does not match the uniform buffer offset!"); +static_assert(sizeof(genericData_default_material_f_sdr) == 124, "Size of struct genericData_default_material_f_sdr does not match what is expected for the uniform block!"); +static_assert(offsetof(genericData_default_material_f_sdr, modelMatrix) == 0, "Offset of member modelMatrix does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_f_sdr, color) == 64, "Offset of member color does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_f_sdr, clipEquation) == 80, "Offset of member clipEquation does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_f_sdr, baseMapIndex) == 96, "Offset of member baseMapIndex does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_f_sdr, alphaTexture) == 100, "Offset of member alphaTexture does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_f_sdr, noTexturing) == 104, "Offset of member noTexturing does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_f_sdr, srgb) == 108, "Offset of member srgb does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_f_sdr, intensity) == 112, "Offset of member intensity does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_f_sdr, alphaThreshold) == 116, "Offset of member alphaThreshold does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_f_sdr, clipEnabled) == 120, "Offset of member clipEnabled does not match the uniform buffer offset!"); diff --git a/code/graphics/shaders/compiled/default-material_structs.vert.h b/code/graphics/shaders/compiled/default-material_structs.vert.h index 9a4530cacdd..153748b22ee 100644 --- a/code/graphics/shaders/compiled/default-material_structs.vert.h +++ b/code/graphics/shaders/compiled/default-material_structs.vert.h @@ -4,7 +4,7 @@ #include #include -struct genericData_default_material_vert { +struct genericData_default_material_v_sdr { SPIRV_FLOAT_MAT_4x4 modelMatrix; SPIRV_FLOAT_VEC4 color; SPIRV_FLOAT_VEC4 clipEquation; @@ -16,21 +16,21 @@ struct genericData_default_material_vert { float alphaThreshold; std::uint32_t clipEnabled; }; -static_assert(sizeof(genericData_default_material_vert) == 124, "Size of struct genericData_default_material_vert does not match what is expected for the uniform block!"); -static_assert(offsetof(genericData_default_material_vert, modelMatrix) == 0, "Offset of member modelMatrix does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_vert, color) == 64, "Offset of member color does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_vert, clipEquation) == 80, "Offset of member clipEquation does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_vert, baseMapIndex) == 96, "Offset of member baseMapIndex does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_vert, alphaTexture) == 100, "Offset of member alphaTexture does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_vert, noTexturing) == 104, "Offset of member noTexturing does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_vert, srgb) == 108, "Offset of member srgb does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_vert, intensity) == 112, "Offset of member intensity does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_vert, alphaThreshold) == 116, "Offset of member alphaThreshold does not match the uniform buffer offset!"); -static_assert(offsetof(genericData_default_material_vert, clipEnabled) == 120, "Offset of member clipEnabled does not match the uniform buffer offset!"); -struct matrixData_default_material_vert { +static_assert(sizeof(genericData_default_material_v_sdr) == 124, "Size of struct genericData_default_material_v_sdr does not match what is expected for the uniform block!"); +static_assert(offsetof(genericData_default_material_v_sdr, modelMatrix) == 0, "Offset of member modelMatrix does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_v_sdr, color) == 64, "Offset of member color does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_v_sdr, clipEquation) == 80, "Offset of member clipEquation does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_v_sdr, baseMapIndex) == 96, "Offset of member baseMapIndex does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_v_sdr, alphaTexture) == 100, "Offset of member alphaTexture does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_v_sdr, noTexturing) == 104, "Offset of member noTexturing does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_v_sdr, srgb) == 108, "Offset of member srgb does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_v_sdr, intensity) == 112, "Offset of member intensity does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_v_sdr, alphaThreshold) == 116, "Offset of member alphaThreshold does not match the uniform buffer offset!"); +static_assert(offsetof(genericData_default_material_v_sdr, clipEnabled) == 120, "Offset of member clipEnabled does not match the uniform buffer offset!"); +struct matrixData_default_material_v_sdr { SPIRV_FLOAT_MAT_4x4 modelViewMatrix; SPIRV_FLOAT_MAT_4x4 projMatrix; }; -static_assert(sizeof(matrixData_default_material_vert) == 128, "Size of struct matrixData_default_material_vert does not match what is expected for the uniform block!"); -static_assert(offsetof(matrixData_default_material_vert, modelViewMatrix) == 0, "Offset of member modelViewMatrix does not match the uniform buffer offset!"); -static_assert(offsetof(matrixData_default_material_vert, projMatrix) == 64, "Offset of member projMatrix does not match the uniform buffer offset!"); +static_assert(sizeof(matrixData_default_material_v_sdr) == 128, "Size of struct matrixData_default_material_v_sdr does not match what is expected for the uniform block!"); +static_assert(offsetof(matrixData_default_material_v_sdr, modelViewMatrix) == 0, "Offset of member modelViewMatrix does not match the uniform buffer offset!"); +static_assert(offsetof(matrixData_default_material_v_sdr, projMatrix) == 64, "Offset of member projMatrix does not match the uniform buffer offset!"); diff --git a/code/graphics/shaders/compiled/vulkan.frag.spv b/code/graphics/shaders/compiled/vulkan.frag.spv deleted file mode 100644 index d267d4906c2..00000000000 Binary files a/code/graphics/shaders/compiled/vulkan.frag.spv and /dev/null differ diff --git a/code/graphics/shaders/compiled/vulkan.frag.spv.glsl b/code/graphics/shaders/compiled/vulkan.frag.spv.glsl deleted file mode 100644 index baa87038095..00000000000 --- a/code/graphics/shaders/compiled/vulkan.frag.spv.glsl +++ /dev/null @@ -1,10 +0,0 @@ -#version 150 - -out vec4 outColor; -in vec3 fragColor; - -void main() -{ - outColor = vec4(fragColor, 1.0); -} - diff --git a/code/graphics/shaders/compiled/vulkan.vert.spv b/code/graphics/shaders/compiled/vulkan.vert.spv deleted file mode 100644 index c3e37b6e672..00000000000 Binary files a/code/graphics/shaders/compiled/vulkan.vert.spv and /dev/null differ diff --git a/code/graphics/shaders/compiled/vulkan.vert.spv.glsl b/code/graphics/shaders/compiled/vulkan.vert.spv.glsl deleted file mode 100644 index 6e396118336..00000000000 --- a/code/graphics/shaders/compiled/vulkan.vert.spv.glsl +++ /dev/null @@ -1,13 +0,0 @@ -#version 150 - -const vec2 _20[3] = vec2[](vec2(0.0, -0.5), vec2(0.5), vec2(-0.5, 0.5)); -const vec3 _29[3] = vec3[](vec3(1.0, 0.0, 0.0), vec3(0.0, 1.0, 0.0), vec3(0.0, 0.0, 1.0)); - -out vec3 fragColor; - -void main() -{ - gl_Position = vec4(_20[gl_VertexID], 0.0, 1.0); - fragColor = _29[gl_VertexID]; -} - diff --git a/code/graphics/shaders/compiled/vulkan_structs.frag.h b/code/graphics/shaders/compiled/vulkan_structs.frag.h deleted file mode 100644 index 37f6fcd2e46..00000000000 --- a/code/graphics/shaders/compiled/vulkan_structs.frag.h +++ /dev/null @@ -1,6 +0,0 @@ - -#pragma once - -#include -#include - diff --git a/code/graphics/shaders/compiled/vulkan_structs.vert.h b/code/graphics/shaders/compiled/vulkan_structs.vert.h deleted file mode 100644 index 37f6fcd2e46..00000000000 --- a/code/graphics/shaders/compiled/vulkan_structs.vert.h +++ /dev/null @@ -1,6 +0,0 @@ - -#pragma once - -#include -#include - diff --git a/code/graphics/shaders/default-material.vert b/code/graphics/shaders/default-material.vert deleted file mode 100644 index 2529adaf8a5..00000000000 --- a/code/graphics/shaders/default-material.vert +++ /dev/null @@ -1,42 +0,0 @@ -#version 450 -#extension GL_ARB_separate_shader_objects : enable - -layout (location = 0) in vec4 vertPosition; -layout (location = 1) in vec4 vertColor; -layout (location = 2) in vec4 vertTexCoord; - -layout (location = 0) out vec4 fragTexCoord; -layout (location = 1) out vec4 fragColor; - -layout (binding = 0, std140) uniform matrixData { - mat4 modelViewMatrix; - mat4 projMatrix; -}; - -layout (binding = 1, std140) uniform genericData { - mat4 modelMatrix; - - vec4 color; - - vec4 clipEquation; - - int baseMapIndex; - int alphaTexture; - int noTexturing; - int srgb; - - float intensity; - float alphaThreshold; - bool clipEnabled; -}; - -void main() -{ - fragTexCoord = vertTexCoord; - fragColor = vertColor * color; - gl_Position = projMatrix * modelViewMatrix * vertPosition; - - if (clipEnabled) { - gl_ClipDistance[0] = dot(clipEquation, modelMatrix * vertPosition); - } -} diff --git a/code/graphics/shaders/vulkan.frag b/code/graphics/shaders/vulkan.frag deleted file mode 100644 index 84daf5e0d0e..00000000000 --- a/code/graphics/shaders/vulkan.frag +++ /dev/null @@ -1,10 +0,0 @@ -#version 450 -#extension GL_ARB_separate_shader_objects : enable - -layout(location = 0) in vec3 fragColor; - -layout(location = 0) out vec4 outColor; - -void main() { - outColor = vec4(fragColor, 1.0); -} diff --git a/code/graphics/shaders/vulkan.vert b/code/graphics/shaders/vulkan.vert deleted file mode 100644 index ec247650646..00000000000 --- a/code/graphics/shaders/vulkan.vert +++ /dev/null @@ -1,21 +0,0 @@ -#version 450 -#extension GL_ARB_separate_shader_objects : enable - -layout(location = 0) out vec3 fragColor; - -vec2 positions[3] = vec2[]( - vec2(0.0, -0.5), - vec2(0.5, 0.5), - vec2(-0.5, 0.5) -); - -vec3 colors[3] = vec3[]( - vec3(1.0, 0.0, 0.0), - vec3(0.0, 1.0, 0.0), - vec3(0.0, 0.0, 1.0) -); - -void main() { - gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0); - fragColor = colors[gl_VertexIndex]; -} diff --git a/code/graphics/shadows.cpp b/code/graphics/shadows.cpp index 3e86d2c709b..eb4566ad959 100644 --- a/code/graphics/shadows.cpp +++ b/code/graphics/shadows.cpp @@ -100,11 +100,19 @@ void shadows_construct_light_proj(light_frustum_info *shadow_data) shadow_data->proj_matrix.a1d[0] = 2.0f / ( shadow_data->max.xyz.x - shadow_data->min.xyz.x ); shadow_data->proj_matrix.a1d[5] = 2.0f / ( shadow_data->max.xyz.y - shadow_data->min.xyz.y ); - shadow_data->proj_matrix.a1d[10] = -2.0f / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); shadow_data->proj_matrix.a1d[12] = -(shadow_data->max.xyz.x + shadow_data->min.xyz.x) / ( shadow_data->max.xyz.x - shadow_data->min.xyz.x ); shadow_data->proj_matrix.a1d[13] = -(shadow_data->max.xyz.y + shadow_data->min.xyz.y) / ( shadow_data->max.xyz.y - shadow_data->min.xyz.y ); - shadow_data->proj_matrix.a1d[14] = -(shadow_data->max.xyz.z + shadow_data->min.xyz.z) / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); shadow_data->proj_matrix.a1d[15] = 1.0f; + + if (gr_screen.mode == GR_VULKAN) { + // Vulkan uses [0, 1] depth range + shadow_data->proj_matrix.a1d[10] = -1.0f / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); + shadow_data->proj_matrix.a1d[14] = -shadow_data->min.xyz.z / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); + } else { + // OpenGL uses [-1, 1] depth range + shadow_data->proj_matrix.a1d[10] = -2.0f / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); + shadow_data->proj_matrix.a1d[14] = -(shadow_data->max.xyz.z + shadow_data->min.xyz.z) / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); + } } void shadows_debug_show_frustum(matrix* orient, vec3d *pos, float fov, float aspect, float z_near, float z_far) diff --git a/code/graphics/util/primitives.cpp b/code/graphics/util/primitives.cpp new file mode 100644 index 00000000000..4e7e0e739b1 --- /dev/null +++ b/code/graphics/util/primitives.cpp @@ -0,0 +1,125 @@ +#include "graphics/util/primitives.h" + +#include + + +namespace graphics::util { + +generated_mesh generate_sphere_mesh(int rings, int segments) +{ + generated_mesh mesh; + + unsigned int nVertex = (rings + 1) * (segments + 1) * 3; + unsigned int nIndex = 6 * rings * (segments + 1); + + mesh.vertices.reserve(nVertex); + mesh.indices.reserve(nIndex); + + float fDeltaRingAngle = (PI / rings); + float fDeltaSegAngle = (2.0f * PI / segments); + unsigned short wVerticeIndex = 0; + + // Generate the group of rings for the sphere + for (int ring = 0; ring <= rings; ring++) { + float r0 = sinf(ring * fDeltaRingAngle); + float y0 = cosf(ring * fDeltaRingAngle); + + // Generate the group of segments for the current ring + for (int seg = 0; seg <= segments; seg++) { + float x0 = r0 * sinf(seg * fDeltaSegAngle); + float z0 = r0 * cosf(seg * fDeltaSegAngle); + + // Add one vertex to the strip which makes up the sphere + mesh.vertices.push_back(x0); + mesh.vertices.push_back(y0); + mesh.vertices.push_back(z0); + + if (ring != rings) { + // each vertex (except the last) has six indices pointing to it + mesh.indices.push_back(wVerticeIndex + (ushort)segments + 1); + mesh.indices.push_back(wVerticeIndex); + mesh.indices.push_back(wVerticeIndex + (ushort)segments); + mesh.indices.push_back(wVerticeIndex + (ushort)segments + 1); + mesh.indices.push_back(wVerticeIndex + 1); + mesh.indices.push_back(wVerticeIndex); + wVerticeIndex++; + } + } + } + + mesh.vertex_count = wVerticeIndex; + mesh.index_count = nIndex; + + return mesh; +} + +generated_mesh generate_cylinder_mesh(int segments) +{ + generated_mesh mesh; + + unsigned int nVertex = ((segments + 1) * 2 * 3) + 6; + unsigned int nIndex = (12 * (segments + 1)) - 6; + + mesh.vertices.reserve(nVertex); + mesh.indices.reserve(nIndex); + + float fDeltaSegAngle = (2.0f * PI / segments); + unsigned short wVerticeIndex = 0; + + // Bottom cap center vertex + mesh.vertices.push_back(0.0f); + mesh.vertices.push_back(0.0f); + mesh.vertices.push_back(0.0f); + wVerticeIndex++; + + // Top cap center vertex + mesh.vertices.push_back(0.0f); + mesh.vertices.push_back(0.0f); + mesh.vertices.push_back(1.0f); + wVerticeIndex++; + + for (int ring = 0; ring <= 1; ring++) { + auto z0 = (float)ring; + + // Generate the group of segments for the current ring + for (int seg = 0; seg <= segments; seg++) { + float x0 = sinf(seg * fDeltaSegAngle); + float y0 = cosf(seg * fDeltaSegAngle); + + // Add one vertex to the strip which makes up the cylinder + mesh.vertices.push_back(x0); + mesh.vertices.push_back(y0); + mesh.vertices.push_back(z0); + + if (!ring) { + mesh.indices.push_back(wVerticeIndex + (ushort)segments + 1); + mesh.indices.push_back(wVerticeIndex); + mesh.indices.push_back(wVerticeIndex + (ushort)segments); + mesh.indices.push_back(wVerticeIndex + (ushort)segments + 1); + mesh.indices.push_back(wVerticeIndex + 1); + mesh.indices.push_back(wVerticeIndex); + if (seg != segments) { + mesh.indices.push_back(wVerticeIndex + 1); + mesh.indices.push_back(wVerticeIndex); + mesh.indices.push_back(0); + } + wVerticeIndex++; + } else { + if (seg != segments) { + mesh.indices.push_back(wVerticeIndex + 1); + mesh.indices.push_back(wVerticeIndex); + mesh.indices.push_back(1); + wVerticeIndex++; + } + } + } + } + + mesh.vertex_count = wVerticeIndex; + mesh.index_count = nIndex; + + return mesh; +} + +} + diff --git a/code/graphics/util/primitives.h b/code/graphics/util/primitives.h new file mode 100644 index 00000000000..a10b2b46e07 --- /dev/null +++ b/code/graphics/util/primitives.h @@ -0,0 +1,37 @@ +#pragma once + +#include "globalincs/pstypes.h" + + +namespace graphics::util { + +struct generated_mesh { + SCP_vector vertices; // position-only, 3 floats per vertex + SCP_vector indices; + unsigned int vertex_count; // number of unique vertices generated + unsigned int index_count; // number of indices +}; + +/** + * @brief Generate a unit sphere mesh (radius 1.0) suitable for deferred light volumes + * + * Based on http://www.ogre3d.org/tikiwiki/ManualSphereMeshes + * + * @param rings Number of horizontal rings + * @param segments Number of vertical segments + * @return generated_mesh containing position-only vertices and triangle indices + */ +generated_mesh generate_sphere_mesh(int rings, int segments); + +/** + * @brief Generate a unit cylinder mesh (radius 1.0, height 1.0) suitable for deferred light volumes + * + * Based on http://www.ogre3d.org/tikiwiki/ManualSphereMeshes + * + * @param segments Number of radial segments + * @return generated_mesh containing position-only vertices and triangle indices + */ +generated_mesh generate_cylinder_mesh(int segments); + +} + diff --git a/code/graphics/vulkan/RenderFrame.cpp b/code/graphics/vulkan/RenderFrame.cpp deleted file mode 100644 index ae113fd1031..00000000000 --- a/code/graphics/vulkan/RenderFrame.cpp +++ /dev/null @@ -1,100 +0,0 @@ - -#include "RenderFrame.h" - -namespace graphics { -namespace vulkan { - -RenderFrame::RenderFrame(vk::Device device, vk::SwapchainKHR swapChain, vk::Queue graphicsQueue, vk::Queue presentQueue) - : m_device(device), m_swapChain(swapChain), m_graphicsQueue(graphicsQueue), m_presentQueue(presentQueue) -{ - constexpr vk::SemaphoreCreateInfo semaphoreCreateInfo; - constexpr vk::FenceCreateInfo fenceCreateInfo; - - m_imageAvailableSemaphore = device.createSemaphoreUnique(semaphoreCreateInfo); - m_renderingFinishedSemaphore = device.createSemaphoreUnique(semaphoreCreateInfo); - m_frameInFlightFence = device.createFenceUnique(fenceCreateInfo); -} -void RenderFrame::waitForFinish() -{ - if (!m_inFlight) { - return; - } - - // waitForFences can theoretically return a timeout, but as this passes the maximum uint64_t value in microseconds, - // this won't happen in practice, and the result can be ignored. - (void)m_device.waitForFences(m_frameInFlightFence.get(), true, std::numeric_limits::max()); - m_device.resetFences(m_frameInFlightFence.get()); - - // That frame is now definitely not in flight anymore so we can call the functions that depend on that - for (const auto& finishFunc : m_frameFinishedCallbacks) { - finishFunc(); - } - m_frameFinishedCallbacks.clear(); - - // Our fence has been signaled so we are no longer in flight and ready to be reused - m_inFlight = false; -} -void RenderFrame::onFrameFinished(std::function finishFunc) -{ - m_frameFinishedCallbacks.push_back(std::move(finishFunc)); -} -uint32_t RenderFrame::acquireSwapchainImage() -{ - Assertion(!m_inFlight, "Cannot acquire swapchain image when frame is still in flight."); - - uint32_t imageIndex; - vk::Result res = m_device.acquireNextImageKHR(m_swapChain, - std::numeric_limits::max(), - m_imageAvailableSemaphore.get(), - nullptr, - &imageIndex); - // TODO: This should handle at least VK_SUBOPTIMAL_KHR, which means that the swap chain is no longer - // optimal and should be recreated. - (void)res; - - m_swapChainIdx = imageIndex; - - return imageIndex; -} -void RenderFrame::submitAndPresent(const std::vector& cmdBuffers) -{ - Assertion(!m_inFlight, "Cannot submit a frame for presentation when it is still in flight."); - - const std::array waitStages = {vk::PipelineStageFlagBits::eColorAttachmentOutput}; - const std::array waitSemaphores = {m_imageAvailableSemaphore.get()}; - - vk::SubmitInfo submitInfo; - submitInfo.waitSemaphoreCount = 1; - submitInfo.pWaitDstStageMask = waitStages.data(); - submitInfo.pWaitSemaphores = waitSemaphores.data(); - - submitInfo.commandBufferCount = static_cast(cmdBuffers.size()); - submitInfo.pCommandBuffers = cmdBuffers.data(); - - const std::array signalSemaphores = {m_renderingFinishedSemaphore.get()}; - submitInfo.signalSemaphoreCount = 1; - submitInfo.pSignalSemaphores = signalSemaphores.data(); - - m_graphicsQueue.submit(submitInfo, m_frameInFlightFence.get()); - - // This frame is now officially in flight - m_inFlight = true; - - vk::PresentInfoKHR presentInfo; - presentInfo.waitSemaphoreCount = 1; - presentInfo.pWaitSemaphores = signalSemaphores.data(); - - const std::array swapChains = {m_swapChain}; - presentInfo.swapchainCount = 1; - presentInfo.pSwapchains = swapChains.data(); - presentInfo.pImageIndices = &m_swapChainIdx; - presentInfo.pResults = nullptr; - - vk::Result res = m_presentQueue.presentKHR(presentInfo); - // TODO: This should handle at least VK_SUBOPTIMAL_KHR, which means that the swap chain is no longer - // optimal and should be recreated. - (void)res; -} - -} // namespace vulkan -} // namespace graphics diff --git a/code/graphics/vulkan/RenderFrame.h b/code/graphics/vulkan/RenderFrame.h deleted file mode 100644 index a5c2ad51c33..00000000000 --- a/code/graphics/vulkan/RenderFrame.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include "globalincs/pstypes.h" - -#include - -namespace graphics { -namespace vulkan { - -class RenderFrame { - public: - RenderFrame(vk::Device device, vk::SwapchainKHR swapChain, vk::Queue graphicsQueue, vk::Queue presentQueue); - - void waitForFinish(); - - uint32_t acquireSwapchainImage(); - - void onFrameFinished(std::function finishFunc); - - void submitAndPresent(const std::vector& cmdBuffers); - - private: - vk::Device m_device; - vk::SwapchainKHR m_swapChain; - vk::Queue m_graphicsQueue; - vk::Queue m_presentQueue; - - vk::UniqueSemaphore m_imageAvailableSemaphore; - vk::UniqueSemaphore m_renderingFinishedSemaphore; - vk::UniqueFence m_frameInFlightFence; - SCP_vector> m_frameFinishedCallbacks; - - bool m_inFlight = false; - - uint32_t m_swapChainIdx = 0; -}; - -} // namespace vulkan -} // namespace graphics diff --git a/code/graphics/vulkan/VulkanBuffer.cpp b/code/graphics/vulkan/VulkanBuffer.cpp new file mode 100644 index 00000000000..6108afae0c3 --- /dev/null +++ b/code/graphics/vulkan/VulkanBuffer.cpp @@ -0,0 +1,783 @@ +#include "VulkanBuffer.h" +#include "VulkanDeletionQueue.h" +#include "VulkanDraw.h" + +#include "globalincs/pstypes.h" + + +namespace graphics::vulkan { + +namespace { +VulkanBufferManager* g_bufferManager = nullptr; +} + +VulkanBufferManager* getBufferManager() +{ + Assertion(g_bufferManager != nullptr, "Vulkan BufferManager not initialized!"); + return g_bufferManager; +} + +void setBufferManager(VulkanBufferManager* manager) +{ + g_bufferManager = manager; +} + +VulkanBufferManager::VulkanBufferManager() = default; + +VulkanBufferManager::~VulkanBufferManager() +{ + if (m_initialized) { + shutdown(); + } +} + +bool VulkanBufferManager::createOneShotBuffer(vk::Flags usage, const void* data, size_t size, vk::Buffer& buf, VulkanAllocation& alloc) const +{ + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = size; + bufferInfo.usage = usage; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + buf = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create buffer: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(buf, MemoryUsage::CpuToGpu, alloc)) { + m_device.destroyBuffer(buf); + buf = nullptr; + mprintf(("Failed to allocate buffer memory!\n")); + return false; + } + + void* mapped = m_memoryManager->mapMemory(alloc); + if (mapped) { + memcpy(mapped, data, size); + m_memoryManager->flushMemory(alloc, 0, size); + m_memoryManager->unmapMemory(alloc); + } else { + m_memoryManager->freeAllocation(alloc); + m_device.destroyBuffer(buf); + buf = nullptr; + + mprintf(("Failed to map buffer memory!\n")); + return false; + } + return true; +} + +// ========== Frame bump allocator ========== + +bool VulkanBufferManager::createFrameAllocBuffer(FrameBumpAllocator& alloc, size_t size) +{ + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = size; + bufferInfo.usage = vk::BufferUsageFlagBits::eVertexBuffer + | vk::BufferUsageFlagBits::eIndexBuffer + | vk::BufferUsageFlagBits::eUniformBuffer + | vk::BufferUsageFlagBits::eStorageBuffer + | vk::BufferUsageFlagBits::eTransferDst; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + alloc.buffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create frame allocator buffer: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(alloc.buffer, MemoryUsage::CpuToGpu, alloc.allocation)) { + m_device.destroyBuffer(alloc.buffer); + alloc.buffer = nullptr; + mprintf(("Failed to allocate frame allocator buffer memory!\n")); + return false; + } + + alloc.mappedPtr = m_memoryManager->mapMemory(alloc.allocation); + if (!alloc.mappedPtr) { + m_memoryManager->freeAllocation(alloc.allocation); + m_device.destroyBuffer(alloc.buffer); + alloc.buffer = nullptr; + alloc.allocation = {}; + mprintf(("Failed to map frame allocator buffer!\n")); + return false; + } + + alloc.capacity = size; + alloc.cursor = 0; + return true; +} + +void VulkanBufferManager::initFrameAllocators() +{ + for (auto & m_frameAlloc : m_frameAllocs) { + Verify(createFrameAllocBuffer(m_frameAlloc, FRAME_ALLOC_INITIAL_SIZE)); + } + mprintf(("Frame bump allocators initialized: %u x %zuKB\n", + MAX_FRAMES_IN_FLIGHT, FRAME_ALLOC_INITIAL_SIZE / 1024)); +} + +void VulkanBufferManager::shutdownFrameAllocators() +{ + for (auto & alloc : m_frameAllocs) { + if (alloc.mappedPtr) { + m_memoryManager->unmapMemory(alloc.allocation); + alloc.mappedPtr = nullptr; + } + if (alloc.buffer) { + m_device.destroyBuffer(alloc.buffer); + alloc.buffer = nullptr; + } + if (alloc.allocation.isValid()) { + m_memoryManager->freeAllocation(alloc.allocation); + alloc.allocation = {}; + } + alloc.capacity = 0; + alloc.cursor = 0; + } +} + +size_t VulkanBufferManager::bumpAllocate(size_t size) +{ + auto& alloc = m_frameAllocs[m_currentFrame]; + + // Align cursor up to UBO alignment (satisfies UBO/SSBO/vertex alignment) + size_t alignedOffset = (alloc.cursor + m_uboAlignment - 1) & ~(static_cast(m_uboAlignment) - 1); + + if (alignedOffset + size > alloc.capacity) { + growFrameAllocator(); + // After growth, cursor is 0 so alignedOffset is 0 + alignedOffset = 0; + Assertion(size <= alloc.capacity, "Frame allocator growth failed to provide enough capacity"); + } + + alloc.cursor = alignedOffset + size; + return alignedOffset; +} + +void VulkanBufferManager::growFrameAllocator() +{ + auto& alloc = m_frameAllocs[m_currentFrame]; + + // Double capacity until sufficient + size_t newCapacity = alloc.capacity > 0 ? alloc.capacity * 2 : FRAME_ALLOC_INITIAL_SIZE; + // Ensure at least the current cursor position can fit (handles pathological single-alloc case) + while (newCapacity < alloc.cursor) { + newCapacity *= 2; + } + + mprintf(("Growing frame allocator %u: %zuKB -> %zuKB\n", + m_currentFrame, alloc.capacity / 1024, newCapacity / 1024)); + + // Queue old buffer for deferred destruction - the deletion queue's FRAMES_TO_WAIT=2 + // ensures the old buffer survives through current frame's GPU execution. + // Existing handles with frameAllocBuffer pointing to the old buffer remain valid. + auto* deletionQueue = getDeletionQueue(); + if (alloc.mappedPtr) { + m_memoryManager->unmapMemory(alloc.allocation); + } + deletionQueue->queueBuffer(alloc.buffer, alloc.allocation); + + // Create new buffer + alloc = {}; + Verify(createFrameAllocBuffer(alloc, newCapacity)); +} + +// ========== Init / Shutdown ========== + +bool VulkanBufferManager::init(vk::Device device, + VulkanMemoryManager* memoryManager, + uint32_t graphicsQueueFamily, + uint32_t transferQueueFamily, + uint32_t minUboAlignment) +{ + if (m_initialized) { + mprintf(("VulkanBufferManager::init called when already initialized!\n")); + return false; + } + + if (!device || !memoryManager) { + mprintf(("VulkanBufferManager::init called with null device or memory manager!\n")); + return false; + } + + m_device = device; + m_memoryManager = memoryManager; + m_graphicsQueueFamily = graphicsQueueFamily; + m_transferQueueFamily = transferQueueFamily; + m_currentFrame = 0; + m_uboAlignment = minUboAlignment > 0 ? minUboAlignment : 256; + + // Create fallback color buffer with white (1,1,1,1) for shaders expecting vertColor + std::array whiteColor = { 1.0f, 1.0f, 1.0f, 1.0f }; + if (!createOneShotBuffer(vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eTransferDst, whiteColor.data(), sizeof(whiteColor), m_fallbackColorBuffer, m_fallbackColorAllocation)) { + mprintf(("VulkanBufferManager::init could not create fallback color buffer\n")); + return false; + } + + std::array zeroTexCoord = { 0.0f, 0.0f, 0.0f, 0.0f }; + if (!createOneShotBuffer(vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eTransferDst, zeroTexCoord.data(), sizeof(zeroTexCoord), m_fallbackTexCoordBuffer, m_fallbackTexCoordAllocation)) { + mprintf(("VulkanBufferManager::init could not create fallback texcoord buffer\n")); + return false; + } + + // Create fallback uniform buffer (zeros) for uninitialized descriptor set bindings + // Without this, descriptor set UBO bindings left unwritten after pool reset + // contain undefined data, causing intermittent rendering failures + std::array dummy_ubo = {}; + if (!createOneShotBuffer(vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer, dummy_ubo.data(), sizeof(dummy_ubo), m_fallbackUniformBuffer, m_fallbackUniformAllocation)) { + mprintf(("VulkanBufferManager::init could not create fallback uniform buffer\n")); + return false; + } + + initFrameAllocators(); + + m_initialized = true; + mprintf(("Vulkan Buffer Manager initialized (frame bump allocator, UBO alignment=%u, %u frames)\n", + m_uboAlignment, MAX_FRAMES_IN_FLIGHT)); + return true; +} + +void VulkanBufferManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Destroy fallback color buffer + if (m_fallbackColorBuffer) { + m_device.destroyBuffer(m_fallbackColorBuffer); + m_fallbackColorBuffer = nullptr; + } + if (m_fallbackColorAllocation.isValid()) { + m_memoryManager->freeAllocation(m_fallbackColorAllocation); + m_fallbackColorAllocation = {}; + } + + // Destroy fallback texcoord buffer + if (m_fallbackTexCoordBuffer) { + m_device.destroyBuffer(m_fallbackTexCoordBuffer); + m_fallbackTexCoordBuffer = nullptr; + } + if (m_fallbackTexCoordAllocation.isValid()) { + m_memoryManager->freeAllocation(m_fallbackTexCoordAllocation); + m_fallbackTexCoordAllocation = {}; + } + + // Destroy fallback uniform buffer + if (m_fallbackUniformBuffer) { + m_device.destroyBuffer(m_fallbackUniformBuffer); + m_fallbackUniformBuffer = nullptr; + } + if (m_fallbackUniformAllocation.isValid()) { + m_memoryManager->freeAllocation(m_fallbackUniformAllocation); + m_fallbackUniformAllocation = {}; + } + + // Free all remaining static buffers + for (auto& bufferObj : m_buffers) { + if (bufferObj.valid) { + if (!bufferObj.isStreaming() && bufferObj.buffer) { + m_device.destroyBuffer(bufferObj.buffer); + } + if (!bufferObj.isStreaming() && bufferObj.allocation.isValid()) { + m_memoryManager->freeAllocation(bufferObj.allocation); + } + bufferObj.valid = false; + } + } + + shutdownFrameAllocators(); + + m_buffers.clear(); + m_freeIndices.clear(); + m_activeBufferCount = 0; + m_totalBufferMemory = 0; + m_initialized = false; + + mprintf(("Vulkan Buffer Manager shutdown\n")); +} + +void VulkanBufferManager::setCurrentFrame(uint32_t frameIndex) +{ + m_currentFrame = frameIndex % MAX_FRAMES_IN_FLIGHT; + // Reset bump cursor — safe because the GPU fence for this frame-in-flight + // was already waited on before setCurrentFrame is called. + m_frameAllocs[m_currentFrame].cursor = 0; +} + +// ========== Buffer usage / memory helpers ========== + +vk::BufferUsageFlags VulkanBufferManager::getVkUsageFlags(BufferType type) +{ + vk::BufferUsageFlags flags = vk::BufferUsageFlagBits::eTransferDst; + + switch (type) { + case BufferType::Vertex: + flags |= vk::BufferUsageFlagBits::eVertexBuffer; + break; + case BufferType::Index: + flags |= vk::BufferUsageFlagBits::eIndexBuffer; + break; + case BufferType::Uniform: + flags |= vk::BufferUsageFlagBits::eUniformBuffer; + break; + } + + return flags; +} + +MemoryUsage VulkanBufferManager::getMemoryUsage(BufferUsageHint hint) +{ + switch (hint) { + case BufferUsageHint::Static: + // Static data goes to device-local memory for best GPU performance + // For simplicity, we use CpuToGpu which allows host writes + // A more optimized path would use staging buffers for truly static data + return MemoryUsage::CpuToGpu; + + case BufferUsageHint::Dynamic: + case BufferUsageHint::Streaming: + // Frequently updated data needs to be host visible + return MemoryUsage::CpuToGpu; + + case BufferUsageHint::PersistentMapping: + // Persistent mapping requires host visible memory + return MemoryUsage::CpuOnly; + + default: + return MemoryUsage::CpuToGpu; + } +} + +// ========== Buffer create / delete ========== + +gr_buffer_handle VulkanBufferManager::createBuffer(BufferType type, BufferUsageHint usage) +{ + Verify(m_initialized); + + VulkanBufferObject bufferObj; + bufferObj.type = type; + bufferObj.usage = usage; + bufferObj.valid = true; + // Note: actual buffer creation is deferred until data is uploaded + + int index; + if (!m_freeIndices.empty()) { + // Reuse a freed slot + index = m_freeIndices.back(); + m_freeIndices.pop_back(); + m_buffers[index] = bufferObj; + } else { + // Add new slot + index = static_cast(m_buffers.size()); + m_buffers.push_back(bufferObj); + } + + ++m_activeBufferCount; + return gr_buffer_handle(index); +} + +void VulkanBufferManager::deleteBuffer(gr_buffer_handle handle) +{ + Verify(m_initialized && isValidHandle(handle)); + + VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + Verify(bufferObj.valid); + + if (!bufferObj.isStreaming()) { + // Queue static buffer for deferred destruction + auto* deletionQueue = getDeletionQueue(); + if (bufferObj.buffer) { + deletionQueue->queueBuffer(bufferObj.buffer, bufferObj.allocation); + m_totalBufferMemory -= bufferObj.dataSize; + } + bufferObj.buffer = nullptr; + bufferObj.allocation = {}; + bufferObj.dataSize = 0; + } else { + // Streaming buffers have no per-buffer resources — just mark invalid + } + + --m_activeBufferCount; + bufferObj.valid = false; + + // Add to free list for reuse + m_freeIndices.push_back(handle.value()); +} + +// ========== createOrResizeBuffer (static only) ========== + +bool VulkanBufferManager::createOrResizeBuffer(VulkanBufferObject& bufferObj, size_t size) +{ + Assertion(!bufferObj.isStreaming(), "createOrResizeBuffer called on streaming buffer!"); + + // If buffer exists and is large enough, no-op + if (bufferObj.buffer && bufferObj.dataSize >= size) { + return true; + } + + // Save old buffer info for data copy + vk::Buffer oldBuffer = bufferObj.buffer; + VulkanAllocation oldAllocation = bufferObj.allocation; + size_t oldDataSize = bufferObj.dataSize; + + // Create new buffer + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = size; + bufferInfo.usage = getVkUsageFlags(bufferObj.type); + + // Handle queue family sharing + uint32_t queueFamilies[] = {m_graphicsQueueFamily, m_transferQueueFamily}; + if (m_graphicsQueueFamily != m_transferQueueFamily) { + bufferInfo.sharingMode = vk::SharingMode::eConcurrent; + bufferInfo.queueFamilyIndexCount = 2; + bufferInfo.pQueueFamilyIndices = queueFamilies; + } else { + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + } + + try { + bufferObj.buffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create Vulkan buffer: %s\n", e.what())); + bufferObj.buffer = oldBuffer; + return false; + } + + // Allocate memory + MemoryUsage memUsage = getMemoryUsage(bufferObj.usage); + if (!m_memoryManager->allocateBufferMemory(bufferObj.buffer, memUsage, bufferObj.allocation)) { + m_device.destroyBuffer(bufferObj.buffer); + bufferObj.buffer = oldBuffer; + bufferObj.allocation = oldAllocation; + return false; + } + + // Copy existing data from old buffer + if (oldBuffer && oldDataSize > 0) { + void* oldMapped = m_memoryManager->mapMemory(oldAllocation); + void* newMapped = m_memoryManager->mapMemory(bufferObj.allocation); + Verify(oldMapped); + Verify(newMapped); + + size_t copySize = std::min(oldDataSize, size); + memcpy(newMapped, oldMapped, copySize); + m_memoryManager->flushMemory(bufferObj.allocation, 0, copySize); + + m_memoryManager->unmapMemory(oldAllocation); + m_memoryManager->unmapMemory(bufferObj.allocation); + } + + // Queue old buffer for deferred destruction + if (oldBuffer) { + auto* deletionQueue = getDeletionQueue(); + deletionQueue->queueBuffer(oldBuffer, oldAllocation); + m_totalBufferMemory -= oldDataSize; + } + + bufferObj.dataSize = size; + m_totalBufferMemory += size; + + return true; +} + +// ========== Buffer data updates ========== + +void VulkanBufferManager::updateBufferData(gr_buffer_handle handle, size_t size, const void* data) +{ + Verify(m_initialized && isValidHandle(handle)); + + if (size == 0) { + mprintf(("WARNING: updateBufferData called with size 0\n")); + return; + } + + VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + Verify(bufferObj.valid); + + if (bufferObj.isStreaming()) { + auto& alloc = m_frameAllocs[m_currentFrame]; + + if (data) { + // Pattern A: full replacement — allocate and copy + size_t offset = bumpAllocate(size); + memcpy(static_cast(alloc.mappedPtr) + offset, data, size); + m_memoryManager->flushMemory(alloc.allocation, offset, size); + + bufferObj.frameAllocBuffer = alloc.buffer; + bufferObj.frameAllocOffset = offset; + bufferObj.dataSize = size; + bufferObj.frameAllocFrame = m_currentFrame; + } else { + // Pattern B: pre-alloc for offset writes (null data) + if (bufferObj.frameAllocFrame != m_currentFrame || size > bufferObj.dataSize) { + // First allocation this frame, or need more space + size_t offset = bumpAllocate(size); + bufferObj.frameAllocBuffer = alloc.buffer; + bufferObj.frameAllocOffset = offset; + bufferObj.dataSize = size; + bufferObj.frameAllocFrame = m_currentFrame; + } + // Otherwise: same frame and size fits — keep current allocation + } + } else { + // Static / PersistentMapping path + Verify(createOrResizeBuffer(bufferObj, size)); + + // A null data pointer just allocates/resizes the buffer without writing + if (data) { + void* mapped = m_memoryManager->mapMemory(bufferObj.allocation); + Verify(mapped); + memcpy(mapped, data, size); + m_memoryManager->flushMemory(bufferObj.allocation, 0, size); + m_memoryManager->unmapMemory(bufferObj.allocation); + } + } +} + +void VulkanBufferManager::updateBufferDataOffset(gr_buffer_handle handle, size_t offset, size_t size, const void* data) +{ + Verify(m_initialized && isValidHandle(handle)); + + VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + Verify(bufferObj.valid); + + if (bufferObj.isStreaming()) { + // Auto-allocate if not yet allocated this frame. This happens when + // the caller skips updateBufferData (e.g. gr_add_to_immediate_buffer + // when the data fits the existing buffer size). + if (bufferObj.frameAllocFrame != m_currentFrame) { + size_t allocSize = std::max(bufferObj.dataSize, offset + size); + Verify(allocSize > 0); + auto& fa = m_frameAllocs[m_currentFrame]; + size_t allocOffset = bumpAllocate(allocSize); + bufferObj.frameAllocBuffer = fa.buffer; + bufferObj.frameAllocOffset = allocOffset; + bufferObj.dataSize = allocSize; + bufferObj.frameAllocFrame = m_currentFrame; + } + + Verify(offset + size <= bufferObj.dataSize); + + auto& alloc = m_frameAllocs[m_currentFrame]; + size_t totalOffset = bufferObj.frameAllocOffset + offset; + memcpy(static_cast(alloc.mappedPtr) + totalOffset, data, size); + m_memoryManager->flushMemory(alloc.allocation, totalOffset, size); + } else { + // Static path + Verify(bufferObj.buffer); + Verify(offset + size <= bufferObj.dataSize); + + // Map, update region, and unmap + void* mapped = m_memoryManager->mapMemory(bufferObj.allocation); + Verify(mapped); + memcpy(static_cast(mapped) + offset, data, size); + m_memoryManager->flushMemory(bufferObj.allocation, offset, size); + m_memoryManager->unmapMemory(bufferObj.allocation); + } +} + +// ========== Map / Flush ========== + +void* VulkanBufferManager::mapBuffer(gr_buffer_handle handle) +{ + if (!m_initialized || !isValidHandle(handle)) { + return nullptr; + } + + VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + if (!bufferObj.valid) { + return nullptr; + } + + if (bufferObj.isStreaming()) { + Verify(bufferObj.frameAllocFrame == m_currentFrame); + auto& alloc = m_frameAllocs[m_currentFrame]; + return static_cast(alloc.mappedPtr) + bufferObj.frameAllocOffset; + } + + // Static / PersistentMapping + if (!bufferObj.buffer) { + return nullptr; + } + + // Only persistent mapping buffers should stay mapped + if (bufferObj.usage != BufferUsageHint::PersistentMapping) { + mprintf(("WARNING: mapBuffer called on non-persistent buffer\n")); + } + + // Map the entire buffer + void* mapped = m_memoryManager->mapMemory(bufferObj.allocation); + return mapped; +} + +void VulkanBufferManager::flushMappedBuffer(gr_buffer_handle handle, size_t offset, size_t size) +{ + Verify(m_initialized && isValidHandle(handle)); + + VulkanBufferObject const& bufferObj = m_buffers[handle.value()]; + Verify(bufferObj.valid); + + if (bufferObj.isStreaming()) { + // Adjust offset for current frame's allocation + Verify(bufferObj.frameAllocFrame == m_currentFrame); + auto& alloc = m_frameAllocs[m_currentFrame]; + m_memoryManager->flushMemory(alloc.allocation, bufferObj.frameAllocOffset + offset, size); + } else { + m_memoryManager->flushMemory(bufferObj.allocation, offset, size); + } +} + +// ========== Uniform buffer binding ========== + +void VulkanBufferManager::bindUniformBuffer(uniform_block_type blockType, size_t offset, size_t size, gr_buffer_handle buffer) const +{ + // Resolve the full offset NOW (frame base + caller offset) so the binding + // captures the correct allocation. The vk::Buffer is still looked up at + // draw time (via handle) to survive buffer recreation. + size_t resolvedOffset = getFrameBaseOffset(buffer) + offset; + + auto* drawManager = getDrawManager(); + drawManager->setPendingUniformBinding(blockType, buffer, + static_cast(resolvedOffset), + static_cast(size)); +} + +// ========== Buffer queries ========== + +vk::Buffer VulkanBufferManager::getVkBuffer(gr_buffer_handle handle) const +{ + if (!isValidHandle(handle)) { + return nullptr; + } + + const VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + if (!bufferObj.valid) { + return nullptr; + } + + if (bufferObj.isStreaming()) { + // Streaming buffers return the frame allocator buffer they were uploaded to + Verify(bufferObj.frameAllocFrame == m_currentFrame); + return bufferObj.frameAllocBuffer; + } else { + return bufferObj.buffer; + } +} + +size_t VulkanBufferManager::getBufferSize(gr_buffer_handle handle) const +{ + if (!isValidHandle(handle)) { + return 0; + } + + const VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + if (!bufferObj.valid) { + return 0; + } + + return bufferObj.dataSize; +} + +size_t VulkanBufferManager::getFrameBaseOffset(gr_buffer_handle handle) const +{ + if (!isValidHandle(handle)) { + return 0; + } + + const VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + if (!bufferObj.valid) { + return 0; + } + + if (bufferObj.isStreaming()) { + // Return the bump allocator offset for the most recent upload this frame. + // Stale handle detection: if frameAllocFrame != m_currentFrame, this buffer + // was not uploaded this frame and the offset would be meaningless (the bump + // allocator has been reset). This indicates a buffer marked Streaming/Dynamic + // is being bound for rendering without being uploaded first. + Verify(bufferObj.frameAllocFrame == m_currentFrame); + return bufferObj.frameAllocOffset; + } else { + return 0; + } +} + +bool VulkanBufferManager::isValidHandle(gr_buffer_handle handle) const +{ + if (!handle.isValid()) { + return false; + } + if (static_cast(handle.value()) >= m_buffers.size()) { + return false; + } + return m_buffers[handle.value()].valid; +} + +VulkanBufferObject* VulkanBufferManager::getBufferObject(gr_buffer_handle handle) +{ + if (!isValidHandle(handle)) { + return nullptr; + } + return &m_buffers[handle.value()]; +} + +const VulkanBufferObject* VulkanBufferManager::getBufferObject(gr_buffer_handle handle) const +{ + if (!isValidHandle(handle)) { + return nullptr; + } + return &m_buffers[handle.value()]; +} + +// ========== gr_screen function pointer implementations ========== + +gr_buffer_handle vulkan_create_buffer(BufferType type, BufferUsageHint usage) +{ + auto* bufferManager = getBufferManager(); + return bufferManager->createBuffer(type, usage); +} + +void vulkan_delete_buffer(gr_buffer_handle handle) +{ + auto* bufferManager = getBufferManager(); + bufferManager->deleteBuffer(handle); +} + +void vulkan_update_buffer_data(gr_buffer_handle handle, size_t size, const void* data) +{ + auto* bufferManager = getBufferManager(); + bufferManager->updateBufferData(handle, size, data); +} + +void vulkan_update_buffer_data_offset(gr_buffer_handle handle, size_t offset, size_t size, const void* data) +{ + auto* bufferManager = getBufferManager(); + bufferManager->updateBufferDataOffset(handle, offset, size, data); +} + +void* vulkan_map_buffer(gr_buffer_handle handle) +{ + auto* bufferManager = getBufferManager(); + void* result = bufferManager->mapBuffer(handle); + Verify(result); + return result; +} + +void vulkan_flush_mapped_buffer(gr_buffer_handle handle, size_t offset, size_t size) +{ + auto* bufferManager = getBufferManager(); + bufferManager->flushMappedBuffer(handle, offset, size); +} + +void vulkan_bind_uniform_buffer(uniform_block_type blockType, size_t offset, size_t size, gr_buffer_handle buffer) +{ + auto* bufferManager = getBufferManager(); + bufferManager->bindUniformBuffer(blockType, offset, size, buffer); +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanBuffer.h b/code/graphics/vulkan/VulkanBuffer.h new file mode 100644 index 00000000000..f2641bc725e --- /dev/null +++ b/code/graphics/vulkan/VulkanBuffer.h @@ -0,0 +1,313 @@ +#pragma once + +#include "graphics/2d.h" +#include "VulkanConstants.h" +#include "VulkanMemory.h" + +#include +#include + + +namespace graphics::vulkan { + +/** + * @brief Per-frame bump allocator for streaming/dynamic buffers + * + * Two of these exist (one per frame-in-flight). At frame start the cursor + * resets to 0. Each streaming upload bumps the cursor forward. + * The buffer is persistently mapped for the lifetime of the allocator. + */ +struct FrameBumpAllocator { + vk::Buffer buffer; + VulkanAllocation allocation = {}; + void* mappedPtr = nullptr; + size_t capacity = 0; + size_t cursor = 0; +}; + +/** + * @brief Internal representation of a Vulkan buffer + * + * Static buffers own their own VkBuffer. Streaming/Dynamic buffers are + * sub-allocated from a shared FrameBumpAllocator each frame. + */ +struct VulkanBufferObject { + BufferType type = BufferType::Vertex; + BufferUsageHint usage = BufferUsageHint::Static; + bool valid = false; + size_t dataSize = 0; // Usable data size. Static: total VkBuffer allocation. Streaming: current frame allocation. + + // Static buffer fields (unused for streaming) + vk::Buffer buffer = nullptr; + VulkanAllocation allocation = {}; + + // Frame bump allocator sub-allocation (streaming/dynamic only) + vk::Buffer frameAllocBuffer; // VkBuffer at upload time (may be old allocator buffer after growth) + size_t frameAllocOffset = 0; // Byte offset within the frame allocator buffer + uint32_t frameAllocFrame = UINT32_MAX; // Frame index when last allocated + + bool isStreaming() const { + return usage == BufferUsageHint::Streaming || usage == BufferUsageHint::Dynamic; + } +}; + +/** + * @brief Manages GPU buffer creation, updates, and destruction + * + * Streaming/Dynamic buffers are sub-allocated from a global per-frame bump + * allocator (two large VkBuffers, one per frame-in-flight). Static buffers + * keep their own VkBuffer. PersistentMapping buffers are handled separately. + */ +class VulkanBufferManager { +public: + VulkanBufferManager(); + ~VulkanBufferManager(); + + // Non-copyable + VulkanBufferManager(const VulkanBufferManager&) = delete; + VulkanBufferManager& operator=(const VulkanBufferManager&) = delete; + + /** + * @brief Initialize the buffer manager + * @param device The Vulkan logical device + * @param memoryManager The memory manager for allocations + * @param graphicsQueueFamily Graphics queue family index + * @param transferQueueFamily Transfer queue family index + * @param minUboAlignment Minimum uniform buffer offset alignment from device limits + * @return true on success + */ + bool init(vk::Device device, + VulkanMemoryManager* memoryManager, + uint32_t graphicsQueueFamily, + uint32_t transferQueueFamily, + uint32_t minUboAlignment); + + /** + * @brief Shutdown and free all buffers + */ + void shutdown(); + + /** + * @brief Set the current frame index and reset the bump allocator cursor + * Must be called at the start of each frame before any buffer updates + * @param frameIndex The current frame index (0 to MAX_FRAMES_IN_FLIGHT-1) + */ + void setCurrentFrame(uint32_t frameIndex); + + /** + * @brief Get the current frame index + */ + uint32_t getCurrentFrame() const { return m_currentFrame; } + + /** + * @brief Get the Vulkan logical device + */ + vk::Device getDevice() const { return m_device; } + + /** + * @brief Create a new buffer + * @param type The buffer type (Vertex, Index, Uniform) + * @param usage Usage hint for optimization + * @return Handle to the created buffer, or invalid handle on failure + */ + gr_buffer_handle createBuffer(BufferType type, BufferUsageHint usage); + + /** + * @brief Delete a buffer + * @param handle The buffer to delete + */ + void deleteBuffer(gr_buffer_handle handle); + + /** + * @brief Update buffer data (full replacement) + * @param handle The buffer to update + * @param size Size of data in bytes + * @param data Pointer to data + */ + void updateBufferData(gr_buffer_handle handle, size_t size, const void* data); + + /** + * @brief Update buffer data at an offset + * @param handle The buffer to update + * @param offset Offset in bytes + * @param size Size of data in bytes + * @param data Pointer to data + */ + void updateBufferDataOffset(gr_buffer_handle handle, size_t offset, size_t size, const void* data); + + /** + * @brief Map buffer for CPU access + * @param handle The buffer to map + * @return Pointer to mapped memory, or nullptr on failure + */ + void* mapBuffer(gr_buffer_handle handle); + + /** + * @brief Flush a range of a mapped buffer + * @param handle The buffer to flush + * @param offset Offset in bytes + * @param size Size of range in bytes + */ + void flushMappedBuffer(gr_buffer_handle handle, size_t offset, size_t size); + + /** + * @brief Bind uniform buffer to a binding slot + * @param blockType The uniform block type + * @param offset Offset within the buffer + * @param size Size of the bound range + * @param buffer The buffer to bind + */ + void bindUniformBuffer(uniform_block_type blockType, size_t offset, size_t size, gr_buffer_handle buffer) const; + + /** + * @brief Get the Vulkan buffer handle for the current frame + * @param handle The buffer handle + * @return The VkBuffer, or VK_NULL_HANDLE if invalid + */ + vk::Buffer getVkBuffer(gr_buffer_handle handle) const; + + /** + * @brief Get buffer size + * For streaming buffers, returns the current frame allocation size. + * For static buffers, returns the total buffer size. + * @param handle The buffer handle + * @return Size in bytes, or 0 if invalid + */ + size_t getBufferSize(gr_buffer_handle handle) const; + + /** + * @brief Get the base offset for the current frame's allocation + * For streaming buffers, returns the bump allocator offset. + * For static buffers, returns 0. + * @param handle The buffer handle + * @return Byte offset for current frame's allocation + */ + size_t getFrameBaseOffset(gr_buffer_handle handle) const; + + /** + * @brief Check if a handle is valid + */ + bool isValidHandle(gr_buffer_handle handle) const; + + /** + * @brief Get statistics + */ + size_t getBufferCount() const { return m_activeBufferCount; } + size_t getTotalBufferMemory() const { return m_totalBufferMemory; } + + /** + * @brief Get the constant white color buffer for fallback vertex colors + * This buffer contains vec4(1,1,1,1) for shaders expecting vertColor + */ + vk::Buffer getFallbackColorBuffer() const { return m_fallbackColorBuffer; } + + /** + * @brief Get the constant zero texcoord buffer for fallback vertex texcoords + * This buffer contains vec4(0,0,0,0) for shaders expecting vertTexCoord + */ + vk::Buffer getFallbackTexCoordBuffer() const { return m_fallbackTexCoordBuffer; } + + /** + * @brief Get the fallback uniform buffer for uninitialized descriptor bindings + * This buffer contains zeros and is used to pre-fill all UBO descriptor bindings + * to avoid undefined behavior from uninitialized descriptors after pool reset + */ + vk::Buffer getFallbackUniformBuffer() const { return m_fallbackUniformBuffer; } + + /** + * @brief Get a ready-to-use DescriptorBufferInfo for the fallback uniform buffer + */ + vk::DescriptorBufferInfo getFallbackUniformBufferInfo() const { + return {m_fallbackUniformBuffer, 0, FALLBACK_UNIFORM_BUFFER_SIZE}; + } + + /** + * @brief Get the size of the fallback uniform buffer + */ + static size_t getFallbackUniformBufferSize() { return FALLBACK_UNIFORM_BUFFER_SIZE; } + +private: + /** + * @brief Create a one-shot buffer (used in initialization only). + */ + bool createOneShotBuffer(vk::Flags usage, const void* data, size_t size, vk::Buffer& buf, VulkanAllocation& alloc) const; + + /** + * @brief Convert BufferType to Vulkan usage flags + */ + static vk::BufferUsageFlags getVkUsageFlags(BufferType type) ; + + /** + * @brief Convert BufferUsageHint to memory usage + */ + static MemoryUsage getMemoryUsage(BufferUsageHint hint) ; + + /** + * @brief Create or resize a static buffer + * Streaming buffers must NOT call this — they use the frame bump allocator. + */ + bool createOrResizeBuffer(VulkanBufferObject& bufferObj, size_t size); + + /** + * @brief Get buffer object from handle + */ + VulkanBufferObject* getBufferObject(gr_buffer_handle handle); + const VulkanBufferObject* getBufferObject(gr_buffer_handle handle) const; + + // Frame bump allocator + static constexpr size_t FRAME_ALLOC_INITIAL_SIZE = 4 * 1024 * 1024; + + bool createFrameAllocBuffer(FrameBumpAllocator& alloc, size_t size); + void initFrameAllocators(); + void shutdownFrameAllocators(); + size_t bumpAllocate(size_t size); + void growFrameAllocator(); + + std::array m_frameAllocs; + uint32_t m_uboAlignment = 256; + + vk::Device m_device; + VulkanMemoryManager* m_memoryManager = nullptr; + + uint32_t m_graphicsQueueFamily = 0; + uint32_t m_transferQueueFamily = 0; + uint32_t m_currentFrame = 0; + + SCP_vector m_buffers; + SCP_vector m_freeIndices; // Recycled buffer indices + + // Fallback color buffer containing white (1,1,1,1) for vertex data without colors + vk::Buffer m_fallbackColorBuffer; + VulkanAllocation m_fallbackColorAllocation; + + // Fallback texcoord buffer containing (0,0,0,0) for vertex data without texcoords + vk::Buffer m_fallbackTexCoordBuffer; + VulkanAllocation m_fallbackTexCoordAllocation; + + // Fallback uniform buffer (zeros) for uninitialized descriptor set UBO bindings + static constexpr size_t FALLBACK_UNIFORM_BUFFER_SIZE = 4096; + vk::Buffer m_fallbackUniformBuffer; + VulkanAllocation m_fallbackUniformAllocation; + + size_t m_activeBufferCount = 0; + size_t m_totalBufferMemory = 0; + + bool m_initialized = false; +}; + +// Global buffer manager instance (set during renderer init) +VulkanBufferManager* getBufferManager(); +void setBufferManager(VulkanBufferManager* manager); + +// ========== gr_screen function pointer implementations ========== + +gr_buffer_handle vulkan_create_buffer(BufferType type, BufferUsageHint usage); +void vulkan_delete_buffer(gr_buffer_handle handle); +void vulkan_update_buffer_data(gr_buffer_handle handle, size_t size, const void* data); +void vulkan_update_buffer_data_offset(gr_buffer_handle handle, size_t offset, size_t size, const void* data); +void* vulkan_map_buffer(gr_buffer_handle handle); +void vulkan_flush_mapped_buffer(gr_buffer_handle handle, size_t offset, size_t size); +void vulkan_bind_uniform_buffer(uniform_block_type blockType, size_t offset, size_t size, gr_buffer_handle buffer); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanConstants.h b/code/graphics/vulkan/VulkanConstants.h new file mode 100644 index 00000000000..e9fe6d4b254 --- /dev/null +++ b/code/graphics/vulkan/VulkanConstants.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + + +namespace graphics::vulkan { + +/** + * @brief Lightweight non-owning view over a contiguous array (C++17 substitute for std::span) + */ +template +struct ArrayView { + const T* data = nullptr; + size_t size = 0; + + constexpr ArrayView() = default; + constexpr ArrayView(const T* data_, size_t size_) : data(data_), size(size_) {} + + template + constexpr ArrayView(const T (&arr)[N]) : data(arr), size(N) {} + + template + ArrayView(const std::array& arr) : data(arr.data()), size(N) {} + + const T* begin() const { return data; } + const T* end() const { return data + size; } +}; + +static constexpr uint32_t MAX_FRAMES_IN_FLIGHT = 2; + +// Standard pixel formats for rendering targets +static constexpr vk::Format HDR_COLOR_FORMAT = vk::Format::eR16G16B16A16Sfloat; +static constexpr vk::Format LDR_COLOR_FORMAT = vk::Format::eR8G8B8A8Unorm; +static constexpr vk::Format SHADOW_DEPTH_FORMAT = vk::Format::eD32Sfloat; + +// Return the correct image aspect flags for a given format: +// depth-only → eDepth +// depth+stencil → eDepth | eStencil +// everything else → eColor +inline vk::ImageAspectFlags imageAspectFromFormat(vk::Format format) +{ + switch (format) { + case vk::Format::eD16Unorm: + case vk::Format::eD32Sfloat: + return vk::ImageAspectFlagBits::eDepth; + case vk::Format::eD16UnormS8Uint: + case vk::Format::eD24UnormS8Uint: + case vk::Format::eD32SfloatS8Uint: + return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + default: + return vk::ImageAspectFlagBits::eColor; + } +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanConvert.cpp b/code/graphics/vulkan/VulkanConvert.cpp new file mode 100644 index 00000000000..c9c90bf2bb9 --- /dev/null +++ b/code/graphics/vulkan/VulkanConvert.cpp @@ -0,0 +1,273 @@ +#include "VulkanConvert.h" + + +namespace graphics::vulkan { + +void convertBlendMode(gr_alpha_blend mode, vk::BlendFactor& srcFactor, vk::BlendFactor& dstFactor) +{ + // Based on SetAlphaBlendMode in gropenglstate.cpp + switch (mode) { + case ALPHA_BLEND_NONE: + srcFactor = vk::BlendFactor::eOne; + dstFactor = vk::BlendFactor::eZero; + break; + case ALPHA_BLEND_ALPHA_ADDITIVE: + srcFactor = vk::BlendFactor::eSrcAlpha; + dstFactor = vk::BlendFactor::eOne; + break; + case ALPHA_BLEND_ALPHA_BLEND_ALPHA: + srcFactor = vk::BlendFactor::eSrcAlpha; + dstFactor = vk::BlendFactor::eOneMinusSrcAlpha; + break; + case ALPHA_BLEND_ALPHA_BLEND_SRC_COLOR: + srcFactor = vk::BlendFactor::eSrcColor; + dstFactor = vk::BlendFactor::eOneMinusSrcColor; + break; + case ALPHA_BLEND_ADDITIVE: + srcFactor = vk::BlendFactor::eOne; + dstFactor = vk::BlendFactor::eOne; + break; + case ALPHA_BLEND_PREMULTIPLIED: + srcFactor = vk::BlendFactor::eOne; + dstFactor = vk::BlendFactor::eOneMinusSrcAlpha; + break; + default: + srcFactor = vk::BlendFactor::eOne; + dstFactor = vk::BlendFactor::eZero; + break; + } +} + +void convertDepthMode(gr_zbuffer_type type, vk::CompareOp& compareOp, bool& writeEnable) +{ + // Based on SetZbufferType in gropenglstate.cpp + switch (type) { + case ZBUFFER_TYPE_NONE: + compareOp = vk::CompareOp::eAlways; + writeEnable = false; + break; + case ZBUFFER_TYPE_READ: + compareOp = vk::CompareOp::eLess; + writeEnable = false; + break; + case ZBUFFER_TYPE_WRITE: + compareOp = vk::CompareOp::eAlways; + writeEnable = true; + break; + case ZBUFFER_TYPE_FULL: + compareOp = vk::CompareOp::eLess; + writeEnable = true; + break; + default: + compareOp = vk::CompareOp::eAlways; + writeEnable = false; + break; + } +} + +vk::CompareOp convertStencilCompare(ComparisionFunction func) +{ + switch (func) { + case ComparisionFunction::Never: + return vk::CompareOp::eNever; + case ComparisionFunction::Less: + return vk::CompareOp::eLess; + case ComparisionFunction::Equal: + return vk::CompareOp::eEqual; + case ComparisionFunction::LessOrEqual: + return vk::CompareOp::eLessOrEqual; + case ComparisionFunction::Greater: + return vk::CompareOp::eGreater; + case ComparisionFunction::NotEqual: + return vk::CompareOp::eNotEqual; + case ComparisionFunction::GreaterOrEqual: + return vk::CompareOp::eGreaterOrEqual; + case ComparisionFunction::Always: + default: + return vk::CompareOp::eAlways; + } +} + +vk::StencilOp convertStencilOp(StencilOperation op) +{ + switch (op) { + case StencilOperation::Keep: + return vk::StencilOp::eKeep; + case StencilOperation::Zero: + return vk::StencilOp::eZero; + case StencilOperation::Replace: + return vk::StencilOp::eReplace; + case StencilOperation::Increment: + return vk::StencilOp::eIncrementAndClamp; + case StencilOperation::Decrement: + return vk::StencilOp::eDecrementAndClamp; + case StencilOperation::Invert: + return vk::StencilOp::eInvert; + case StencilOperation::IncrementWrap: + return vk::StencilOp::eIncrementAndWrap; + case StencilOperation::DecrementWrap: + return vk::StencilOp::eDecrementAndWrap; + default: + return vk::StencilOp::eKeep; + } +} + +vk::PrimitiveTopology convertPrimitiveType(primitive_type type) +{ + switch (type) { + case PRIM_TYPE_POINTS: + return vk::PrimitiveTopology::ePointList; + case PRIM_TYPE_LINES: + return vk::PrimitiveTopology::eLineList; + case PRIM_TYPE_LINESTRIP: + return vk::PrimitiveTopology::eLineStrip; + case PRIM_TYPE_TRIS: + return vk::PrimitiveTopology::eTriangleList; + case PRIM_TYPE_TRISTRIP: + return vk::PrimitiveTopology::eTriangleStrip; + case PRIM_TYPE_TRIFAN: + return vk::PrimitiveTopology::eTriangleFan; + default: + return vk::PrimitiveTopology::eTriangleList; + } +} + +vk::CullModeFlags convertCullMode(bool cullEnabled) +{ + return cullEnabled ? vk::CullModeFlagBits::eBack : vk::CullModeFlagBits::eNone; +} + +bool isBlendingEnabled(gr_alpha_blend mode) +{ + return mode != ALPHA_BLEND_NONE; +} + +vk::PipelineColorBlendAttachmentState createColorBlendAttachment(gr_alpha_blend mode, const bvec4& colorWriteMask) +{ + vk::PipelineColorBlendAttachmentState attachment; + + attachment.blendEnable = isBlendingEnabled(mode) ? VK_TRUE : VK_FALSE; + + vk::BlendFactor srcFactor, dstFactor; + convertBlendMode(mode, srcFactor, dstFactor); + + attachment.srcColorBlendFactor = srcFactor; + attachment.dstColorBlendFactor = dstFactor; + attachment.colorBlendOp = vk::BlendOp::eAdd; + + // Alpha blend - same as color for most modes + attachment.srcAlphaBlendFactor = srcFactor; + attachment.dstAlphaBlendFactor = dstFactor; + attachment.alphaBlendOp = vk::BlendOp::eAdd; + + // Color write mask from material + vk::ColorComponentFlags writeMask; + if (colorWriteMask.x) writeMask |= vk::ColorComponentFlagBits::eR; + if (colorWriteMask.y) writeMask |= vk::ColorComponentFlagBits::eG; + if (colorWriteMask.z) writeMask |= vk::ColorComponentFlagBits::eB; + if (colorWriteMask.w) writeMask |= vk::ColorComponentFlagBits::eA; + attachment.colorWriteMask = writeMask; + + return attachment; +} + +vk::PipelineDepthStencilStateCreateInfo createDepthStencilState( + gr_zbuffer_type depthMode, + bool stencilEnabled, + ComparisionFunction stencilFunc, + const material::StencilOp* frontOp, + const material::StencilOp* backOp, + uint32_t stencilMask) +{ + vk::PipelineDepthStencilStateCreateInfo info; + + // Depth settings + vk::CompareOp depthCompare; + bool depthWrite; + convertDepthMode(depthMode, depthCompare, depthWrite); + + info.depthTestEnable = (depthMode != ZBUFFER_TYPE_NONE) ? VK_TRUE : VK_FALSE; + info.depthWriteEnable = depthWrite ? VK_TRUE : VK_FALSE; + info.depthCompareOp = depthCompare; + info.depthBoundsTestEnable = VK_FALSE; + info.minDepthBounds = 0.0f; + info.maxDepthBounds = 1.0f; + + // Stencil settings + info.stencilTestEnable = stencilEnabled ? VK_TRUE : VK_FALSE; + + if (stencilEnabled) { + // Front face stencil + info.front.compareOp = convertStencilCompare(stencilFunc); + info.front.compareMask = 0xFF; + info.front.writeMask = stencilMask; + info.front.reference = 0; // Set dynamically + + if (frontOp) { + info.front.failOp = convertStencilOp(frontOp->stencilFailOperation); + info.front.depthFailOp = convertStencilOp(frontOp->depthFailOperation); + info.front.passOp = convertStencilOp(frontOp->successOperation); + } else { + info.front.failOp = vk::StencilOp::eKeep; + info.front.depthFailOp = vk::StencilOp::eKeep; + info.front.passOp = vk::StencilOp::eKeep; + } + + // Back face stencil + info.back.compareOp = convertStencilCompare(stencilFunc); + info.back.compareMask = 0xFF; + info.back.writeMask = stencilMask; + info.back.reference = 0; + + if (backOp) { + info.back.failOp = convertStencilOp(backOp->stencilFailOperation); + info.back.depthFailOp = convertStencilOp(backOp->depthFailOperation); + info.back.passOp = convertStencilOp(backOp->successOperation); + } else { + info.back.failOp = vk::StencilOp::eKeep; + info.back.depthFailOp = vk::StencilOp::eKeep; + info.back.passOp = vk::StencilOp::eKeep; + } + } + + return info; +} + +vk::PipelineRasterizationStateCreateInfo createRasterizationState( + bool cullEnabled, + int fillMode, + bool frontFaceCW, + bool depthBiasEnabled) +{ + vk::PipelineRasterizationStateCreateInfo info; + + info.depthClampEnable = VK_FALSE; + info.rasterizerDiscardEnable = VK_FALSE; + + // Fill mode + switch (fillMode) { + case GR_FILL_MODE_WIRE: + info.polygonMode = vk::PolygonMode::eLine; + break; + case GR_FILL_MODE_SOLID: + default: + info.polygonMode = vk::PolygonMode::eFill; + break; + } + + info.cullMode = convertCullMode(cullEnabled); + info.frontFace = frontFaceCW ? vk::FrontFace::eClockwise : vk::FrontFace::eCounterClockwise; + + // Depth bias - actual values set dynamically via vkCmdSetDepthBias + info.depthBiasEnable = depthBiasEnabled ? VK_TRUE : VK_FALSE; + info.depthBiasConstantFactor = 0.0f; + info.depthBiasClamp = 0.0f; + info.depthBiasSlopeFactor = 0.0f; + + info.lineWidth = 1.0f; + + return info; +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanConvert.h b/code/graphics/vulkan/VulkanConvert.h new file mode 100644 index 00000000000..6451f513a86 --- /dev/null +++ b/code/graphics/vulkan/VulkanConvert.h @@ -0,0 +1,103 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/material.h" + +#include + + +namespace graphics::vulkan { + +/** + * @brief Convert FSO alpha blend mode to Vulkan blend factors + * @param mode FSO blend mode + * @param srcFactor Output source blend factor + * @param dstFactor Output destination blend factor + */ +void convertBlendMode(gr_alpha_blend mode, vk::BlendFactor& srcFactor, vk::BlendFactor& dstFactor); + +/** + * @brief Convert FSO depth buffer type to Vulkan compare op and write mask + * @param type FSO zbuffer type + * @param compareOp Output compare operation + * @param writeEnable Output depth write enable + */ +void convertDepthMode(gr_zbuffer_type type, vk::CompareOp& compareOp, bool& writeEnable); + +/** + * @brief Convert FSO stencil comparison function to Vulkan compare op + * @param func FSO comparison function + * @return Vulkan compare operation + */ +vk::CompareOp convertStencilCompare(ComparisionFunction func); + +/** + * @brief Convert FSO stencil operation to Vulkan stencil op + * @param op FSO stencil operation + * @return Vulkan stencil operation + */ +vk::StencilOp convertStencilOp(StencilOperation op); + +/** + * @brief Convert FSO primitive type to Vulkan topology + * @param type FSO primitive type + * @return Vulkan primitive topology + */ +vk::PrimitiveTopology convertPrimitiveType(primitive_type type); + +/** + * @brief Convert FSO cull mode to Vulkan cull mode + * @param cullEnabled Whether culling is enabled + * @return Vulkan cull mode flags + */ +vk::CullModeFlags convertCullMode(bool cullEnabled); + +/** + * @brief Check if a blend mode requires blending to be enabled + * @param mode FSO blend mode + * @return true if blending should be enabled + */ +bool isBlendingEnabled(gr_alpha_blend mode); + +/** + * @brief Create a complete color blend attachment state + * @param mode FSO blend mode + * @return Vulkan color blend attachment state + */ +vk::PipelineColorBlendAttachmentState createColorBlendAttachment(gr_alpha_blend mode, + const bvec4& colorWriteMask = {true, true, true, true}); + +/** + * @brief Create depth stencil state create info + * @param depthMode FSO depth buffer mode + * @param stencilEnabled Whether stencil testing is enabled + * @param stencilFunc Stencil comparison function + * @param frontOp Front face stencil operations + * @param backOp Back face stencil operations + * @param stencilMask Stencil write mask + * @return Vulkan depth stencil state create info + */ +vk::PipelineDepthStencilStateCreateInfo createDepthStencilState( + gr_zbuffer_type depthMode, + bool stencilEnabled = false, + ComparisionFunction stencilFunc = ComparisionFunction::Always, + const material::StencilOp* frontOp = nullptr, + const material::StencilOp* backOp = nullptr, + uint32_t stencilMask = 0xFF); + +/** + * @brief Create rasterization state create info + * @param cullEnabled Whether back-face culling is enabled + * @param fillMode Polygon fill mode (0 = fill, 1 = line, 2 = point) + * @param frontFace Front face winding (true = CW, false = CCW) + * @return Vulkan rasterization state create info + */ +vk::PipelineRasterizationStateCreateInfo createRasterizationState( + bool cullEnabled = true, + int fillMode = 0, + bool frontFaceCW = true, + bool depthBiasEnabled = false); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanDeferred.cpp b/code/graphics/vulkan/VulkanDeferred.cpp new file mode 100644 index 00000000000..d7369456f77 --- /dev/null +++ b/code/graphics/vulkan/VulkanDeferred.cpp @@ -0,0 +1,1100 @@ + +#include "VulkanDeferred.h" + +#include + +#include "VulkanRenderer.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" +#include "VulkanDescriptorManager.h" +#include "VulkanPipeline.h" +#include "VulkanState.h" +#include "VulkanDraw.h" +#include "VulkanPostProcessing.h" +#include "gr_vulkan.h" + +#include "cmdline/cmdline.h" +#include "graphics/2d.h" +#include "graphics/matrix.h" +#include "graphics/material.h" +#include "graphics/grinternal.h" +#include "graphics/shadows.h" +#include "lighting/lighting.h" +#include "mission/missionparse.h" +#include "nebula/neb.h" +#include "nebula/volumetrics.h" +#include "render/3d.h" + + +namespace graphics::vulkan { + +namespace { + +bool s_vulkanOverrideFog = false; + +} // anonymous namespace + +/** + * @brief Transition all 6 MSAA images between two layout states. + * + * Builds a 6-barrier batch (5 color + 1 depth) and submits via pipelineBarrier. + */ +static void transitionMsaaImages(vk::CommandBuffer cmd, VulkanPostProcessor* pp, + vk::ImageLayout colorOldLayout, vk::ImageLayout colorNewLayout, + vk::AccessFlags colorSrcAccess, vk::AccessFlags colorDstAccess, + vk::ImageLayout depthOldLayout, vk::ImageLayout depthNewLayout, + vk::AccessFlags depthSrcAccess, vk::AccessFlags depthDstAccess, + vk::PipelineStageFlags srcStage, vk::PipelineStageFlags dstStage) +{ + std::array barriers; + + std::array msaaImages = { + pp->deferred().msaaColorImage(), + pp->deferred().msaaPositionImage(), + pp->deferred().msaaNormalImage(), + pp->deferred().msaaSpecularImage(), + pp->deferred().msaaEmissiveImage(), + }; + for (size_t i = 0; i < msaaImages.size(); ++i) { + barriers[i].srcAccessMask = colorSrcAccess; + barriers[i].dstAccessMask = colorDstAccess; + barriers[i].oldLayout = colorOldLayout; + barriers[i].newLayout = colorNewLayout; + barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[i].image = msaaImages[i]; + barriers[i].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + } + + barriers[5].srcAccessMask = depthSrcAccess; + barriers[5].dstAccessMask = depthDstAccess; + barriers[5].oldLayout = depthOldLayout; + barriers[5].newLayout = depthNewLayout; + barriers[5].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[5].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[5].image = pp->deferred().msaaDepthImage(); + barriers[5].subresourceRange = {imageAspectFromFormat(pp->getDepthFormat()), 0, 1, 0, 1}; + + cmd.pipelineBarrier(srcStage, dstStage, {}, nullptr, nullptr, barriers); +} + +// ========== Deferred Lighting ========== + +void vulkan_deferred_lighting_begin(bool clearNonColorBufs) +{ + if (!light_deferred_enabled()) { + return; + } + + auto* pp = getPostProcessor(); + if (!pp || !pp->deferred().isInitialized()) { + return; + } + + auto* renderer = getRendererInstance(); + if (!renderer->isSceneRendering()) { + return; + } + + auto* stateTracker = getStateTracker(); + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + const bool msaaActive = (Cmdline_msaa_enabled > 0 && pp->deferred().isMsaaInitialized()); + + // End the current G-buffer render pass to perform the color→emissive copy. + // All 6 color attachments transition to eShaderReadOnlyOptimal (finalLayout). + cmd.endRenderPass(); + + // Copy scene color → non-MSAA emissive (pre-deferred content becomes emissive). + // Skip both post-barriers — conditional MSAA/non-MSAA code below handles transitions. + copyImageToImage(cmd, + pp->getSceneColorImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferSrcOptimal, + pp->deferred().emissiveImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferDstOptimal, + pp->getSceneExtent()); + + if (msaaActive) { + // --- MSAA path --- + // Transition scene color: eTransferSrcOptimal → eShaderReadOnlyOptimal + // (will be sampled inside MSAA pass to fill emissive) + // Transition non-MSAA emissive: eTransferDstOptimal → eShaderReadOnlyOptimal (preserved for later) + { + std::array barriers; + + barriers[0].srcAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[0].dstAccessMask = vk::AccessFlagBits::eShaderRead; + barriers[0].oldLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[0].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = pp->getSceneColorImage(); + barriers[0].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + barriers[1].srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[1].dstAccessMask = {}; + barriers[1].oldLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[1].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = pp->deferred().emissiveImage(); + barriers[1].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + {}, nullptr, nullptr, barriers); + } + + // Transition MSAA images to expected initial layouts + pp->deferred().transitionMsaaForBegin(cmd); + + // Begin MSAA G-buffer render pass (eClear — clears all attachments) + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->deferred().msaaRenderPass(); + rpBegin.framebuffer = pp->deferred().msaaFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + std::array clearValues{}; + clearValues[VulkanDeferredGBuffer::GBUF_ATT_COLOR].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[VulkanDeferredGBuffer::GBUF_ATT_POSITION].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[VulkanDeferredGBuffer::GBUF_ATT_NORMAL].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[VulkanDeferredGBuffer::GBUF_ATT_SPECULAR].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[VulkanDeferredGBuffer::GBUF_ATT_EMISSIVE].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[VulkanDeferredGBuffer::MSAA_COLOR_ATTACHMENT_COUNT].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->deferred().msaaRenderPass(), 0); + stateTracker->setColorAttachmentCount(VulkanDeferredGBuffer::MSAA_COLOR_ATTACHMENT_COUNT); + stateTracker->setCurrentSampleCount(renderer->getMsaaSampleCount()); + } + + // Fill MSAA emissive with pre-deferred scene content (starfield, backgrounds). + // Draw a fullscreen tri sampling non-MSAA scene color, writing to all attachments. + // Only emissive (attachment 4) matters — the other attachments will be overwritten + // by model rendering. Use per-attachment color write mask to write only att 4. + { + auto* pipelineMgr = getPipelineManager(); + + PipelineConfig config; + config.shaderType = SDR_TYPE_COPY; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = pp->deferred().msaaRenderPass(); + config.sampleCount = renderer->getMsaaSampleCount(); + config.colorAttachmentCount = VulkanDeferredGBuffer::MSAA_COLOR_ATTACHMENT_COUNT; + + // Per-attachment blend: only write to emissive + config.perAttachmentBlendEnabled = true; + for (uint32_t i = 0; i < config.colorAttachmentCount; ++i) { + config.attachmentBlends[i].blendMode = ALPHA_BLEND_NONE; + config.attachmentBlends[i].writeMask = {false, false, false, false}; + } + config.attachmentBlends[VulkanDeferredGBuffer::GBUF_ATT_EMISSIVE].writeMask = {true, true, true, true}; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (pipeline) { + // Use drawFullscreenTriangle pattern but inline since we're already in a render pass + auto* descriptorMgr = getDescriptorManager(); + + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + auto extent = pp->getSceneExtent(); + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(extent.width); + viewport.height = static_cast(extent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = extent; + cmd.setScissor(0, scissor); + + // Bind descriptors with scene color as source + DescriptorWriter writer; + writer.reset(descriptorMgr->getDevice(), descriptorMgr->getFallbacks()); + + vk::DescriptorSet globalSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Global); + Verify(globalSet); + writer.writeSet(globalSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Global)); + + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeSet(materialSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Material)); + // Scene color at texture array slot 0 + { + std::array texImages; + texImages.fill(descriptorMgr->getFallbacks().texture2D); + texImages[0] = {pp->getSceneColorSampler(), pp->getSceneColorView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + writer.setImageArray(MaterialBinding::TextureArray, texImages); + } + + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + writer.writeSet(perDrawSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::PerDraw)); + writer.flush(); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::Global), globalSet, {}); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::Material), materialSet, {}); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::PerDraw), perDrawSet, {}); + + cmd.draw(3, 1, 0, 0); + } + } + } else { + // --- Non-MSAA path (original) --- + // Transition scene color back to eColorAttachmentOptimal. + // Transition emissive to eShaderReadOnlyOptimal (where transitionGbufForResume expects it). + { + std::array barriers; + + barriers[0].srcAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[0].dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barriers[0].oldLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[0].newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = pp->getSceneColorImage(); + barriers[0].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + barriers[1].srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[1].dstAccessMask = {}; + barriers[1].oldLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[1].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = pp->deferred().emissiveImage(); + barriers[1].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barriers); + } + + // Transition G-buffer attachments 1-5 from eShaderReadOnlyOptimal → eColorAttachmentOptimal + pp->deferred().transitionForResume(cmd); + + // Resume G-buffer render pass with eLoad + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->deferred().renderPassLoad(); + rpBegin.framebuffer = pp->deferred().framebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + std::array clearValues{}; + clearValues[VulkanDeferredGBuffer::GBUF_COLOR_ATTACHMENT_COUNT].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->deferred().renderPassLoad(), 0); + } + + // Optionally clear non-color G-buffer attachments + if (clearNonColorBufs) { + vk::ClearAttachment clearAtt; + clearAtt.aspectMask = vk::ImageAspectFlagBits::eColor; + clearAtt.clearValue.color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + + auto extent = pp->getSceneExtent(); + vk::ClearRect clearRect; + clearRect.rect.offset = vk::Offset2D(0, 0); + clearRect.rect.extent = extent; + clearRect.baseArrayLayer = 0; + clearRect.layerCount = 1; + + for (uint32_t att : {VulkanDeferredGBuffer::GBUF_ATT_POSITION, + VulkanDeferredGBuffer::GBUF_ATT_NORMAL, + VulkanDeferredGBuffer::GBUF_ATT_SPECULAR, + VulkanDeferredGBuffer::GBUF_ATT_COMPOSITE}) { + clearAtt.colorAttachment = att; + cmd.clearAttachments(clearAtt, clearRect); + } + } + } + + Deferred_lighting = true; +} + +void vulkan_deferred_lighting_msaa() +{ + if (Cmdline_msaa_enabled <= 0) { + return; + } + + auto* pp = getPostProcessor(); + if (!pp || !pp->deferred().isMsaaInitialized()) { + return; + } + + auto* stateTracker = getStateTracker(); + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + // End MSAA G-buffer render pass. + // With finalLayout == subpass layout, all attachments stay in their subpass layouts: + // colors remain eColorAttachmentOptimal, depth remains eDepthStencilAttachmentOptimal. + cmd.endRenderPass(); + + // Reset sample count to 1x (resolve and subsequent passes are non-MSAA) + stateTracker->setCurrentSampleCount(vk::SampleCountFlagBits::e1); + + // Transition all 6 MSAA images to eShaderReadOnlyOptimal for the resolve shader. + // We use explicit barriers instead of render pass finalLayout transitions to + // ensure the validation layer tracks the layout changes correctly. + transitionMsaaImages(cmd, pp, + vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::AccessFlagBits::eColorAttachmentWrite, vk::AccessFlagBits::eShaderRead, + vk::ImageLayout::eDepthStencilAttachmentOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::AccessFlagBits::eDepthStencilAttachmentWrite, vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eColorAttachmentOutput | vk::PipelineStageFlagBits::eLateFragmentTests, + vk::PipelineStageFlagBits::eFragmentShader); + + // Begin resolve render pass (non-MSAA, writes to standard G-buffer images) + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->deferred().msaaResolveRenderPass(); + rpBegin.framebuffer = pp->deferred().msaaResolveFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + // 6 attachments: 5 color + depth. loadOp=eDontCare for all (fully overwritten). + std::array clearValues{}; + clearValues[VulkanDeferredGBuffer::MSAA_COLOR_ATTACHMENT_COUNT].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + PipelineConfig config; + config.shaderType = SDR_TYPE_MSAA_RESOLVE; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_FULL; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = true; + config.renderPass = pp->deferred().msaaResolveRenderPass(); + config.colorAttachmentCount = 5; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (pipeline) { + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(extent.width); + viewport.height = static_cast(extent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = extent; + cmd.setScissor(0, scissor); + + DescriptorWriter writer; + writer.reset(descriptorMgr->getDevice(), descriptorMgr->getFallbacks()); + + // Global set (fallback — resolve shader doesn't use global bindings) + vk::DescriptorSet globalSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Global); + Verify(globalSet); + writer.writeSet(globalSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Global)); + + // Material set: All 6 MSAA textures in binding 1 array (elements 0-5) + // [0]=color, [1]=position, [2]=normal, [3]=specular, [4]=emissive, [5]=depth + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeSet(materialSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Material)); + + // Build texture array: elements 0-5 are MSAA textures, 6-15 are fallback + vk::Sampler nearestSampler = texMgr->getSampler( + vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerAddressMode::eClampToEdge, false, 0.0f, false); + + std::array texImages; + // MSAA textures at slots 0-5 + // Fill all slots with MSAA color view (validation checks ALL elements + // even though the shader only accesses 0-5 — sample count must match). + texImages.fill({nearestSampler, pp->deferred().msaaColorView(), vk::ImageLayout::eShaderReadOnlyOptimal}); + texImages[1] = {nearestSampler, pp->deferred().msaaPositionView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + texImages[2] = {nearestSampler, pp->deferred().msaaNormalView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + texImages[3] = {nearestSampler, pp->deferred().msaaSpecularView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + texImages[4] = {nearestSampler, pp->deferred().msaaEmissiveView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + texImages[5] = {nearestSampler, pp->deferred().msaaDepthView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + writer.setImageArray(MaterialBinding::TextureArray, texImages); + + // PerDraw set: GenericData UBO with {samples, fov} at binding 0 + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + writer.writeSet(perDrawSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::PerDraw)); + + struct MsaaResolveData { + int samples; + float fov; + } resolveData; + resolveData.samples = Cmdline_msaa_enabled; + resolveData.fov = g3_get_hfov(Proj_fov); + + uint32_t frame = bufferMgr->getCurrentFrame(); + uint32_t slotOffset = frame * 256; + memcpy(static_cast(pp->deferred().msaaResolveUBOMapped()) + slotOffset, + &resolveData, sizeof(resolveData)); + + writer.setBuffer(PerDrawBinding::GenericData, + {pp->deferred().msaaResolveUBO(), slotOffset, 256}); + writer.flush(); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::Global), globalSet, {}); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::Material), materialSet, {}); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::PerDraw), perDrawSet, {}); + + cmd.draw(3, 1, 0, 0); + } + + cmd.endRenderPass(); + } + + // Restore MSAA images to their resting layout (attachment-optimal) so they + // match the validation layer's tracking state for the next frame. + transitionMsaaImages(cmd, pp, + vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eColorAttachmentOptimal, + vk::AccessFlagBits::eShaderRead, vk::AccessFlagBits::eColorAttachmentWrite, + vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eDepthStencilAttachmentOptimal, + vk::AccessFlagBits::eShaderRead, vk::AccessFlagBits::eDepthStencilAttachmentWrite, + vk::PipelineStageFlagBits::eFragmentShader, + vk::PipelineStageFlagBits::eColorAttachmentOutput | vk::PipelineStageFlagBits::eEarlyFragmentTests); + + // After resolve, the non-MSAA G-buffer has properly resolved data. + // Color attachments 0-4 are in eShaderReadOnlyOptimal (from resolve pass finalLayout). + // Depth is in eDepthStencilAttachmentOptimal. + // Subsequent deferred_lighting_end/finish operate on the non-MSAA G-buffer unchanged. + + // Transition scene color from eShaderReadOnlyOptimal → eColorAttachmentOptimal + // (deferred_lighting_end resumes the non-MSAA gbuf pass and needs scene color writable) + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = pp->getSceneColorImage(); + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Composite is not part of the resolve framebuffer, so its layout is + // indeterminate (UNDEFINED on first frame, eTransferSrcOptimal from + // previous frame's composite→scene copy, etc.). Use oldLayout=eUndefined + // to transition it regardless of current state — content will be fully + // overwritten by emissive→composite copy in deferred_lighting_finish(). + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = {}; + barrier.oldLayout = vk::ImageLayout::eUndefined; + barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = pp->deferred().compositeImage(); + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Transition G-buffer attachments 1-5 for resume + // (all now in eShaderReadOnlyOptimal: 1-4 from resolve finalLayout, 5 from above) + pp->deferred().transitionForResume(cmd); + + // Resume the non-MSAA G-buffer render pass with eLoad + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->deferred().renderPassLoad(); + rpBegin.framebuffer = pp->deferred().framebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + std::array clearValues{}; + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->deferred().renderPassLoad(), 0); + stateTracker->setColorAttachmentCount(VulkanDeferredGBuffer::GBUF_COLOR_ATTACHMENT_COUNT); + } +} + +void vulkan_deferred_lighting_end() +{ + if (!Deferred_lighting) { + return; + } + + Deferred_lighting = false; + + // After this, rendering goes back to writing only attachment 0. + // The pipeline still has 6 blend states (matching the G-buffer render pass) + // but the shader only outputs to location 0. Attachments 1-5 are untouched. +} + +void vulkan_deferred_lighting_finish() +{ + if (!light_deferred_enabled()) { + return; + } + + auto* pp = getPostProcessor(); + if (!pp || !pp->deferred().isInitialized()) { + return; + } + + auto* renderer = getRendererInstance(); + if (!renderer->isSceneRendering()) { + return; + } + + auto* stateTracker = getStateTracker(); + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + // 1. End G-buffer render pass + // All 6 color attachments → eShaderReadOnlyOptimal + // Depth → eDepthStencilAttachmentOptimal + cmd.endRenderPass(); + + // 2. Copy emissive → composite (the emissive data becomes the base for light accumulation) + // Emissive → eShaderReadOnlyOptimal (done), composite → eColorAttachmentOptimal (for light accum) + copyImageToImage(cmd, + pp->deferred().emissiveImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + pp->deferred().compositeImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eColorAttachmentOptimal, + pp->getSceneExtent()); + + // 3. Render deferred lights (begins + ends light accum render pass internally) + // After this, composite is in eShaderReadOnlyOptimal + pp->renderDeferredLights(cmd); + + // 4. Fog rendering (between light accumulation and forward rendering) + // Matches OpenGL flow in opengl_deferred_lighting_finish() + bool bDrawFullNeb = The_mission.flags[Mission::Mission_Flags::Fullneb] + && Neb2_render_mode != NEB2_RENDER_NONE && !s_vulkanOverrideFog; + bool bDrawNebVolumetrics = The_mission.volumetrics + && The_mission.volumetrics->get_enabled() && !s_vulkanOverrideFog; + + bool fogRendered = false; + if (bDrawFullNeb) { + // Scene fog reads composite + depth → writes scene color + pp->renderSceneFog(cmd); + fogRendered = true; + + if (bDrawNebVolumetrics) { + // Copy scene color → composite so volumetric reads the fogged result + copyImageToImage(cmd, + pp->getSceneColorImage(), vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eColorAttachmentOptimal, + pp->deferred().compositeImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + pp->getSceneExtent()); + } + } + if (bDrawNebVolumetrics) { + // Volumetric fog reads composite + emissive + depth + 3D volumes → writes scene color + pp->renderVolumetricFog(cmd); + fogRendered = true; + } + + if (!fogRendered) { + // No fog — copy composite → scene color (existing behavior) + // Skip src post-barrier (composite not used again in this path) + copyImageToImage(cmd, + pp->deferred().compositeImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferSrcOptimal, + pp->getSceneColorImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eColorAttachmentOptimal, + pp->getSceneExtent()); + } + + // 5. Switch to scene render pass for forward transparent objects + // After light accumulation, use the 2-attachment scene render pass instead + // of the 6-attachment G-buffer pass. Forward-rendered transparent objects + // only write to fragOut0 — using the G-buffer pass would leave undefined + // values at attachment locations 1-5. + renderer->setUseGbufRenderPass(false); + stateTracker->setColorAttachmentCount(1); + + // Resume scene render pass (loadOp=eLoad) with depth preserved + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getSceneRenderPassLoad(); + rpBegin.framebuffer = pp->getSceneFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getSceneRenderPassLoad(), 0); + } +} + +void vulkan_override_fog(bool set_override) { + s_vulkanOverrideFog = set_override; +} + +// ========== Shadow Map Rendering ========== + +} // namespace graphics::vulkan + + + +extern bool gr_htl_projection_matrix_set; + + +namespace graphics::vulkan { + +namespace { +bool Glowpoint_override_save = false; +bool Restore_swapchain_after_shadow_pass = false; +} // anonymous namespace + +void vulkan_shadow_map_start(matrix4* shadow_view_matrix, const matrix* light_matrix, vec3d* eye_pos) +{ + if (Shadow_quality == ShadowQuality::Disabled || !getRendererInstance()->supportsShaderViewportLayerOutput()) { + return; + } + + auto* pp = getPostProcessor(); + if (!pp) { + return; + } + + // Lazy-init shadow resources + if (!pp->shadow().isInitialized()) { + if (!pp->initShadowPass()) { + return; + } + } + + auto* stateTracker = getStateTracker(); + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + // End the current render pass but keep track if we need to resume the swapchain render pass or the scene render pass afterwards. + Restore_swapchain_after_shadow_pass = !getRendererInstance()->isSceneRendering(); + cmd.endRenderPass(); + + // Shadow render pass is always non-MSAA (1x sample count) + stateTracker->setCurrentSampleCount(vk::SampleCountFlagBits::e1); + + // Begin shadow render pass (eClear for both color and depth) + { + int shadowSize = pp->shadow().textureSize(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->shadow().renderPass(); + rpBegin.framebuffer = pp->shadow().framebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = vk::Extent2D(static_cast(shadowSize), static_cast(shadowSize)); + + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->shadow().renderPass(), 0); + stateTracker->setColorAttachmentCount(1); + } + + // Set viewport and scissor to shadow texture size + { + const int shadowSize = pp->shadow().textureSize(); + + stateTracker->setViewport(0.0f, 0.0f, static_cast(shadowSize), static_cast(shadowSize), 0.0f, 1.0f); + stateTracker->setScissor(0, 0, static_cast(shadowSize), static_cast(shadowSize)); + } + + Rendering_to_shadow_map = true; + Glowpoint_override_save = Glowpoint_override; + Glowpoint_override = true; + + gr_htl_projection_matrix_set = true; + + gr_set_view_matrix(eye_pos, light_matrix); + + *shadow_view_matrix = gr_view_matrix; +} + +void vulkan_shadow_map_end() +{ + if (!Rendering_to_shadow_map) { + return; + } + + auto* pp = getPostProcessor(); + auto* stateTracker = getStateTracker(); + auto* renderer = getRendererInstance(); + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + gr_end_view_matrix(); + Rendering_to_shadow_map = false; + + gr_zbuffer_set(ZBUFFER_TYPE_FULL); + + Glowpoint_override = Glowpoint_override_save; + gr_htl_projection_matrix_set = false; + + // End shadow render pass (color transitions to eShaderReadOnlyOptimal via finalLayout) + cmd.endRenderPass(); + + if ( Restore_swapchain_after_shadow_pass ) { + renderer->resumeSwapChainPass(); + } else if ( renderer->isUsingGbufRenderPass() ) { + const bool msaaActive = (Cmdline_msaa_enabled > 0 && pp->deferred().isMsaaInitialized()); + + if ( msaaActive ) { + // Resume MSAA G-buffer render pass + pp->deferred().transitionMsaaForResume(cmd); + + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->deferred().msaaRenderPassLoad(); + rpBegin.framebuffer = pp->deferred().msaaFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + std::array clearValues{}; + clearValues[VulkanDeferredGBuffer::MSAA_COLOR_ATTACHMENT_COUNT].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->deferred().msaaRenderPassLoad(), 0); + stateTracker->setColorAttachmentCount(VulkanDeferredGBuffer::MSAA_COLOR_ATTACHMENT_COUNT); + stateTracker->setCurrentSampleCount(getRendererInstance()->getMsaaSampleCount()); + } else { + // Transition scene color: eShaderReadOnlyOptimal → eColorAttachmentOptimal + // (Scene color was in eShaderReadOnlyOptimal from ending G-buffer pass before shadow start) + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = pp->getSceneColorImage(); + barrier.subresourceRange = { vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1 }; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Transition G-buffer attachments 1-5 for resume + pp->deferred().transitionForResume(cmd); + + // Resume G-buffer render pass with eLoad + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->deferred().renderPassLoad(); + rpBegin.framebuffer = pp->deferred().framebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + + std::array clearValues{}; + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->deferred().renderPassLoad(), 0); + stateTracker->setColorAttachmentCount(VulkanDeferredGBuffer::GBUF_COLOR_ATTACHMENT_COUNT); + } + } else { + renderer->resumeSceneRendering(); + } + + // Restore viewport and scissor to scene size + { + vk::Viewport viewport; + viewport.x = static_cast(gr_screen.offset_x); + viewport.y = static_cast(gr_screen.offset_y); + viewport.width = static_cast(gr_screen.clip_width); + viewport.height = static_cast(gr_screen.clip_height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(gr_screen.offset_x, gr_screen.offset_y); + scissor.extent = vk::Extent2D(static_cast(gr_screen.clip_width), static_cast(gr_screen.clip_height)); + cmd.setScissor(0, scissor); + } +} + +// ========== Decal Pass ========== + +void vulkan_start_decal_pass() +{ + auto* renderer = getRendererInstance(); + auto* pp = getPostProcessor(); + auto* stateTracker = getStateTracker(); + + if (!renderer->isSceneRendering() || !pp || !pp->deferred().isInitialized()) { + return; + } + + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + // End the G-buffer render pass (transitions all color attachments to eShaderReadOnlyOptimal) + cmd.endRenderPass(); + + // Copy scene depth → samplable depth copy (for fragment depth reconstruction) + pp->copySceneDepth(cmd); + + // Copy G-buffer normal → samplable normal copy (for angle rejection) + pp->deferred().copyNormal(cmd); + + // Transition scene color: eShaderReadOnlyOptimal → eColorAttachmentOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = pp->getSceneColorImage(); + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Transition G-buffer attachments 1-5 for render pass resume + pp->deferred().transitionForResume(cmd); + + // Resume G-buffer render pass with eLoad + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->deferred().renderPassLoad(); + rpBegin.framebuffer = pp->deferred().framebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + + std::array clearValues{}; + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->deferred().renderPassLoad(), 0); + stateTracker->setColorAttachmentCount(VulkanDeferredGBuffer::GBUF_COLOR_ATTACHMENT_COUNT); + } + + // Restore viewport (Y-flipped for Vulkan scene rendering) + auto extent = pp->getSceneExtent(); + stateTracker->setViewport(0.0f, + static_cast(extent.height), + static_cast(extent.width), + -static_cast(extent.height)); +} + +void vulkan_stop_decal_pass() +{ + // No-op — decals draw within the resumed G-buffer render pass +} + +void vulkan_render_decals(decal_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int num_elements, + const indexed_vertex_source& buffers, + const gr_buffer_handle& instance_buffer, + int num_instances) +{ + if (!material_info || !layout || num_instances <= 0) { + return; + } + + auto* stateTracker = getStateTracker(); + auto* pipelineManager = getPipelineManager(); + auto* descManager = getDescriptorManager(); + auto* bufferManager = getBufferManager(); + auto* drawManager = getDrawManager(); + auto* texManager = getTextureManager(); + auto* pp = getPostProcessor(); + + // Set up matrices + gr_matrix_set_uniforms(); + + // Build pipeline config for decal rendering + PipelineConfig config; + config.shaderType = SDR_TYPE_DECAL; + config.primitiveType = prim_type; + config.depthMode = material_info->get_depth_mode(); + config.depthWriteEnabled = false; + config.cullEnabled = false; + config.frontFaceCW = false; + config.blendMode = material_info->get_blend_mode(); + config.renderPass = stateTracker->getCurrentRenderPass(); + config.colorAttachmentCount = stateTracker->getColorAttachmentCount(); + + // Per-attachment blend: active attachments (color, normal, emissive) get + // the material's blend mode with RGB-only write mask. Inactive attachments get + // write mask = 0 to avoid corrupting G-buffer data. + config.perAttachmentBlendEnabled = true; + for (uint32_t i = 0; i < config.colorAttachmentCount; ++i) { + config.attachmentBlends[i].blendMode = ALPHA_BLEND_NONE; + config.attachmentBlends[i].writeMask = {false, false, false, false}; + } + // Color/diffuse — use material blend mode 0 + config.attachmentBlends[VulkanDeferredGBuffer::GBUF_ATT_COLOR].blendMode = material_info->get_blend_mode(0); + config.attachmentBlends[VulkanDeferredGBuffer::GBUF_ATT_COLOR].writeMask = {true, true, true, false}; + // Normal — always additive + config.attachmentBlends[VulkanDeferredGBuffer::GBUF_ATT_NORMAL].blendMode = ALPHA_BLEND_ADDITIVE; + config.attachmentBlends[VulkanDeferredGBuffer::GBUF_ATT_NORMAL].writeMask = {true, true, true, false}; + // Emissive — use material blend mode 2 + config.attachmentBlends[VulkanDeferredGBuffer::GBUF_ATT_EMISSIVE].blendMode = material_info->get_blend_mode(2); + config.attachmentBlends[VulkanDeferredGBuffer::GBUF_ATT_EMISSIVE].writeMask = {true, true, true, false}; + + // Get or create pipeline + vk::Pipeline pipeline = pipelineManager->getPipeline(config, *layout); + if (!pipeline) { + mprintf(("vulkan_render_decals: Failed to get pipeline!\n")); + return; + } + + stateTracker->bindPipeline(pipeline, pipelineManager->getPipelineLayout()); + + // Helper: get DescriptorBufferInfo from pending binding (null buffer = fallback) + auto getPendingBufInfo = [&](size_t blockIdx) -> vk::DescriptorBufferInfo { + const auto& pending = drawManager->getPendingUniformBinding(blockIdx); + if (pending.valid) { + vk::Buffer buf = bufferManager->getVkBuffer(pending.bufferHandle); + if (buf) { + return {buf, pending.offset, pending.size}; + } + } + return {}; + }; + + DescriptorWriter writer; + writer.reset(descManager->getDevice(), descManager->getFallbacks()); + + // Set 0: Global (all fallback) + vk::DescriptorSet globalSet = descManager->allocateFrameSet(DescriptorSetIndex::Global); + Verify(globalSet); + writer.writeSet(globalSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Global)); + + // Set 1: Material + vk::DescriptorSet materialSet = descManager->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeSet(materialSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Material)); + + // Binding 2: DecalGlobals UBO + writer.setBuffer(MaterialBinding::DecalGlobals, + getPendingBufInfo(static_cast(uniform_block_type::DecalGlobals))); + + // Binding 1: decal textures (diffuse, glow, normal as texture array) + drawManager->bindMaterialTextures(material_info, &writer); + + // Binding 4: scene depth copy (for fragment depth reconstruction) + { + vk::Sampler nearestSampler = texManager->getSampler( + vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerAddressMode::eClampToEdge, false, 0.0f, false); + vk::ImageView depthView = pp->getSceneDepthCopyView(); + if (depthView && nearestSampler) { + writer.setImage(MaterialBinding::DepthMap, {nearestSampler, depthView, vk::ImageLayout::eShaderReadOnlyOptimal}); + } + } + + // Binding 6: G-buffer normal copy (for angle rejection) + { + vk::Sampler nearestSampler = texManager->getSampler( + vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerAddressMode::eClampToEdge, false, 0.0f, false); + vk::ImageView normalView = pp->deferred().normalCopyView(); + if (normalView && nearestSampler) { + writer.setImage(MaterialBinding::DistortionMap, {nearestSampler, normalView, vk::ImageLayout::eShaderReadOnlyOptimal}); + } + } + + // Set 2: PerDraw + vk::DescriptorSet perDrawSet = descManager->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + writer.writeSet(perDrawSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::PerDraw)); + writer.setBuffer(PerDrawBinding::Matrices, + getPendingBufInfo(static_cast(uniform_block_type::Matrices))); + writer.setBuffer(PerDrawBinding::DecalInfo, + getPendingBufInfo(static_cast(uniform_block_type::DecalInfo))); + writer.flush(); + stateTracker->bindDescriptorSet(DescriptorSetIndex::Global, globalSet); + stateTracker->bindDescriptorSet(DescriptorSetIndex::Material, materialSet); + stateTracker->bindDescriptorSet(DescriptorSetIndex::PerDraw, perDrawSet); + + // Bind vertex buffers: binding 0 = box VBO, binding 1 = instance buffer + vk::Buffer boxVBO = bufferManager->getVkBuffer(buffers.Vbuffer_handle); + vk::Buffer boxIBO = bufferManager->getVkBuffer(buffers.Ibuffer_handle); + vk::Buffer instBuf = bufferManager->getVkBuffer(instance_buffer); + + if (!boxVBO || !boxIBO || !instBuf) { + mprintf(("vulkan_render_decals: Missing buffer(s)!\n")); + return; + } + + stateTracker->bindVertexBuffer(0, boxVBO, 0); + + // Instance buffer needs frame base offset for streaming buffers + size_t instFrameOffset = bufferManager->getFrameBaseOffset(instance_buffer); + stateTracker->bindVertexBuffer(1, instBuf, static_cast(instFrameOffset)); + + stateTracker->bindIndexBuffer(boxIBO, 0, vk::IndexType::eUint32); + + // Flush dynamic state and draw + stateTracker->applyDynamicState(); + + auto cmdBuffer = stateTracker->getCommandBuffer(); + cmdBuffer.drawIndexed( + static_cast(num_elements), // index count + static_cast(num_instances), // instance count + 0, // first index + 0, // vertex offset + 0 // first instance + ); +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanDeferred.h b/code/graphics/vulkan/VulkanDeferred.h new file mode 100644 index 00000000000..0c52bfadffa --- /dev/null +++ b/code/graphics/vulkan/VulkanDeferred.h @@ -0,0 +1,39 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/material.h" + +struct matrix; +struct matrix4; +struct vec3d; + + +namespace graphics::vulkan { + +// Deferred lighting pipeline entry points (gr_screen.gf_* implementations) +void vulkan_deferred_lighting_begin(bool clearNonColorBufs); +void vulkan_deferred_lighting_msaa(); +void vulkan_deferred_lighting_end(); +void vulkan_deferred_lighting_finish(); + +// Fog control +void vulkan_override_fog(bool set_override); + +// Shadow map rendering +void vulkan_shadow_map_start(matrix4* shadow_view_matrix, const matrix* light_matrix, vec3d* eye_pos); +void vulkan_shadow_map_end(); + +// Decal pass +void vulkan_start_decal_pass(); +void vulkan_stop_decal_pass(); +void vulkan_render_decals(decal_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int num_elements, + const indexed_vertex_source& buffers, + const gr_buffer_handle& instance_buffer, + int num_instances); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanDeletionQueue.cpp b/code/graphics/vulkan/VulkanDeletionQueue.cpp new file mode 100644 index 00000000000..82727a9ab22 --- /dev/null +++ b/code/graphics/vulkan/VulkanDeletionQueue.cpp @@ -0,0 +1,195 @@ + +#include "VulkanDeletionQueue.h" + + +namespace graphics::vulkan { + +namespace { +VulkanDeletionQueue* g_deletionQueue = nullptr; +} + +VulkanDeletionQueue* getDeletionQueue() +{ + Assertion(g_deletionQueue != nullptr, "Vulkan DeletionQueue not initialized!"); + return g_deletionQueue; +} + +void setDeletionQueue(VulkanDeletionQueue* queue) +{ + g_deletionQueue = queue; +} + +VulkanDeletionQueue::~VulkanDeletionQueue() +{ + shutdown(); +} + +void VulkanDeletionQueue::init(vk::Device device, VulkanMemoryManager* memoryManager) +{ + m_device = device; + m_memoryManager = memoryManager; + m_initialized = true; +} + +void VulkanDeletionQueue::shutdown() +{ + if (!m_initialized) { + return; + } + + flushAll(); + m_initialized = false; +} + +void VulkanDeletionQueue::queueBuffer(vk::Buffer buffer, VulkanAllocation allocation) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueBuffer called before initialization!"); + if (!buffer) { + return; + } + + PendingDestruction pending; + pending.resource = PendingBuffer{buffer, allocation}; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::queueImage(vk::Image image, VulkanAllocation allocation) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueImage called before initialization!"); + if (!image) { + return; + } + + PendingDestruction pending; + pending.resource = PendingImage{image, allocation}; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::queueImageView(vk::ImageView imageView) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueImageView called before initialization!"); + if (!imageView) { + return; + } + + PendingDestruction pending; + pending.resource = imageView; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::queueFramebuffer(vk::Framebuffer framebuffer) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueFramebuffer called before initialization!"); + if (!framebuffer) { + return; + } + + PendingDestruction pending; + pending.resource = framebuffer; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::queueRenderPass(vk::RenderPass renderPass) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueRenderPass called before initialization!"); + if (!renderPass) { + return; + } + + PendingDestruction pending; + pending.resource = renderPass; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::queueSampler(vk::Sampler sampler) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueSampler called before initialization!"); + if (!sampler) { + return; + } + + PendingDestruction pending; + pending.resource = sampler; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::processDestructions() +{ + Assertion(m_initialized, "VulkanDeletionQueue::processDestructions called before initialization!"); + if (m_pendingDestructions.empty()) { + return; + } + + auto it = m_pendingDestructions.begin(); + while (it != m_pendingDestructions.end()) { + if (it->framesRemaining > 0) { + it->framesRemaining--; + ++it; + } else { + destroyResource(it->resource); + it = m_pendingDestructions.erase(it); + } + } +} + +void VulkanDeletionQueue::flushAll() +{ + if (!m_initialized) { + return; + } + + for (const auto& pending : m_pendingDestructions) { + destroyResource(pending.resource); + } + m_pendingDestructions.clear(); +} + +void VulkanDeletionQueue::destroyResource(const PendingResource& resource) +{ + std::visit([this](auto&& res) -> void { + using T = std::decay_t; + + if constexpr (std::is_same_v) { + if (res.buffer) { + m_device.destroyBuffer(res.buffer); + } + if (res.allocation.isValid() && m_memoryManager) { + VulkanAllocation alloc = res.allocation; // Copy for non-const ref + m_memoryManager->freeAllocation(alloc); + } + } else if constexpr (std::is_same_v) { + if (res.image) { + m_device.destroyImage(res.image); + } + if (res.allocation.isValid() && m_memoryManager) { + VulkanAllocation alloc = res.allocation; // Copy for non-const ref + m_memoryManager->freeAllocation(alloc); + } + } else if constexpr (std::is_same_v) { + if (res) { + m_device.destroyImageView(res); + } + } else if constexpr (std::is_same_v) { + if (res) { + m_device.destroyFramebuffer(res); + } + } else if constexpr (std::is_same_v) { + if (res) { + m_device.destroyRenderPass(res); + } + } else if constexpr (std::is_same_v) { + if (res) { + m_device.destroySampler(res); + } + } + }, resource); +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanDeletionQueue.h b/code/graphics/vulkan/VulkanDeletionQueue.h new file mode 100644 index 00000000000..6ce0923a82c --- /dev/null +++ b/code/graphics/vulkan/VulkanDeletionQueue.h @@ -0,0 +1,118 @@ + +#pragma once + +#include "globalincs/pstypes.h" +#include "VulkanConstants.h" +#include "VulkanMemory.h" + +#include +#include +#include + + +namespace graphics::vulkan { + +/** + * @brief Unified deferred resource deletion queue for Vulkan + * + * Resources that may still be referenced by in-flight command buffers are + * queued here instead of being destroyed immediately. After waiting the + * configured number of frames, they are safely destroyed. + * + * This prevents validation errors like "can't be called on VkImageView that + * is currently in use by VkDescriptorSet". + */ +class VulkanDeletionQueue { +public: + static constexpr uint32_t FRAMES_TO_WAIT = MAX_FRAMES_IN_FLIGHT; + + VulkanDeletionQueue() = default; + ~VulkanDeletionQueue(); + + void init(vk::Device device, VulkanMemoryManager* memoryManager); + void shutdown(); + + /** + * @brief Queue a buffer for deferred destruction + */ + void queueBuffer(vk::Buffer buffer, VulkanAllocation allocation); + + /** + * @brief Queue an image for deferred destruction + */ + void queueImage(vk::Image image, VulkanAllocation allocation); + + /** + * @brief Queue an image view for deferred destruction + */ + void queueImageView(vk::ImageView imageView); + + /** + * @brief Queue a framebuffer for deferred destruction + */ + void queueFramebuffer(vk::Framebuffer framebuffer); + + /** + * @brief Queue a render pass for deferred destruction + */ + void queueRenderPass(vk::RenderPass renderPass); + + /** + * @brief Queue a sampler for deferred destruction + */ + void queueSampler(vk::Sampler sampler); + + /** + * @brief Process pending destructions - call once per frame + * + * Decrements frame counters and destroys resources that have waited + * enough frames. + */ + void processDestructions(); + + /** + * @brief Flush all pending destructions immediately + * + * Used during shutdown when we know the device is idle. + */ + void flushAll(); + +private: + struct PendingBuffer { + vk::Buffer buffer; + VulkanAllocation allocation; + }; + + struct PendingImage { + vk::Image image; + VulkanAllocation allocation; + }; + + using PendingResource = std::variant< + PendingBuffer, + PendingImage, + vk::ImageView, + vk::Framebuffer, + vk::RenderPass, + vk::Sampler + >; + + struct PendingDestruction { + PendingResource resource; + uint32_t framesRemaining; + }; + + void destroyResource(const PendingResource& resource); + + vk::Device m_device; + VulkanMemoryManager* m_memoryManager = nullptr; + SCP_vector m_pendingDestructions; + bool m_initialized = false; +}; + +// Global deletion queue instance +VulkanDeletionQueue* getDeletionQueue(); +void setDeletionQueue(VulkanDeletionQueue* queue); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanDescriptorManager.cpp b/code/graphics/vulkan/VulkanDescriptorManager.cpp new file mode 100644 index 00000000000..363133497c0 --- /dev/null +++ b/code/graphics/vulkan/VulkanDescriptorManager.cpp @@ -0,0 +1,383 @@ +#include "VulkanDescriptorManager.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" + + +namespace graphics::vulkan { + +// ========== Static set templates ========== + +static constexpr DescriptorBindingTemplate s_globalBindings[] = { + {GlobalBinding::Lights, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment}, + {GlobalBinding::DeferredData, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment}, + {GlobalBinding::ShadowMap, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment, vk::ImageViewType::e2DArray}, + {GlobalBinding::EnvMap, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment, vk::ImageViewType::eCube}, + {GlobalBinding::IrradianceMap, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment, vk::ImageViewType::eCube}, +}; +static constexpr DescriptorSetTemplate s_globalTemplate(s_globalBindings); + +static constexpr DescriptorBindingTemplate s_materialBindings[] = { + {MaterialBinding::ModelData, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment}, + {MaterialBinding::TextureArray, vk::DescriptorType::eCombinedImageSampler, 16, vk::ShaderStageFlagBits::eFragment, vk::ImageViewType::e2DArray}, + {MaterialBinding::DecalGlobals, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment}, + {MaterialBinding::TransformSSBO, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eVertex}, + {MaterialBinding::DepthMap, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {MaterialBinding::SceneColor, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {MaterialBinding::DistortionMap, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, +}; +static constexpr DescriptorSetTemplate s_materialTemplate(s_materialBindings); + +static constexpr DescriptorBindingTemplate s_perDrawBindings[] = { + {PerDrawBinding::GenericData, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment}, + {PerDrawBinding::Matrices, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment}, + {PerDrawBinding::NanoVGData, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment}, + {PerDrawBinding::DecalInfo, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment}, + {PerDrawBinding::MovieData, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eFragment}, +}; +static constexpr DescriptorSetTemplate s_perDrawTemplate(s_perDrawBindings); + +// ========== Static uniform binding mappings ========== + +static constexpr VulkanDescriptorManager::UniformBindingEntry s_globalUBOs[] = { + {GlobalBinding::Lights, uniform_block_type::Lights}, + {GlobalBinding::DeferredData, uniform_block_type::DeferredGlobals}, +}; + +static constexpr VulkanDescriptorManager::UniformBindingEntry s_materialUBOs[] = { + {MaterialBinding::ModelData, uniform_block_type::ModelData}, + {MaterialBinding::DecalGlobals, uniform_block_type::DecalGlobals}, +}; + +static constexpr VulkanDescriptorManager::UniformBindingEntry s_perDrawUBOs[] = { + {PerDrawBinding::GenericData, uniform_block_type::GenericData}, + {PerDrawBinding::Matrices, uniform_block_type::Matrices}, + {PerDrawBinding::NanoVGData, uniform_block_type::NanoVGData}, + {PerDrawBinding::DecalInfo, uniform_block_type::DecalInfo}, + {PerDrawBinding::MovieData, uniform_block_type::MovieData}, +}; + + +// ========== DescriptorFallbacks ========== + +const vk::DescriptorImageInfo& DescriptorFallbacks::getImage(vk::ImageViewType t) const +{ + switch (t) { + case vk::ImageViewType::e2D: return texture2D; + case vk::ImageViewType::e2DArray: return texture2DArray; + case vk::ImageViewType::eCube: return textureCube; + case vk::ImageViewType::e3D: return texture3D; + default: + Assertion(false, "DescriptorFallbacks::getImage: unhandled ImageViewType %d", static_cast(t)); + return texture2D; + } +} + +// ========== DescriptorWriter template-based methods ========== + +void DescriptorWriter::writeSet(vk::DescriptorSet set, const DescriptorSetTemplate& tmpl) +{ + Verify(m_fallbacks); + + // Clear binding slots for this set + m_bindingSlots = {}; + + for (const auto& b : tmpl) { + Verify(m_writeCount < MAX_WRITES); + Verify(b.binding < MAX_BINDINGS_PER_SET); + + auto& w = m_writes[m_writeCount++]; + w = vk::WriteDescriptorSet(); + w.dstSet = set; + w.dstBinding = b.binding; + w.descriptorCount = b.count; + w.descriptorType = b.type; + + auto& slot = m_bindingSlots[b.binding]; + slot.count = b.count; + slot.viewType = b.viewType; + + bool isImage = (b.type == vk::DescriptorType::eCombinedImageSampler); + if (isImage) { + Verify(m_imageInfoCount + b.count <= MAX_IMAGE_INFOS); + auto* dst = &m_imageInfos[m_imageInfoCount]; + const auto& fallbackImg = m_fallbacks->getImage(b.viewType); + for (uint32_t j = 0; j < b.count; ++j) { + dst[j] = fallbackImg; + } + w.pImageInfo = dst; + slot.imageInfo = dst; + m_imageInfoCount += b.count; + } else { + Verify(m_bufferInfoCount < MAX_BUFFER_INFOS); + m_bufferInfos[m_bufferInfoCount] = m_fallbacks->buffer; + w.pBufferInfo = &m_bufferInfos[m_bufferInfoCount]; + slot.bufferInfo = &m_bufferInfos[m_bufferInfoCount++]; + } + } +} + +void DescriptorWriter::setBuffer(uint32_t binding, const vk::DescriptorBufferInfo& info) +{ + Verify(binding < MAX_BINDINGS_PER_SET); + auto& slot = m_bindingSlots[binding]; + Verify(slot.bufferInfo); + if (info.buffer) { + *slot.bufferInfo = info; + } else { + *slot.bufferInfo = m_fallbacks->buffer; + } +} + +void DescriptorWriter::setImage(uint32_t binding, const vk::DescriptorImageInfo& info) +{ + Verify(binding < MAX_BINDINGS_PER_SET); + auto& slot = m_bindingSlots[binding]; + Verify(slot.imageInfo); + if (info.imageView) { + *slot.imageInfo = info; + } else { + *slot.imageInfo = m_fallbacks->getImage(slot.viewType); + } +} + +void DescriptorWriter::setImageArray(uint32_t binding, ArrayView infos) +{ + Verify(binding < MAX_BINDINGS_PER_SET); + auto& slot = m_bindingSlots[binding]; + Verify(slot.imageInfo); + Verify(infos.size <= slot.count); + memcpy(slot.imageInfo, infos.data, infos.size * sizeof(vk::DescriptorImageInfo)); +} + +// ========== Global descriptor manager ========== + +static VulkanDescriptorManager* g_descriptorManager = nullptr; + +VulkanDescriptorManager* getDescriptorManager() +{ + Assertion(g_descriptorManager != nullptr, "Vulkan DescriptorManager not initialized!"); + return g_descriptorManager; +} + +void setDescriptorManager(VulkanDescriptorManager* manager) +{ + g_descriptorManager = manager; +} + +bool VulkanDescriptorManager::init(vk::Device device) +{ + if (m_initialized) { + return true; + } + + m_device = device; + + createSetLayouts(); + createDescriptorPools(); + + m_initialized = true; + mprintf(("VulkanDescriptorManager: Initialized\n")); + return true; +} + +void VulkanDescriptorManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Wait for device idle before destroying + m_device.waitIdle(); + + // Destroy pools (automatically frees allocated sets) + for (auto& poolChain : m_framePools) { + poolChain.clear(); + } + + // Destroy layouts + for (auto& layout : m_setLayouts) { + layout.reset(); + } + + m_initialized = false; + mprintf(("VulkanDescriptorManager: Shutdown complete\n")); +} + +void VulkanDescriptorManager::buildFallbacks(VulkanBufferManager* bufMgr, VulkanTextureManager* texMgr) +{ + m_fallbacks.buffer = bufMgr->getFallbackUniformBufferInfo(); + m_fallbacks.texture2D = texMgr->getFallbackTextureInfo2D(); + m_fallbacks.texture2DArray = texMgr->getFallbackTextureInfo2DArray(); + m_fallbacks.textureCube = texMgr->getFallbackTextureInfoCube(); + m_fallbacks.texture3D = texMgr->getFallbackTextureInfo3D(); + mprintf(("VulkanDescriptorManager: Fallbacks built\n")); +} + +const DescriptorSetTemplate& VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex setIndex) +{ + switch (setIndex) { + case DescriptorSetIndex::Global: return s_globalTemplate; + case DescriptorSetIndex::Material: return s_materialTemplate; + case DescriptorSetIndex::PerDraw: return s_perDrawTemplate; + default: + Assertion(false, "Invalid DescriptorSetIndex!"); + return s_globalTemplate; + } +} + +vk::DescriptorSetLayout VulkanDescriptorManager::getSetLayout(DescriptorSetIndex setIndex) const +{ + return m_setLayouts[static_cast(setIndex)].get(); +} + +vk::DescriptorSet VulkanDescriptorManager::allocateFrameSet(DescriptorSetIndex setIndex) +{ + if (!m_initialized) { + return {}; + } + + vk::DescriptorSetLayout layout = m_setLayouts[static_cast(setIndex)].get(); + auto& pools = m_framePools[m_currentFrame]; + + // Try allocating from the last pool in the list + if (!pools.empty()) { + vk::DescriptorSetAllocateInfo allocInfo; + allocInfo.descriptorPool = pools.back().get(); + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &layout; + + try { + auto sets = m_device.allocateDescriptorSets(allocInfo); + return sets[0]; + } catch (const vk::OutOfPoolMemoryError&) { + // Pool exhausted, fall through to create a new one + } catch (const vk::FragmentedPoolError&) { + // Pool fragmented, fall through to create a new one + } + } + + // Create a new pool and retry + pools.push_back(createFramePool()); + mprintf(("VulkanDescriptorManager: Grew frame %u pool count to %zu\n", + m_currentFrame, pools.size())); + + vk::DescriptorSetAllocateInfo allocInfo; + allocInfo.descriptorPool = pools.back().get(); + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &layout; + + try { + auto sets = m_device.allocateDescriptorSets(allocInfo); + return sets[0]; + } catch (const vk::SystemError& e) { + mprintf(("VulkanDescriptorManager: Failed to allocate frame descriptor set after pool growth: %s\n", e.what())); + return {}; + } +} + +void VulkanDescriptorManager::beginFrame() +{ + if (!m_initialized) { + return; + } + + auto& pools = m_framePools[m_currentFrame]; + + // Reset all pools for the current frame + for (auto& pool : pools) { + m_device.resetDescriptorPool(pool.get()); + } + + // If we grew beyond the initial pool, shrink back to 1 to reclaim memory + // (the single pool will grow again next frame if needed) + if (pools.size() > 1) { + vk::UniqueDescriptorPool first = std::move(pools[0]); + pools.clear(); + pools.push_back(std::move(first)); + } +} + +void VulkanDescriptorManager::endFrame() +{ + // Advance to next frame + m_currentFrame = (m_currentFrame + 1) % MAX_FRAMES_IN_FLIGHT; +} + +ArrayView +VulkanDescriptorManager::getUniformBindings(DescriptorSetIndex setIndex) +{ + switch (setIndex) { + case DescriptorSetIndex::Global: return {s_globalUBOs, std::size(s_globalUBOs)}; + case DescriptorSetIndex::Material: return {s_materialUBOs, std::size(s_materialUBOs)}; + case DescriptorSetIndex::PerDraw: return {s_perDrawUBOs, std::size(s_perDrawUBOs)}; + default: return {nullptr, 0}; + } +} + +void VulkanDescriptorManager::createSetLayouts() +{ + m_setLayouts[static_cast(DescriptorSetIndex::Global)] = createSetLayout(s_globalTemplate); + m_setLayouts[static_cast(DescriptorSetIndex::Material)] = createSetLayout(s_materialTemplate); + m_setLayouts[static_cast(DescriptorSetIndex::PerDraw)] = createSetLayout(s_perDrawTemplate); + + mprintf(("VulkanDescriptorManager: Created %zu descriptor set layouts\n", + static_cast(DescriptorSetIndex::Count))); +} + +vk::UniqueDescriptorPool VulkanDescriptorManager::createFramePool() +{ + // Pool sizes per chunk - supports ~330 draw calls (3 sets each) + // If more are needed, additional pools are created automatically + constexpr uint32_t MAX_SETS_PER_POOL = 1024; + constexpr uint32_t MAX_UNIFORM_BUFFERS = MAX_SETS_PER_POOL * 9; // up to 9 UBOs per draw + constexpr uint32_t MAX_SAMPLERS = MAX_SETS_PER_POOL * 16; // up to 16 samplers per material set + + SCP_vector poolSizes = { + { vk::DescriptorType::eUniformBuffer, MAX_UNIFORM_BUFFERS }, + { vk::DescriptorType::eCombinedImageSampler, MAX_SAMPLERS }, + { vk::DescriptorType::eStorageBuffer, MAX_SETS_PER_POOL }, + }; + + vk::DescriptorPoolCreateInfo poolInfo; + poolInfo.maxSets = MAX_SETS_PER_POOL; + poolInfo.poolSizeCount = static_cast(poolSizes.size()); + poolInfo.pPoolSizes = poolSizes.data(); + + return m_device.createDescriptorPoolUnique(poolInfo); +} + +void VulkanDescriptorManager::createDescriptorPools() +{ + // Create one initial pool per frame (more will be added on demand) + for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { + m_framePools[i].push_back(createFramePool()); + } + + mprintf(("VulkanDescriptorManager: Created %u frame pool chains\n", + MAX_FRAMES_IN_FLIGHT)); +} + +vk::UniqueDescriptorSetLayout VulkanDescriptorManager::createSetLayout( + const DescriptorSetTemplate& tmpl) +{ + SCP_vector vkBindings; + vkBindings.reserve(tmpl.size); + + for (const auto& b : tmpl) { + vk::DescriptorSetLayoutBinding binding; + binding.binding = b.binding; + binding.descriptorType = b.type; + binding.descriptorCount = b.count; + binding.stageFlags = b.stages; + binding.pImmutableSamplers = nullptr; + vkBindings.push_back(binding); + } + + vk::DescriptorSetLayoutCreateInfo layoutInfo; + layoutInfo.bindingCount = static_cast(vkBindings.size()); + layoutInfo.pBindings = vkBindings.data(); + + return m_device.createDescriptorSetLayoutUnique(layoutInfo); +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanDescriptorManager.h b/code/graphics/vulkan/VulkanDescriptorManager.h new file mode 100644 index 00000000000..a3f8352f466 --- /dev/null +++ b/code/graphics/vulkan/VulkanDescriptorManager.h @@ -0,0 +1,298 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "VulkanConstants.h" + +#include +#include + + +namespace graphics::vulkan { + +class VulkanBufferManager; +class VulkanTextureManager; + +// ========== Descriptor Set Templates ========== + +struct DescriptorBindingTemplate { + uint32_t binding; + vk::DescriptorType type; + uint32_t count; // 1 for most, 16 for texture array + vk::ShaderStageFlags stages; + vk::ImageViewType viewType; // only meaningful for eCombinedImageSampler + + constexpr DescriptorBindingTemplate(uint32_t binding_, vk::DescriptorType type_, + uint32_t count_, vk::ShaderStageFlags stages_, + vk::ImageViewType viewType_ = vk::ImageViewType::e2D) + : binding(binding_), type(type_), count(count_), stages(stages_), viewType(viewType_) {} +}; + +struct DescriptorSetTemplate : ArrayView { + using ArrayView::ArrayView; +}; + +struct DescriptorFallbacks { + vk::DescriptorBufferInfo buffer; + vk::DescriptorImageInfo texture2D; + vk::DescriptorImageInfo texture2DArray; + vk::DescriptorImageInfo textureCube; + vk::DescriptorImageInfo texture3D; + + const vk::DescriptorImageInfo& getImage(vk::ImageViewType t) const; +}; + +/** + * @brief Stack-allocated batch writer for descriptor set updates. + * + * Usage: reset() + writeSet() (pre-fills all bindings with fallbacks) + * + setBuffer/setImage overrides for real data + flush(). + */ +class DescriptorWriter { +public: + static constexpr uint32_t MAX_WRITES = 32; + static constexpr uint32_t MAX_BUFFER_INFOS = 20; + static constexpr uint32_t MAX_IMAGE_INFOS = 24; + static constexpr uint32_t MAX_BINDINGS_PER_SET = 16; + + void reset(vk::Device device, const DescriptorFallbacks& fallbacks) { + m_device = device; + m_fallbacks = &fallbacks; + m_writeCount = 0; + m_bufferInfoCount = 0; + m_imageInfoCount = 0; + } + + void writeSet(vk::DescriptorSet set, const DescriptorSetTemplate& tmpl); + + void setBuffer(uint32_t binding, const vk::DescriptorBufferInfo& info); + void setImage(uint32_t binding, const vk::DescriptorImageInfo& info); + void setImageArray(uint32_t binding, ArrayView infos); + + void flush() { + if (m_writeCount > 0) { + m_device.updateDescriptorSets(m_writeCount, m_writes.data(), 0, nullptr); + } + m_writeCount = 0; + m_bufferInfoCount = 0; + m_imageInfoCount = 0; + } + +private: + // Per-binding lookup for the current writeSet, indexed by binding number. + // Populated by writeSet, used by setBuffer/setImage/setImageArray for O(1) access. + struct BindingSlot { + vk::DescriptorBufferInfo* bufferInfo = nullptr; // non-null for buffer bindings + vk::DescriptorImageInfo* imageInfo = nullptr; // non-null for image bindings + uint32_t count = 0; // descriptor count (1 or 16 for arrays) + vk::ImageViewType viewType = vk::ImageViewType::e2D; // for fallback lookup + }; + + vk::Device m_device; + const DescriptorFallbacks* m_fallbacks = nullptr; + + std::array m_writes; + std::array m_bufferInfos; + std::array m_imageInfos; + std::array m_bindingSlots; + uint32_t m_writeCount = 0; + uint32_t m_bufferInfoCount = 0; + uint32_t m_imageInfoCount = 0; +}; + +/** + * @brief Descriptor set indices for the 3-tier layout + * + * Set 0: Global - per-frame data (lights, deferred globals, shadow maps) + * Set 1: Material - per-material data (model data, textures) + * Set 2: Per-Draw - per-draw-call data (generic data, matrices, etc.) + */ +enum class DescriptorSetIndex : uint32_t { + Global = 0, + Material = 1, + PerDraw = 2, + + Count = 3 +}; + +// ========== Descriptor Binding Constants ========== + +// Global Set (Set 0) bindings — per-frame data +namespace GlobalBinding { + static constexpr uint32_t Lights = 0; // UBO: light data + static constexpr uint32_t DeferredData = 1; // UBO: deferred globals + static constexpr uint32_t ShadowMap = 2; // sampler2D: shadow map + static constexpr uint32_t EnvMap = 3; // samplerCube: environment map + static constexpr uint32_t IrradianceMap = 4; // samplerCube: irradiance map +} + +// Material Set (Set 1) bindings — per-material data +namespace MaterialBinding { + static constexpr uint32_t ModelData = 0; // UBO: model/material data + static constexpr uint32_t TextureArray = 1; // sampler2D[16]: material textures + static constexpr uint32_t DecalGlobals = 2; // UBO: decal globals + static constexpr uint32_t TransformSSBO = 3; // SSBO: batched transforms + static constexpr uint32_t DepthMap = 4; // sampler2D: depth (soft particles) + static constexpr uint32_t SceneColor = 5; // sampler2D: scene color (distortion) + static constexpr uint32_t DistortionMap = 6; // sampler2D: distortion texture +} + +// Texture array slot indices (elements within MaterialBinding::TextureArray) +namespace TextureSlot { + static constexpr uint32_t BaseMap = 0; + static constexpr uint32_t GlowMap = 1; + static constexpr uint32_t SpecMap = 2; + static constexpr uint32_t NormalMap = 3; + static constexpr uint32_t HeightMap = 4; + static constexpr uint32_t AmbientMap = 5; + static constexpr uint32_t MiscMap = 6; +} + +// PerDraw Set (Set 2) bindings — per-draw-call data +namespace PerDrawBinding { + static constexpr uint32_t GenericData = 0; // UBO: generic shader data + static constexpr uint32_t Matrices = 1; // UBO: transform matrices + static constexpr uint32_t NanoVGData = 2; // UBO: NanoVG data + static constexpr uint32_t DecalInfo = 3; // UBO: per-decal info + static constexpr uint32_t MovieData = 4; // UBO: movie playback data +} + + +/** + * @brief Manages Vulkan descriptor sets, pools, and layouts + * + * Provides descriptor set allocation and update functionality. + * Uses per-frame pools for transient descriptors. + */ +class VulkanDescriptorManager { +public: + static constexpr uint32_t MAX_TEXTURE_BINDINGS = 16; // Texture array size + + VulkanDescriptorManager() = default; + ~VulkanDescriptorManager() = default; + + // Non-copyable + VulkanDescriptorManager(const VulkanDescriptorManager&) = delete; + VulkanDescriptorManager& operator=(const VulkanDescriptorManager&) = delete; + + /** + * @brief Initialize descriptor manager + * @param device Vulkan logical device + * @return true on success + */ + bool init(vk::Device device); + + /** + * @brief Shutdown and release resources + */ + void shutdown(); + + /** + * @brief Build fallback descriptor values from buffer/texture managers. + * Must be called after buffer and texture managers are initialized. + */ + void buildFallbacks(VulkanBufferManager* bufMgr, VulkanTextureManager* texMgr); + + /** + * @brief Get the fallback descriptor values + */ + const DescriptorFallbacks& getFallbacks() const { return m_fallbacks; } + + /** + * @brief Get the set template for a given set index + */ + static const DescriptorSetTemplate& getSetTemplate(DescriptorSetIndex setIndex); + + /** + * @brief Get descriptor set layout for a given set index + */ + vk::DescriptorSetLayout getSetLayout(DescriptorSetIndex setIndex) const; + + /** + * @brief Get all descriptor set layouts (for pipeline layout creation) + * @return Reference to the UniqueDescriptorSetLayout array (Global, Material, PerDraw) + */ + const auto& getAllSetLayouts() const { return m_setLayouts; } + + /** + * @brief Allocate a descriptor set from the per-frame pool + * @param setIndex Which set type to allocate + * @return Allocated descriptor set, or null handle on failure + */ + vk::DescriptorSet allocateFrameSet(DescriptorSetIndex setIndex); + + /** + * @brief Begin a new frame - reset current frame's pool + */ + void beginFrame(); + + /** + * @brief End current frame - advance to next pool + */ + void endFrame(); + + /** + * @brief Get current frame index + */ + uint32_t getCurrentFrame() const { return m_currentFrame; } + + /** + * @brief Get the Vulkan device (for DescriptorWriter) + */ + vk::Device getDevice() const { return m_device; } + + /** + * @brief Entry mapping a UBO binding to its uniform_block_type + */ + struct UniformBindingEntry { + uint32_t binding; + uniform_block_type blockType; + }; + + /** + * @brief Get the UBO bindings for a given descriptor set + */ + static ArrayView getUniformBindings(DescriptorSetIndex setIndex); + +private: + /** + * @brief Create all descriptor set layouts + */ + void createSetLayouts(); + + /** + * @brief Create descriptor pools + */ + void createDescriptorPools(); + + /** + * @brief Create a single descriptor set layout + */ + vk::UniqueDescriptorSetLayout createSetLayout(const DescriptorSetTemplate& tmpl); + + /** + * @brief Create a new descriptor pool with standard sizes + */ + vk::UniqueDescriptorPool createFramePool(); + + vk::Device m_device; + + // Descriptor set layouts (one per set type) + std::array(DescriptorSetIndex::Count)> m_setLayouts; + + // Per-frame descriptor pools (growable - new pools added on demand) + std::array, MAX_FRAMES_IN_FLIGHT> m_framePools; + + // Pre-built fallback descriptor values + DescriptorFallbacks m_fallbacks{}; + + uint32_t m_currentFrame = 0; + bool m_initialized = false; +}; + +// Global descriptor manager access +VulkanDescriptorManager* getDescriptorManager(); +void setDescriptorManager(VulkanDescriptorManager* manager); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanDraw.cpp b/code/graphics/vulkan/VulkanDraw.cpp new file mode 100644 index 00000000000..a81cae8b92e --- /dev/null +++ b/code/graphics/vulkan/VulkanDraw.cpp @@ -0,0 +1,2076 @@ +#include "VulkanDraw.h" + +#include +#include + +#include "VulkanState.h" +#include "VulkanBuffer.h" +#include "VulkanPipeline.h" +#include "VulkanShader.h" +#include "VulkanTexture.h" +#include "VulkanRenderer.h" +#include "VulkanPostProcessing.h" +#include "VulkanDescriptorManager.h" +#include "VulkanDeletionQueue.h" +#include "VulkanMemory.h" +#include "VulkanConstants.h" +#include "gr_vulkan.h" +#include "VulkanVertexFormat.h" +#include "bmpman/bmpman.h" +#include "ddsutils/ddsutils.h" +#include "graphics/grinternal.h" +#include "graphics/material.h" +#include "graphics/matrix.h" +#include "graphics/util/primitives.h" +#include "graphics/util/uniform_structs.h" +#include "graphics/shadows.h" +#include "lighting/lighting.h" +#include "graphics/util/UniformBuffer.h" + +#define MODEL_SDR_FLAG_MODE_CPP +#include "def_files/data/effects/model_shader_flags.h" + +namespace graphics::vulkan { + + +// Convert FSO texture addressing mode to Vulkan sampler address mode +static vk::SamplerAddressMode convertTextureAddressing(int mode) +{ + switch (mode) { + case TMAP_ADDRESS_MIRROR: + return vk::SamplerAddressMode::eMirroredRepeat; + case TMAP_ADDRESS_CLAMP: + return vk::SamplerAddressMode::eClampToEdge; + case TMAP_ADDRESS_WRAP: + default: + return vk::SamplerAddressMode::eRepeat; + } +} + +// Global draw manager pointer +static VulkanDrawManager* g_drawManager = nullptr; + +// ========== Transform buffer for batched submodel rendering ========== +// Per-frame sub-allocating buffer. Multiple draw lists may upload transforms +// in a single frame (e.g. space view + HUD targeting). Because Vulkan defers +// command submission until flip(), each upload must be preserved — we append +// rather than overwrite, and bind the SSBO with the per-upload byte offset. + +// SSBO descriptor offsets must be aligned to minStorageBufferOffsetAlignment. +// The Vulkan spec guarantees this value is <= 256, so 256 is always safe. +static constexpr size_t SSBO_OFFSET_ALIGNMENT = 256; + +struct TransformBufferState { + vk::Buffer buffer; + VulkanAllocation allocation; + size_t capacity = 0; // allocated bytes + size_t writeOffset = 0; // append cursor (resets each frame) + size_t lastUploadOffset = 0; // byte offset of most recent upload + size_t lastUploadSize = 0; // byte size of most recent upload +}; +static std::array g_transformBuffers; +static uint32_t g_lastTransformWriteFrame = UINT32_MAX; + +void vulkan_update_transform_buffer(void* data, size_t size) +{ + if (!data || size == 0) { + return; + } + + auto* descManager = getDescriptorManager(); + uint32_t frameIdx = descManager->getCurrentFrame(); + auto& tb = g_transformBuffers[frameIdx]; + + // Reset write cursor on first call of each frame + if (g_lastTransformWriteFrame != frameIdx) { + tb.writeOffset = 0; + g_lastTransformWriteFrame = frameIdx; + } + + // Align the write offset for SSBO descriptor binding + size_t alignedOffset = (tb.writeOffset + SSBO_OFFSET_ALIGNMENT - 1) & ~(SSBO_OFFSET_ALIGNMENT - 1); + size_t needed = alignedOffset + size; + + auto* memManager = getMemoryManager(); + + // Resize if needed, preserving data already written this frame + if (needed > tb.capacity) { + size_t newCapacity = std::max(needed * 2, static_cast(4096)); + + auto* bufferManager = getBufferManager(); + vk::Device device = bufferManager->getDevice(); + + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = static_cast(newCapacity); + bufferInfo.usage = vk::BufferUsageFlagBits::eStorageBuffer; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer newBuffer; + VulkanAllocation newAllocation; + + try { + newBuffer = device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("vulkan_update_transform_buffer: Failed to create buffer: %s\n", e.what())); + return; + } + + Verify(memManager->allocateBufferMemory(newBuffer, MemoryUsage::CpuToGpu, newAllocation)); + + // Copy data already written this frame from old buffer + if (tb.buffer && tb.writeOffset > 0) { + void* oldMapped = memManager->mapMemory(tb.allocation); + void* newMapped = memManager->mapMemory(newAllocation); + Verify(oldMapped); + Verify(newMapped); + memcpy(newMapped, oldMapped, tb.writeOffset); + memManager->unmapMemory(tb.allocation); + memManager->unmapMemory(newAllocation); + } + + // Defer destruction of old buffer + if (tb.buffer) { + auto* deletionQueue = getDeletionQueue(); + deletionQueue->queueBuffer(tb.buffer, tb.allocation); + } + + tb.buffer = newBuffer; + tb.allocation = newAllocation; + tb.capacity = newCapacity; + } + + // Upload new data at the aligned offset + void* mapped = memManager->mapMemory(tb.allocation); + Verify(mapped); + memcpy(static_cast(mapped) + alignedOffset, data, size); + memManager->flushMemory(tb.allocation, alignedOffset, size); + memManager->unmapMemory(tb.allocation); + + tb.lastUploadOffset = alignedOffset; + tb.lastUploadSize = size; + tb.writeOffset = alignedOffset + size; +} + +VulkanDrawManager* getDrawManager() +{ + Assertion(g_drawManager != nullptr, "Vulkan DrawManager not initialized!"); + return g_drawManager; +} + +void setDrawManager(VulkanDrawManager* manager) +{ + g_drawManager = manager; +} + +bool VulkanDrawManager::init(vk::Device device) +{ + if (m_initialized) { + return true; + } + + m_device = device; + + initSphereBuffers(); + + m_initialized = true; + mprintf(("VulkanDrawManager: Initialized\n")); + return true; +} + +void VulkanDrawManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Destroy transform SSBO buffers (static globals, not tracked by deletion queue) + auto* bufferManager = getBufferManager(); + auto* memManager = getMemoryManager(); + if (bufferManager && memManager) { + vk::Device device = bufferManager->getDevice(); + for (auto& tb : g_transformBuffers) { + if (tb.buffer) { + device.destroyBuffer(tb.buffer); + memManager->freeAllocation(tb.allocation); + tb.buffer = nullptr; + tb.capacity = 0; + tb.writeOffset = 0; + } + } + } + + shutdownSphereBuffers(); + + m_initialized = false; + mprintf(("VulkanDrawManager: Shutdown complete\n")); +} + +void VulkanDrawManager::clear() +{ + (void)this; + auto* stateTracker = getStateTracker(); + + // Use the current clip/scissor region for clearing, matching OpenGL behavior. + // In OpenGL, glClear() respects the scissor test - if a clip region is set, + // only that region is cleared. Without this, HUD code that does + // gr_set_clip(panel) + gr_clear() would wipe the entire screen in Vulkan. + vk::ClearAttachment clearAttachment; + clearAttachment.aspectMask = vk::ImageAspectFlagBits::eColor; + clearAttachment.colorAttachment = 0; + clearAttachment.clearValue.color = stateTracker->getClearColor(); + + vk::ClearRect clearRect; + if (stateTracker->isScissorEnabled()) { + // Respect the current clip region (matches OpenGL scissor behavior) + clearRect.rect.offset = vk::Offset2D(gr_screen.offset_x + gr_screen.clip_left, + gr_screen.offset_y + gr_screen.clip_top); + clearRect.rect.extent = vk::Extent2D(static_cast(gr_screen.clip_width), + static_cast(gr_screen.clip_height)); + } else { + clearRect.rect.offset = vk::Offset2D(0, 0); + clearRect.rect.extent = vk::Extent2D(static_cast(gr_screen.max_w), + static_cast(gr_screen.max_h)); + } + clearRect.baseArrayLayer = 0; + clearRect.layerCount = 1; + + auto cmdBuffer = stateTracker->getCommandBuffer(); + cmdBuffer.clearAttachments(1, &clearAttachment, 1, &clearRect); +} + +void VulkanDrawManager::setClearColor(int r, int g, int b) +{ + (void)this; + auto* stateTracker = getStateTracker(); + + float fr = static_cast(r) / 255.0f; + float fg = static_cast(g) / 255.0f; + float fb = static_cast(b) / 255.0f; + + // Apply HDR gamma if needed + if (High_dynamic_range) { + const float SRGB_GAMMA = 2.2f; + fr = powf(fr, SRGB_GAMMA); + fg = powf(fg, SRGB_GAMMA); + fb = powf(fb, SRGB_GAMMA); + } + + stateTracker->setClearColor(fr, fg, fb, 1.0f); + + // Also update gr_screen for compatibility + gr_screen.current_clear_color.red = static_cast(r); + gr_screen.current_clear_color.green = static_cast(g); + gr_screen.current_clear_color.blue = static_cast(b); + gr_screen.current_clear_color.alpha = 255; +} + +void VulkanDrawManager::setClip(int x, int y, int w, int h, int resize_mode) +{ + (void)this; + auto* stateTracker = getStateTracker(); + + // Clamp values + x = std::max(x, 0); + y = std::max(y, 0); + + int to_resize = (resize_mode != GR_RESIZE_NONE && resize_mode != GR_RESIZE_REPLACE && + (gr_screen.custom_size || (gr_screen.rendering_to_texture != -1))); + + int max_w = (to_resize) ? gr_screen.max_w_unscaled : gr_screen.max_w; + int max_h = (to_resize) ? gr_screen.max_h_unscaled : gr_screen.max_h; + + if ((gr_screen.rendering_to_texture != -1) && to_resize) { + gr_unsize_screen_pos(&max_w, &max_h); + } + + if (resize_mode != GR_RESIZE_REPLACE) { + if (x >= max_w) x = max_w - 1; + if (y >= max_h) y = max_h - 1; + if (x + w > max_w) w = max_w - x; + if (y + h > max_h) h = max_h - y; + w = std::min(w, max_w); + h = std::min(h, max_h); + } + + // Store unscaled values + gr_screen.offset_x_unscaled = x; + gr_screen.offset_y_unscaled = y; + gr_screen.clip_left_unscaled = 0; + gr_screen.clip_right_unscaled = w - 1; + gr_screen.clip_top_unscaled = 0; + gr_screen.clip_bottom_unscaled = h - 1; + gr_screen.clip_width_unscaled = w; + gr_screen.clip_height_unscaled = h; + + if (to_resize) { + gr_resize_screen_pos(&x, &y, &w, &h, resize_mode); + } else { + gr_unsize_screen_pos(&gr_screen.offset_x_unscaled, &gr_screen.offset_y_unscaled); + gr_unsize_screen_pos(&gr_screen.clip_right_unscaled, &gr_screen.clip_bottom_unscaled); + gr_unsize_screen_pos(&gr_screen.clip_width_unscaled, &gr_screen.clip_height_unscaled); + } + + // Update gr_screen clip state (scaled values) + gr_screen.offset_x = x; + gr_screen.offset_y = y; + gr_screen.clip_left = 0; + gr_screen.clip_top = 0; + gr_screen.clip_right = w - 1; + gr_screen.clip_bottom = h - 1; + gr_screen.clip_width = w; + gr_screen.clip_height = h; + + gr_screen.clip_aspect = i2fl(w) / i2fl(h); + gr_screen.clip_center_x = (gr_screen.clip_left + gr_screen.clip_right) * 0.5f; + gr_screen.clip_center_y = (gr_screen.clip_top + gr_screen.clip_bottom) * 0.5f; + + // Check if full screen (disable scissor) + if ((x == 0) && (y == 0) && (w == max_w) && (h == max_h)) { + stateTracker->setScissorEnabled(false); + return; + } + + // Enable scissor test + stateTracker->setScissorEnabled(true); + stateTracker->setScissor(x, y, static_cast(w), static_cast(h)); +} + +void VulkanDrawManager::resetClip() +{ + (void)this; + auto* stateTracker = getStateTracker(); + + int max_w = gr_screen.max_w; + int max_h = gr_screen.max_h; + + gr_screen.offset_x = gr_screen.offset_x_unscaled = 0; + gr_screen.offset_y = gr_screen.offset_y_unscaled = 0; + gr_screen.clip_left = gr_screen.clip_left_unscaled = 0; + gr_screen.clip_top = gr_screen.clip_top_unscaled = 0; + gr_screen.clip_right = gr_screen.clip_right_unscaled = max_w - 1; + gr_screen.clip_bottom = gr_screen.clip_bottom_unscaled = max_h - 1; + gr_screen.clip_width = gr_screen.clip_width_unscaled = max_w; + gr_screen.clip_height = gr_screen.clip_height_unscaled = max_h; + + if (gr_screen.custom_size) { + gr_unsize_screen_pos(&gr_screen.clip_right_unscaled, &gr_screen.clip_bottom_unscaled); + gr_unsize_screen_pos(&gr_screen.clip_width_unscaled, &gr_screen.clip_height_unscaled); + } + + gr_screen.clip_aspect = i2fl(max_w) / i2fl(max_h); + gr_screen.clip_center_x = (gr_screen.clip_left + gr_screen.clip_right) * 0.5f; + gr_screen.clip_center_y = (gr_screen.clip_top + gr_screen.clip_bottom) * 0.5f; + + stateTracker->setScissorEnabled(false); +} + +int VulkanDrawManager::zbufferGet() const +{ + if (!gr_global_zbuffering) { + return GR_ZBUFF_NONE; + } + return m_zbufferMode; +} + +int VulkanDrawManager::zbufferSet(int mode) +{ + auto* stateTracker = getStateTracker(); + + int prev = m_zbufferMode; + m_zbufferMode = mode; + + // Update FSO global state + if (mode == GR_ZBUFF_NONE) { + gr_zbuffering = 0; + } else { + gr_zbuffering = 1; + } + gr_zbuffering_mode = mode; + + gr_zbuffer_type zbufType; + switch (mode) { + case GR_ZBUFF_NONE: + zbufType = ZBUFFER_TYPE_NONE; + break; + case GR_ZBUFF_READ: + zbufType = ZBUFFER_TYPE_READ; + break; + case GR_ZBUFF_WRITE: + zbufType = ZBUFFER_TYPE_WRITE; + break; + case GR_ZBUFF_FULL: + default: + zbufType = ZBUFFER_TYPE_FULL; + break; + } + stateTracker->setZBufferMode(zbufType); + + return prev; +} + +void VulkanDrawManager::zbufferClear(int mode) +{ + auto* stateTracker = getStateTracker(); + + if (mode) { + // Enable zbuffering and clear + gr_zbuffering = 1; + gr_zbuffering_mode = GR_ZBUFF_FULL; + gr_global_zbuffering = 1; + m_zbufferMode = GR_ZBUFF_FULL; + stateTracker->setZBufferMode(ZBUFFER_TYPE_FULL); + + // Clear depth buffer + vk::ClearAttachment clearAttachment; + clearAttachment.aspectMask = vk::ImageAspectFlagBits::eDepth; + clearAttachment.clearValue.depthStencil.depth = 1.0f; + clearAttachment.clearValue.depthStencil.stencil = 0; + + vk::ClearRect clearRect; + clearRect.rect.offset = vk::Offset2D(0, 0); + clearRect.rect.extent = vk::Extent2D(static_cast(gr_screen.max_w), + static_cast(gr_screen.max_h)); + clearRect.baseArrayLayer = 0; + clearRect.layerCount = 1; + + stateTracker->getCommandBuffer().clearAttachments(1, &clearAttachment, 1, &clearRect); + } else { + // Disable zbuffering + gr_zbuffering = 0; + gr_zbuffering_mode = GR_ZBUFF_NONE; + gr_global_zbuffering = 0; + m_zbufferMode = GR_ZBUFF_NONE; + stateTracker->setZBufferMode(ZBUFFER_TYPE_NONE); + } +} + +int VulkanDrawManager::stencilSet(int mode) +{ + auto* stateTracker = getStateTracker(); + + int prev = m_stencilMode; + m_stencilMode = mode; + gr_stencil_mode = mode; + + stateTracker->setStencilMode(mode); + + // Set stencil reference based on mode + if (mode == GR_STENCIL_READ || mode == GR_STENCIL_WRITE) { + stateTracker->setStencilReference(1); + } else { + stateTracker->setStencilReference(0); + } + + return prev; +} + +void VulkanDrawManager::stencilClear() +{ + (void)this; + auto* stateTracker = getStateTracker(); + + // Clear stencil buffer + vk::ClearAttachment clearAttachment; + clearAttachment.aspectMask = vk::ImageAspectFlagBits::eStencil; + clearAttachment.clearValue.depthStencil.depth = 1.0f; + clearAttachment.clearValue.depthStencil.stencil = 0; + + vk::ClearRect clearRect; + clearRect.rect.offset = vk::Offset2D(0, 0); + clearRect.rect.extent = vk::Extent2D(static_cast(gr_screen.max_w), + static_cast(gr_screen.max_h)); + clearRect.baseArrayLayer = 0; + clearRect.layerCount = 1; + + stateTracker->getCommandBuffer().clearAttachments(1, &clearAttachment, 1, &clearRect); +} + +int VulkanDrawManager::setCull(int cull) +{ + auto* stateTracker = getStateTracker(); + + int prev = m_cullEnabled ? 1 : 0; + m_cullEnabled = (cull != 0); + + stateTracker->setCullMode(m_cullEnabled); + + return prev; +} + +void VulkanDrawManager::renderPrimitivesCommon(material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, + gr_buffer_handle buffer_handle, size_t buffer_offset, + int* statCounter) +{ + if (!material_info || !layout || n_verts <= 0) { + return; + } + + if (statCounter != nullptr) { + (*statCounter)++; + } + + // Apply material state and bind pipeline + if (!applyMaterial(material_info, prim_type, layout)) { + return; + } + + // Bind vertex buffer and issue the draw call + bindVertexBuffer(buffer_handle, buffer_offset); + draw(prim_type, offset, n_verts); +} + +void VulkanDrawManager::renderPrimitives(material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, + gr_buffer_handle buffer_handle, size_t buffer_offset) +{ + renderPrimitivesCommon(material_info, prim_type, layout, offset, n_verts, + buffer_handle, buffer_offset, &m_frameStats.renderPrimitiveCalls); +} + +void VulkanDrawManager::renderPrimitivesBatched(batched_bitmap_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int offset, int n_verts, gr_buffer_handle buffer_handle) +{ + renderPrimitivesCommon(material_info, prim_type, layout, offset, n_verts, + buffer_handle, 0, &m_frameStats.renderBatchedCalls); +} + +void VulkanDrawManager::renderPrimitivesParticle(particle_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int offset, int n_verts, gr_buffer_handle buffer_handle) +{ + renderPrimitivesCommon(material_info, prim_type, layout, offset, n_verts, + buffer_handle, 0, &m_frameStats.renderParticleCalls); +} + +void VulkanDrawManager::renderPrimitivesDistortion(distortion_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int offset, int n_verts, gr_buffer_handle buffer_handle) +{ + // Distortion intentionally tracks no dedicated frame stat counter. + renderPrimitivesCommon(material_info, prim_type, layout, offset, n_verts, + buffer_handle, 0, nullptr); +} + +void VulkanDrawManager::renderMovie(movie_material* material_info, primitive_type prim_type, + vertex_layout* layout, int n_verts, gr_buffer_handle buffer_handle, + size_t buffer_offset) +{ + renderPrimitivesCommon(material_info, prim_type, layout, 0, n_verts, + buffer_handle, buffer_offset, &m_frameStats.renderMovieCalls); +} + +void VulkanDrawManager::renderNanoVG(nanovg_material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, + gr_buffer_handle buffer_handle) +{ + renderPrimitivesCommon(material_info, prim_type, layout, offset, n_verts, + buffer_handle, 0, &m_frameStats.renderNanoVGCalls); +} + +void VulkanDrawManager::renderRocketPrimitives(interface_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int n_indices, gr_buffer_handle vertex_buffer, + gr_buffer_handle index_buffer) +{ + if (!material_info || !layout || n_indices <= 0) { + return; + } + + m_frameStats.renderRocketCalls++; + + if (!applyMaterial(material_info, prim_type, layout)) { + return; + } + bindVertexBuffer(vertex_buffer, 0); + bindIndexBuffer(index_buffer); + drawIndexed(prim_type, n_indices, 0, 0); +} + +void VulkanDrawManager::renderModel(model_material* material_info, indexed_vertex_source* vert_source, + vertex_buffer* bufferp, size_t texi) +{ + if (!material_info || !vert_source || !bufferp) { + return; + } + + m_frameStats.renderModelCalls++; + + // Validate buffers + if (!vert_source->Vbuffer_handle.isValid() || !vert_source->Ibuffer_handle.isValid()) { + nprintf(("Vulkan", "VulkanDrawManager: renderModel called with invalid buffer handles\n")); + return; + } + + if (texi >= bufferp->tex_buf.size()) { + nprintf(("Vulkan", "VulkanDrawManager: renderModel texi out of range\n")); + return; + } + + auto* stateTracker = getStateTracker(); + + // Get buffer data for this texture/draw + buffer_data* datap = &bufferp->tex_buf[texi]; + + if (datap->n_verts == 0) { + return; // Nothing to draw + } + + // Apply model material state and bind pipeline + // Model rendering always uses triangles + if (!applyMaterial(material_info, PRIM_TYPE_TRIS, &bufferp->layout)) { + return; + } + + // Bind vertex buffer with the model's vertex offset + auto* bufferManager = getBufferManager(); + + vk::Buffer vbuffer = bufferManager->getVkBuffer(vert_source->Vbuffer_handle); + vk::Buffer ibuffer = bufferManager->getVkBuffer(vert_source->Ibuffer_handle); + + Assertion(vbuffer, "VulkanDrawManager::renderModel got null vertex buffer from valid handle!"); + Assertion(ibuffer, "VulkanDrawManager::renderModel got null index buffer from valid handle!"); + + // Bind vertex buffer at offset 0 (start of heap buffer), matching OpenGL behavior. + // The Base_vertex_offset in drawIndexed handles the heap allocation offset. + stateTracker->bindVertexBuffer(0, vbuffer, 0); + + // Determine index type based on VB_FLAG_LARGE_INDEX flag + vk::IndexType indexType = (datap->flags & VB_FLAG_LARGE_INDEX) ? + vk::IndexType::eUint32 : vk::IndexType::eUint16; + + // Bind index buffer at the model's heap allocation offset. + // The firstIndex (from datap->index_offset) handles per-mesh offset within the model. + stateTracker->bindIndexBuffer(ibuffer, static_cast(vert_source->Index_offset), indexType); + + // Base vertex offset: accounts for heap allocation position + per-mesh vertex offset. + // This matches OpenGL's glDrawElementsBaseVertex usage. + auto baseVertex = static_cast(vert_source->Base_vertex_offset + bufferp->vertex_num_offset); + + // Calculate first index + // The index_offset in buffer_data is in bytes, need to convert to index count + uint32_t firstIndex; + if (indexType == vk::IndexType::eUint32) { + firstIndex = static_cast(datap->index_offset / sizeof(uint32_t)); + } else { + firstIndex = static_cast(datap->index_offset / sizeof(uint16_t)); + } + + // Issue indexed draw call + m_frameStats.drawIndexedCalls++; + m_frameStats.totalIndices += static_cast(datap->n_verts); + + // Flush any dirty dynamic state before draw + stateTracker->applyDynamicState(); + + // Shadow map rendering uses MAX_SHADOW_CASCADES instances (one per cascade), routed via gl_InstanceIndex → gl_Layer + uint32_t instanceCount = Rendering_to_shadow_map ? MAX_SHADOW_CASCADES : 1; + + auto cmdBuffer = stateTracker->getCommandBuffer(); + cmdBuffer.drawIndexed( + static_cast(datap->n_verts), // index count + instanceCount, // instance count + firstIndex, // first index + baseVertex, // vertex offset + 0 // first instance + ); +} + +void VulkanDrawManager::setFillMode(int mode) +{ + m_fillMode = mode; +} + +int VulkanDrawManager::setColorBuffer(int mode) +{ + int prev = m_colorBufferEnabled ? 1 : 0; + m_colorBufferEnabled = (mode != 0); + return prev; +} + +void VulkanDrawManager::setTextureAddressing(int mode) +{ + m_textureAddressing = mode; +} + +void VulkanDrawManager::setDepthBiasEnabled(bool enabled) +{ + m_depthBiasEnabled = enabled; +} + +void VulkanDrawManager::setDepthTextureOverride(vk::DescriptorImageInfo info) +{ + m_depthTextureInfo = info; +} + +void VulkanDrawManager::clearDepthTextureOverride() +{ + m_depthTextureInfo = vk::DescriptorImageInfo(); +} + +void VulkanDrawManager::setSceneColorOverride(vk::DescriptorImageInfo info) +{ + m_sceneColorInfo = info; +} + +void VulkanDrawManager::setDistMapOverride(vk::DescriptorImageInfo info) +{ + m_distMapInfo = info; +} + +void VulkanDrawManager::clearDistortionOverrides() +{ + m_sceneColorInfo = vk::DescriptorImageInfo(); + m_distMapInfo = vk::DescriptorImageInfo(); +} + +void VulkanDrawManager::clearStates() +{ + auto* stateTracker = getStateTracker(); + + // Match OpenGL's gr_opengl_clear_states() behavior: + // gr_zbias(0), gr_zbuffer_set(ZBUFFER_TYPE_READ), gr_set_cull(0), + // gr_set_fill_mode(GR_FILL_MODE_SOLID) + m_zbufferMode = GR_ZBUFF_READ; + m_stencilMode = GR_STENCIL_NONE; + m_cullEnabled = false; + m_fillMode = GR_FILL_MODE_SOLID; + m_colorBufferEnabled = true; + m_textureAddressing = TMAP_ADDRESS_WRAP; + m_depthBiasEnabled = false; + + gr_zbuffering = 1; + gr_zbuffering_mode = GR_ZBUFF_READ; + gr_global_zbuffering = 1; + gr_stencil_mode = GR_STENCIL_NONE; + + stateTracker->setZBufferMode(ZBUFFER_TYPE_READ); + stateTracker->setStencilMode(GR_STENCIL_NONE); + stateTracker->setCullMode(false); + stateTracker->setScissorEnabled(false); + stateTracker->setDepthBias(0.0f, 0.0f); + stateTracker->setLineWidth(1.0f); + + // Clear pending uniform bindings + clearPendingUniformBindings(); + + // NOTE: Do NOT call resetClip() here. OpenGL's gr_opengl_clear_states() does + // not reset the clip region, and callers (e.g. model_render_immediate) rely on + // the clip/offset state surviving through clear_states for subsequent 2D draws. +} + +void VulkanDrawManager::setPendingUniformBinding(uniform_block_type blockType, gr_buffer_handle bufferHandle, + vk::DeviceSize offset, vk::DeviceSize size) +{ + auto index = static_cast(blockType); + if (index >= NUM_UNIFORM_BLOCK_TYPES) { + return; + } + + m_pendingUniformBindings[index].bufferHandle = bufferHandle; + m_pendingUniformBindings[index].offset = offset; + m_pendingUniformBindings[index].size = size; + m_pendingUniformBindings[index].valid = bufferHandle.isValid(); +} + +void VulkanDrawManager::clearPendingUniformBindings() +{ + for (auto& binding : m_pendingUniformBindings) { + binding.valid = false; + binding.bufferHandle = gr_buffer_handle(); + binding.offset = 0; + binding.size = 0; + } +} + +void VulkanDrawManager::resetFrameStats() +{ + m_frameStats = {}; +} + +void VulkanDrawManager::printFrameStats() +{ + // Print summary every frame for the first 200 frames, then every 60 frames + bool shouldPrint = (m_frameStatsFrameNum < 200) || (m_frameStatsFrameNum % 60 == 0); + + if (shouldPrint) { + mprintf(("FRAME %d STATS: draws=%d indexed=%d verts=%d idxs=%d | applyMat=%d/%d fails | noPipeline=%d sdrNeg1=%d\n", + m_frameStatsFrameNum, + m_frameStats.drawCalls, + m_frameStats.drawIndexedCalls, + m_frameStats.totalVertices, + m_frameStats.totalIndices, + m_frameStats.applyMaterialFailures, + m_frameStats.applyMaterialCalls, + m_frameStats.noPipelineSkips, + m_frameStats.shaderHandleNeg1)); + mprintf((" CALLS: prim=%d batch=%d model=%d particle=%d nanovg=%d rocket=%d movie=%d\n", + m_frameStats.renderPrimitiveCalls, + m_frameStats.renderBatchedCalls, + m_frameStats.renderModelCalls, + m_frameStats.renderParticleCalls, + m_frameStats.renderNanoVGCalls, + m_frameStats.renderRocketCalls, + m_frameStats.renderMovieCalls)); + } + + m_frameStatsFrameNum++; +} + + +PipelineConfig VulkanDrawManager::buildPipelineConfig(material* mat, primitive_type prim_type) const +{ + PipelineConfig config; + + // Get shader info from material + int shaderHandle = mat->get_shader_handle(); + auto* shaderManager = getShaderManager(); + if (shaderHandle >= 0) { + const auto* shaderModule = shaderManager->getShaderByHandle(shaderHandle); + if (shaderModule) { + config.shaderType = shaderModule->type; + config.shaderFlags = shaderModule->flags; + } + } + + // Primitive type + config.primitiveType = prim_type; + + // Depth mode + config.depthMode = mat->get_depth_mode(); + + // Blend mode + config.blendMode = mat->get_blend_mode(); + + // Cull mode + config.cullEnabled = mat->get_cull_mode(); + + // Fill mode + config.fillMode = mat->get_fill_mode(); + + // Front face winding: match OpenGL which defaults to CCW and only switches to CW + // for model rendering (opengl_tnl_set_model_material sets GL_CW). + config.frontFaceCW = (config.shaderType == SDR_TYPE_MODEL && !(config.shaderFlags & MODEL_SDR_FLAG_SHADOW_MAP)); + + // Depth write + config.depthWriteEnabled = (config.depthMode == ZBUFFER_TYPE_FULL || + config.depthMode == ZBUFFER_TYPE_WRITE); + + // Stencil state + config.stencilEnabled = mat->is_stencil_enabled(); + if (config.stencilEnabled) { + config.stencilFunc = mat->get_stencil_func().compare; + config.stencilMask = mat->get_stencil_func().mask; + config.frontStencilOp = mat->get_front_stencil_op(); + config.backStencilOp = mat->get_back_stencil_op(); + } + + // Color write mask + config.colorWriteMask = mat->get_color_mask(); + + // Override color write mask if color buffer writes are disabled + if (!m_colorBufferEnabled) { + config.colorWriteMask = {false, false, false, false}; + } + + // Fill mode and depth bias from draw manager state + config.depthBiasEnabled = m_depthBiasEnabled; + + // Get current render pass, attachment count, and sample count from state tracker + auto* stateTracker = getStateTracker(); + config.renderPass = stateTracker->getCurrentRenderPass(); + config.colorAttachmentCount = stateTracker->getColorAttachmentCount(); + config.sampleCount = stateTracker->getCurrentSampleCount(); + + return config; +} + +bool VulkanDrawManager::bindMaterialTextures(material* mat, DescriptorWriter* writer) const +{ + auto* texManager = getTextureManager(); + + // Get sampler matching current texture addressing mode and fallback texture + vk::SamplerAddressMode addressMode = convertTextureAddressing(m_textureAddressing); + vk::Sampler sampler = texManager->getSampler( + vk::Filter::eLinear, vk::Filter::eLinear, addressMode, true, 0.0f, true); + // OpenGL skips applying texture addressing for AABITMAP, INTERFACE, and CUBEMAP + // types - they always stay clamped. We need a clamp sampler for those cases. + vk::Sampler clampSampler = texManager->getSampler( + vk::Filter::eLinear, vk::Filter::eLinear, vk::SamplerAddressMode::eClampToEdge, true, 0.0f, true); + auto fallbackTexInfo = texManager->getFallbackTextureInfo2DArray(); + fallbackTexInfo.sampler = sampler; + + std::array textureInfos; + textureInfos.fill(fallbackTexInfo); + + // Check for movie material - needs special YUV texture handling + auto* movieMat = dynamic_cast(mat); + if (movieMat) { + auto loadYuvTexture = [&](int handle, uint32_t slot) { + if (handle < 0 || slot >= textureInfos.size()) return; + auto* texSlot = texManager->getTextureSlot(handle); + if (!texSlot || !texSlot->imageView) { + // Load on demand - YUV planes are 8bpp grayscale + bitmap* bmp = bm_lock(handle, 8, BMP_TEX_OTHER); + if (bmp) { + texManager->bm_data(handle, bmp, bm_is_compressed(handle)); + bm_unlock(handle); + texSlot = texManager->getTextureSlot(handle); + } + } + if (texSlot && texSlot->imageView) { + textureInfos[slot].imageView = texSlot->imageView; + } + }; + + loadYuvTexture(movieMat->getYtex(), 0); // Y at index 0 + loadYuvTexture(movieMat->getUtex(), 1); // U at index 1 + loadYuvTexture(movieMat->getVtex(), 2); // V at index 2 + + writer->setImageArray(MaterialBinding::TextureArray, textureInfos); + return true; + } + + // Helper to set texture at a specific slot - loads on-demand if not present + static int texLogCount = 0; + + // Get material's expected texture type for the base map + int materialTextureType = mat->get_texture_type(); + + auto setTexture = [&](int textureHandle, uint32_t slot, bool isBaseMap = false) { + if (textureHandle < 0 || slot >= textureInfos.size()) { + return; + } + + // Determine bitmap type - match OpenGL's gr_opengl_tcache_set logic: + // Override material texture type with bitmap's own type if not NORMAL + int bitmapType = isBaseMap ? materialTextureType : TCACHE_TYPE_NORMAL; + int overrideType = bm_get_tcache_type(textureHandle); + if (overrideType != TCACHE_TYPE_NORMAL) { + bitmapType = overrideType; + } + + // OpenGL skips applying texture addressing for AABITMAP, INTERFACE, and + // CUBEMAP types - they always stay clamped (gropengltexture.cpp:1140-1141). + // Match that behavior by using a clamp sampler for these types. + if (bitmapType == TCACHE_TYPE_AABITMAP || bitmapType == TCACHE_TYPE_INTERFACE + || bitmapType == TCACHE_TYPE_CUBEMAP) { + textureInfos[slot].sampler = clampSampler; + } + + auto* texSlot = texManager->getTextureSlot(textureHandle); + + // If texture isn't loaded, try to load it on-demand (like OpenGL does) + if (!texSlot || !texSlot->imageView) { + // Determine bpp and flags - matches OpenGL's opengl_determine_bpp_and_flags + ushort lockFlags = 0; + int bpp = 16; + + switch (bitmapType) { + case TCACHE_TYPE_AABITMAP: + lockFlags = BMP_AABITMAP; + bpp = 8; + break; + case TCACHE_TYPE_INTERFACE: + case TCACHE_TYPE_XPARENT: + lockFlags = BMP_TEX_XPARENT; + if (bm_get_type(textureHandle) == BM_TYPE_PCX) { + bpp = 16; + } else { + bpp = 32; + } + break; + case TCACHE_TYPE_COMPRESSED: + switch (bm_is_compressed(textureHandle)) { + case DDS_DXT1: + bpp = 24; + lockFlags = BMP_TEX_DXT1; + break; + case DDS_DXT3: + bpp = 32; + lockFlags = BMP_TEX_DXT3; + break; + case DDS_DXT5: + bpp = 32; + lockFlags = BMP_TEX_DXT5; + break; + default: + bpp = 32; + lockFlags = BMP_TEX_OTHER; + break; + } + break; + case TCACHE_TYPE_NORMAL: + default: + lockFlags = BMP_TEX_OTHER; + if (bm_get_type(textureHandle) == BM_TYPE_PCX) { + bpp = 16; // PCX locking only works with bpp=16 + } else { + if (bm_has_alpha_channel(textureHandle)) { + bpp = 32; + } else { + bpp = 24; + } + } + break; + } + + // Lock bitmap with appropriate flags + bitmap* bmp = bm_lock(textureHandle, bpp, lockFlags); + if (bmp) { + // Upload texture + texManager->bm_data(textureHandle, bmp, bm_is_compressed(textureHandle)); + bm_unlock(textureHandle); + + // Re-get the slot after upload + texSlot = texManager->getTextureSlot(textureHandle); + + if (texLogCount < 20) { + mprintf(("bindMaterialTextures: loaded tex %d (type=%d bpp=%d lockFlags=0x%x bmType=%d), slot=%p\n", + textureHandle, bitmapType, bpp, lockFlags, static_cast(bm_get_type(textureHandle)), texSlot)); + texLogCount++; + } + } + } + + if (texSlot && texSlot->imageView) { + textureInfos[slot].imageView = texSlot->imageView; + } else { + if (texLogCount < 20) { + mprintf(("bindMaterialTextures: slot %u handle %d FAILED to load\n", + slot, textureHandle)); + texLogCount++; + } + } + }; + + // Bind material textures to their slots + // Base map uses material's texture type (may be AABITMAP for fonts) + setTexture(mat->get_texture_map(TM_BASE_TYPE), TextureSlot::BaseMap, true); + setTexture(mat->get_texture_map(TM_GLOW_TYPE), TextureSlot::GlowMap); + + // Specular - prefer spec_gloss if available + int specMap = mat->get_texture_map(TM_SPEC_GLOSS_TYPE); + if (specMap < 0) { + specMap = mat->get_texture_map(TM_SPECULAR_TYPE); + } + setTexture(specMap, TextureSlot::SpecMap); + + setTexture(mat->get_texture_map(TM_NORMAL_TYPE), TextureSlot::NormalMap); + setTexture(mat->get_texture_map(TM_HEIGHT_TYPE), TextureSlot::HeightMap); + setTexture(mat->get_texture_map(TM_AMBIENT_TYPE), TextureSlot::AmbientMap); + setTexture(mat->get_texture_map(TM_MISC_TYPE), TextureSlot::MiscMap); + + // Update the texture array in the descriptor set + // All slots now have valid views (either actual texture or fallback) + writer->setImageArray(MaterialBinding::TextureArray, textureInfos); + + return true; +} + +bool VulkanDrawManager::applyMaterial(material* mat, primitive_type prim_type, vertex_layout* layout) +{ + auto* stateTracker = getStateTracker(); + auto* pipelineManager = getPipelineManager(); + auto* descManager = getDescriptorManager(); + auto* bufferManager = getBufferManager(); + + if (!mat || !layout) { + return false; + } + + m_frameStats.applyMaterialCalls++; + + // Build pipeline configuration from material + PipelineConfig config = buildPipelineConfig(mat, prim_type); + + // Track shader handle issues + if (mat->get_shader_handle() < 0) { + m_frameStats.shaderHandleNeg1++; + } + + // Check if we have a valid render pass + if (!config.renderPass) { + m_frameStats.applyMaterialFailures++; + mprintf(("VulkanDrawManager: applyMaterial FAIL - no render pass (shaderType=%d)\n", + static_cast(config.shaderType))); + return false; + } + + // Get or create pipeline + vk::Pipeline pipeline = pipelineManager->getPipeline(config, *layout); + if (!pipeline) { + m_frameStats.applyMaterialFailures++; + mprintf(("VulkanDrawManager: applyMaterial FAIL - no pipeline (shaderType=%d handle=%d)\n", + static_cast(config.shaderType), mat->get_shader_handle())); + return false; + } + + // Bind pipeline with layout + stateTracker->bindPipeline(pipeline, pipelineManager->getPipelineLayout()); + + // Bind fallback vertex buffers for attributes the layout doesn't provide but the shader needs + if (pipelineManager->needsFallbackAttribute(*layout, config.shaderType, VATTRIB_COLOR)) { + vk::Buffer fallbackColor = bufferManager->getFallbackColorBuffer(); + if (fallbackColor) { + stateTracker->bindVertexBuffer(FALLBACK_COLOR_BINDING, fallbackColor, 0); + } + } + if (pipelineManager->needsFallbackAttribute(*layout, config.shaderType, VATTRIB_TEXCOORD)) { + vk::Buffer fallbackTexCoord = bufferManager->getFallbackTexCoordBuffer(); + if (fallbackTexCoord) { + stateTracker->bindVertexBuffer(FALLBACK_TEXCOORD_BINDING, fallbackTexCoord, 0); + } + } + + // Allocate and bind descriptor sets for this draw. + // Template-based writer pre-fills all bindings with fallbacks, + // then we overwrite only the bindings that have real data. + { + DescriptorWriter writer; + writer.reset(descManager->getDevice(), descManager->getFallbacks()); + + // Bind pending UBOs for a given descriptor set + auto bindPendingUBOs = [&](DescriptorSetIndex targetSet) { + for (const auto& entry : VulkanDescriptorManager::getUniformBindings(targetSet)) { + vk::DescriptorBufferInfo bufInfo; + const auto& pending = m_pendingUniformBindings[static_cast(entry.blockType)]; + if (pending.valid) { + vk::Buffer buf = bufferManager->getVkBuffer(pending.bufferHandle); + if (buf) { + bufInfo = vk::DescriptorBufferInfo(buf, pending.offset, pending.size); + } + } + writer.setBuffer(entry.binding, bufInfo); + } + }; + + // Set 0: Global + vk::DescriptorSet globalSet = descManager->allocateFrameSet(DescriptorSetIndex::Global); + Verify(globalSet); + writer.writeSet(globalSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Global)); + bindPendingUBOs(DescriptorSetIndex::Global); + { + auto* pp = getPostProcessor(); + if (pp && pp->shadow().isInitialized()) { + writer.setImage(GlobalBinding::ShadowMap, pp->getShadowTextureInfo()); + } + } + + // Set 1: Material + vk::DescriptorSet materialSet = descManager->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeSet(materialSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Material)); + bindPendingUBOs(DescriptorSetIndex::Material); + { + uint32_t tfIdx = descManager->getCurrentFrame(); + auto& tf = g_transformBuffers[tfIdx]; + if (tf.buffer && tf.lastUploadSize > 0) { + writer.setBuffer(MaterialBinding::TransformSSBO, {tf.buffer, + static_cast(tf.lastUploadOffset), + static_cast(tf.lastUploadSize)}); + } + } + writer.setImage(MaterialBinding::DepthMap, m_depthTextureInfo); + writer.setImage(MaterialBinding::SceneColor, m_sceneColorInfo); + writer.setImage(MaterialBinding::DistortionMap, m_distMapInfo); + bindMaterialTextures(mat, &writer); + + // Set 2: PerDraw + vk::DescriptorSet perDrawSet = descManager->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + writer.writeSet(perDrawSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::PerDraw)); + bindPendingUBOs(DescriptorSetIndex::PerDraw); + writer.flush(); + stateTracker->bindDescriptorSet(DescriptorSetIndex::Global, globalSet); + stateTracker->bindDescriptorSet(DescriptorSetIndex::Material, materialSet); + stateTracker->bindDescriptorSet(DescriptorSetIndex::PerDraw, perDrawSet); + } + + // Update tracked state for FSO compatibility + stateTracker->setZBufferMode(mat->get_depth_mode()); + stateTracker->setCullMode(mat->get_cull_mode()); + + if (mat->is_stencil_enabled()) { + stateTracker->setStencilMode(GR_STENCIL_READ); + stateTracker->setStencilReference(mat->get_stencil_func().ref); + } else { + stateTracker->setStencilMode(GR_STENCIL_NONE); + } + + // Set depth bias if needed + stateTracker->setDepthBias(static_cast(mat->get_depth_bias()), 0.0f); + + return true; +} + +void VulkanDrawManager::bindVertexBuffer(gr_buffer_handle handle, size_t offset) +{ + (void)this; + auto* bufferManager = getBufferManager(); + auto* stateTracker = getStateTracker(); + + if (!handle.isValid()) { + return; + } + + vk::Buffer buffer = bufferManager->getVkBuffer(handle); + if (buffer) { + // Add frame base offset for ring buffer support + // This maps the caller's offset into the current frame's span + size_t frameOffset = bufferManager->getFrameBaseOffset(handle); + size_t totalOffset = frameOffset + offset; + stateTracker->bindVertexBuffer(0, buffer, static_cast(totalOffset)); + } +} + +void VulkanDrawManager::bindIndexBuffer(gr_buffer_handle handle) +{ + (void)this; + auto* bufferManager = getBufferManager(); + auto* stateTracker = getStateTracker(); + + if (!handle.isValid()) { + return; + } + + vk::Buffer buffer = bufferManager->getVkBuffer(handle); + if (buffer) { + // Add frame base offset for ring buffer support (mirrors bindVertexBuffer) + size_t frameOffset = bufferManager->getFrameBaseOffset(handle); + stateTracker->bindIndexBuffer(buffer, static_cast(frameOffset), vk::IndexType::eUint32); + } +} + +void VulkanDrawManager::draw(primitive_type prim_type, int first_vertex, int vertex_count) +{ + auto* stateTracker = getStateTracker(); + + Assertion(stateTracker->getCurrentPipeline(), + "draw() called with no bound pipeline! prim_type=%d first_vertex=%d vertex_count=%d", + static_cast(prim_type), first_vertex, vertex_count); + if (!stateTracker->getCurrentPipeline()) { + m_frameStats.noPipelineSkips++; + return; + } + + m_frameStats.drawCalls++; + m_frameStats.totalVertices += vertex_count; + + // Flush any dirty dynamic state (viewport, scissor, depth bias, stencil ref) + // before issuing the draw command. applyMaterial sets these AFTER bindPipeline, + // so they may be dirty even when the pipeline didn't change. + stateTracker->applyDynamicState(); + + auto cmdBuffer = stateTracker->getCommandBuffer(); + cmdBuffer.draw(static_cast(vertex_count), + 1, + static_cast(first_vertex), + 0); +} + +void VulkanDrawManager::drawIndexed(primitive_type prim_type, int index_count, int first_index, int vertex_offset) +{ + auto* stateTracker = getStateTracker(); + + Assertion(stateTracker->getCurrentPipeline(), + "drawIndexed() called with no bound pipeline! prim_type=%d index_count=%d first_index=%d vertex_offset=%d", + static_cast(prim_type), index_count, first_index, vertex_offset); + if (!stateTracker->getCurrentPipeline()) { + m_frameStats.noPipelineSkips++; + return; + } + + m_frameStats.drawIndexedCalls++; + m_frameStats.totalIndices += index_count; + + // Flush any dirty dynamic state before draw + stateTracker->applyDynamicState(); + + auto cmdBuffer = stateTracker->getCommandBuffer(); + cmdBuffer.drawIndexed(static_cast(index_count), + 1, + static_cast(first_index), + vertex_offset, + 0); +} + +void VulkanDrawManager::initSphereBuffers() +{ + auto* bufferManager = getBufferManager(); + + auto mesh = graphics::util::generate_sphere_mesh(16, 16); + + m_sphereIndexCount = mesh.index_count; + + m_sphereVBO = bufferManager->createBuffer(BufferType::Vertex, BufferUsageHint::Static); + bufferManager->updateBufferData(m_sphereVBO, mesh.vertices.size() * sizeof(float), mesh.vertices.data()); + + m_sphereIBO = bufferManager->createBuffer(BufferType::Index, BufferUsageHint::Static); + bufferManager->updateBufferData(m_sphereIBO, mesh.indices.size() * sizeof(ushort), mesh.indices.data()); + + m_sphereVertexLayout.add_vertex_component(vertex_format_data::POSITION3, sizeof(float) * 3, 0); + + mprintf(("VulkanDrawManager: Sphere mesh created (%u vertices, %u indices)\n", + mesh.vertex_count, mesh.index_count)); +} + +void VulkanDrawManager::shutdownSphereBuffers() +{ + auto* bufferManager = getBufferManager(); + + if (m_sphereVBO.isValid()) { + bufferManager->deleteBuffer(m_sphereVBO); + m_sphereVBO = gr_buffer_handle::invalid(); + } + if (m_sphereIBO.isValid()) { + bufferManager->deleteBuffer(m_sphereIBO); + m_sphereIBO = gr_buffer_handle::invalid(); + } +} + +void VulkanDrawManager::drawSphere(material* material_def) +{ + if (!material_def || m_sphereIndexCount == 0) { + return; + } + + auto* stateTracker = getStateTracker(); + + auto* bufferManager = getBufferManager(); + + if (!applyMaterial(material_def, PRIM_TYPE_TRIS, &m_sphereVertexLayout)) { + return; + } + + // Bind sphere vertex buffer + vk::Buffer vbo = bufferManager->getVkBuffer(m_sphereVBO); + if (!vbo) { + return; + } + stateTracker->bindVertexBuffer(0, vbo, 0); + + // Bind sphere index buffer with uint16 indices (matching the ushort mesh data) + vk::Buffer ibo = bufferManager->getVkBuffer(m_sphereIBO); + if (!ibo) { + return; + } + stateTracker->bindIndexBuffer(ibo, 0, vk::IndexType::eUint16); + + drawIndexed(PRIM_TYPE_TRIS, static_cast(m_sphereIndexCount), 0, 0); +} + +} // namespace graphics::vulkan + + + +// PostProcessing_override is defined in globalincs/systemvars.cpp + + + +namespace graphics::vulkan { + +// ========== gr_screen function pointer implementations ========== +// These free functions are assigned to gr_screen.gf_* in gr_vulkan.cpp. + +namespace { + +// Helper to set up GenericData uniform for default material shader +// Similar to opengl_shader_set_default_material() in gropenglshader.cpp +void vulkan_set_default_material_uniforms(material* material_info) +{ + if (!material_info) { + return; + } + + // Get uniform buffer for GenericData + auto buffer = gr_get_uniform_buffer(uniform_block_type::GenericData, 1, sizeof(genericData_default_material_v_sdr)); + auto* data = buffer.aligner().addTypedElement(); + + // Get base map from material + int base_map = material_info->get_texture_map(TM_BASE_TYPE); + bool textured = (base_map >= 0); + bool alpha = (material_info->get_texture_type() == TCACHE_TYPE_AABITMAP); + + // Texturing flags + if (textured) { + data->noTexturing = 0; + // Get array index for animated texture arrays + auto* texSlot = getTextureManager()->getTextureSlot(base_map); + data->baseMapIndex = texSlot ? static_cast(texSlot->arrayIndex) : 0; + } else { + data->noTexturing = 1; + data->baseMapIndex = 0; + } + + // Alpha texture flag + data->alphaTexture = alpha ? 1 : 0; + + // HDR / intensity settings + data->srgb = High_dynamic_range ? 1 : 0; + data->intensity = material_info->get_color_scale(); + + // Alpha threshold + data->alphaThreshold = getStateTracker()->getAlphaThreshold(); + + // Color from material + vec4 clr = material_info->get_color(); + data->color.a1d[0] = clr.xyzw.x; + data->color.a1d[1] = clr.xyzw.y; + data->color.a1d[2] = clr.xyzw.z; + data->color.a1d[3] = clr.xyzw.w; + + // Clip plane + const auto& clip_plane = material_info->get_clip_plane(); + if (clip_plane.enabled) { + data->clipEnabled = 1; + + data->clipEquation.a1d[0] = clip_plane.normal.xyz.x; + data->clipEquation.a1d[1] = clip_plane.normal.xyz.y; + data->clipEquation.a1d[2] = clip_plane.normal.xyz.z; + // Calculate 'd' value: d = -dot(normal, position) + data->clipEquation.a1d[3] = -((clip_plane.normal.xyz.x * clip_plane.position.xyz.x) + + (clip_plane.normal.xyz.y * clip_plane.position.xyz.y) + + (clip_plane.normal.xyz.z * clip_plane.position.xyz.z)); + + // Model matrix (identity for now, material doesn't provide one) + vm_matrix4_set_identity(&data->modelMatrix); + } else { + data->clipEnabled = 0; + vm_matrix4_set_identity(&data->modelMatrix); + data->clipEquation.a1d[0] = 0.0f; + data->clipEquation.a1d[1] = 0.0f; + data->clipEquation.a1d[2] = 0.0f; + data->clipEquation.a1d[3] = 0.0f; + } + + buffer.submitData(); + gr_bind_uniform_buffer(uniform_block_type::GenericData, buffer.getBufferOffset(0), + sizeof(genericData_default_material_v_sdr), buffer.bufferHandle()); +} + +} // anonymous namespace + +int vulkan_zbuffer_get() +{ + auto* drawManager = getDrawManager(); + return drawManager->zbufferGet(); +} + +int vulkan_zbuffer_set(int mode) +{ + auto* drawManager = getDrawManager(); + return drawManager->zbufferSet(mode); +} + +void vulkan_zbuffer_clear(int mode) +{ + auto* drawManager = getDrawManager(); + drawManager->zbufferClear(mode); +} + +int vulkan_stencil_set(int mode) +{ + auto* drawManager = getDrawManager(); + return drawManager->stencilSet(mode); +} + +void vulkan_stencil_clear() +{ + auto* drawManager = getDrawManager(); + drawManager->stencilClear(); +} + +void vulkan_set_fill_mode(int mode) +{ + auto* drawManager = getDrawManager(); + // GR_FILL_MODE_WIRE = 1, GR_FILL_MODE_SOLID = 2 + drawManager->setFillMode(mode); +} + +void vulkan_clear() +{ + auto* drawManager = getDrawManager(); + drawManager->clear(); +} + +void vulkan_reset_clip() +{ + auto* drawManager = getDrawManager(); + drawManager->resetClip(); +} + +void vulkan_set_clear_color(int r, int g, int b) +{ + auto* drawManager = getDrawManager(); + drawManager->setClearColor(r, g, b); +} + +void vulkan_set_clip(int x, int y, int w, int h, int resize_mode) +{ + auto* drawManager = getDrawManager(); + drawManager->setClip(x, y, w, h, resize_mode); +} + +int vulkan_set_cull(int cull) +{ + auto* drawManager = getDrawManager(); + return drawManager->setCull(cull); +} + +int vulkan_set_color_buffer(int mode) +{ + auto* drawManager = getDrawManager(); + return drawManager->setColorBuffer(mode); +} + +void vulkan_set_texture_addressing(int mode) +{ + auto* drawManager = getDrawManager(); + drawManager->setTextureAddressing(mode); +} + +void vulkan_set_line_width(float width) +{ + auto* stateTracker = getStateTracker(); + if (width <= 1.0f) { + stateTracker->setLineWidth(width); + } + gr_screen.line_width = width; +} + +void vulkan_clear_states() +{ + auto* drawManager = getDrawManager(); + drawManager->clearStates(); +} + +void vulkan_scene_texture_begin() +{ + auto* renderer = getRendererInstance(); + + // Switch to HDR scene render pass when post-processing is enabled + auto* pp = getPostProcessor(); + if (pp && pp->isInitialized() && Gr_post_processing_enabled && !PostProcessing_override) { + renderer->beginSceneRendering(); + High_dynamic_range = true; + } else { + // Fallback: just clear within the current swap chain pass + auto* stateTracker = getStateTracker(); + auto cmdBuffer = stateTracker->getCommandBuffer(); + + std::array clearAttachments; + clearAttachments[0].aspectMask = vk::ImageAspectFlagBits::eColor; + clearAttachments[0].colorAttachment = 0; + clearAttachments[0].clearValue.color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + + clearAttachments[1].aspectMask = vk::ImageAspectFlagBits::eDepth; + clearAttachments[1].clearValue.depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + + vk::ClearRect clearRect; + clearRect.rect.offset = vk::Offset2D(0, 0); + clearRect.rect.extent = vk::Extent2D(static_cast(gr_screen.max_w), + static_cast(gr_screen.max_h)); + clearRect.baseArrayLayer = 0; + clearRect.layerCount = 1; + + cmdBuffer.clearAttachments(clearAttachments, clearRect); + } +} + +void vulkan_scene_texture_end() +{ + auto* renderer = getRendererInstance(); + + // If we were rendering to the HDR scene target, switch back to swap chain + if (renderer->isSceneRendering()) { + renderer->endSceneRendering(); + } + + High_dynamic_range = false; +} + +void vulkan_copy_effect_texture() +{ + auto* renderer = getRendererInstance(); + + // Only copy if we're actively rendering the HDR scene + if (!renderer->isSceneRendering()) { + return; + } + + renderer->copyEffectTexture(); +} + +void vulkan_draw_sphere(material* material_def, float /*rad*/) +{ + auto* drawManager = getDrawManager(); + drawManager->drawSphere(material_def); +} + +void vulkan_render_shield_impact(shield_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + gr_buffer_handle buffer_handle, + int n_verts) +{ + auto* drawManager = getDrawManager(); + + // Compute impact projection matrices + float radius = material_info->get_impact_radius(); + vec3d min_v, max_v; + min_v.xyz.x = min_v.xyz.y = min_v.xyz.z = -radius; + max_v.xyz.x = max_v.xyz.y = max_v.xyz.z = radius; + + matrix4 impact_projection; + vm_matrix4_set_orthographic(&impact_projection, &max_v, &min_v); + + matrix impact_orient = material_info->get_impact_orient(); + vec3d impact_pos = material_info->get_impact_pos(); + + matrix4 impact_transform; + vm_matrix4_set_inverse_transform(&impact_transform, &impact_orient, &impact_pos); + + // Set shield impact uniform data (GenericData UBO) + auto buffer = gr_get_uniform_buffer(uniform_block_type::GenericData, 1, + sizeof(graphics::generic_data::shield_impact_data)); + auto* data = buffer.aligner().addTypedElement(); + data->hitNormal = impact_orient.vec.fvec; + data->shieldProjMatrix = impact_projection; + data->shieldModelViewMatrix = impact_transform; + data->shieldMapIndex = 0; // Vulkan binds textures individually, always layer 0 + data->srgb = High_dynamic_range ? 1 : 0; + data->color = material_info->get_color(); + buffer.submitData(); + gr_bind_uniform_buffer(uniform_block_type::GenericData, buffer.getBufferOffset(0), + sizeof(graphics::generic_data::shield_impact_data), buffer.bufferHandle()); + + // Set matrix uniforms + gr_matrix_set_uniforms(); + + // Draw the shield mesh + drawManager->renderPrimitives(material_info, prim_type, layout, 0, n_verts, buffer_handle, 0); +} + +void vulkan_render_model(model_material* material_info, + indexed_vertex_source* vert_source, + vertex_buffer* bufferp, + size_t texi) +{ + // ModelData UBO (matrices, lights, material params) is already bound by the model + // rendering pipeline (model_draw_list::render_buffer) before this function is called. + // Do NOT call vulkan_set_default_material_uniforms here - that would set GenericData + // uniforms for SDR_TYPE_DEFAULT_MATERIAL, but models use SDR_TYPE_MODEL with ModelData. + + auto* drawManager = getDrawManager(); + drawManager->renderModel(material_info, vert_source, bufferp, texi); +} + +void vulkan_render_primitives(material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int offset, + int n_verts, + gr_buffer_handle buffer_handle, + size_t buffer_offset) +{ + // Set up uniform buffers before rendering (like OpenGL does) + gr_matrix_set_uniforms(); + vulkan_set_default_material_uniforms(material_info); + + auto* drawManager = getDrawManager(); + drawManager->renderPrimitives(material_info, prim_type, layout, offset, n_verts, buffer_handle, buffer_offset); +} + +void vulkan_render_primitives_particle(particle_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int offset, + int n_verts, + gr_buffer_handle buffer_handle) +{ + auto* renderer = getRendererInstance(); + auto* drawManager = getDrawManager(); + auto* pp = getPostProcessor(); + + // In deferred mode, once the G-buffer pass has ended the position texture + // (view-space XYZ) is in eShaderReadOnlyOptimal and free to sample. + bool usePosTexture = light_deferred_enabled() + && !renderer->isUsingGbufRenderPass() + && pp && pp->deferred().isInitialized(); + + if (!usePosTexture) { + // Non-deferred path: copy hardware depth buffer + renderer->copySceneDepthForParticles(); + } + + // Set up matrices + gr_matrix_set_uniforms(); + + // Set effect_data GenericData UBO (matching OpenGL's opengl_tnl_set_material_particle) + { + auto buffer = gr_get_uniform_buffer(uniform_block_type::GenericData, 1, + sizeof(graphics::generic_data::effect_data)); + auto* data = buffer.aligner().addTypedElement(); + + data->window_width = static_cast(gr_screen.max_w); + data->window_height = static_cast(gr_screen.max_h); + data->nearZ = Min_draw_distance; + data->farZ = Max_draw_distance; + data->srgb = High_dynamic_range ? 1 : 0; + data->blend_alpha = material_info->get_blend_mode() != ALPHA_BLEND_ADDITIVE ? 1 : 0; + // In deferred mode, bind the G-buffer position texture (view-space XYZ) + // so linear_depth=1 reads .z directly (matches OpenGL behavior). + // Otherwise use the NDC conversion path with the hardware depth copy. + data->linear_depth = usePosTexture ? 1 : 0; + + buffer.submitData(); + gr_bind_uniform_buffer(uniform_block_type::GenericData, buffer.getBufferOffset(0), + sizeof(graphics::generic_data::effect_data), buffer.bufferHandle()); + } + + // Set depth texture override + if (usePosTexture) { + // Deferred path: bind G-buffer position texture directly + auto* texMgr = getTextureManager(); + auto nearestSampler = texMgr->getSampler(vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerAddressMode::eClampToEdge, false, 0.0f, false); + drawManager->setDepthTextureOverride( + {nearestSampler, pp->deferred().positionView(), vk::ImageLayout::eShaderReadOnlyOptimal}); + } else if (renderer->isSceneDepthCopied() && pp) { + // Non-deferred path: bind the hardware depth copy + auto* texMgr = getTextureManager(); + auto nearestSampler = texMgr->getSampler(vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerAddressMode::eClampToEdge, false, 0.0f, false); + drawManager->setDepthTextureOverride( + {nearestSampler, pp->getSceneDepthCopyView(), vk::ImageLayout::eShaderReadOnlyOptimal}); + } + + drawManager->renderPrimitivesParticle(material_info, prim_type, layout, offset, n_verts, buffer_handle); + + // Clear the override + drawManager->clearDepthTextureOverride(); +} + +void vulkan_render_primitives_distortion(distortion_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int offset, + int n_verts, + gr_buffer_handle buffer_handle) +{ + auto* drawManager = getDrawManager(); + auto* pp = getPostProcessor(); + + // Set up matrices + gr_matrix_set_uniforms(); + + // Set effect_distort_data GenericData UBO (16 bytes) + { + auto buffer = gr_get_uniform_buffer(uniform_block_type::GenericData, 1, + sizeof(graphics::generic_data::effect_distort_data)); + auto* data = buffer.aligner().addTypedElement(); + + data->window_width = static_cast(gr_screen.max_w); + data->window_height = static_cast(gr_screen.max_h); + data->use_offset = material_info->get_thruster_rendering() ? 1.0f : 0.0f; + + buffer.submitData(); + gr_bind_uniform_buffer(uniform_block_type::GenericData, buffer.getBufferOffset(0), + sizeof(graphics::generic_data::effect_distort_data), buffer.bufferHandle()); + } + + // Set scene color override (binding 5) — snapshot of scene color for distortion sampling + if (pp) drawManager->setSceneColorOverride(pp->getSceneEffectTextureInfo()); + + // Set distortion map override (binding 6) — ping-pong noise texture for thrusters + if (material_info->get_thruster_rendering() && pp) + drawManager->setDistMapOverride(pp->getDistortionTextureInfo()); + + drawManager->renderPrimitivesDistortion(material_info, prim_type, layout, offset, n_verts, buffer_handle); + + // Clear overrides so subsequent draws use fallback textures + drawManager->clearDistortionOverrides(); +} + +void vulkan_render_movie(movie_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int n_verts, + gr_buffer_handle buffer, + size_t buffer_offset) +{ + gr_matrix_set_uniforms(); + vulkan_set_default_material_uniforms(material_info); + + auto* drawManager = getDrawManager(); + drawManager->renderMovie(material_info, prim_type, layout, n_verts, buffer, buffer_offset); +} + +void vulkan_render_nanovg(nanovg_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int offset, + int n_verts, + gr_buffer_handle buffer_handle) +{ + // NanoVG shader reads from NanoVGData UBO (set 2 binding 2), not GenericData. + // The NanoVGRenderer binds NanoVGData before calling gr_render_nanovg(). + + // NanoVG uses its own software scissor (scissorMat/scissorExt in the fragment shader). + // Disable hardware scissor to match nanovg_gl.h which calls glDisable(GL_SCISSOR_TEST). + // Without this, NanoVG draws get clipped by gr_set_clip's hardware scissor. + auto* stateTracker = getStateTracker(); + bool savedScissorEnabled = stateTracker->isScissorEnabled(); + stateTracker->setScissorEnabled(false); + + auto* drawManager = getDrawManager(); + drawManager->renderNanoVG(material_info, prim_type, layout, offset, n_verts, buffer_handle); + + // Restore scissor state + stateTracker->setScissorEnabled(savedScissorEnabled); +} + +void vulkan_render_primitives_batched(batched_bitmap_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int offset, + int n_verts, + gr_buffer_handle buffer_handle) +{ + gr_matrix_set_uniforms(); + vulkan_set_default_material_uniforms(material_info); + + auto* drawManager = getDrawManager(); + drawManager->renderPrimitivesBatched(material_info, prim_type, layout, offset, n_verts, buffer_handle); +} + +void vulkan_render_rocket_primitives(interface_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int n_indices, + gr_buffer_handle vertex_buffer, + gr_buffer_handle index_buffer) +{ + // Set up 2D orthographic projection (matches OpenGL's gr_opengl_render_rocket_primitives) + gr_set_2d_matrix(); + + // Fill GenericData UBO with rocketui_data layout (NOT default material layout). + // The rocketui shader reads projMatrix, offset, textured, baseMapIndex, and + // horizontalSwipeOffset from GenericData — a completely different layout than + // the default material shader's genericData. + { + auto buffer = gr_get_uniform_buffer(uniform_block_type::GenericData, 1, + sizeof(graphics::generic_data::rocketui_data)); + auto* data = buffer.aligner().addTypedElement(); + + data->projMatrix = gr_projection_matrix; + + const vec2d& offset = material_info->get_offset(); + data->offset = offset; + data->textured = material_info->is_textured() ? 1 : 0; + data->baseMapIndex = 0; // Vulkan texture array: always layer 0 + data->horizontalSwipeOffset = material_info->get_horizontal_swipe(); + + buffer.submitData(); + gr_bind_uniform_buffer(uniform_block_type::GenericData, buffer.getBufferOffset(0), + sizeof(graphics::generic_data::rocketui_data), buffer.bufferHandle()); + } + + // Matrices UBO is still needed for descriptor set completeness + gr_matrix_set_uniforms(); + + auto* drawManager = getDrawManager(); + drawManager->renderRocketPrimitives(material_info, prim_type, layout, n_indices, vertex_buffer, index_buffer); + + gr_end_2d_matrix(); +} + +void vulkan_calculate_irrmap() +{ + if (ENVMAP < 0 || gr_screen.irrmap_render_target < 0) { + return; + } + + auto* renderer = getRendererInstance(); + auto* stateTracker = getStateTracker(); + auto* texManager = getTextureManager(); + auto* descManager = getDescriptorManager(); + auto* bufferManager = getBufferManager(); + auto* pipelineManager = getPipelineManager(); + if (!renderer || !stateTracker || !texManager || !descManager || !bufferManager || !pipelineManager) { + return; + } + + // Get envmap cubemap view + auto* envSlot = bm_get_slot(ENVMAP, true); + if (!envSlot || !envSlot->gr_info) { + return; + } + auto* envTs = static_cast(envSlot->gr_info); + vk::ImageView envmapView = envTs->isCubemap ? envTs->cubeImageView : envTs->imageView; + if (!envmapView) { + return; + } + + // Get irrmap render target (cubemap with per-face framebuffers) + auto* irrSlot = bm_get_slot(gr_screen.irrmap_render_target, true); + if (!irrSlot || !irrSlot->gr_info) { + return; + } + auto* irrTs = static_cast(irrSlot->gr_info); + if (!irrTs->isCubemap || !irrTs->renderPass) { + return; + } + + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + // End the current swap chain render pass + cmd.endRenderPass(); + + // Create pipeline for irradiance map generation + PipelineConfig config; + config.shaderType = SDR_TYPE_IRRADIANCE_MAP_GEN; + config.vertexLayoutHash = 0; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = irrTs->renderPass; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineManager->getPipeline(config, emptyLayout); + if (!pipeline) { + mprintf(("vulkan_calculate_irrmap: Failed to get pipeline!\n")); + return; + } + + vk::PipelineLayout pipelineLayout = pipelineManager->getPipelineLayout(); + + // Create a small host-visible UBO for the 6 face indices + // minUniformBufferOffsetAlignment is typically 256 bytes + const uint32_t UBO_SLOT_SIZE = 256; // Safe alignment for all GPUs + const uint32_t UBO_TOTAL_SIZE = 6 * UBO_SLOT_SIZE; + + vk::Device device = bufferManager->getDevice(); + auto* memManager = getMemoryManager(); + + vk::BufferCreateInfo uboBufInfo; + uboBufInfo.size = UBO_TOTAL_SIZE; + uboBufInfo.usage = vk::BufferUsageFlagBits::eUniformBuffer; + uboBufInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer faceUBO; + VulkanAllocation faceUBOAlloc; + try { + faceUBO = device.createBuffer(uboBufInfo); + } catch (const vk::SystemError& e) { + mprintf(("vulkan_calculate_irrmap: Failed to create face UBO: %s\n", e.what())); + return; + } + + if (!memManager->allocateBufferMemory(faceUBO, MemoryUsage::CpuToGpu, faceUBOAlloc)) { + device.destroyBuffer(faceUBO); + return; + } + + // Map and write face indices + auto* mapped = static_cast(memManager->mapMemory(faceUBOAlloc)); + if (!mapped) { + device.destroyBuffer(faceUBO); + memManager->freeAllocation(faceUBOAlloc); + return; + } + memset(mapped, 0, UBO_TOTAL_SIZE); + for (int i = 0; i < 6; i++) { + *reinterpret_cast(mapped + (i * UBO_SLOT_SIZE)) = i; + } + memManager->unmapMemory(faceUBOAlloc); + + vk::Extent2D irrExtent(irrTs->width, irrTs->height); + + for (size_t face = 0; face < irrTs->cubeFaceFramebuffers.size(); face++) { + vk::Framebuffer fb = irrTs->cubeFaceFramebuffers[face]; + if (!fb) { + continue; + } + + // Begin render pass for this face (loadOp=eClear, finalLayout=eShaderReadOnlyOptimal) + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = irrTs->renderPass; + rpBegin.framebuffer = fb; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = irrExtent; + + vk::ClearValue clearValue; + clearValue.color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + rpBegin.clearValueCount = 1; + rpBegin.pClearValues = &clearValue; + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + // Set viewport and scissor + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(irrExtent.width); + viewport.height = static_cast(irrExtent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = irrExtent; + cmd.setScissor(0, scissor); + + DescriptorWriter writer; + writer.reset(device, descManager->getFallbacks()); + + // Set 0: Global (all fallback) + vk::DescriptorSet globalSet = descManager->allocateFrameSet(DescriptorSetIndex::Global); + Verify(globalSet); + writer.writeSet(globalSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Global)); + + // Set 1: Material (envmap cubemap at element 0 of texture array) + vk::DescriptorSet materialSet = descManager->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeSet(materialSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Material)); + { + std::array texImages; + texImages.fill(descManager->getFallbacks().texture2D); + texImages[0].imageView = envmapView; + writer.setImageArray(MaterialBinding::TextureArray, texImages); + } + + // Set 2: PerDraw (face UBO at binding 0) + vk::DescriptorSet perDrawSet = descManager->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + writer.writeSet(perDrawSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::PerDraw)); + writer.setBuffer(PerDrawBinding::GenericData, {faceUBO, + static_cast(face) * UBO_SLOT_SIZE, UBO_SLOT_SIZE}); + writer.flush(); + + // Bind all descriptor sets + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, + 0, {globalSet, materialSet, perDrawSet}, {}); + + // Draw fullscreen triangle + cmd.draw(3, 1, 0, 0); + cmd.endRenderPass(); + } + + // Queue UBO for deferred destruction (safe to destroy after frame submission) + getDeletionQueue()->queueBuffer(faceUBO, faceUBOAlloc); + + // Resume the swap chain pass (irrmap is always called before scene rendering begins) + renderer->resumeSwapChainPass(); + + mprintf(("vulkan_calculate_irrmap: Generated irradiance cubemap (%ux%u)\n", irrTs->width, irrTs->height)); +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanDraw.h b/code/graphics/vulkan/VulkanDraw.h new file mode 100644 index 00000000000..436ec04cafd --- /dev/null +++ b/code/graphics/vulkan/VulkanDraw.h @@ -0,0 +1,463 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/material.h" +#include "VulkanPipeline.h" + +#include +#include + + +namespace graphics::vulkan { + +class DescriptorWriter; + +/** + * @brief Tracks a pending uniform buffer binding + * Stores handle instead of raw vk::Buffer to survive buffer recreation. + * The offset is fully resolved at bind time (includes frame base offset) + * to prevent stale lastWriteStreamOffset if the buffer is updated between bind and draw. + */ +struct PendingUniformBinding { + gr_buffer_handle bufferHandle; // FSO buffer handle - lookup vk::Buffer at draw time + vk::DeviceSize offset = 0; // Fully resolved offset (frame base + caller offset) + vk::DeviceSize size = 0; + bool valid = false; +}; + +/** + * @brief Handles Vulkan draw command recording + * + * Provides functions to record draw commands to the command buffer, + * including primitive rendering, batched rendering, and special effects. + */ +class VulkanDrawManager { +public: + VulkanDrawManager() = default; + ~VulkanDrawManager() = default; + + // Non-copyable + VulkanDrawManager(const VulkanDrawManager&) = delete; + VulkanDrawManager& operator=(const VulkanDrawManager&) = delete; + + /** + * @brief Initialize draw manager + */ + bool init(vk::Device device); + + /** + * @brief Shutdown and release resources + */ + void shutdown(); + + // ========== Clear Operations ========== + + /** + * @brief Clear the color buffer + */ + void clear(); + + /** + * @brief Set clear color + */ + void setClearColor(int r, int g, int b); + + // ========== Clipping ========== + + /** + * @brief Set clip region (scissor) + */ + void setClip(int x, int y, int w, int h, int resize_mode); + + /** + * @brief Reset clip to full screen + */ + void resetClip(); + + // ========== Z-Buffer ========== + + /** + * @brief Get current zbuffer mode + */ + int zbufferGet() const; + + /** + * @brief Set zbuffer mode + * @return Previous mode + */ + int zbufferSet(int mode); + + /** + * @brief Clear zbuffer + */ + void zbufferClear(int mode); + + // ========== Stencil ========== + + /** + * @brief Set stencil mode + * @return Previous mode + */ + int stencilSet(int mode); + + /** + * @brief Clear stencil buffer + */ + void stencilClear(); + + // ========== Culling ========== + + /** + * @brief Set cull mode + * @return Previous mode + */ + int setCull(int cull); + + // ========== Primitive Rendering ========== + + /** + * @brief Render primitives with material + */ + void renderPrimitives(material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, + gr_buffer_handle buffer_handle, size_t buffer_offset); + + /** + * @brief Render batched bitmaps + */ + void renderPrimitivesBatched(batched_bitmap_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int offset, int n_verts, gr_buffer_handle buffer_handle); + + /** + * @brief Render particles + */ + void renderPrimitivesParticle(particle_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int offset, int n_verts, gr_buffer_handle buffer_handle); + + /** + * @brief Render distortion effect + */ + void renderPrimitivesDistortion(distortion_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int offset, int n_verts, gr_buffer_handle buffer_handle); + + /** + * @brief Render movie frame + */ + void renderMovie(movie_material* material_info, primitive_type prim_type, + vertex_layout* layout, int n_verts, gr_buffer_handle buffer_handle, + size_t buffer_offset); + + /** + * @brief Render NanoVG UI + */ + void renderNanoVG(nanovg_material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, + gr_buffer_handle buffer_handle); + + /** + * @brief Render Rocket UI primitives (indexed) + */ + void renderRocketPrimitives(interface_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int n_indices, gr_buffer_handle vertex_buffer, + gr_buffer_handle index_buffer); + + /** + * @brief Render 3D model with indexed geometry + * @param material_info Model material settings + * @param vert_source Indexed vertex source with buffer handles + * @param bufferp Vertex buffer with layout and texture info + * @param texi Index into tex_buf array for this draw + */ + void renderModel(model_material* material_info, indexed_vertex_source* vert_source, + vertex_buffer* bufferp, size_t texi); + + /** + * @brief Draw a unit sphere with the given material + * Used for debug visualization and deferred light volumes + */ + void drawSphere(material* material_def); + + // ========== Render State ========== + + /** + * @brief Set polygon fill mode (GR_FILL_MODE_SOLID / GR_FILL_MODE_WIRE) + */ + void setFillMode(int mode); + + /** + * @brief Set color buffer write enable + * @return Previous state (1 = was enabled, 0 = was disabled) + */ + int setColorBuffer(int mode); + + /** + * @brief Set texture addressing mode (TMAP_ADDRESS_WRAP/MIRROR/CLAMP) + */ + void setTextureAddressing(int mode); + + /** + * @brief Enable or disable depth bias in pipeline + */ + void setDepthBiasEnabled(bool enabled); + + /** + * @brief Set depth texture override for soft particle rendering + * + * When set, applyMaterial() binds this texture to Material set binding 4 + * instead of the fallback white texture. Must be set before the render call + * and cleared afterwards. + */ + void setDepthTextureOverride(vk::DescriptorImageInfo info); + + /** + * @brief Clear depth texture override (reverts to fallback) + */ + void clearDepthTextureOverride(); + + /** + * @brief Set scene color texture override for binding 5 (distortion effects) + */ + void setSceneColorOverride(vk::DescriptorImageInfo info); + + /** + * @brief Set distortion map texture override for binding 6 (distortion effects) + */ + void setDistMapOverride(vk::DescriptorImageInfo info); + + /** + * @brief Clear distortion texture overrides (bindings 5 and 6, reverts to fallback) + */ + void clearDistortionOverrides(); + + /** + * @brief Get current texture addressing mode + */ + int getTextureAddressing() const { return m_textureAddressing; } + + /** + * @brief Clear all graphics states to defaults + */ + void clearStates(); + + // ========== Uniform Buffers ========== + + /** + * @brief Set a pending uniform buffer binding + * @param blockType The uniform block type + * @param bufferHandle The FSO buffer handle (looked up at bind time) + * @param offset Offset within the buffer + * @param size Size of the bound range + */ + void setPendingUniformBinding(uniform_block_type blockType, gr_buffer_handle bufferHandle, + vk::DeviceSize offset, vk::DeviceSize size); + + /** + * @brief Clear all pending uniform bindings + */ + void clearPendingUniformBindings(); + + /** + * @brief Get a pending uniform binding by block type index + */ + const PendingUniformBinding& getPendingUniformBinding(size_t index) const { + Assertion(index < NUM_UNIFORM_BLOCK_TYPES, "getPendingUniformBinding: index %zu out of range!", index); + return m_pendingUniformBindings[index]; + } + + /** + * @brief Bind material textures to descriptor set (public for decal rendering) + * @param writer Texture array is written into the writer's current set via setImageArray + */ + bool bindMaterialTextures(material* mat, DescriptorWriter* writer) const; + + /** + * @brief Reset per-frame diagnostic counters (called at start of frame) + */ + void resetFrameStats(); + + /** + * @brief Print per-frame diagnostic summary (called at end of frame) + */ + void printFrameStats(); + +private: + /** + * @brief Shared implementation for the non-indexed renderPrimitives* variants + * + * Applies the material/pipeline, binds the vertex buffer, and issues a + * non-indexed draw. The only per-variant differences are the optional frame + * stat counter and the concrete material subtype (passed as material*). + * + * @param statCounter Optional per-variant FrameStats counter to increment (may be null) + */ + void renderPrimitivesCommon(material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, + gr_buffer_handle buffer_handle, size_t buffer_offset, + int* statCounter); + + /** + * @brief Apply material state and bind pipeline + * @return true if pipeline was successfully bound + */ + bool applyMaterial(material* mat, primitive_type prim_type, vertex_layout* layout); + + /** + * @brief Build pipeline config from material + */ + PipelineConfig buildPipelineConfig(material* mat, primitive_type prim_type) const; + + /** + * @brief Bind vertex buffer from handle + */ + void bindVertexBuffer(gr_buffer_handle handle, size_t offset = 0); + + /** + * @brief Bind index buffer from handle + */ + void bindIndexBuffer(gr_buffer_handle handle); + + /** + * @brief Issue draw call + */ + void draw(primitive_type prim_type, int first_vertex, int vertex_count); + + /** + * @brief Issue indexed draw call + */ + void drawIndexed(primitive_type prim_type, int index_count, int first_index, int vertex_offset); + + /** + * @brief Create sphere VBO/IBO from shared mesh generator + */ + void initSphereBuffers(); + + /** + * @brief Destroy sphere VBO/IBO + */ + void shutdownSphereBuffers(); + + vk::Device m_device; + + // Current render state + int m_zbufferMode = GR_ZBUFF_FULL; + int m_stencilMode = GR_STENCIL_NONE; + bool m_cullEnabled = true; + int m_fillMode = GR_FILL_MODE_SOLID; + bool m_colorBufferEnabled = true; + int m_textureAddressing = TMAP_ADDRESS_WRAP; + bool m_depthBiasEnabled = false; + + // Pending uniform buffer bindings (indexed by uniform_block_type) + static constexpr size_t NUM_UNIFORM_BLOCK_TYPES = static_cast(uniform_block_type::NUM_BLOCK_TYPES); + std::array m_pendingUniformBindings; + + // Per-frame diagnostic counters + struct FrameStats { + int drawCalls = 0; + int drawIndexedCalls = 0; + int applyMaterialCalls = 0; + int applyMaterialFailures = 0; + int noPipelineSkips = 0; + int shaderHandleNeg1 = 0; + int totalVertices = 0; + int totalIndices = 0; + + // Per-function call counters + int renderPrimitiveCalls = 0; + int renderBatchedCalls = 0; + int renderModelCalls = 0; + int renderParticleCalls = 0; + int renderNanoVGCalls = 0; + int renderRocketCalls = 0; + int renderMovieCalls = 0; + }; + FrameStats m_frameStats; + int m_frameStatsFrameNum = 0; + + // Texture overrides for material bindings 4-6. + vk::DescriptorImageInfo m_depthTextureInfo; // binding 4: depth/position for soft particles + vk::DescriptorImageInfo m_sceneColorInfo; // binding 5: scene color for distortion + vk::DescriptorImageInfo m_distMapInfo; // binding 6: distortion map + + // Pre-built sphere mesh for draw_sphere / deferred light volumes + gr_buffer_handle m_sphereVBO; + gr_buffer_handle m_sphereIBO; + unsigned int m_sphereIndexCount = 0; + vertex_layout m_sphereVertexLayout; + + bool m_initialized = false; +}; + +// Global draw manager access +VulkanDrawManager* getDrawManager(); +void setDrawManager(VulkanDrawManager* manager); + +// ========== gr_screen function pointer implementations ========== +// These free functions implement gr_screen.gf_* function pointers. +// They are assigned in gr_vulkan.cpp::init_function_pointers(). + +// Clear operations +void vulkan_clear(); +void vulkan_set_clear_color(int r, int g, int b); + +// Clipping +void vulkan_set_clip(int x, int y, int w, int h, int resize_mode); +void vulkan_reset_clip(); + +// Z-buffer +int vulkan_zbuffer_get(); +int vulkan_zbuffer_set(int mode); +void vulkan_zbuffer_clear(int mode); + +// Stencil +int vulkan_stencil_set(int mode); +void vulkan_stencil_clear(); + +// Render state +int vulkan_set_cull(int cull); +int vulkan_set_color_buffer(int mode); +void vulkan_set_fill_mode(int mode); +void vulkan_set_texture_addressing(int mode); +void vulkan_set_line_width(float width); +void vulkan_clear_states(); + +// Scene texture +void vulkan_scene_texture_begin(); +void vulkan_scene_texture_end(); +void vulkan_copy_effect_texture(); + +// 3D primitives +void vulkan_draw_sphere(material* material_def, float rad); +void vulkan_render_shield_impact(shield_material* material_info, primitive_type prim_type, + vertex_layout* layout, gr_buffer_handle buffer_handle, int n_verts); +void vulkan_render_model(model_material* material_info, indexed_vertex_source* vert_source, + vertex_buffer* bufferp, size_t texi); +void vulkan_render_primitives(material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, gr_buffer_handle buffer_handle, size_t buffer_offset); +void vulkan_render_primitives_particle(particle_material* material_info, + primitive_type prim_type, vertex_layout* layout, int offset, int n_verts, gr_buffer_handle buffer_handle); +void vulkan_render_primitives_distortion(distortion_material* material_info, + primitive_type prim_type, vertex_layout* layout, int offset, int n_verts, gr_buffer_handle buffer_handle); +void vulkan_render_primitives_batched(batched_bitmap_material* material_info, + primitive_type prim_type, vertex_layout* layout, int offset, int n_verts, gr_buffer_handle buffer_handle); +void vulkan_render_movie(movie_material* material_info, primitive_type prim_type, + vertex_layout* layout, int n_verts, gr_buffer_handle buffer, size_t buffer_offset); +void vulkan_render_nanovg(nanovg_material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, gr_buffer_handle buffer_handle); +void vulkan_render_rocket_primitives(interface_material* material_info, + primitive_type prim_type, vertex_layout* layout, int n_indices, + gr_buffer_handle vertex_buffer, gr_buffer_handle index_buffer); + +// Transform buffer for batched submodel rendering +void vulkan_update_transform_buffer(void* data, size_t size); + +// Environment mapping +void vulkan_calculate_irrmap(); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanMemory.cpp b/code/graphics/vulkan/VulkanMemory.cpp new file mode 100644 index 00000000000..779d587b655 --- /dev/null +++ b/code/graphics/vulkan/VulkanMemory.cpp @@ -0,0 +1,294 @@ +#define VMA_IMPLEMENTATION +#include "VulkanMemory.h" + +#include "globalincs/pstypes.h" + + +namespace graphics::vulkan { + +namespace { +VulkanMemoryManager* g_memoryManager = nullptr; +} + +VulkanMemoryManager* getMemoryManager() +{ + return g_memoryManager; +} + +void setMemoryManager(VulkanMemoryManager* manager) +{ + g_memoryManager = manager; +} + +VulkanMemoryManager::VulkanMemoryManager() = default; + +VulkanMemoryManager::~VulkanMemoryManager() +{ + if (m_initialized) { + shutdown(); + } +} + +bool VulkanMemoryManager::init(vk::Instance instance, vk::PhysicalDevice physicalDevice, vk::Device device) +{ + if (m_initialized) { + mprintf(("VulkanMemoryManager::init called when already initialized!\n")); + return false; + } + + // Log memory properties for diagnostics + auto memoryProperties = physicalDevice.getMemoryProperties(); + mprintf(("Vulkan Memory Manager initializing (VMA)\n")); + mprintf((" Memory heaps: %u\n", memoryProperties.memoryHeapCount)); + for (uint32_t i = 0; i < memoryProperties.memoryHeapCount; ++i) { + const auto& heap = memoryProperties.memoryHeaps[i]; + mprintf((" Heap %u: %zu MB%s\n", + i, + static_cast(heap.size / (1024 * 1024)), + (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) ? " (device local)" : "")); + } + + // Resolve Vulkan function pointers for VMA from the dynamic dispatcher + VmaVulkanFunctions vulkanFunctions = {}; + auto const& d = VULKAN_HPP_DEFAULT_DISPATCHER; + vulkanFunctions.vkGetInstanceProcAddr = d.vkGetInstanceProcAddr; + vulkanFunctions.vkGetDeviceProcAddr = d.vkGetDeviceProcAddr; + vulkanFunctions.vkGetPhysicalDeviceProperties = d.vkGetPhysicalDeviceProperties; + vulkanFunctions.vkGetPhysicalDeviceMemoryProperties = d.vkGetPhysicalDeviceMemoryProperties; + vulkanFunctions.vkAllocateMemory = d.vkAllocateMemory; + vulkanFunctions.vkFreeMemory = d.vkFreeMemory; + vulkanFunctions.vkMapMemory = d.vkMapMemory; + vulkanFunctions.vkUnmapMemory = d.vkUnmapMemory; + vulkanFunctions.vkFlushMappedMemoryRanges = d.vkFlushMappedMemoryRanges; + vulkanFunctions.vkInvalidateMappedMemoryRanges = d.vkInvalidateMappedMemoryRanges; + vulkanFunctions.vkBindBufferMemory = d.vkBindBufferMemory; + vulkanFunctions.vkBindImageMemory = d.vkBindImageMemory; + vulkanFunctions.vkGetBufferMemoryRequirements = d.vkGetBufferMemoryRequirements; + vulkanFunctions.vkGetImageMemoryRequirements = d.vkGetImageMemoryRequirements; + vulkanFunctions.vkCreateBuffer = d.vkCreateBuffer; + vulkanFunctions.vkDestroyBuffer = d.vkDestroyBuffer; + vulkanFunctions.vkCreateImage = d.vkCreateImage; + vulkanFunctions.vkDestroyImage = d.vkDestroyImage; + vulkanFunctions.vkCmdCopyBuffer = d.vkCmdCopyBuffer; + vulkanFunctions.vkGetBufferMemoryRequirements2KHR = d.vkGetBufferMemoryRequirements2; + vulkanFunctions.vkGetImageMemoryRequirements2KHR = d.vkGetImageMemoryRequirements2; + vulkanFunctions.vkBindBufferMemory2KHR = d.vkBindBufferMemory2; + vulkanFunctions.vkBindImageMemory2KHR = d.vkBindImageMemory2; + vulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = d.vkGetPhysicalDeviceMemoryProperties2; + + VmaAllocatorCreateInfo allocatorInfo = {}; + allocatorInfo.vulkanApiVersion = VK_API_VERSION_1_1; + allocatorInfo.physicalDevice = static_cast(physicalDevice); + allocatorInfo.device = static_cast(device); + allocatorInfo.instance = static_cast(instance); + allocatorInfo.pVulkanFunctions = &vulkanFunctions; + + VkResult result = vmaCreateAllocator(&allocatorInfo, &m_allocator); + if (result != VK_SUCCESS) { + mprintf(("Failed to create VMA allocator! VkResult: %d\n", static_cast(result))); + return false; + } + + mprintf(("Vulkan Memory Manager initialized (VMA)\n")); + m_initialized = true; + return true; +} + +void VulkanMemoryManager::shutdown() +{ + if (!m_initialized) { + return; + } + + if (m_allocationCount > 0) { + mprintf(("WARNING: VulkanMemoryManager shutdown with %zu allocations still active!\n", m_allocationCount)); + } + + if (m_allocator != VK_NULL_HANDLE) { + vmaDestroyAllocator(m_allocator); + m_allocator = VK_NULL_HANDLE; + } + + m_allocationCount = 0; + m_totalAllocatedBytes = 0; + m_initialized = false; +} + +VmaMemoryUsage VulkanMemoryManager::toVmaUsage(MemoryUsage usage) +{ + switch (usage) { + case MemoryUsage::GpuOnly: + return VMA_MEMORY_USAGE_GPU_ONLY; + case MemoryUsage::CpuToGpu: + return VMA_MEMORY_USAGE_CPU_TO_GPU; + case MemoryUsage::GpuToCpu: + return VMA_MEMORY_USAGE_GPU_TO_CPU; + case MemoryUsage::CpuOnly: + return VMA_MEMORY_USAGE_CPU_ONLY; + default: + return VMA_MEMORY_USAGE_CPU_TO_GPU; + } +} + +bool VulkanMemoryManager::allocateBufferMemory(vk::Buffer buffer, MemoryUsage usage, VulkanAllocation& allocation) +{ + if (!m_initialized) { + mprintf(("VulkanMemoryManager::allocateBufferMemory called before initialization!\n")); + return false; + } + + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.usage = toVmaUsage(usage); + + VmaAllocationInfo allocInfo; + VkResult result = vmaAllocateMemoryForBuffer( + m_allocator, + static_cast(buffer), + &allocCreateInfo, + &allocation.vmaAlloc, + &allocInfo); + + if (result != VK_SUCCESS) { + mprintf(("Failed to allocate buffer memory via VMA! VkResult: %d (allocations: %zu, total: %zu bytes)\n", + static_cast(result), m_allocationCount, m_totalAllocatedBytes)); + allocation.vmaAlloc = VK_NULL_HANDLE; + return false; + } + + // Bind the memory to the buffer + result = vmaBindBufferMemory(m_allocator, allocation.vmaAlloc, static_cast(buffer)); + if (result != VK_SUCCESS) { + mprintf(("Failed to bind buffer memory via VMA! VkResult: %d\n", static_cast(result))); + vmaFreeMemory(m_allocator, allocation.vmaAlloc); + allocation.vmaAlloc = VK_NULL_HANDLE; + return false; + } + + allocation.size = allocInfo.size; + allocation.mappedPtr = allocInfo.pMappedData; + + ++m_allocationCount; + m_totalAllocatedBytes += static_cast(allocation.size); + + return true; +} + +bool VulkanMemoryManager::allocateImageMemory(vk::Image image, MemoryUsage usage, VulkanAllocation& allocation) +{ + if (!m_initialized) { + mprintf(("VulkanMemoryManager::allocateImageMemory called before initialization!\n")); + return false; + } + + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.usage = toVmaUsage(usage); + + VmaAllocationInfo allocInfo; + VkResult result = vmaAllocateMemoryForImage( + m_allocator, + static_cast(image), + &allocCreateInfo, + &allocation.vmaAlloc, + &allocInfo); + + if (result != VK_SUCCESS) { + mprintf(("Failed to allocate image memory via VMA! VkResult: %d (allocations: %zu, total: %zu bytes)\n", + static_cast(result), m_allocationCount, m_totalAllocatedBytes)); + allocation.vmaAlloc = VK_NULL_HANDLE; + return false; + } + + // Bind the memory to the image + result = vmaBindImageMemory(m_allocator, allocation.vmaAlloc, static_cast(image)); + if (result != VK_SUCCESS) { + mprintf(("Failed to bind image memory via VMA! VkResult: %d\n", static_cast(result))); + vmaFreeMemory(m_allocator, allocation.vmaAlloc); + allocation.vmaAlloc = VK_NULL_HANDLE; + return false; + } + + allocation.size = allocInfo.size; + allocation.mappedPtr = allocInfo.pMappedData; + + ++m_allocationCount; + m_totalAllocatedBytes += static_cast(allocation.size); + + return true; +} + +void VulkanMemoryManager::freeAllocation(VulkanAllocation& allocation) +{ + if (!m_initialized || !allocation.isValid()) { + return; + } + + // Unmap if still mapped — VMA asserts map count == 0 on free + if (allocation.mappedPtr != nullptr) { + vmaUnmapMemory(m_allocator, allocation.vmaAlloc); + allocation.mappedPtr = nullptr; + } + + vmaFreeMemory(m_allocator, allocation.vmaAlloc); + + --m_allocationCount; + m_totalAllocatedBytes -= static_cast(allocation.size); + + allocation.vmaAlloc = VK_NULL_HANDLE; + allocation.size = 0; + allocation.mappedPtr = nullptr; +} + +void* VulkanMemoryManager::mapMemory(VulkanAllocation& allocation) +{ + if (!m_initialized || !allocation.isValid()) { + return nullptr; + } + + if (allocation.mappedPtr != nullptr) { + // Already mapped (VMA supports nested map calls via refcount) + return allocation.mappedPtr; + } + + VkResult result = vmaMapMemory(m_allocator, allocation.vmaAlloc, &allocation.mappedPtr); + if (result != VK_SUCCESS) { + mprintf(("Failed to map memory via VMA! VkResult: %d\n", static_cast(result))); + allocation.mappedPtr = nullptr; + return nullptr; + } + + return allocation.mappedPtr; +} + +void VulkanMemoryManager::unmapMemory(VulkanAllocation& allocation) +{ + if (!m_initialized || !allocation.isValid() || allocation.mappedPtr == nullptr) { + return; + } + + vmaUnmapMemory(m_allocator, allocation.vmaAlloc); + allocation.mappedPtr = nullptr; +} + +void VulkanMemoryManager::flushMemory(const VulkanAllocation& allocation, vk::DeviceSize offset, vk::DeviceSize size) +{ + if (!m_initialized || !allocation.isValid()) { + return; + } + + vmaFlushAllocation(m_allocator, allocation.vmaAlloc, + static_cast(offset), + (size == VK_WHOLE_SIZE) ? allocation.size : static_cast(size)); +} + +void VulkanMemoryManager::invalidateMemory(const VulkanAllocation& allocation, vk::DeviceSize offset, vk::DeviceSize size) +{ + if (!m_initialized || !allocation.isValid()) { + return; + } + + vmaInvalidateAllocation(m_allocator, allocation.vmaAlloc, + static_cast(offset), + (size == VK_WHOLE_SIZE) ? allocation.size : static_cast(size)); +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanMemory.h b/code/graphics/vulkan/VulkanMemory.h new file mode 100644 index 00000000000..615c48f601f --- /dev/null +++ b/code/graphics/vulkan/VulkanMemory.h @@ -0,0 +1,148 @@ +#pragma once + +#include "globalincs/pstypes.h" + +#include + +// VMA requires the Vulkan function pointers. We use the dynamic dispatcher, +// so we supply them via VMA_DYNAMIC_VULKAN_FUNCTIONS. +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0 +#define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1 + +#include + + +namespace graphics::vulkan { + +// Forward declarations +class VulkanRenderer; + +/** + * @brief Memory allocation info returned when allocating GPU memory. + * + * Wraps a VmaAllocation handle. Callers should use isValid() instead of + * checking internal fields directly. + */ +struct VulkanAllocation { + VmaAllocation vmaAlloc = VK_NULL_HANDLE; + vk::DeviceSize size = 0; + void* mappedPtr = nullptr; // Non-null if memory is persistently mapped + + bool isValid() const { return vmaAlloc != VK_NULL_HANDLE; } +}; + +/** + * @brief Flags for memory allocation requirements + */ +enum class MemoryUsage { + GpuOnly, // Device local, not host visible (fastest for GPU) + CpuToGpu, // Host visible, preferably device local (for uploads) + GpuToCpu, // Host visible, preferably cached (for readbacks) + CpuOnly // Host visible and coherent (for staging) +}; + +/** + * @brief Memory manager for Vulkan GPU memory allocations, backed by VMA. + * + * VMA sub-allocates from large VkDeviceMemory blocks, avoiding the + * per-object allocation limit (maxMemoryAllocationCount) that the + * previous simple allocator could hit with large mods. + */ +class VulkanMemoryManager { +public: + VulkanMemoryManager(); + ~VulkanMemoryManager(); + + // Non-copyable + VulkanMemoryManager(const VulkanMemoryManager&) = delete; + VulkanMemoryManager& operator=(const VulkanMemoryManager&) = delete; + + /** + * @brief Initialize the memory manager + * @param physicalDevice The physical device to query memory properties from + * @param device The logical device for allocations + * @return true on success + */ + bool init(vk::Instance instance, vk::PhysicalDevice physicalDevice, vk::Device device); + + /** + * @brief Shutdown and free all allocations + */ + void shutdown(); + + /** + * @brief Allocate memory for a buffer + * @param buffer The buffer to allocate memory for + * @param usage The intended memory usage pattern + * @param[out] allocation Output allocation info + * @return true on success + */ + bool allocateBufferMemory(vk::Buffer buffer, MemoryUsage usage, VulkanAllocation& allocation); + + /** + * @brief Allocate memory for an image + * @param image The image to allocate memory for + * @param usage The intended memory usage pattern + * @param[out] allocation Output allocation info + * @return true on success + */ + bool allocateImageMemory(vk::Image image, MemoryUsage usage, VulkanAllocation& allocation); + + /** + * @brief Free a previous allocation + * @param allocation The allocation to free + */ + void freeAllocation(VulkanAllocation& allocation); + + /** + * @brief Map memory for CPU access + * @param allocation The allocation to map + * @return Pointer to mapped memory, or nullptr on failure + */ + void* mapMemory(VulkanAllocation& allocation); + + /** + * @brief Unmap previously mapped memory + * @param allocation The allocation to unmap + */ + void unmapMemory(VulkanAllocation& allocation); + + /** + * @brief Flush mapped memory to make writes visible to GPU + * @param allocation The allocation containing the range to flush + * @param offset Offset within the allocation + * @param size Size of the range to flush (VK_WHOLE_SIZE for entire allocation) + */ + void flushMemory(const VulkanAllocation& allocation, vk::DeviceSize offset, vk::DeviceSize size); + + /** + * @brief Invalidate mapped memory to make GPU writes visible to CPU + * @param allocation The allocation containing the range to invalidate + * @param offset Offset within the allocation + * @param size Size of the range to invalidate (VK_WHOLE_SIZE for entire allocation) + */ + void invalidateMemory(const VulkanAllocation& allocation, vk::DeviceSize offset, vk::DeviceSize size); + + /** + * @brief Get memory statistics + */ + size_t getAllocationCount() const { return m_allocationCount; } + size_t getTotalAllocatedBytes() const { return m_totalAllocatedBytes; } + +private: + static VmaMemoryUsage toVmaUsage(MemoryUsage usage); + + VmaAllocator m_allocator = VK_NULL_HANDLE; + + size_t m_allocationCount = 0; + size_t m_totalAllocatedBytes = 0; + + bool m_initialized = false; +}; + +// Global memory manager instance (set during renderer init) +VulkanMemoryManager* getMemoryManager(); +void setMemoryManager(VulkanMemoryManager* manager); + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanPipeline.cpp b/code/graphics/vulkan/VulkanPipeline.cpp new file mode 100644 index 00000000000..90efc5258ad --- /dev/null +++ b/code/graphics/vulkan/VulkanPipeline.cpp @@ -0,0 +1,461 @@ +#include "VulkanPipeline.h" +#include "VulkanConvert.h" + +#include "cfile/cfile.h" + + +namespace graphics::vulkan { + +// Global pipeline manager pointer +static VulkanPipelineManager* g_pipelineManager = nullptr; + +VulkanPipelineManager* getPipelineManager() +{ + Assertion(g_pipelineManager != nullptr, "Vulkan PipelineManager not initialized!"); + return g_pipelineManager; +} + +void setPipelineManager(VulkanPipelineManager* manager) +{ + g_pipelineManager = manager; +} + +bool PipelineConfig::operator==(const PipelineConfig& other) const +{ + return shaderType == other.shaderType && + shaderFlags == other.shaderFlags && + vertexLayoutHash == other.vertexLayoutHash && + primitiveType == other.primitiveType && + depthMode == other.depthMode && + blendMode == other.blendMode && + cullEnabled == other.cullEnabled && + frontFaceCW == other.frontFaceCW && + depthWriteEnabled == other.depthWriteEnabled && + stencilEnabled == other.stencilEnabled && + stencilFunc == other.stencilFunc && + stencilMask == other.stencilMask && + frontStencilOp.stencilFailOperation == other.frontStencilOp.stencilFailOperation && + frontStencilOp.depthFailOperation == other.frontStencilOp.depthFailOperation && + frontStencilOp.successOperation == other.frontStencilOp.successOperation && + backStencilOp.stencilFailOperation == other.backStencilOp.stencilFailOperation && + backStencilOp.depthFailOperation == other.backStencilOp.depthFailOperation && + backStencilOp.successOperation == other.backStencilOp.successOperation && + colorWriteMask.x == other.colorWriteMask.x && + colorWriteMask.y == other.colorWriteMask.y && + colorWriteMask.z == other.colorWriteMask.z && + colorWriteMask.w == other.colorWriteMask.w && + fillMode == other.fillMode && + depthBiasEnabled == other.depthBiasEnabled && + renderPass == other.renderPass && + subpass == other.subpass && + colorAttachmentCount == other.colorAttachmentCount && + sampleCount == other.sampleCount && + perAttachmentBlendEnabled == other.perAttachmentBlendEnabled && + [&]() { + if (!perAttachmentBlendEnabled) return true; + for (uint32_t i = 0; i < colorAttachmentCount; ++i) { + if (attachmentBlends[i].blendMode != other.attachmentBlends[i].blendMode || + attachmentBlends[i].writeMask.x != other.attachmentBlends[i].writeMask.x || + attachmentBlends[i].writeMask.y != other.attachmentBlends[i].writeMask.y || + attachmentBlends[i].writeMask.z != other.attachmentBlends[i].writeMask.z || + attachmentBlends[i].writeMask.w != other.attachmentBlends[i].writeMask.w) + return false; + } + return true; + }(); +} + +uint64_t PipelineConfig::hash() const +{ + uint64_t h = 0; + + // Combine all fields into hash + h ^= std::hash()(static_cast(shaderType)) << 0; + h ^= std::hash()(shaderFlags) << 4; + h ^= std::hash()(vertexLayoutHash) << 8; + h ^= std::hash()(static_cast(primitiveType)) << 12; + h ^= std::hash()(static_cast(depthMode)) << 16; + h ^= std::hash()(static_cast(blendMode)) << 20; + h ^= std::hash()(cullEnabled) << 24; + h ^= std::hash()(frontFaceCW) << 25; + h ^= std::hash()(depthWriteEnabled) << 26; + h ^= std::hash()(stencilEnabled) << 27; + h ^= std::hash()(static_cast(stencilFunc)) << 28; + h ^= std::hash()(stencilMask) << 31; + h ^= static_cast(std::hash()(static_cast(frontStencilOp.stencilFailOperation))) << 33; + h ^= static_cast(std::hash()(static_cast(frontStencilOp.depthFailOperation))) << 35; + h ^= static_cast(std::hash()(static_cast(frontStencilOp.successOperation))) << 37; + h ^= static_cast(std::hash()(static_cast(backStencilOp.stencilFailOperation))) << 39; + h ^= static_cast(std::hash()(static_cast(backStencilOp.depthFailOperation))) << 41; + h ^= static_cast(std::hash()(static_cast(backStencilOp.successOperation))) << 43; + h ^= static_cast(std::hash()((colorWriteMask.x ? 1 : 0) | (colorWriteMask.y ? 2 : 0) | + (colorWriteMask.z ? 4 : 0) | (colorWriteMask.w ? 8 : 0))) << 44; + h ^= static_cast(std::hash()(fillMode)) << 45; + h ^= static_cast(std::hash()(depthBiasEnabled)) << 46; + h ^= static_cast(std::hash()(reinterpret_cast + (reinterpret_cast + (static_cast(renderPass))))) << 47; + h ^= static_cast(std::hash()(subpass)) << 51; + h ^= static_cast(std::hash()(colorAttachmentCount)) << 55; + h ^= static_cast(std::hash()(static_cast(sampleCount))) << 56; + h ^= static_cast(std::hash()(perAttachmentBlendEnabled)) << 57; + if (perAttachmentBlendEnabled) { + for (uint32_t i = 0; i < colorAttachmentCount; ++i) { + h ^= static_cast(std::hash()(static_cast(attachmentBlends[i].blendMode))) << (i * 3 + 2); + h ^= static_cast(std::hash()((attachmentBlends[i].writeMask.x ? 1 : 0) | + (attachmentBlends[i].writeMask.y ? 2 : 0) | + (attachmentBlends[i].writeMask.z ? 4 : 0) | + (attachmentBlends[i].writeMask.w ? 8 : 0))) << (i * 3 + 5); + } + } + + return h; +} + +bool VulkanPipelineManager::init(vk::Device device, VulkanShaderManager* shaderManager, + VulkanDescriptorManager* descriptorManager) +{ + if (m_initialized) { + return true; + } + + m_device = device; + m_shaderManager = shaderManager; + m_descriptorManager = descriptorManager; + + // Create empty pipeline cache + vk::PipelineCacheCreateInfo cacheInfo; + m_pipelineCache = m_device.createPipelineCacheUnique(cacheInfo); + + // Create common pipeline layout + createPipelineLayout(); + + m_initialized = true; + mprintf(("VulkanPipelineManager: Initialized\n")); + return true; +} + +void VulkanPipelineManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Wait for device idle + m_device.waitIdle(); + + // Clear all pipelines + m_pipelines.clear(); + m_pipelineLayout.reset(); + m_pipelineCache.reset(); + m_vertexFormatCache.clear(); + + m_initialized = false; + mprintf(("VulkanPipelineManager: Shutdown complete\n")); +} + +vk::Pipeline VulkanPipelineManager::getPipeline(const PipelineConfig& config, const vertex_layout& vertexLayout) +{ + Assertion(m_initialized, "VulkanPipelineManager::getPipeline called before initialization!"); + + // Update vertex layout hash in config + PipelineConfig fullConfig = config; + fullConfig.vertexLayoutHash = vertexLayout.hash(); + + // Check cache + auto it = m_pipelines.find(fullConfig); + if (it != m_pipelines.end()) { + return it->second.get(); + } + + // Create new pipeline + auto pipeline = createPipeline(fullConfig, vertexLayout); + if (!pipeline) { + return {}; + } + + vk::Pipeline result = pipeline.get(); + m_pipelines[fullConfig] = std::move(pipeline); + + nprintf(("Vulkan", "VulkanPipelineManager: Created pipeline for shader type %d (hash 0x%" PRIx64 ")\n", + static_cast(config.shaderType), fullConfig.hash())); + + return result; +} + +bool VulkanPipelineManager::loadPipelineCache(const SCP_string& filename) +{ + // Try to load cache file + CFILE* fp = cfopen(filename.c_str(), "rb", CF_TYPE_CACHE); + if (!fp) { + nprintf(("Vulkan", "VulkanPipelineManager: No pipeline cache file found: %s\n", filename.c_str())); + return false; + } + + // Get file size + int fileSize = cfilelength(fp); + if (fileSize <= 0) { + cfclose(fp); + return false; + } + + // Read cache data + SCP_vector cacheData(fileSize); + if (cfread(cacheData.data(), 1, fileSize, fp) != fileSize) { + cfclose(fp); + return false; + } + cfclose(fp); + + // Create new pipeline cache with data + vk::PipelineCacheCreateInfo cacheInfo; + cacheInfo.initialDataSize = cacheData.size(); + cacheInfo.pInitialData = cacheData.data(); + + try { + auto newCache = m_device.createPipelineCacheUnique(cacheInfo); + m_pipelineCache = std::move(newCache); + mprintf(("VulkanPipelineManager: Loaded pipeline cache: %s (%d bytes)\n", + filename.c_str(), fileSize)); + return true; + } catch (const vk::SystemError& e) { + mprintf(("VulkanPipelineManager: Failed to load pipeline cache: %s\n", e.what())); + return false; + } +} + +bool VulkanPipelineManager::savePipelineCache(const SCP_string& filename) +{ + if (!m_pipelineCache) { + return false; + } + + // Get cache data + auto cacheData = m_device.getPipelineCacheData(m_pipelineCache.get()); + if (cacheData.empty()) { + return false; + } + + // Write to file + CFILE* fp = cfopen(filename.c_str(), "wb", CF_TYPE_CACHE); + if (!fp) { + mprintf(("VulkanPipelineManager: Could not create cache file: %s\n", filename.c_str())); + return false; + } + + bool success = (cfwrite(cacheData.data(), 1, static_cast(cacheData.size()), fp) == + static_cast(cacheData.size())); + cfclose(fp); + + if (success) { + mprintf(("VulkanPipelineManager: Saved pipeline cache: %s (%zu bytes)\n", + filename.c_str(), cacheData.size())); + } + + return success; +} + +bool VulkanPipelineManager::needsFallbackAttribute(const vertex_layout& vertexLayout, shader_type shaderType, + VertexAttributeLocation location) +{ + // Empty layouts (fullscreen triangle etc.) don't use fallbacks + if (vertexLayout.get_num_vertex_components() == 0) return false; + + const VertexInputConfig& config = m_vertexFormatCache.getVertexInputConfig(vertexLayout); + uint32_t bit = 1u << location; + + // Layout natively provides this attribute — no fallback needed + if (config.providedInputMask & bit) return false; + + // Fallback needed only if the shader actually consumes this attribute + const VulkanShaderModule* shader = m_shaderManager->getShaderByType(shaderType); + if (shader && shader->vertexInputMask != 0) { + return (shader->vertexInputMask & bit) != 0; + } + return true; +} + +void VulkanPipelineManager::createPipelineLayout() +{ + // Get descriptor set layouts from descriptor manager + const auto& uniqueLayouts = m_descriptorManager->getAllSetLayouts(); + std::array(DescriptorSetIndex::Count)> setLayouts; + for (size_t i = 0; i < uniqueLayouts.size(); ++i) { + setLayouts[i] = uniqueLayouts[i].get(); + } + + vk::PipelineLayoutCreateInfo layoutInfo; + layoutInfo.setLayoutCount = static_cast(setLayouts.size()); + layoutInfo.pSetLayouts = setLayouts.data(); + layoutInfo.pushConstantRangeCount = 0; + layoutInfo.pPushConstantRanges = nullptr; + + m_pipelineLayout = m_device.createPipelineLayoutUnique(layoutInfo); + + mprintf(("VulkanPipelineManager: Created pipeline layout with %zu descriptor sets\n", + setLayouts.size())); +} + +vk::UniquePipeline VulkanPipelineManager::createPipeline(const PipelineConfig& config, + const vertex_layout& vertexLayout) +{ + // Ensure shader variant is loaded (lazy creation on first use) + int shaderHandle = m_shaderManager->maybeCreateShader(config.shaderType, config.shaderFlags); + + // Get shader modules + const VulkanShaderModule* shader = (shaderHandle >= 0) ? m_shaderManager->getShader(shaderHandle) : nullptr; + if (!shader || !shader->valid) { + mprintf(("VulkanPipelineManager: Shader not available for type %d\n", + static_cast(config.shaderType))); + return {}; + } + + // Debug: Log which shader and vertex layout is being used + mprintf(("VulkanPipelineManager: Creating pipeline for shader type %d (%s)\n", + static_cast(config.shaderType), shader->description.c_str())); + mprintf((" Vertex layout has %zu components:\n", vertexLayout.get_num_vertex_components())); + for (size_t i = 0; i < vertexLayout.get_num_vertex_components(); ++i) { + const vertex_format_data* comp = vertexLayout.get_vertex_component(i); + mprintf((" [%zu] format=%d offset=%zu stride=%zu\n", i, + static_cast(comp->format_type), comp->offset, comp->stride)); + } + + // Shader stages + SCP_vector shaderStages; + + vk::PipelineShaderStageCreateInfo vertStage; + vertStage.stage = vk::ShaderStageFlagBits::eVertex; + vertStage.module = shader->vertexModule.get(); + vertStage.pName = "main"; + shaderStages.push_back(vertStage); + + vk::PipelineShaderStageCreateInfo fragStage; + fragStage.stage = vk::ShaderStageFlagBits::eFragment; + fragStage.module = shader->fragmentModule.get(); + fragStage.pName = "main"; + shaderStages.push_back(fragStage); + + // Vertex input state — filter out attributes the shader doesn't consume. + // The vertex format cache may add fallback color/texcoord attributes that + // shaders like NanoVG don't declare; the SPIR-V compiler strips unused + // inputs, so we must match the pipeline to the actual shader inputs. + VertexInputConfig vertexInputConfig = m_vertexFormatCache.getVertexInputConfig(vertexLayout); + if (shader->vertexInputMask != 0) { + uint32_t mask = shader->vertexInputMask; + auto& attrs = vertexInputConfig.attributes; + SCP_unordered_set usedBindings; + + // Remove attributes at locations the shader doesn't use + attrs.erase(std::remove_if(attrs.begin(), attrs.end(), + [mask](const vk::VertexInputAttributeDescription& a) { + return (mask & (1u << a.location)) == 0; + }), attrs.end()); + + // Collect bindings still referenced by remaining attributes + for (auto& a : attrs) { + usedBindings.insert(a.binding); + } + + // Remove orphaned bindings + auto& binds = vertexInputConfig.bindings; + binds.erase(std::remove_if(binds.begin(), binds.end(), + [&usedBindings](const vk::VertexInputBindingDescription& b) { + return usedBindings.count(b.binding) == 0; + }), binds.end()); + + vertexInputConfig.updatePointers(); + } + + // Input assembly + vk::PipelineInputAssemblyStateCreateInfo inputAssembly; + inputAssembly.topology = convertPrimitiveType(config.primitiveType); + inputAssembly.primitiveRestartEnable = VK_FALSE; + + // Viewport state (dynamic) + vk::PipelineViewportStateCreateInfo viewportState; + viewportState.viewportCount = 1; + viewportState.pViewports = nullptr; // Dynamic + viewportState.scissorCount = 1; + viewportState.pScissors = nullptr; // Dynamic + + // Rasterization state + vk::PipelineRasterizationStateCreateInfo rasterizer = createRasterizationState( + config.cullEnabled, config.fillMode, config.frontFaceCW, config.depthBiasEnabled); + + // Multisample state + vk::PipelineMultisampleStateCreateInfo multisampling; + multisampling.rasterizationSamples = config.sampleCount; + multisampling.sampleShadingEnable = VK_FALSE; + + // Depth stencil state + vk::PipelineDepthStencilStateCreateInfo depthStencil = createDepthStencilState( + config.depthMode, + config.stencilEnabled, + config.stencilFunc, + config.stencilEnabled ? &config.frontStencilOp : nullptr, + config.stencilEnabled ? &config.backStencilOp : nullptr, + config.stencilMask); + + // Override depth write if specified + if (!config.depthWriteEnabled) { + depthStencil.depthWriteEnable = VK_FALSE; + } + + // Color blend state + SCP_vector colorBlendAttachments; + for (uint32_t i = 0; i < config.colorAttachmentCount; ++i) { + if (config.perAttachmentBlendEnabled) { + colorBlendAttachments.push_back(createColorBlendAttachment( + config.attachmentBlends[i].blendMode, config.attachmentBlends[i].writeMask)); + } else { + colorBlendAttachments.push_back(createColorBlendAttachment(config.blendMode, config.colorWriteMask)); + } + } + + vk::PipelineColorBlendStateCreateInfo colorBlending; + colorBlending.logicOpEnable = VK_FALSE; + colorBlending.attachmentCount = static_cast(colorBlendAttachments.size()); + colorBlending.pAttachments = colorBlendAttachments.data(); + + // Dynamic state + std::array dynamicStates = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, + vk::DynamicState::eLineWidth, + vk::DynamicState::eDepthBias, + vk::DynamicState::eStencilReference, + }; + + vk::PipelineDynamicStateCreateInfo dynamicState; + dynamicState.dynamicStateCount = static_cast(dynamicStates.size()); + dynamicState.pDynamicStates = dynamicStates.data(); + + // Create pipeline + vk::GraphicsPipelineCreateInfo pipelineInfo; + pipelineInfo.stageCount = static_cast(shaderStages.size()); + pipelineInfo.pStages = shaderStages.data(); + pipelineInfo.pVertexInputState = &vertexInputConfig.createInfo; + pipelineInfo.pInputAssemblyState = &inputAssembly; + pipelineInfo.pViewportState = &viewportState; + pipelineInfo.pRasterizationState = &rasterizer; + pipelineInfo.pMultisampleState = &multisampling; + pipelineInfo.pDepthStencilState = &depthStencil; + pipelineInfo.pColorBlendState = &colorBlending; + pipelineInfo.pDynamicState = &dynamicState; + pipelineInfo.layout = m_pipelineLayout.get(); + pipelineInfo.renderPass = config.renderPass; + pipelineInfo.subpass = config.subpass; + pipelineInfo.basePipelineHandle = nullptr; + pipelineInfo.basePipelineIndex = -1; + + try { + auto result = m_device.createGraphicsPipelineUnique(m_pipelineCache.get(), pipelineInfo); + return std::move(result.value); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPipelineManager: Failed to create pipeline: %s\n", e.what())); + return {}; + } +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanPipeline.h b/code/graphics/vulkan/VulkanPipeline.h new file mode 100644 index 00000000000..92882225876 --- /dev/null +++ b/code/graphics/vulkan/VulkanPipeline.h @@ -0,0 +1,195 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/material.h" + +#include + +#include "VulkanShader.h" +#include "VulkanVertexFormat.h" +#include "VulkanDescriptorManager.h" + +#include + + +namespace graphics::vulkan { + +/** + * @brief Pipeline configuration key + * + * All state that affects pipeline creation. Two configurations with the + * same values will produce identical pipelines. + */ +struct PipelineConfig { + // Shader identification + shader_type shaderType = SDR_TYPE_NONE; + unsigned int shaderFlags = 0; // SDR_FLAG_* bitmask for shader variant + + // Vertex format + size_t vertexLayoutHash = 0; + + // Render state + primitive_type primitiveType = PRIM_TYPE_TRIS; + gr_zbuffer_type depthMode = ZBUFFER_TYPE_NONE; + gr_alpha_blend blendMode = ALPHA_BLEND_NONE; + bool cullEnabled = true; + bool frontFaceCW = false; // Match OpenGL default (CCW); models override to CW + bool depthWriteEnabled = true; + + // Stencil state + bool stencilEnabled = false; + ComparisionFunction stencilFunc = ComparisionFunction::Always; + uint32_t stencilMask = 0xFF; + material::StencilOp frontStencilOp; + material::StencilOp backStencilOp; + + // Fill mode (0 = solid, 1 = wireframe) + int fillMode = 0; + + // Depth bias + bool depthBiasEnabled = false; + + // Color write mask + bvec4 colorWriteMask = {true, true, true, true}; + + // Render pass compatibility + vk::RenderPass renderPass; + uint32_t subpass = 0; + + // Color attachment count (for multiple render targets) + uint32_t colorAttachmentCount = 1; + + // MSAA sample count (default e1 = no multisampling) + vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1; + + // Per-attachment blend (used by decal rendering to write-mask unused G-buffer attachments) + bool perAttachmentBlendEnabled = false; + struct AttachmentBlend { + gr_alpha_blend blendMode = ALPHA_BLEND_NONE; + bvec4 writeMask = {true, true, true, true}; + }; + static constexpr uint32_t MAX_COLOR_ATTACHMENTS = 8; + std::array attachmentBlends; + + bool operator==(const PipelineConfig& other) const; + uint64_t hash() const; +}; + +struct PipelineConfigHasher { + uint64_t operator()(const PipelineConfig& config) const { + return config.hash(); + } +}; + +/** + * @brief Manages Vulkan graphics pipelines + * + * Creates and caches pipelines based on configuration. Uses VkPipelineCache + * for driver-level caching and an application-level cache for fast lookups. + */ +class VulkanPipelineManager { +public: + VulkanPipelineManager() = default; + ~VulkanPipelineManager() = default; + + // Non-copyable + VulkanPipelineManager(const VulkanPipelineManager&) = delete; + VulkanPipelineManager& operator=(const VulkanPipelineManager&) = delete; + + /** + * @brief Initialize the pipeline manager + * @param device Vulkan logical device + * @param shaderManager Shader manager for loading shader modules + * @param descriptorManager Descriptor manager for set layouts + * @return true on success + */ + bool init(vk::Device device, VulkanShaderManager* shaderManager, + VulkanDescriptorManager* descriptorManager); + + /** + * @brief Shutdown and release resources + */ + void shutdown(); + + /** + * @brief Get or create a pipeline for the given configuration + * @param config Pipeline configuration + * @param vertexLayout Vertex layout for the pipeline + * @return Pipeline handle, or null handle on failure + */ + vk::Pipeline getPipeline(const PipelineConfig& config, const vertex_layout& vertexLayout); + + /** + * @brief Get the common pipeline layout + * + * All pipelines share the same pipeline layout (descriptor set layouts + * and push constant ranges). + */ + vk::PipelineLayout getPipelineLayout() const { return m_pipelineLayout.get(); } + + /** + * @brief Load pipeline cache from file + * @param filename Cache file path + * @return true if cache was loaded + */ + bool loadPipelineCache(const SCP_string& filename); + + /** + * @brief Save pipeline cache to file + * @param filename Cache file path + * @return true if cache was saved + */ + bool savePipelineCache(const SCP_string& filename); + + /** + * @brief Get number of cached pipelines + */ + size_t getPipelineCount() const { return m_pipelines.size(); } + + /** + * @brief Check if a draw needs a fallback buffer for a given vertex attribute + * @param vertexLayout The vertex layout to check + * @param shaderType The shader being used (checked against vertexInputMask) + * @param location The vertex attribute location to check + * @return true if the layout doesn't provide this attribute AND the shader consumes it + */ + bool needsFallbackAttribute(const vertex_layout& vertexLayout, shader_type shaderType, + VertexAttributeLocation location); + +private: + /** + * @brief Create the common pipeline layout + */ + void createPipelineLayout(); + + /** + * @brief Create a new pipeline + */ + vk::UniquePipeline createPipeline(const PipelineConfig& config, const vertex_layout& vertexLayout); + + vk::Device m_device; + VulkanShaderManager* m_shaderManager = nullptr; + VulkanDescriptorManager* m_descriptorManager = nullptr; + + // Common pipeline layout (shared by all pipelines) + vk::UniquePipelineLayout m_pipelineLayout; + + // Driver-level pipeline cache + vk::UniquePipelineCache m_pipelineCache; + + // Application-level pipeline cache: config -> pipeline + SCP_unordered_map m_pipelines; + + // Vertex format cache + VulkanVertexFormatCache m_vertexFormatCache; + + bool m_initialized = false; +}; + +// Global pipeline manager access +VulkanPipelineManager* getPipelineManager(); +void setPipelineManager(VulkanPipelineManager* manager); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanPostProcessing.cpp b/code/graphics/vulkan/VulkanPostProcessing.cpp new file mode 100644 index 00000000000..d9d16f2f715 --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessing.cpp @@ -0,0 +1,727 @@ +#include "VulkanPostProcessing.h" + +#include + +#include "cmdline/cmdline.h" +#include "gr_vulkan.h" +#include "VulkanRenderer.h" +#include "VulkanBuffer.h" +#include "VulkanPipeline.h" +#include "VulkanState.h" +#include "VulkanDescriptorManager.h" +#include "graphics/util/uniform_structs.h" +#include "graphics/post_processing.h" +#include "graphics/grinternal.h" +#include "graphics/2d.h" +#include "lighting/lighting_profiles.h" +#include "lighting/lighting.h" +#include "math/vecmat.h" +#include "nebula/neb.h" +#include "nebula/volumetrics.h" +#include "mission/missionparse.h" + +extern float Sun_spot; +extern int Game_subspace_effect; +extern SCP_vector Lights; +extern int Num_lights; + + +namespace graphics::vulkan { + +// Global post-processor pointer +static VulkanPostProcessor* g_postProcessor = nullptr; + +VulkanPostProcessor* getPostProcessor() +{ + return g_postProcessor; +} + +void setPostProcessor(VulkanPostProcessor* pp) +{ + g_postProcessor = pp; +} + +bool VulkanPostProcessor::init(vk::Device device, vk::PhysicalDevice physDevice, + VulkanMemoryManager* memMgr, vk::Extent2D extent, + vk::Format depthFormat) +{ + if (m_initialized) { + return true; + } + + m_ctx.device = device; + m_ctx.memoryManager = memMgr; + m_ctx.sceneExtent = extent; + m_ctx.depthFormat = depthFormat; + + // Verify RGBA16F support for color attachment + sampling + { + vk::FormatProperties props = physDevice.getFormatProperties(HDR_COLOR_FORMAT); + if (!(props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eColorAttachment) || + !(props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eSampledImage)) { + mprintf(("VulkanPostProcessor: RGBA16F not supported for color attachment + sampling!\n")); + return false; + } + } + + // Create HDR scene color target (RGBA16F) + // eTransferSrc needed for copy_effect_texture (mid-scene snapshot) + // eTransferDst needed for deferred_lighting_finish (emissive→color copy) + if (!createImage(extent.width, extent.height, HDR_COLOR_FORMAT, + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled + | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, + vk::ImageAspectFlagBits::eColor, + m_sceneColor.image, m_sceneColor.view, m_sceneColor.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create scene color image!\n")); + return false; + } + m_sceneColor.format = HDR_COLOR_FORMAT; + m_sceneColor.width = extent.width; + m_sceneColor.height = extent.height; + + // Create scene depth target + if (!createImage(extent.width, extent.height, depthFormat, + vk::ImageUsageFlagBits::eDepthStencilAttachment + | vk::ImageUsageFlagBits::eSampled + | vk::ImageUsageFlagBits::eTransferSrc, + vk::ImageAspectFlagBits::eDepth, // View uses depth-only aspect + m_sceneDepth.image, m_sceneDepth.view, m_sceneDepth.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create scene depth image!\n")); + shutdown(); + return false; + } + m_sceneDepth.format = depthFormat; + m_sceneDepth.width = extent.width; + m_sceneDepth.height = extent.height; + + // Create effect/composite texture (RGBA16F, snapshot of scene color for distortion/soft particles) + if (!createImage(extent.width, extent.height, HDR_COLOR_FORMAT, + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, + vk::ImageAspectFlagBits::eColor, + m_sceneEffect.image, m_sceneEffect.view, m_sceneEffect.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create scene effect image!\n")); + shutdown(); + return false; + } + m_sceneEffect.format = HDR_COLOR_FORMAT; + m_sceneEffect.width = extent.width; + m_sceneEffect.height = extent.height; + + // Create scene depth copy (samplable copy for soft particles) + // Same depth format, usage: eTransferDst (copy target) + eSampled (fragment shader reads) + if (!createImage(extent.width, extent.height, depthFormat, + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, + vk::ImageAspectFlagBits::eDepth, + m_sceneDepthCopy.image, m_sceneDepthCopy.view, m_sceneDepthCopy.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create scene depth copy image!\n")); + shutdown(); + return false; + } + m_sceneDepthCopy.format = depthFormat; + m_sceneDepthCopy.width = extent.width; + m_sceneDepthCopy.height = extent.height; + + // Create HDR scene render pass + // Attachment 0: Color (RGBA16F) + // loadOp=eClear: clear to black each frame + // finalLayout=eShaderReadOnlyOptimal: ready for post-processing sampling + // Attachment 1: Depth + // loadOp=eClear: clear to far plane + // finalLayout=eDepthStencilAttachmentOptimal + { + std::array attachments; + + // Color + attachments[0].format = HDR_COLOR_FORMAT; + attachments[0].samples = vk::SampleCountFlagBits::e1; + attachments[0].loadOp = vk::AttachmentLoadOp::eClear; + attachments[0].storeOp = vk::AttachmentStoreOp::eStore; + attachments[0].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[0].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[0].initialLayout = vk::ImageLayout::eUndefined; + attachments[0].finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + // Depth — storeOp=eStore required for: + // 1. copy_effect_texture mid-scene interruption (depth must survive render pass end/resume) + // 2. lightshafts pass (samples scene depth after render pass ends) + attachments[1].format = depthFormat; + attachments[1].samples = vk::SampleCountFlagBits::e1; + attachments[1].loadOp = vk::AttachmentLoadOp::eClear; + attachments[1].storeOp = vk::AttachmentStoreOp::eStore; + attachments[1].stencilLoadOp = vk::AttachmentLoadOp::eClear; + attachments[1].stencilStoreOp = vk::AttachmentStoreOp::eStore; + attachments[1].initialLayout = vk::ImageLayout::eUndefined; + attachments[1].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::AttachmentReference depthRef; + depthRef.attachment = 1; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + subpass.pDepthStencilAttachment = &depthRef; + + // Dependency: external → subpass 0 + // Includes eTransfer in srcStageMask so this render pass is compatible with + // m_sceneRenderPassLoad (which follows copy_effect_texture transfer ops). + // Vulkan requires render passes sharing a framebuffer to have identical dependencies. + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests + | vk::PipelineStageFlagBits::eTransfer; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eTransferRead; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentRead; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_sceneRenderPass = m_ctx.device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create scene render pass: %s\n", e.what())); + shutdown(); + return false; + } + } + + // Create scene render pass with loadOp=eLoad (for resuming after copy_effect_texture) + // Compatible with m_sceneRenderPass (same formats/samples) so shares the same framebuffer + { + std::array attachments; + + // Color — load existing content, keep final layout for post-processing + attachments[0].format = HDR_COLOR_FORMAT; + attachments[0].samples = vk::SampleCountFlagBits::e1; + attachments[0].loadOp = vk::AttachmentLoadOp::eLoad; + attachments[0].storeOp = vk::AttachmentStoreOp::eStore; + attachments[0].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[0].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[0].initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + attachments[0].finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + // Depth — load existing content + attachments[1].format = depthFormat; + attachments[1].samples = vk::SampleCountFlagBits::e1; + attachments[1].loadOp = vk::AttachmentLoadOp::eLoad; + attachments[1].storeOp = vk::AttachmentStoreOp::eStore; + attachments[1].stencilLoadOp = vk::AttachmentLoadOp::eLoad; + attachments[1].stencilStoreOp = vk::AttachmentStoreOp::eStore; + attachments[1].initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + attachments[1].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::AttachmentReference depthRef; + depthRef.attachment = 1; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + subpass.pDepthStencilAttachment = &depthRef; + + // Must match m_sceneRenderPass dependency exactly for render pass compatibility + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests + | vk::PipelineStageFlagBits::eTransfer; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eTransferRead; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentRead; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_sceneRenderPassLoad = m_ctx.device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create scene load render pass: %s\n", e.what())); + shutdown(); + return false; + } + } + + // Create scene framebuffer + { + std::array fbAttachments = {m_sceneColor.view, m_sceneDepth.view}; + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_sceneRenderPass; + fbInfo.attachmentCount = static_cast(fbAttachments.size()); + fbInfo.pAttachments = fbAttachments.data(); + fbInfo.width = extent.width; + fbInfo.height = extent.height; + fbInfo.layers = 1; + + try { + m_sceneFramebuffer = m_ctx.device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create scene framebuffer: %s\n", e.what())); + shutdown(); + return false; + } + } + + // Create linear sampler for post-processing texture reads + { + vk::SamplerCreateInfo samplerInfo; + samplerInfo.magFilter = vk::Filter::eLinear; + samplerInfo.minFilter = vk::Filter::eLinear; + samplerInfo.mipmapMode = vk::SamplerMipmapMode::eLinear; + samplerInfo.addressModeU = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeV = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeW = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.anisotropyEnable = VK_FALSE; + samplerInfo.compareEnable = VK_FALSE; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = 0.0f; + samplerInfo.borderColor = vk::BorderColor::eFloatOpaqueBlack; + + try { + m_ctx.linearSampler = m_ctx.device.createSampler(samplerInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create sampler: %s\n", e.what())); + shutdown(); + return false; + } + } + + // Create mipmap sampler for bloom textures (supports textureLod) + { + vk::SamplerCreateInfo samplerInfo; + samplerInfo.magFilter = vk::Filter::eLinear; + samplerInfo.minFilter = vk::Filter::eLinear; + samplerInfo.mipmapMode = vk::SamplerMipmapMode::eLinear; + samplerInfo.addressModeU = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeV = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeW = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.anisotropyEnable = VK_FALSE; + samplerInfo.compareEnable = VK_FALSE; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = static_cast(VulkanBloom::MAX_MIP_BLUR_LEVELS); + samplerInfo.borderColor = vk::BorderColor::eFloatOpaqueBlack; + + try { + m_ctx.mipmapSampler = m_ctx.device.createSampler(samplerInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create mipmap sampler: %s\n", e.what())); + shutdown(); + return false; + } + } + + // Create persistent UBO for tonemapping parameters + { + vk::BufferCreateInfo bufInfo; + bufInfo.size = sizeof(graphics::generic_data::tonemapping_data); + bufInfo.usage = vk::BufferUsageFlagBits::eUniformBuffer; + bufInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_tonemapUBO = m_ctx.device.createBuffer(bufInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create tonemap UBO: %s\n", e.what())); + shutdown(); + return false; + } + + if (!m_ctx.memoryManager->allocateBufferMemory(m_tonemapUBO, MemoryUsage::CpuToGpu, m_tonemapUBOAlloc)) { + mprintf(("VulkanPostProcessor: Failed to allocate tonemap UBO memory!\n")); + m_ctx.device.destroyBuffer(m_tonemapUBO); + m_tonemapUBO = nullptr; + shutdown(); + return false; + } + + // Write default passthrough tonemapping data (linear, exposure=1.0) + auto* mapped = static_cast(m_ctx.memoryManager->mapMemory(m_tonemapUBOAlloc)); + if (mapped) { + memset(mapped, 0, sizeof(graphics::generic_data::tonemapping_data)); + mapped->exposure = 1.0f; + mapped->tonemapper = 0; // Linear + m_ctx.memoryManager->unmapMemory(m_tonemapUBOAlloc); + } + } + + // Create the shared scratch UBO (used by every subsystem's drawFullscreenTriangle) + if (!m_ctx.initScratchUBO()) { + mprintf(("VulkanPostProcessor: Failed to create scratch UBO!\n")); + shutdown(); + return false; + } + + // Initialize bloom resources (non-fatal if it fails) + if (!initBloom()) { + mprintf(("VulkanPostProcessor: Bloom initialization failed (non-fatal)\n")); + } + + // Initialize LDR targets for tonemapping + FXAA (non-fatal if it fails) + if (!initLDRTargets()) { + mprintf(("VulkanPostProcessor: LDR target initialization failed (non-fatal)\n")); + } + + // Initialize distortion ping-pong textures (non-fatal if it fails) + m_distortion.init(m_ctx); + + // Initialize G-buffer for deferred lighting (non-fatal) + if (!initGBuffer()) { + mprintf(("VulkanPostProcessor: G-buffer initialization failed (non-fatal)\n")); + } + + // Wire the deferred light accumulation + fog subsystems (resources are lazy). + m_lighting.init(m_ctx, m_sceneColor, m_deferred, m_shadow); + m_fog.init(m_ctx, m_sceneColor, m_sceneDepth, m_sceneDepthCopy, m_deferred); + + // Initialize MSAA resources if MSAA is enabled and G-buffer is ready + if (m_deferred.isInitialized() && Cmdline_msaa_enabled > 0) { + if (!initMSAA()) { + mprintf(("VulkanPostProcessor: MSAA initialization failed (non-fatal, disabling MSAA)\n")); + Cmdline_msaa_enabled = 0; + } + } + + m_initialized = true; + mprintf(("VulkanPostProcessor: Initialized (%ux%u, RGBA16F scene color)\n", + extent.width, extent.height)); + return true; +} + +void VulkanPostProcessor::shutdown() +{ + if (m_ctx.device) { + m_ctx.device.waitIdle(); + + m_fog.shutdown(); + shutdownShadowPass(); + shutdownMSAA(); + m_lighting.shutdown(); + shutdownGBuffer(); + shutdownLDRTargets(); + shutdownBloom(); + + m_ctx.shutdownScratchUBO(); + + if (m_ctx.mipmapSampler) { + m_ctx.device.destroySampler(m_ctx.mipmapSampler); + m_ctx.mipmapSampler = nullptr; + } + + if (m_tonemapUBO) { + m_ctx.device.destroyBuffer(m_tonemapUBO); + m_tonemapUBO = nullptr; + } + if (m_tonemapUBOAlloc.isValid()) { + m_ctx.memoryManager->freeAllocation(m_tonemapUBOAlloc); + } + + if (m_ctx.linearSampler) { + m_ctx.device.destroySampler(m_ctx.linearSampler); + m_ctx.linearSampler = nullptr; + } + if (m_sceneFramebuffer) { + m_ctx.device.destroyFramebuffer(m_sceneFramebuffer); + m_sceneFramebuffer = nullptr; + } + if (m_sceneRenderPassLoad) { + m_ctx.device.destroyRenderPass(m_sceneRenderPassLoad); + m_sceneRenderPassLoad = nullptr; + } + if (m_sceneRenderPass) { + m_ctx.device.destroyRenderPass(m_sceneRenderPass); + m_sceneRenderPass = nullptr; + } + + // Destroy scene effect/composite target + if (m_sceneEffect.view) { + m_ctx.device.destroyImageView(m_sceneEffect.view); + m_sceneEffect.view = nullptr; + } + if (m_sceneEffect.image) { + m_ctx.device.destroyImage(m_sceneEffect.image); + m_sceneEffect.image = nullptr; + } + if (m_sceneEffect.allocation.isValid()) { + m_ctx.memoryManager->freeAllocation(m_sceneEffect.allocation); + } + + // Destroy scene color target + if (m_sceneColor.view) { + m_ctx.device.destroyImageView(m_sceneColor.view); + m_sceneColor.view = nullptr; + } + if (m_sceneColor.image) { + m_ctx.device.destroyImage(m_sceneColor.image); + m_sceneColor.image = nullptr; + } + if (m_sceneColor.allocation.isValid()) { + m_ctx.memoryManager->freeAllocation(m_sceneColor.allocation); + } + + // Destroy scene depth target + if (m_sceneDepth.view) { + m_ctx.device.destroyImageView(m_sceneDepth.view); + m_sceneDepth.view = nullptr; + } + if (m_sceneDepth.image) { + m_ctx.device.destroyImage(m_sceneDepth.image); + m_sceneDepth.image = nullptr; + } + if (m_sceneDepth.allocation.isValid()) { + m_ctx.memoryManager->freeAllocation(m_sceneDepth.allocation); + } + + // Destroy scene depth copy target + if (m_sceneDepthCopy.view) { + m_ctx.device.destroyImageView(m_sceneDepthCopy.view); + m_sceneDepthCopy.view = nullptr; + } + if (m_sceneDepthCopy.image) { + m_ctx.device.destroyImage(m_sceneDepthCopy.image); + m_sceneDepthCopy.image = nullptr; + } + if (m_sceneDepthCopy.allocation.isValid()) { + m_ctx.memoryManager->freeAllocation(m_sceneDepthCopy.allocation); + } + + // Destroy distortion textures + m_distortion.shutdown(); + } + + m_initialized = false; +} + +void VulkanPostProcessor::updateTonemappingUBO() +{ + if (!m_tonemapUBO || !m_ctx.memoryManager) { + return; + } + + namespace ltp = lighting_profiles; + + auto* mapped = static_cast( + m_ctx.memoryManager->mapMemory(m_tonemapUBOAlloc)); + if (mapped) { + auto ppc = ltp::current_piecewise_intermediates(); + mapped->exposure = ltp::current_exposure(); + mapped->tonemapper = static_cast(ltp::current_tonemapper()); + mapped->x0 = ppc.x0; + mapped->y0 = ppc.y0; + mapped->x1 = ppc.x1; + mapped->toe_B = ppc.toe_B; + mapped->toe_lnA = ppc.toe_lnA; + mapped->sh_B = ppc.sh_B; + mapped->sh_lnA = ppc.sh_lnA; + mapped->sh_offsetX = ppc.sh_offsetX; + mapped->sh_offsetY = ppc.sh_offsetY; + m_ctx.memoryManager->unmapMemory(m_tonemapUBOAlloc); + } +} + +void VulkanPostProcessor::copyEffectTexture(vk::CommandBuffer cmd) const +{ + // Called mid-scene, outside a render pass. + // Scene color is in eShaderReadOnlyOptimal (from the ended scene render pass). + // Copies scene color → effect texture so distortion/soft particle shaders can sample it. + copyImageToImage(cmd, + m_sceneColor.image, vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eColorAttachmentOptimal, + m_sceneEffect.image, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + m_ctx.sceneExtent); +} + +void VulkanPostProcessor::copySceneDepth(vk::CommandBuffer cmd) const +{ + // Called mid-scene, outside a render pass. + // Copies scene depth → depth copy texture so soft particle shaders can sample it. + // Scene depth is in eDepthStencilAttachmentOptimal (from the ended scene render pass). + copyImageToImage(cmd, + m_sceneDepth.image, vk::ImageLayout::eDepthStencilAttachmentOptimal, vk::ImageLayout::eDepthStencilAttachmentOptimal, + m_sceneDepthCopy.image, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + m_ctx.sceneExtent, + imageAspectFromFormat(m_ctx.depthFormat)); +} + +void VulkanPostProcessor::blitToSwapChain(vk::CommandBuffer cmd) +{ + // If LDR targets exist, executeTonemap()+executeFXAA() already ran. + // Blit from the latest post-processing result with passthrough settings. + // Otherwise, fall back to direct HDR→swap chain tonemapping. + bool useLdr = m_ldrInitialized; + + if (!useLdr) { + // Update tonemapping parameters from engine lighting profile + updateTonemappingUBO(); + } + + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* stateTracker = getStateTracker(); + auto* bufferMgr = getBufferManager(); + + if (!pipelineMgr || !descriptorMgr || !stateTracker || !bufferMgr) { + return; + } + + // Build pipeline config for tonemapping (fullscreen, no depth, no blending) + PipelineConfig config; + config.shaderType = SDR_TYPE_POST_PROCESS_TONEMAPPING; + config.shaderFlags = useLdr ? SDR_FLAG_TONEMAPPING_LINEAR_OUT : 0; + config.vertexLayoutHash = 0; // Empty vertex layout + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = stateTracker->getCurrentRenderPass(); + + // Get or create the pipeline + vertex_layout emptyLayout; // No vertex components + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (!pipeline) { + mprintf(("VulkanPostProcessor: Failed to get tonemapping pipeline!\n")); + return; + } + + vk::PipelineLayout pipelineLayout = pipelineMgr->getPipelineLayout(); + stateTracker->bindPipeline(pipeline, pipelineLayout); + + // Set viewport (non-flipped for post-processing — textures are already + // in the correct Vulkan orientation, no Y-flip needed) + stateTracker->setViewport(0.0f, 0.0f, + static_cast(m_ctx.sceneExtent.width), + static_cast(m_ctx.sceneExtent.height)); + + stateTracker->applyDynamicState(); + + DescriptorWriter writer; + writer.reset(m_ctx.device, descriptorMgr->getFallbacks()); + + // Set 1: Material — source texture at array slot 0 + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeSet(materialSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Material)); + { + std::array texArrayInfos; + texArrayInfos.fill(descriptorMgr->getFallbacks().texture2D); + texArrayInfos[0].sampler = m_ctx.linearSampler; + texArrayInfos[0].imageView = m_postEffectsApplied ? m_sceneLuminance.view + : useLdr ? m_sceneLdr.view + : m_sceneColor.view; + writer.setImageArray(MaterialBinding::TextureArray, texArrayInfos); + } + + // Set 2: PerDraw — tonemapping UBO + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + writer.writeSet(perDrawSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::PerDraw)); + + // When blitting LDR, use passthrough tonemapping (exposure=1, linear) + if (useLdr) { + auto* mapped = static_cast( + m_ctx.memoryManager->mapMemory(m_tonemapUBOAlloc)); + Verify(mapped); + memset(mapped, 0, sizeof(graphics::generic_data::tonemapping_data)); + mapped->exposure = 1.0f; + mapped->tonemapper = 0; // Linear passthrough + m_ctx.memoryManager->unmapMemory(m_tonemapUBOAlloc); + } + writer.setBuffer(PerDrawBinding::GenericData, {m_tonemapUBO, 0, + sizeof(graphics::generic_data::tonemapping_data)}); + writer.flush(); + stateTracker->bindDescriptorSet(DescriptorSetIndex::Material, materialSet); + stateTracker->bindDescriptorSet(DescriptorSetIndex::PerDraw, perDrawSet); + + // Draw fullscreen triangle (3 vertices from gl_VertexIndex, no vertex buffer) + cmd.draw(3, 1, 0, 0); +} + +// No-op: In OpenGL, begin/end push/pop an FBO and run the post-processing +// pipeline. In Vulkan, this is handled by vulkan_scene_texture_begin/end +// which manage the HDR render pass and post-processing passes. These +// functions are not actively called by the engine. +void vulkan_post_process_begin() {} +void vulkan_post_process_end() {} + +// No-op: In OpenGL, save/restore swap the depth attachment between +// Scene_depth_texture and Cockpit_depth_texture to isolate cockpit +// depth from the main scene. In Vulkan, the render pass loadOp=eClear +// clears depth at the start of each scene pass, and separate cockpit +// depth isolation is not yet implemented. Called from ship.cpp during +// cockpit rendering but degrades gracefully as a no-op (cockpit just +// shares the scene depth buffer). +void vulkan_post_process_save_zbuffer() {} +void vulkan_post_process_restore_zbuffer() {} + +void vulkan_post_process_set_effect(const char* name, int value, const vec3d* rgb) +{ + if (!Gr_post_processing_enabled || !graphics::Post_processing_manager) { + return; + } + if (name == nullptr) { + return; + } + + auto& ls_params = graphics::Post_processing_manager->getLightshaftParams(); + if (!stricmp("lightshafts", name)) { + ls_params.intensity = value / 100.0f; + ls_params.on = !!value; + return; + } + + auto& postEffects = graphics::Post_processing_manager->getPostEffects(); + for (auto & postEffect : postEffects) { + if (!stricmp(postEffect.name.c_str(), name)) { + postEffect.intensity = (value / postEffect.div) + postEffect.add; + if ((rgb != nullptr) && !(vmd_zero_vector == *rgb)) { + postEffect.rgb = *rgb; + } + break; + } + } +} + +void vulkan_post_process_set_defaults() +{ + if (!graphics::Post_processing_manager) { + return; + } + + auto& postEffects = graphics::Post_processing_manager->getPostEffects(); + for (auto& effect : postEffects) { + effect.intensity = effect.default_intensity; + } +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanPostProcessing.h b/code/graphics/vulkan/VulkanPostProcessing.h new file mode 100644 index 00000000000..668d68d1178 --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessing.h @@ -0,0 +1,883 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "VulkanMemory.h" + +#include +#include + + +namespace graphics::vulkan { + +/** + * @brief Shared drawing infrastructure for post-processing subsystems + * + * Bundles the handles and helpers that every post-processing pass needs + * (logical device, memory manager, scene geometry, samplers, the per-draw + * scratch UBO ring, and the image/fullscreen-draw helpers). Owned by + * VulkanPostProcessor and intended to be passed by reference to the + * individual subsystem passes as they are extracted into their own types. + */ +struct PostProcessContext { + vk::Device device; + VulkanMemoryManager* memoryManager = nullptr; + vk::Extent2D sceneExtent; + vk::Format depthFormat = vk::Format::eUndefined; + + // Shared samplers for post-processing texture reads + vk::Sampler linearSampler; // maxLod=0 + vk::Sampler mipmapSampler; // mipmap support (bloom) + + // Per-draw scratch UBO ring shared by all fullscreen effect passes. + // Each draw consumes one slot at scratchUBOCursor; callers reset the + // cursor when starting a fresh group of passes. + static constexpr size_t SCRATCH_UBO_SLOT_SIZE = 256; // >= minUniformBufferOffsetAlignment + static constexpr uint32_t SCRATCH_UBO_MAX_SLOTS = 24; + vk::Buffer scratchUBO; + VulkanAllocation scratchUBOAlloc; + void* scratchUBOMapped = nullptr; + uint32_t scratchUBOCursor = 0; + + /** + * @brief Create a single-mip 2D image + view backed by GPU-only memory + */ + bool createImage(uint32_t width, uint32_t height, vk::Format format, + vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect, + vk::Image& outImage, vk::ImageView& outView, + VulkanAllocation& outAllocation, + vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1) + const; + + /** + * @brief Draw a fullscreen triangle through the post-processing pipeline + * + * Begins/ends the given render pass and binds material + per-draw sets. + * Optional UBO data is written into the next scratch UBO slot. + */ + void drawFullscreenTriangle(vk::CommandBuffer cmd, vk::RenderPass renderPass, + vk::Framebuffer framebuffer, vk::Extent2D extent, + int shaderType, + vk::ImageView textureView, vk::Sampler sampler, + const void* uboData, size_t uboSize, + int blendMode, + unsigned int shaderFlags = 0); + + /** + * @brief Generate a mip chain for an image (transitions mip 0 first) + */ + static void generateMipmaps(vk::CommandBuffer cmd, vk::Image image, + uint32_t width, uint32_t height, uint32_t mipLevels); + + /** + * @brief Create the shared scratch UBO used by drawFullscreenTriangle + * @return false on failure (caller should treat as fatal) + */ + bool initScratchUBO(); + void shutdownScratchUBO(); +}; + +/** + * @brief A single-mip 2D color/depth render target (image + view + allocation) + * + * Shared value type used by the post-processor and its subsystems. + */ +struct RenderTarget { + vk::Image image; + vk::ImageView view; + VulkanAllocation allocation; + vk::Format format = vk::Format::eUndefined; + uint32_t width = 0; + uint32_t height = 0; +}; + +/** + * @brief Distortion ping-pong textures (32x32 RGBA8) + * + * Self-contained subsystem: owns two ping-pong textures plus a LINEAR/REPEAT + * sampler, and scrolls/refreshes them on a ~30ms timer to match OpenGL's + * gr_opengl_update_distortion(). Thruster rendering samples the most recently + * written texture via getTextureInfo(). + */ +class VulkanDistortion { +public: + bool init(PostProcessContext& ctx); + void shutdown(); + + /** + * @brief Advance the ping-pong textures (no-op until the ~30ms timer fires) + * @param cmd Active command buffer (must be outside a render pass) + * @param frametime Time since last frame in seconds + */ + void update(vk::CommandBuffer cmd, float frametime); + + bool isInitialized() const { return m_initialized; } + + /** + * @brief DescriptorImageInfo for the current (most recently written) texture + * + * Returns a default-constructed info if not initialized. + */ + vk::DescriptorImageInfo getTextureInfo() const; + +private: + PostProcessContext* m_ctx = nullptr; + std::array m_tex; + int m_switch = 0; // Which texture is the current read source + float m_timer = 0.0f; // Accumulator for ~30ms update interval + vk::Sampler m_sampler; // LINEAR filter, REPEAT wrapping + bool m_initialized = false; + bool m_firstUpdate = true; // First update needs eUndefined old layout +}; + +/** + * @brief Cascaded shadow map (VSM) render target + * + * Self-contained, lazily-initialized subsystem: owns the layered color (VSM + * variance) and depth array images, their render pass, and the layered + * framebuffer. Sized from the current Shadow_quality on init. + */ +class VulkanShadowMap { +public: + /** + * @brief Lazily create shadow resources (sized from Shadow_quality) + * @return false if shadows are disabled or creation failed + */ + bool init(PostProcessContext& ctx); + void shutdown(); + + bool isInitialized() const { return m_initialized; } + int textureSize() const { return m_textureSize; } + vk::ImageView colorView() const { return m_color.view; } + vk::Image colorImage() const { return m_color.image; } + vk::Image depthImage() const { return m_depth.image; } + vk::RenderPass renderPass() const { return m_renderPass; } + vk::Framebuffer framebuffer() const { return m_framebuffer; } + +private: + PostProcessContext* m_ctx = nullptr; + RenderTarget m_color; // RGBA16F, 2D array (MAX_SHADOW_CASCADES layers) + RenderTarget m_depth; // D32F, 2D array (MAX_SHADOW_CASCADES layers) + vk::RenderPass m_renderPass; + vk::Framebuffer m_framebuffer; + int m_textureSize = 0; + bool m_initialized = false; +}; + +/** + * @brief HDR bloom (bright-pass → mip blur → additive composite) + * + * Self-contained subsystem owning two half-resolution mip chains plus the + * bloom/composite render passes. Composites additively back into the scene + * color target, which is owned by the post-processor and referenced here. + */ +class VulkanBloom { +public: + static constexpr int MAX_MIP_BLUR_LEVELS = 4; + + /** + * @brief Create bloom resources (sized to half the scene extent) + * @param sceneColor Scene HDR color target to composite into (must outlive this) + */ + bool init(PostProcessContext& ctx, const RenderTarget& sceneColor); + void shutdown(); + + /** + * @brief Run the full bloom chain and composite it onto the scene color + * @param cmd Active command buffer (must be outside a render pass) + */ + void execute(vk::CommandBuffer cmd); + + bool isInitialized() const { return m_initialized; } + +private: + struct BloomTarget { + vk::Image image; + VulkanAllocation allocation; + vk::ImageView fullView; // All mips (textureLod sampling) + std::array mipViews = {}; // Per-mip views (framebuffer attachment) + std::array mipFramebuffers = {}; + }; + + PostProcessContext* m_ctx = nullptr; + const RenderTarget* m_sceneColor = nullptr; + std::array m_tex; // Half-res RGBA16F, 4 mip levels + uint32_t m_width = 0; // Half of scene width + uint32_t m_height = 0; // Half of scene height + vk::RenderPass m_renderPass; // Color-only RGBA16F, loadOp=eDontCare + vk::RenderPass m_compositeRenderPass; // Color-only RGBA16F, loadOp=eLoad (additive to scene) + vk::Framebuffer m_sceneColorFB; // Scene color as attachment for bloom composite + bool m_initialized = false; +}; + +/** + * @brief Deferred geometry buffer (G-buffer) + optional MSAA G-buffer & resolve + * + * Cohesive subsystem owning the single-sample G-buffer targets (position, normal, + * specular, emissive, composite + a samplable normal copy) and, when MSAA is + * enabled, the multisample G-buffer that resolves into those single-sample + * targets. Also owns the render-pass / framebuffer builders shared by both paths. + * Renders into the scene color (attachment 0) and scene depth, both owned by the + * post-processor and referenced here. + */ +class VulkanDeferredGBuffer { +public: + // ===== G-Buffer Layout Constants ===== + // Full layout: [0]=color, [1]=position, [2]=normal, [3]=specular, [4]=emissive, [5]=composite, [6]=depth + // MSAA layout: [0]=color, [1]=position, [2]=normal, [3]=specular, [4]=emissive, [5]=depth (no composite) + static constexpr uint32_t GBUF_ATT_COLOR = 0; + static constexpr uint32_t GBUF_ATT_POSITION = 1; + static constexpr uint32_t GBUF_ATT_NORMAL = 2; + static constexpr uint32_t GBUF_ATT_SPECULAR = 3; + static constexpr uint32_t GBUF_ATT_EMISSIVE = 4; + static constexpr uint32_t GBUF_ATT_COMPOSITE = 5; // Full layout only + + static constexpr uint32_t GBUF_COLOR_ATTACHMENT_COUNT = 6; // Full layout (with composite) + static constexpr uint32_t MSAA_COLOR_ATTACHMENT_COUNT = 5; // MSAA layout (without composite) + + static constexpr vk::Format GBUF_FORMAT_COLOR = vk::Format::eR16G16B16A16Sfloat; + static constexpr vk::Format GBUF_FORMAT_POSITION = vk::Format::eR16G16B16A16Sfloat; + static constexpr vk::Format GBUF_FORMAT_NORMAL = vk::Format::eR16G16B16A16Sfloat; + static constexpr vk::Format GBUF_FORMAT_SPECULAR = vk::Format::eR8G8B8A8Unorm; + static constexpr vk::Format GBUF_FORMAT_EMISSIVE = vk::Format::eR16G16B16A16Sfloat; + static constexpr vk::Format GBUF_FORMAT_COMPOSITE = vk::Format::eR16G16B16A16Sfloat; + + /** + * @brief Create the single-sample G-buffer (sized to the scene extent) + * @param sceneColor Scene HDR color (attachment 0); must outlive this + * @param sceneDepth Scene depth attachment; must outlive this + */ + bool init(PostProcessContext& ctx, const RenderTarget& sceneColor, const RenderTarget& sceneDepth); + void shutdown(); + + /** + * @brief Create the multisample G-buffer + resolve resources (lazy) + * @return false if MSAA is disabled or creation failed + */ + bool initMsaa(); + void shutdownMsaa(); + + bool isInitialized() const { return m_gbufInitialized; } + bool isMsaaInitialized() const { return m_msaaInitialized; } + + // Mid-frame layout transitions / copies (all outside a render pass) + void transitionForResume(vk::CommandBuffer cmd); + void copyNormal(vk::CommandBuffer cmd); + void transitionMsaaForBegin(vk::CommandBuffer cmd); + void transitionMsaaForResume(vk::CommandBuffer cmd); + + // G-buffer accessors + vk::RenderPass renderPass() const { return m_gbufRenderPass; } + vk::RenderPass renderPassLoad() const { return m_gbufRenderPassLoad; } + vk::Framebuffer framebuffer() const { return m_gbufFramebuffer; } + vk::ImageView positionView() const { return m_gbufPosition.view; } + vk::ImageView normalView() const { return m_gbufNormal.view; } + vk::ImageView specularView() const { return m_gbufSpecular.view; } + vk::ImageView emissiveView() const { return m_gbufEmissive.view; } + vk::ImageView compositeView() const { return m_gbufComposite.view; } + vk::ImageView normalCopyView() const { return m_gbufNormalCopy.view; } + vk::Image emissiveImage() const { return m_gbufEmissive.image; } + vk::Image compositeImage() const { return m_gbufComposite.image; } + vk::Image normalImage() const { return m_gbufNormal.image; } + + // MSAA accessors + vk::RenderPass msaaRenderPass() const { return m_msaaGbufRenderPass; } + vk::RenderPass msaaRenderPassLoad() const { return m_msaaGbufRenderPassLoad; } + vk::Framebuffer msaaFramebuffer() const { return m_msaaGbufFramebuffer; } + vk::RenderPass msaaResolveRenderPass() const { return m_msaaResolveRenderPass; } + vk::Framebuffer msaaResolveFramebuffer() const { return m_msaaResolveFramebuffer; } + vk::RenderPass msaaEmissiveCopyRenderPass() const { return m_msaaEmissiveCopyRenderPass; } + vk::Framebuffer msaaEmissiveCopyFramebuffer() const { return m_msaaEmissiveCopyFramebuffer; } + vk::ImageView msaaColorView() const { return m_msaaColor.view; } + vk::ImageView msaaPositionView() const { return m_msaaPosition.view; } + vk::ImageView msaaNormalView() const { return m_msaaNormal.view; } + vk::ImageView msaaSpecularView() const { return m_msaaSpecular.view; } + vk::ImageView msaaEmissiveView() const { return m_msaaEmissive.view; } + vk::ImageView msaaDepthView() const { return m_msaaDepthView; } + vk::Image msaaColorImage() const { return m_msaaColor.image; } + vk::Image msaaPositionImage() const { return m_msaaPosition.image; } + vk::Image msaaNormalImage() const { return m_msaaNormal.image; } + vk::Image msaaSpecularImage() const { return m_msaaSpecular.image; } + vk::Image msaaEmissiveImage() const { return m_msaaEmissive.image; } + vk::Image msaaDepthImage() const { return m_msaaDepthImage; } + vk::Buffer msaaResolveUBO() const { return m_msaaResolveUBO; } + void* msaaResolveUBOMapped() const { return m_msaaResolveUBOMapped; } + +private: + struct GbufRenderPassConfig { + bool includeComposite; + vk::SampleCountFlagBits samples; + vk::AttachmentLoadOp colorLoadOp; + vk::AttachmentLoadOp depthLoadOp; + vk::ImageLayout colorInitialLayout; + vk::ImageLayout colorFinalLayout; + vk::ImageLayout depthInitialLayout; + bool useResolveDependency = false; + }; + vk::RenderPass createGbufRenderPass(const GbufRenderPassConfig& config); + vk::Framebuffer createGbufFramebuffer(vk::RenderPass renderPass, bool includeComposite, + bool useMsaaImages); + + PostProcessContext* m_ctx = nullptr; + const RenderTarget* m_sceneColor = nullptr; + const RenderTarget* m_sceneDepth = nullptr; + + // ---- G-Buffer (single-sample) ---- + RenderTarget m_gbufPosition; // RGBA16F - view-space position (xyz) + AO (w) + RenderTarget m_gbufNormal; // RGBA16F - view-space normal (xyz) + gloss (w) + RenderTarget m_gbufNormalCopy; // RGBA16F - samplable copy of G-buffer normal (for decals) + RenderTarget m_gbufSpecular; // RGBA8 - specular color (rgb) + fresnel (a) + RenderTarget m_gbufEmissive; // RGBA16F - emissive / pre-lit color + RenderTarget m_gbufComposite; // RGBA16F - light accumulation scratch buffer + vk::RenderPass m_gbufRenderPass; // loadOp=eClear (initial) + vk::RenderPass m_gbufRenderPassLoad; // loadOp=eLoad (resume after mid-pass copy) + vk::Framebuffer m_gbufFramebuffer; + bool m_gbufInitialized = false; + + // ---- MSAA G-buffer (multisample) + resolve ---- + RenderTarget m_msaaColor; // RGBA16F (MS) + RenderTarget m_msaaPosition; // RGBA16F (MS) + RenderTarget m_msaaNormal; // RGBA16F (MS) + RenderTarget m_msaaSpecular; // RGBA8 (MS) + RenderTarget m_msaaEmissive; // RGBA16F (MS) + vk::Image m_msaaDepthImage; + vk::ImageView m_msaaDepthView; + VulkanAllocation m_msaaDepthAlloc; + vk::RenderPass m_msaaGbufRenderPass; // eClear, 5 MS color + MS depth + vk::RenderPass m_msaaGbufRenderPassLoad; // eLoad (emissive preserved) + vk::Framebuffer m_msaaGbufFramebuffer; + vk::RenderPass m_msaaResolveRenderPass; // 5 non-MSAA color + depth (via gl_FragDepth) + vk::Framebuffer m_msaaResolveFramebuffer; + vk::RenderPass m_msaaEmissiveCopyRenderPass; // 1 MS color att (for upsample) + vk::Framebuffer m_msaaEmissiveCopyFramebuffer; + vk::Buffer m_msaaResolveUBO; // Per-frame {samples, fov}, persistently mapped + VulkanAllocation m_msaaResolveUBOAlloc; + void* m_msaaResolveUBOMapped = nullptr; + bool m_msaaInitialized = false; +}; + +/** + * @brief Deferred light accumulation (light volumes → G-buffer composite) + * + * Lazily-initialized subsystem owning the light-volume meshes (sphere/cylinder), + * the per-frame deferred light UBO, and the light-accumulation render pass that + * additively blends every scene light into the G-buffer composite. Consumes the + * G-buffer, the shadow map, and the scene color as read-only inputs. + */ +class VulkanDeferredLighting { +public: + /** + * @brief Wire dependencies (GPU resources are created lazily on first render) + */ + void init(PostProcessContext& ctx, const RenderTarget& sceneColor, + const VulkanDeferredGBuffer& gbuffer, const VulkanShadowMap& shadow); + void shutdown(); + + /** + * @brief Accumulate all scene lights into the G-buffer composite + * @param cmd Active command buffer (must be outside a render pass) + */ + void render(vk::CommandBuffer cmd); + +private: + // Light volume meshes (sphere + cylinder for positional lights) + struct LightVolumeMesh { + vk::Buffer vbo; + VulkanAllocation vboAlloc; + vk::Buffer ibo; + VulkanAllocation iboAlloc; + uint32_t vertexCount = 0; + uint32_t indexCount = 0; + }; + + bool initLightVolumes(); + bool initLightAccumPass(); + + static constexpr uint32_t DEFERRED_UBO_SIZE = 256 * 1024; // 256KB for light data + + PostProcessContext* m_ctx = nullptr; + const RenderTarget* m_sceneColor = nullptr; + const VulkanDeferredGBuffer* m_gbuffer = nullptr; + const VulkanShadowMap* m_shadow = nullptr; + + LightVolumeMesh m_sphereMesh; + LightVolumeMesh m_cylinderMesh; + + // Per-frame UBO for deferred light data (lights + globals + matrices) + vk::Buffer m_deferredUBO; + VulkanAllocation m_deferredUBOAlloc; + + vk::RenderPass m_lightAccumRenderPass; // Single RGBA16F color, loadOp=eLoad, additive blend + vk::Framebuffer m_lightAccumFramebuffer; // Composite image as attachment 0 + bool m_lightVolumesInitialized = false; +}; + +/** + * @brief Scene fog + volumetric nebula passes + * + * Lazily-initialized subsystem owning the fog render pass/framebuffer (which + * targets the scene color) and the mipmapped emissive copy used for volumetric + * LOD sampling. Consumes the G-buffer composite/emissive and the scene depth + * copy, performs its own scene-depth copy, and writes into the scene color. + */ +class VulkanFog { +public: + /** + * @brief Wire dependencies (GPU resources are created lazily on first render) + */ + void init(PostProcessContext& ctx, const RenderTarget& sceneColor, + const RenderTarget& sceneDepth, const RenderTarget& sceneDepthCopy, + const VulkanDeferredGBuffer& gbuffer); + void shutdown(); + + /** + * @brief Distance fog pass (composite + depth → scene color) + * @param cmd Active command buffer (must be outside a render pass) + */ + void renderScene(vk::CommandBuffer cmd); + + /** + * @brief Volumetric nebula pass (composite + emissive mips + depth + 3D vol → scene color) + * @param cmd Active command buffer (must be outside a render pass) + */ + void renderVolumetric(vk::CommandBuffer cmd); + +private: + bool initFogPass(); + void copySceneDepth(vk::CommandBuffer cmd); + + PostProcessContext* m_ctx = nullptr; + const RenderTarget* m_sceneColor = nullptr; + const RenderTarget* m_sceneDepth = nullptr; + const RenderTarget* m_sceneDepthCopy = nullptr; + const VulkanDeferredGBuffer* m_gbuffer = nullptr; + + vk::RenderPass m_fogRenderPass; // Color-only RGBA16F, loadOp=eDontCare, finalLayout=eColorAttachmentOptimal + vk::Framebuffer m_fogFramebuffer; // Scene color as color attachment + bool m_fogInitialized = false; + + // Mipmapped emissive copy for volumetric fog LOD sampling + RenderTarget m_emissiveMipmapped; // RGBA16F with full mip chain + uint32_t m_emissiveMipLevels = 0; + vk::ImageView m_emissiveMipmappedFullView; // View with all mip levels + bool m_emissiveMipmappedInitialized = false; +}; + +/** + * @brief Manages Vulkan post-processing pipeline + * + * Owns offscreen render targets (HDR scene color + depth), render passes, + * and executes post-processing passes (tonemapping, bloom, FXAA, etc.) + * between the 3D scene rendering and the final swap chain presentation. + */ +class VulkanPostProcessor { +public: + VulkanPostProcessor() = default; + ~VulkanPostProcessor() = default; + + // Non-copyable + VulkanPostProcessor(const VulkanPostProcessor&) = delete; + VulkanPostProcessor& operator=(const VulkanPostProcessor&) = delete; + + /** + * @brief Initialize post-processing resources + * @param device Vulkan logical device + * @param physDevice Physical device (for format checks) + * @param memMgr Memory manager for allocations + * @param extent Scene rendering resolution + * @param depthFormat Depth format (matches main depth buffer) + * @return true on success + */ + bool init(vk::Device device, vk::PhysicalDevice physDevice, + VulkanMemoryManager* memMgr, vk::Extent2D extent, + vk::Format depthFormat); + + /** + * @brief Shutdown and free all post-processing resources + */ + void shutdown(); + + /** + * @brief Get the HDR scene render pass (for 3D scene rendering) + * + * This render pass has RGBA16F color + depth attachments with loadOp=eClear. + * Used between scene_texture_begin() and scene_texture_end(). + */ + vk::RenderPass getSceneRenderPass() const { return m_sceneRenderPass; } + + /** + * @brief Get the HDR scene render pass with loadOp=eLoad + * + * Compatible with getSceneRenderPass() (same formats/samples) so uses + * the same framebuffer. Used to resume scene rendering after + * copy_effect_texture interrupts the pass. + */ + vk::RenderPass getSceneRenderPassLoad() const { return m_sceneRenderPassLoad; } + + /** + * @brief Get the HDR scene framebuffer + */ + vk::Framebuffer getSceneFramebuffer() const { return m_sceneFramebuffer; } + + /** + * @brief Get the scene rendering extent + */ + vk::Extent2D getSceneExtent() const { return m_ctx.sceneExtent; } + + /** + * @brief Execute post-processing passes and draw result to swap chain + * + * Called after the HDR scene render pass ends and before the resumed + * swap chain render pass begins. Runs tonemapping (and later bloom, + * FXAA, etc.) then draws a fullscreen triangle to blit the result + * into the swap chain. + * + * The caller is responsible for: + * 1. Ending the HDR scene render pass before calling this + * 2. Beginning the resumed swap chain render pass before calling this + * (the blit draws INTO the resumed pass) + * + * @param cmd Active command buffer + */ + void blitToSwapChain(vk::CommandBuffer cmd); + + /** + * @brief Execute bloom post-processing passes + * + * Called after the HDR scene render pass ends and before the resumed + * swap chain render pass begins. Manages its own render passes internally. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void executeBloom(vk::CommandBuffer cmd) { m_bloom.execute(cmd); } + + /** + * @brief Execute tonemapping pass (HDR scene → LDR) + * + * Called after bloom and before FXAA. Renders to Scene_ldr (RGBA8). + * Must be called outside a render pass. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void executeTonemap(vk::CommandBuffer cmd); + + /** + * @brief Execute FXAA anti-aliasing passes + * + * Called after tonemapping. Runs prepass (LDR→luminance) then + * FXAA main pass (luminance→LDR). Must be called outside a render pass. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void executeFXAA(vk::CommandBuffer cmd); + + /** + * @brief Execute post-processing effects (saturation, brightness, etc.) + * + * Called after FXAA and before the final blit. Reads Scene_ldr, writes + * Scene_luminance (reused as temp target). Must be called outside a render pass. + * + * @param cmd Active command buffer (must be outside a render pass) + * @return true if effects were applied (blit should read Scene_luminance) + */ + bool executePostEffects(vk::CommandBuffer cmd); + + /** + * @brief Execute lightshafts (god rays) pass + * + * Called after FXAA and before post-effects. Additively blends god rays + * onto Scene_ldr based on sun position and depth buffer sampling. + * Must be called outside a render pass. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void executeLightshafts(vk::CommandBuffer cmd); + + /** + * @brief Update distortion ping-pong textures + * + * Called every frame from endSceneRendering(). Internally tracks a ~30ms + * timer. When triggered, scrolls old distortion data right by 1 pixel and + * injects random noise at the left edge (matching OpenGL's + * gr_opengl_update_distortion()). Must be called outside a render pass. + * + * @param cmd Active command buffer (must be outside a render pass) + * @param frametime Time since last frame in seconds + */ + void updateDistortion(vk::CommandBuffer cmd, float frametime) { m_distortion.update(cmd, frametime); } + + /** + * @brief Get a ready-to-use DescriptorImageInfo for the current distortion texture + * + * Returns the most recently written distortion texture (the one thrusters + * should read from). Returns a default-constructed info if not initialized. + */ + vk::DescriptorImageInfo getDistortionTextureInfo() const { return m_distortion.getTextureInfo(); } + + /** + * @brief Copy scene color to effect texture for distortion/soft particle sampling + * + * Must be called outside a render pass. Transitions scene color through + * eTransferSrcOptimal and back to eColorAttachmentOptimal (ready for resumed + * scene render pass). Transitions effect texture to eShaderReadOnlyOptimal. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void copyEffectTexture(vk::CommandBuffer cmd) const; + + /** + * @brief Copy scene depth to samplable depth copy for soft particle rendering + * + * Must be called outside a render pass. Transitions scene depth through + * eTransferSrcOptimal and back to eDepthStencilAttachmentOptimal. Transitions + * depth copy to eShaderReadOnlyOptimal for fragment shader sampling. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void copySceneDepth(vk::CommandBuffer cmd) const; + + /** + * @brief Check if LDR targets are available (tonemapping + FXAA ready) + */ + bool hasLDRTargets() const { return m_ldrInitialized; } + + /** + * @brief Get the scene color image (for layout transitions outside post-processor) + */ + vk::Image getSceneColorImage() const { return m_sceneColor.image; } + + /** + * @brief Get the scene color image view (for post-processing texture binding) + */ + vk::ImageView getSceneColorView() const { return m_sceneColor.view; } + + /** + * @brief Get the scene color sampler + */ + vk::Sampler getSceneColorSampler() const { return m_ctx.linearSampler; } + + /** + * @brief Get a ready-to-use DescriptorImageInfo for the scene effect texture + * + * Available for sampling after copyEffectTexture() has been called. + * Used by distortion and soft particle shaders. + * Returns default-constructed info if the effect texture doesn't exist. + */ + vk::DescriptorImageInfo getSceneEffectTextureInfo() const { + if (!m_sceneEffect.view) return {}; + return {m_ctx.linearSampler, m_sceneEffect.view, vk::ImageLayout::eShaderReadOnlyOptimal}; + } + + /** + * @brief Get the scene depth copy view (for soft particle depth sampling) + * + * Available for sampling after copySceneDepth() has been called. + */ + vk::ImageView getSceneDepthCopyView() const { return m_sceneDepthCopy.view; } + + vk::Format getDepthFormat() const { return m_ctx.depthFormat; } + + /** + * @brief Check if post-processing is initialized + */ + bool isInitialized() const { return m_initialized; } + + // ========== Subsystem access ========== + // Consumers that drive the deferred / shadow passes operate directly on the + // owning subsystem (its resources, render passes, and mid-frame transitions) + // rather than through dozens of per-resource forwarding accessors. + VulkanDeferredGBuffer& deferred() { return m_deferred; } + const VulkanDeferredGBuffer& deferred() const { return m_deferred; } + VulkanShadowMap& shadow() { return m_shadow; } + const VulkanShadowMap& shadow() const { return m_shadow; } + + // ========== Deferred Light Accumulation ========== + + /** + * @brief Render deferred lights into the composite buffer + * + * Reads G-buffer textures, renders light volumes (fullscreen, sphere, cylinder) + * with additive blending into the composite attachment. Manages its own render + * pass internally. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void renderDeferredLights(vk::CommandBuffer cmd) { m_lighting.render(cmd); } + + // ========== Shadow Map ========== + + /** + * @brief Initialize shadow map resources (lazy, called on first use) + * @return true on success + */ + bool initShadowPass() { return m_shadow.init(m_ctx); } + + /** + * @brief Shutdown shadow map resources + */ + void shutdownShadowPass() { m_shadow.shutdown(); } + + /** + * @brief Get a ready-to-use DescriptorImageInfo for the shadow map texture + */ + vk::DescriptorImageInfo getShadowTextureInfo() const { + return {m_ctx.linearSampler, m_shadow.colorView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + } + + // ========== Fog / Volumetric Nebula ========== + + /** + * @brief Render scene fog into scene color + * + * Reads composite (lit result) + depth copy -> writes scene color. + * Must be called outside a render pass. After return, scene color + * is in eColorAttachmentOptimal. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void renderSceneFog(vk::CommandBuffer cmd) { m_fog.renderScene(cmd); } + + /** + * @brief Render volumetric nebula fog into scene color + * + * Reads composite + mipmapped emissive + depth copy + 3D volume textures + * -> writes scene color. Must be called outside a render pass. + * After return, scene color is in eColorAttachmentOptimal. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void renderVolumetricFog(vk::CommandBuffer cmd) { m_fog.renderVolumetric(cmd); } + +private: + void updateTonemappingUBO(); + + bool createImage(uint32_t width, uint32_t height, vk::Format format, + vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect, + vk::Image& outImage, vk::ImageView& outView, + VulkanAllocation& outAllocation, + vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1) + const + { + return m_ctx.createImage(width, height, format, usage, aspect, + outImage, outView, outAllocation, sampleCount); + } + + // G-buffer / MSAA (forwards to the VulkanDeferredGBuffer subsystem) + bool initGBuffer() { return m_deferred.init(m_ctx, m_sceneColor, m_sceneDepth); } + void shutdownGBuffer() { m_deferred.shutdown(); } + bool initMSAA() { return m_deferred.initMsaa(); } + void shutdownMSAA() { m_deferred.shutdownMsaa(); } + + // LDR target methods + bool initLDRTargets(); + void shutdownLDRTargets(); + + // Bloom pipeline (forwards to the VulkanBloom subsystem) + bool initBloom() { return m_bloom.init(m_ctx, m_sceneColor); } + void shutdownBloom() { m_bloom.shutdown(); } + static void generateMipmaps(vk::CommandBuffer cmd, vk::Image image, + uint32_t width, uint32_t height, uint32_t mipLevels) + { + PostProcessContext::generateMipmaps(cmd, image, width, height, mipLevels); + } + void drawFullscreenTriangle(vk::CommandBuffer cmd, vk::RenderPass renderPass, + vk::Framebuffer framebuffer, vk::Extent2D extent, + int shaderType, + vk::ImageView textureView, vk::Sampler sampler, + const void* uboData, size_t uboSize, + int blendMode, + unsigned int shaderFlags = 0) + { + m_ctx.drawFullscreenTriangle(cmd, renderPass, framebuffer, extent, shaderType, + textureView, sampler, uboData, uboSize, blendMode, shaderFlags); + } + + RenderTarget m_sceneColor; // RGBA16F HDR scene color + RenderTarget m_sceneDepth; // Depth buffer for scene + RenderTarget m_sceneDepthCopy; // Samplable copy of scene depth (for soft particles) + RenderTarget m_sceneEffect; // RGBA16F effect/composite (snapshot of scene color) + + // Scene render pass and framebuffer + vk::RenderPass m_sceneRenderPass; // loadOp=eClear (initial scene begin) + vk::RenderPass m_sceneRenderPassLoad; // loadOp=eLoad (resume after copy_effect_texture) + vk::Framebuffer m_sceneFramebuffer; // Shared by both scene render passes (compatible) + + // Persistent UBO for tonemapping shader parameters + vk::Buffer m_tonemapUBO; + VulkanAllocation m_tonemapUBOAlloc; + + // ---- Bloom (self-contained subsystem) ---- + VulkanBloom m_bloom; + + // ---- LDR / FXAA resources ---- + RenderTarget m_sceneLdr; // RGBA8 LDR after tonemapping + RenderTarget m_sceneLuminance; // RGBA8 LDR with luma in alpha (for FXAA) + vk::RenderPass m_ldrRenderPass; // Color-only RGBA8, loadOp=eDontCare + vk::RenderPass m_ldrLoadRenderPass; // Color-only RGBA8, loadOp=eLoad (for additive blending) + vk::Framebuffer m_sceneLdrFB; + vk::Framebuffer m_sceneLuminanceFB; + bool m_ldrInitialized = false; + bool m_postEffectsApplied = false; // Set per-frame by executePostEffects + + // ---- Deferred G-buffer + MSAA (self-contained subsystem) ---- + VulkanDeferredGBuffer m_deferred; + + // ---- Deferred light accumulation (self-contained subsystem) ---- + VulkanDeferredLighting m_lighting; + + // ---- Shadow map (cascaded VSM, self-contained subsystem) ---- + VulkanShadowMap m_shadow; + + // ---- Fog / volumetric nebula (self-contained subsystem) ---- + VulkanFog m_fog; + + // ---- Distortion (ping-pong textures, self-contained subsystem) ---- + VulkanDistortion m_distortion; + + PostProcessContext m_ctx; + + bool m_initialized = false; +}; + +// Global post-processor access +VulkanPostProcessor* getPostProcessor(); +void setPostProcessor(VulkanPostProcessor* pp); + +// gr_screen function pointer implementations for post-processing +void vulkan_post_process_begin(); +void vulkan_post_process_end(); +void vulkan_post_process_save_zbuffer(); +void vulkan_post_process_restore_zbuffer(); +void vulkan_post_process_set_effect(const char* name, int value, const vec3d* rgb); +void vulkan_post_process_set_defaults(); + +/** + * @brief Copy one image to another with automatic barrier management + * + * Handles pre-barriers (src→eTransferSrcOptimal, dst→eTransferDstOptimal), + * the copy command, and post-barriers (eTransferSrc→srcNewLayout, eTransferDst→dstNewLayout). + * Access masks and pipeline stages are derived from the layouts automatically. + * + * Skip rule: if srcNewLayout == eTransferSrcOptimal, the src post-barrier is skipped + * (image stays in transfer source layout). Same for dst + eTransferDstOptimal. + * + * @param cmd Active command buffer (must be outside a render pass) + * @param src Source image + * @param srcOldLayout Current layout of source image + * @param srcNewLayout Desired layout of source image after copy + * @param dst Destination image + * @param dstOldLayout Current layout of destination image + * @param dstNewLayout Desired layout of destination image after copy + * @param extent Copy region (width x height) + * @param aspect Image aspect (eColor or eDepth) + * @param dstMipLevels Number of mip levels in dst subresource range (for pre-barrier) + */ +void copyImageToImage( + vk::CommandBuffer cmd, + vk::Image src, vk::ImageLayout srcOldLayout, vk::ImageLayout srcNewLayout, + vk::Image dst, vk::ImageLayout dstOldLayout, vk::ImageLayout dstNewLayout, + vk::Extent2D extent, + vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor, + uint32_t dstMipLevels = 1); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanPostProcessingBloom.cpp b/code/graphics/vulkan/VulkanPostProcessingBloom.cpp new file mode 100644 index 00000000000..59e1750fd2e --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessingBloom.cpp @@ -0,0 +1,394 @@ +#include "VulkanPostProcessing.h" + +#include + +#include "gr_vulkan.h" +#include "VulkanRenderer.h" +#include "VulkanDescriptorManager.h" +#include "graphics/util/uniform_structs.h" +#include "graphics/post_processing.h" +#include "graphics/grinternal.h" +#include "graphics/2d.h" + + +namespace graphics::vulkan { + + +// ===== Bloom Pipeline Implementation ===== + +// Local UBO struct for blur shader (matches OpenGL's blur_data layout). +struct BlurUBOData { + float texSize; + int level; + int pad[2]; +}; + +bool VulkanBloom::init(PostProcessContext& ctx, const RenderTarget& sceneColor) +{ + m_ctx = &ctx; + m_sceneColor = &sceneColor; + + m_width = m_ctx->sceneExtent.width / 2; + m_height = m_ctx->sceneExtent.height / 2; + + const uint32_t mipLevels = MAX_MIP_BLUR_LEVELS; + + // Create 2 bloom textures (RGBA16F, half-res, 4 mip levels each) + for (size_t i = 0; i < m_tex.size(); i++) { + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = HDR_COLOR_FORMAT; + imageInfo.extent.width = m_width; + imageInfo.extent.height = m_height; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = mipLevels; + imageInfo.arrayLayers = 1; + imageInfo.samples = vk::SampleCountFlagBits::e1; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = vk::ImageUsageFlagBits::eColorAttachment + | vk::ImageUsageFlagBits::eSampled + | vk::ImageUsageFlagBits::eTransferSrc + | vk::ImageUsageFlagBits::eTransferDst; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + m_tex[i].image = m_ctx->device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanBloom: Failed to create bloom image %zu: %s\n", i, e.what())); + return false; + } + + if (!m_ctx->memoryManager->allocateImageMemory(m_tex[i].image, MemoryUsage::GpuOnly, m_tex[i].allocation)) { + mprintf(("VulkanBloom: Failed to allocate bloom image %zu memory!\n", i)); + return false; + } + + // Full image view (all mip levels, for textureLod sampling) + vk::ImageViewCreateInfo fullViewInfo; + fullViewInfo.image = m_tex[i].image; + fullViewInfo.viewType = vk::ImageViewType::e2D; + fullViewInfo.format = HDR_COLOR_FORMAT; + fullViewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + fullViewInfo.subresourceRange.baseMipLevel = 0; + fullViewInfo.subresourceRange.levelCount = mipLevels; + fullViewInfo.subresourceRange.baseArrayLayer = 0; + fullViewInfo.subresourceRange.layerCount = 1; + + try { + m_tex[i].fullView = m_ctx->device.createImageView(fullViewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanBloom: Failed to create bloom %zu full view: %s\n", i, e.what())); + return false; + } + + // Per-mip image views (for framebuffer attachment) + for (uint32_t mip = 0; mip < mipLevels; mip++) { + vk::ImageViewCreateInfo mipViewInfo = fullViewInfo; + mipViewInfo.subresourceRange.baseMipLevel = mip; + mipViewInfo.subresourceRange.levelCount = 1; + + try { + m_tex[i].mipViews[mip] = m_ctx->device.createImageView(mipViewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanBloom: Failed to create bloom %zu mip %u view: %s\n", i, mip, e.what())); + return false; + } + } + } + + // Create bloom render pass (color-only RGBA16F, loadOp=eDontCare for overwriting) + { + vk::AttachmentDescription att; + att.format = HDR_COLOR_FORMAT; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eDontCare; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eUndefined; + att.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.srcAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_renderPass = m_ctx->device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanBloom: Failed to create bloom render pass: %s\n", e.what())); + return false; + } + } + + // Create bloom composite render pass (loadOp=eLoad for additive compositing onto scene color) + { + vk::AttachmentDescription att; + att.format = HDR_COLOR_FORMAT; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eLoad; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + att.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.srcAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead + | vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_compositeRenderPass = m_ctx->device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanBloom: Failed to create bloom composite render pass: %s\n", e.what())); + return false; + } + } + + // Create per-mip framebuffers for bloom textures + for (size_t i = 0; i < m_tex.size(); i++) { + for (uint32_t mip = 0; mip < mipLevels; mip++) { + uint32_t mipW = std::max(1u, m_width >> mip); + uint32_t mipH = std::max(1u, m_height >> mip); + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_renderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = &m_tex[i].mipViews[mip]; + fbInfo.width = mipW; + fbInfo.height = mipH; + fbInfo.layers = 1; + + try { + m_tex[i].mipFramebuffers[mip] = m_ctx->device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanBloom: Failed to create bloom %zu mip %u framebuffer: %s\n", i, mip, e.what())); + return false; + } + } + } + + // Create scene color framebuffer for bloom composite (wraps the scene color as attachment) + { + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_compositeRenderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = &m_sceneColor->view; + fbInfo.width = m_ctx->sceneExtent.width; + fbInfo.height = m_ctx->sceneExtent.height; + fbInfo.layers = 1; + + try { + m_sceneColorFB = m_ctx->device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanBloom: Failed to create scene color bloom framebuffer: %s\n", e.what())); + return false; + } + } + + m_initialized = true; + mprintf(("VulkanBloom: Bloom initialized (%ux%u, %d mip levels)\n", + m_width, m_height, MAX_MIP_BLUR_LEVELS)); + return true; +} + +void VulkanBloom::shutdown() +{ + if (!m_initialized) { + return; + } + + if (m_sceneColorFB) { + m_ctx->device.destroyFramebuffer(m_sceneColorFB); + m_sceneColorFB = nullptr; + } + + for (auto& bt : m_tex) { + for (size_t mip = 0; mip < bt.mipFramebuffers.size(); mip++) { + if (bt.mipFramebuffers[mip]) { + m_ctx->device.destroyFramebuffer(bt.mipFramebuffers[mip]); + bt.mipFramebuffers[mip] = nullptr; + } + if (bt.mipViews[mip]) { + m_ctx->device.destroyImageView(bt.mipViews[mip]); + bt.mipViews[mip] = nullptr; + } + } + if (bt.fullView) { + m_ctx->device.destroyImageView(bt.fullView); + bt.fullView = nullptr; + } + if (bt.image) { + m_ctx->device.destroyImage(bt.image); + bt.image = nullptr; + } + if (bt.allocation.isValid()) { + m_ctx->memoryManager->freeAllocation(bt.allocation); + } + } + + if (m_compositeRenderPass) { + m_ctx->device.destroyRenderPass(m_compositeRenderPass); + m_compositeRenderPass = nullptr; + } + if (m_renderPass) { + m_ctx->device.destroyRenderPass(m_renderPass); + m_renderPass = nullptr; + } + + m_initialized = false; +} + +void VulkanBloom::execute(vk::CommandBuffer cmd) +{ + if (!m_initialized || gr_bloom_intensity() <= 0) { + return; + } + + // Map shared scratch UBO for writing per-draw data + m_ctx->scratchUBOMapped = m_ctx->memoryManager->mapMemory(m_ctx->scratchUBOAlloc); + if (!m_ctx->scratchUBOMapped) { + return; + } + m_ctx->scratchUBOCursor = 0; + + // 1. Bright pass: extract pixels brighter than 1.0 from scene color → bloom_tex[0] mip 0 + m_ctx->drawFullscreenTriangle(cmd, m_renderPass, + m_tex[0].mipFramebuffers[0], + vk::Extent2D(m_width, m_height), + SDR_TYPE_POST_PROCESS_BRIGHTPASS, + m_sceneColor->view, m_ctx->linearSampler, + nullptr, 0, // Brightpass has no UBO + ALPHA_BLEND_NONE); + + // 2. Generate mipmaps for bloom_tex[0] (fill mips 1-3 from mip 0) + PostProcessContext::generateMipmaps(cmd, m_tex[0].image, m_width, m_height, MAX_MIP_BLUR_LEVELS); + + // 3. Blur iterations (2 iterations of vertical + horizontal ping-pong) + for (int iteration = 0; iteration < 2; iteration++) { + for (int pass = 0; pass < 2; pass++) { + // pass 0 = vertical (tex[0] → tex[1]), pass 1 = horizontal (tex[1] → tex[0]) + int srcIdx = pass; + int dstIdx = 1 - pass; + bool isVertical = (pass == 0); + unsigned int blurFlags = isVertical ? SDR_FLAG_BLUR_VERTICAL : SDR_FLAG_BLUR_HORIZONTAL; + + for (int mip = 0; mip < MAX_MIP_BLUR_LEVELS; mip++) { + uint32_t mipW = std::max(1u, m_width >> mip); + uint32_t mipH = std::max(1u, m_height >> mip); + + BlurUBOData blurData; + blurData.texSize = isVertical ? 1.0f / static_cast(mipH) + : 1.0f / static_cast(mipW); + blurData.level = mip; + blurData.pad[0] = 0; + blurData.pad[1] = 0; + + m_ctx->drawFullscreenTriangle(cmd, m_renderPass, + m_tex[dstIdx].mipFramebuffers[mip], + vk::Extent2D(mipW, mipH), + SDR_TYPE_POST_PROCESS_BLUR, + m_tex[srcIdx].fullView, m_ctx->mipmapSampler, + &blurData, sizeof(blurData), + ALPHA_BLEND_NONE, + blurFlags); + } + } + } + + // 4. Transition scene color for bloom composite (eShaderReadOnlyOptimal → eColorAttachmentOptimal) + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eShaderRead; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead + | vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_sceneColor->image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eFragmentShader, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, {}, {}, barrier); + } + + // 5. Bloom composite: additively blend blurred bloom onto scene color + graphics::generic_data::bloom_composition_data compData; + compData.bloom_intensity = gr_bloom_intensity() / 100.0f; + compData.levels = MAX_MIP_BLUR_LEVELS; + compData.pad[0] = 0.0f; + compData.pad[1] = 0.0f; + + m_ctx->drawFullscreenTriangle(cmd, m_compositeRenderPass, + m_sceneColorFB, + m_ctx->sceneExtent, + SDR_TYPE_POST_PROCESS_BLOOM_COMP, + m_tex[0].fullView, m_ctx->mipmapSampler, + &compData, sizeof(compData), + ALPHA_BLEND_ADDITIVE); + + // Scene color is now in eShaderReadOnlyOptimal (from bloom composite render pass finalLayout) + + // Unmap shared scratch UBO + m_ctx->memoryManager->unmapMemory(m_ctx->scratchUBOAlloc); + m_ctx->scratchUBOMapped = nullptr; +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanPostProcessingCommon.cpp b/code/graphics/vulkan/VulkanPostProcessingCommon.cpp new file mode 100644 index 00000000000..61ec5db5c5d --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessingCommon.cpp @@ -0,0 +1,381 @@ +#include "VulkanPostProcessing.h" + +#include + +#include "VulkanRenderer.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" +#include "VulkanPipeline.h" +#include "VulkanDescriptorManager.h" +#include "graphics/grinternal.h" +#include "graphics/2d.h" +#include "lighting/lighting_profiles.h" +#include "lighting/lighting.h" +#include "nebula/neb.h" +#include "nebula/volumetrics.h" +#include "mission/missionparse.h" + +extern float Sun_spot; +extern int Game_subspace_effect; +extern SCP_vector Lights; +extern int Num_lights; + + +namespace graphics::vulkan { + + +bool PostProcessContext::initScratchUBO() +{ + vk::BufferCreateInfo bufInfo; + bufInfo.size = SCRATCH_UBO_MAX_SLOTS * SCRATCH_UBO_SLOT_SIZE; + bufInfo.usage = vk::BufferUsageFlagBits::eUniformBuffer; + bufInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + scratchUBO = device.createBuffer(bufInfo); + } catch (const vk::SystemError& e) { + mprintf(("PostProcessContext: Failed to create scratch UBO: %s\n", e.what())); + return false; + } + + if (!memoryManager->allocateBufferMemory(scratchUBO, MemoryUsage::CpuToGpu, scratchUBOAlloc)) { + mprintf(("PostProcessContext: Failed to allocate scratch UBO memory!\n")); + device.destroyBuffer(scratchUBO); + scratchUBO = nullptr; + return false; + } + + return true; +} + +void PostProcessContext::shutdownScratchUBO() +{ + if (scratchUBO) { + device.destroyBuffer(scratchUBO); + scratchUBO = nullptr; + } + if (scratchUBOAlloc.isValid()) { + memoryManager->freeAllocation(scratchUBOAlloc); + } +} + +void PostProcessContext::generateMipmaps(vk::CommandBuffer cmd, vk::Image image, + uint32_t width, uint32_t height, uint32_t mipLevels) +{ + // Transition mip 0 from eShaderReadOnlyOptimal (after brightpass) to eTransferSrcOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferRead; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eTransferSrcOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, + {}, {}, {}, barrier); + } + + vulkan_generate_mipmap_chain(cmd, image, width, height, mipLevels); +} + +void PostProcessContext::drawFullscreenTriangle(vk::CommandBuffer cmd, vk::RenderPass renderPass, + vk::Framebuffer framebuffer, vk::Extent2D extent, + int shaderType, + vk::ImageView textureView, vk::Sampler sampler, + const void* uboData, size_t uboSize, + int blendMode, + unsigned int shaderFlags) +{ + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + if (!pipelineMgr || !descriptorMgr || !bufferMgr || !texMgr) { + return; + } + + // Get/create pipeline for this shader + render pass combination + PipelineConfig config; + config.shaderType = static_cast(shaderType); + config.shaderFlags = shaderFlags; + config.vertexLayoutHash = 0; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = static_cast(blendMode); + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = renderPass; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (!pipeline) { + return; + } + + vk::PipelineLayout pipelineLayout = pipelineMgr->getPipelineLayout(); + + // Begin render pass + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = renderPass; + rpBegin.framebuffer = framebuffer; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + // Set viewport and scissor + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(extent.width); + viewport.height = static_cast(extent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = extent; + cmd.setScissor(0, scissor); + + DescriptorWriter writer; + writer.reset(device, descriptorMgr->getFallbacks()); + + // Set 1: Material + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeSet(materialSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Material)); + { + std::array texArrayInfos; + texArrayInfos.fill(descriptorMgr->getFallbacks().texture2D); + texArrayInfos[0].sampler = sampler; + texArrayInfos[0].imageView = textureView; + writer.setImageArray(MaterialBinding::TextureArray, texArrayInfos); + } + + // Set 2: PerDraw + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + writer.writeSet(perDrawSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::PerDraw)); + if (uboData && uboSize > 0 && scratchUBOMapped) { + Assertion(scratchUBOCursor < SCRATCH_UBO_MAX_SLOTS, "Scratch UBO slot overflow!"); + uint32_t slotOffset = scratchUBOCursor * static_cast(SCRATCH_UBO_SLOT_SIZE); + memcpy(static_cast(scratchUBOMapped) + slotOffset, uboData, uboSize); + scratchUBOCursor++; + writer.setBuffer(PerDrawBinding::GenericData, {scratchUBO, slotOffset, SCRATCH_UBO_SLOT_SIZE}); + } + writer.flush(); + + // Bind descriptor sets (Set 0 already bound from frame setup) + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, + static_cast(DescriptorSetIndex::Material), + {materialSet, perDrawSet}, {}); + + cmd.draw(3, 1, 0, 0); + cmd.endRenderPass(); +} + +bool PostProcessContext::createImage(uint32_t width, uint32_t height, vk::Format format, + vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect, + vk::Image& outImage, vk::ImageView& outView, + VulkanAllocation& outAllocation, + vk::SampleCountFlagBits sampleCount) const +{ + // Create image + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = format; + imageInfo.extent.width = width; + imageInfo.extent.height = height; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.samples = sampleCount; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = usage; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + outImage = device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create image: %s\n", e.what())); + return false; + } + + // Allocate memory + if (!memoryManager->allocateImageMemory(outImage, MemoryUsage::GpuOnly, outAllocation)) { + mprintf(("VulkanPostProcessor: Failed to allocate image memory!\n")); + device.destroyImage(outImage); + outImage = nullptr; + return false; + } + + // Create image view (plain 2D, not array) + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = outImage; + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = format; + viewInfo.subresourceRange.aspectMask = aspect; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = 1; + + try { + outView = device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create image view: %s\n", e.what())); + device.destroyImage(outImage); + memoryManager->freeAllocation(outAllocation); + outImage = nullptr; + return false; + } + + return true; +} + +void copyImageToImage( + vk::CommandBuffer cmd, + vk::Image src, vk::ImageLayout srcOldLayout, vk::ImageLayout srcNewLayout, + vk::Image dst, vk::ImageLayout dstOldLayout, vk::ImageLayout dstNewLayout, + vk::Extent2D extent, + vk::ImageAspectFlags aspect, + uint32_t dstMipLevels) +{ + // Derive access mask and pipeline stage from a layout. + // 'leaving' = true for srcAccessMask (flushing writes before transition), + // false for dstAccessMask (making data available after transition). + auto layoutInfo = [](vk::ImageLayout layout, bool leaving) + -> std::pair { + switch (layout) { + case vk::ImageLayout::eUndefined: + return {{}, vk::PipelineStageFlagBits::eTopOfPipe}; + case vk::ImageLayout::eShaderReadOnlyOptimal: + return {leaving ? vk::AccessFlags{} : vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eFragmentShader}; + case vk::ImageLayout::eColorAttachmentOptimal: + return {leaving ? vk::AccessFlagBits::eColorAttachmentWrite + : (vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite), + vk::PipelineStageFlagBits::eColorAttachmentOutput}; + case vk::ImageLayout::eDepthStencilAttachmentOptimal: + return {leaving ? vk::AccessFlagBits::eDepthStencilAttachmentWrite + : (vk::AccessFlagBits::eDepthStencilAttachmentRead | vk::AccessFlagBits::eDepthStencilAttachmentWrite), + leaving ? vk::PipelineStageFlagBits::eLateFragmentTests + : vk::PipelineStageFlagBits::eEarlyFragmentTests}; + case vk::ImageLayout::eTransferSrcOptimal: + return {vk::AccessFlagBits::eTransferRead, vk::PipelineStageFlagBits::eTransfer}; + case vk::ImageLayout::eTransferDstOptimal: + return {vk::AccessFlagBits::eTransferWrite, vk::PipelineStageFlagBits::eTransfer}; + default: + Assertion(false, "copyImageToImage: unsupported layout %d", static_cast(layout)); + return {{}, vk::PipelineStageFlagBits::eAllCommands}; + } + }; + + // 1. Pre-barriers: transition src → eTransferSrcOptimal, dst → eTransferDstOptimal + { + auto [srcAccess, srcStage] = layoutInfo(srcOldLayout, true); + auto [dstAccess, dstStage] = layoutInfo(dstOldLayout, true); + + std::array barriers; + + barriers[0].srcAccessMask = srcAccess; + barriers[0].dstAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[0].oldLayout = srcOldLayout; + barriers[0].newLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = src; + barriers[0].subresourceRange = {aspect, 0, 1, 0, 1}; + + barriers[1].srcAccessMask = dstAccess; + barriers[1].dstAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[1].oldLayout = dstOldLayout; + barriers[1].newLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = dst; + barriers[1].subresourceRange = {aspect, 0, dstMipLevels, 0, 1}; + + cmd.pipelineBarrier( + srcStage | dstStage, + vk::PipelineStageFlagBits::eTransfer, + {}, nullptr, nullptr, barriers); + } + + // 2. Copy (always mip 0, layer 0) + { + vk::ImageCopy region; + region.srcSubresource = {aspect, 0, 0, 1}; + region.dstSubresource = {aspect, 0, 0, 1}; + region.extent = vk::Extent3D(extent.width, extent.height, 1); + + cmd.copyImage( + src, vk::ImageLayout::eTransferSrcOptimal, + dst, vk::ImageLayout::eTransferDstOptimal, + region); + } + + // 3. Post-barriers: transition src → srcNewLayout, dst → dstNewLayout + // Skip rule: if newLayout matches the transfer layout, skip that barrier + { + bool skipSrc = (srcNewLayout == vk::ImageLayout::eTransferSrcOptimal); + bool skipDst = (dstNewLayout == vk::ImageLayout::eTransferDstOptimal); + + if (skipSrc && skipDst) { + return; + } + + std::array barriers; + uint32_t count = 0; + vk::PipelineStageFlags postDstStage = {}; + + if (!skipSrc) { + auto [access, stage] = layoutInfo(srcNewLayout, false); + barriers[count].srcAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[count].dstAccessMask = access; + barriers[count].oldLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[count].newLayout = srcNewLayout; + barriers[count].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[count].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[count].image = src; + barriers[count].subresourceRange = {aspect, 0, 1, 0, 1}; + count++; + postDstStage |= stage; + } + + if (!skipDst) { + auto [access, stage] = layoutInfo(dstNewLayout, false); + barriers[count].srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[count].dstAccessMask = access; + barriers[count].oldLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[count].newLayout = dstNewLayout; + barriers[count].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[count].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[count].image = dst; + barriers[count].subresourceRange = {aspect, 0, dstMipLevels, 0, 1}; + count++; + postDstStage |= stage; + } + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + postDstStage, + {}, nullptr, nullptr, + vk::ArrayProxy(count, barriers.data())); + } +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanPostProcessingDistortion.cpp b/code/graphics/vulkan/VulkanPostProcessingDistortion.cpp new file mode 100644 index 00000000000..715df1cac57 --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessingDistortion.cpp @@ -0,0 +1,272 @@ +#include "VulkanPostProcessing.h" + +#include + +#include "VulkanConstants.h" +#include "VulkanDeletionQueue.h" +#include "VulkanRenderer.h" +#include "utils/Random.h" + + +namespace graphics::vulkan { + + +bool VulkanDistortion::init(PostProcessContext& ctx) +{ + m_ctx = &ctx; + + for (size_t i = 0; i < m_tex.size(); i++) { + if (!m_ctx->createImage(32, 32, LDR_COLOR_FORMAT, + vk::ImageUsageFlagBits::eTransferSrc + | vk::ImageUsageFlagBits::eTransferDst + | vk::ImageUsageFlagBits::eSampled, + vk::ImageAspectFlagBits::eColor, + m_tex[i].image, m_tex[i].view, m_tex[i].allocation)) { + mprintf(("VulkanDistortion: Failed to create distortion texture %zu\n", i)); + return false; + } + m_tex[i].format = LDR_COLOR_FORMAT; + m_tex[i].width = 32; + m_tex[i].height = 32; + } + + // Create LINEAR/REPEAT sampler for distortion textures + vk::SamplerCreateInfo samplerInfo; + samplerInfo.magFilter = vk::Filter::eLinear; + samplerInfo.minFilter = vk::Filter::eLinear; + samplerInfo.mipmapMode = vk::SamplerMipmapMode::eNearest; + samplerInfo.addressModeU = vk::SamplerAddressMode::eRepeat; + samplerInfo.addressModeV = vk::SamplerAddressMode::eRepeat; + samplerInfo.addressModeW = vk::SamplerAddressMode::eRepeat; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.anisotropyEnable = VK_FALSE; + samplerInfo.compareEnable = VK_FALSE; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = 0.0f; + samplerInfo.borderColor = vk::BorderColor::eFloatOpaqueBlack; + + try { + m_sampler = m_ctx->device.createSampler(samplerInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanDistortion: Failed to create distortion sampler: %s\n", e.what())); + return false; + } + + m_initialized = true; + mprintf(("VulkanDistortion: Distortion textures initialized\n")); + return true; +} + +void VulkanDistortion::shutdown() +{ + if (!m_ctx) { + return; + } + + if (m_sampler) { + m_ctx->device.destroySampler(m_sampler); + m_sampler = nullptr; + } + for (auto& t : m_tex) { + if (t.view) { + m_ctx->device.destroyImageView(t.view); + t.view = nullptr; + } + if (t.image) { + m_ctx->device.destroyImage(t.image); + t.image = nullptr; + } + if (t.allocation.isValid()) { + m_ctx->memoryManager->freeAllocation(t.allocation); + } + } + m_initialized = false; +} + +void VulkanDistortion::update(vk::CommandBuffer cmd, float frametime) +{ + if (!m_initialized) { + return; + } + + m_timer += frametime; + if (m_timer < 0.03f) { + return; + } + m_timer = 0.0f; + + int dst = !m_switch; // Write target + int src = m_switch; // Read source + + // On first update, images are still in eUndefined layout + vk::ImageLayout srcOldLayout = m_firstUpdate + ? vk::ImageLayout::eUndefined : vk::ImageLayout::eShaderReadOnlyOptimal; + vk::AccessFlags srcOldAccess = m_firstUpdate + ? vk::AccessFlags{} : vk::AccessFlagBits::eShaderRead; + + // Transition both distortion textures for transfer operations + { + std::array barriers; + + // dst: eShaderReadOnlyOptimal (or eUndefined on first use) → eTransferDstOptimal + barriers[0].srcAccessMask = srcOldAccess; + barriers[0].dstAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[0].oldLayout = srcOldLayout; + barriers[0].newLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = m_tex[dst].image; + barriers[0].subresourceRange = vk::ImageSubresourceRange( + vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + + // src: eShaderReadOnlyOptimal (or eUndefined on first use) → eTransferSrcOptimal + barriers[1].srcAccessMask = srcOldAccess; + barriers[1].dstAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[1].oldLayout = srcOldLayout; + barriers[1].newLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = m_tex[src].image; + barriers[1].subresourceRange = vk::ImageSubresourceRange( + vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eFragmentShader, + vk::PipelineStageFlagBits::eTransfer, + {}, {}, {}, barriers); + } + + // Clear dest to mid-gray (0.5, 0.5, 0.0, 1.0) = no distortion + { + vk::ClearColorValue clearColor; + clearColor.setFloat32({0.5f, 0.5f, 0.0f, 1.0f}); + vk::ImageSubresourceRange range(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + cmd.clearColorImage(m_tex[dst].image, + vk::ImageLayout::eTransferDstOptimal, clearColor, range); + } + + // Blit: scroll old data right by 1 pixel + // src columns 0-30 → dst columns 1-31 (with LINEAR filtering) + { + vk::ImageBlit blit; + blit.srcSubresource = vk::ImageSubresourceLayers( + vk::ImageAspectFlagBits::eColor, 0, 0, 1); + blit.srcOffsets[0] = vk::Offset3D(0, 0, 0); + blit.srcOffsets[1] = vk::Offset3D(31, 32, 1); + blit.dstSubresource = vk::ImageSubresourceLayers( + vk::ImageAspectFlagBits::eColor, 0, 0, 1); + blit.dstOffsets[0] = vk::Offset3D(1, 0, 0); + blit.dstOffsets[1] = vk::Offset3D(32, 32, 1); + + cmd.blitImage( + m_tex[src].image, vk::ImageLayout::eTransferSrcOptimal, + m_tex[dst].image, vk::ImageLayout::eTransferDstOptimal, + blit, vk::Filter::eLinear); + } + + // Generate random noise and copy to column 0 of dst + // OpenGL draws 33 GL_POINTS at x=0 with random R,G values — we write 32 pixels + { + // Create a small host-visible staging buffer for 32 RGBA8 pixels (128 bytes) + vk::BufferCreateInfo bufInfo; + bufInfo.size = 32 * 4; + bufInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuf; + VulkanAllocation stagingAlloc; + try { + stagingBuf = m_ctx->device.createBuffer(bufInfo); + } catch (const vk::SystemError&) { + // Non-fatal: skip noise injection this frame + goto skip_noise; + } + + Verify(m_ctx->memoryManager->allocateBufferMemory(stagingBuf, MemoryUsage::CpuOnly, stagingAlloc)); + + { + auto* pixels = static_cast(m_ctx->memoryManager->mapMemory(stagingAlloc)); + Verify(pixels); + for (int i = 0; i < 32; i++) { + pixels[(i * 4) + 0] = static_cast(::util::Random::next(256)); // R + pixels[(i * 4) + 1] = static_cast(::util::Random::next(256)); // G + pixels[(i * 4) + 2] = 255; // B + pixels[(i * 4) + 3] = 255; // A + } + m_ctx->memoryManager->unmapMemory(stagingAlloc); + + // Copy staging buffer → column 0 of dst (1 pixel wide, 32 pixels tall) + vk::BufferImageCopy region; + region.bufferOffset = 0; + region.bufferRowLength = 0; // Tightly packed + region.bufferImageHeight = 0; + region.imageSubresource = vk::ImageSubresourceLayers( + vk::ImageAspectFlagBits::eColor, 0, 0, 1); + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(1, 32, 1); + + cmd.copyBufferToImage(stagingBuf, m_tex[dst].image, + vk::ImageLayout::eTransferDstOptimal, region); + } + + // Schedule staging buffer for deferred destruction (GPU may still be reading) + auto* delQueue = getDeletionQueue(); + if (delQueue) { + delQueue->queueBuffer(stagingBuf, stagingAlloc); + } else { + m_ctx->device.destroyBuffer(stagingBuf); + m_ctx->memoryManager->freeAllocation(stagingAlloc); + } + } + +skip_noise: + // Transition both textures back to eShaderReadOnlyOptimal + { + std::array barriers; + + // dst: eTransferDstOptimal → eShaderReadOnlyOptimal + barriers[0].srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[0].dstAccessMask = vk::AccessFlagBits::eShaderRead; + barriers[0].oldLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[0].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = m_tex[dst].image; + barriers[0].subresourceRange = vk::ImageSubresourceRange( + vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + + // src: eTransferSrcOptimal → eShaderReadOnlyOptimal + barriers[1].srcAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[1].dstAccessMask = vk::AccessFlagBits::eShaderRead; + barriers[1].oldLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[1].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = m_tex[src].image; + barriers[1].subresourceRange = vk::ImageSubresourceRange( + vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + {}, {}, {}, barriers); + } + + m_switch = !m_switch; + m_firstUpdate = false; +} + +vk::DescriptorImageInfo VulkanDistortion::getTextureInfo() const +{ + if (!m_initialized) { + return {}; + } + // Return the most recently written texture (matching OpenGL's + // Distortion_texture[!Distortion_switch] binding for thrusters). + // After update() toggles the switch, m_switch points to the old read + // source, which is the texture that was just written. + return {m_ctx->linearSampler, m_tex[m_switch].view, + vk::ImageLayout::eShaderReadOnlyOptimal}; +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanPostProcessingFog.cpp b/code/graphics/vulkan/VulkanPostProcessingFog.cpp new file mode 100644 index 00000000000..f570e94de2e --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessingFog.cpp @@ -0,0 +1,623 @@ +#include "VulkanPostProcessing.h" + +#include + +#include "gr_vulkan.h" +#include "VulkanRenderer.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" +#include "VulkanPipeline.h" +#include "VulkanDescriptorManager.h" +#include "graphics/util/uniform_structs.h" +#include "graphics/grinternal.h" +#include "graphics/matrix.h" +#include "graphics/2d.h" +#include "lighting/lighting_profiles.h" +#include "lighting/lighting.h" +#include "math/vecmat.h" +#include "render/3d.h" +#include "tracing/tracing.h" +#include "nebula/neb.h" +#include "nebula/volumetrics.h" +#include "mission/missionparse.h" + +extern float Sun_spot; +extern int Game_subspace_effect; +extern SCP_vector Lights; +extern int Num_lights; + + +namespace graphics::vulkan { + + +// ========== Fog / Volumetric Nebula ========== + +void VulkanFog::init(PostProcessContext& ctx, const RenderTarget& sceneColor, + const RenderTarget& sceneDepth, const RenderTarget& sceneDepthCopy, + const VulkanDeferredGBuffer& gbuffer) +{ + m_ctx = &ctx; + m_sceneColor = &sceneColor; + m_sceneDepth = &sceneDepth; + m_sceneDepthCopy = &sceneDepthCopy; + m_gbuffer = &gbuffer; +} + +void VulkanFog::copySceneDepth(vk::CommandBuffer cmd) +{ + // Copies scene depth → depth copy texture so the fog shaders can sample it. + // Scene depth is in eDepthStencilAttachmentOptimal (from the ended scene render pass). + copyImageToImage(cmd, + m_sceneDepth->image, vk::ImageLayout::eDepthStencilAttachmentOptimal, vk::ImageLayout::eDepthStencilAttachmentOptimal, + m_sceneDepthCopy->image, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + m_ctx->sceneExtent, + imageAspectFromFormat(m_ctx->depthFormat)); +} + +bool VulkanFog::initFogPass() +{ + if (m_fogInitialized) { + return true; + } + + // Create fog render pass: 1 RGBA16F color attachment, loadOp=eDontCare (writing every pixel), + // initialLayout/finalLayout = eColorAttachmentOptimal (scene color stays as render target) + { + vk::AttachmentDescription att; + att.format = HDR_COLOR_FORMAT; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eDontCare; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + att.finalLayout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.srcAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_fogRenderPass = m_ctx->device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create fog render pass: %s\n", e.what())); + return false; + } + } + + // Create fog framebuffer (scene color as attachment) + { + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_fogRenderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = &m_sceneColor->view; + fbInfo.width = m_ctx->sceneExtent.width; + fbInfo.height = m_ctx->sceneExtent.height; + fbInfo.layers = 1; + + try { + m_fogFramebuffer = m_ctx->device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create fog framebuffer: %s\n", e.what())); + return false; + } + } + + m_fogInitialized = true; + mprintf(("VulkanPostProcessor: Fog pass initialized\n")); + return true; +} + +void VulkanFog::shutdown() +{ + if (!m_ctx || !m_ctx->device) { + return; + } + if (m_emissiveMipmappedFullView) { + m_ctx->device.destroyImageView(m_emissiveMipmappedFullView); + m_emissiveMipmappedFullView = nullptr; + } + if (m_emissiveMipmapped.view) { + m_ctx->device.destroyImageView(m_emissiveMipmapped.view); + m_emissiveMipmapped.view = nullptr; + } + if (m_emissiveMipmapped.image) { + m_ctx->device.destroyImage(m_emissiveMipmapped.image); + m_emissiveMipmapped.image = nullptr; + } + if (m_emissiveMipmapped.allocation.isValid()) { + m_ctx->memoryManager->freeAllocation(m_emissiveMipmapped.allocation); + } + m_emissiveMipmappedInitialized = false; + + if (m_fogFramebuffer) { + m_ctx->device.destroyFramebuffer(m_fogFramebuffer); + m_fogFramebuffer = nullptr; + } + if (m_fogRenderPass) { + m_ctx->device.destroyRenderPass(m_fogRenderPass); + m_fogRenderPass = nullptr; + } + m_fogInitialized = false; +} + +void VulkanFog::renderScene(vk::CommandBuffer cmd) +{ + GR_DEBUG_SCOPE("Scene Fog"); + + if (!m_fogInitialized) { + if (!initFogPass()) { + return; + } + } + + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + if (!pipelineMgr || !descriptorMgr || !bufferMgr || !texMgr) { + return; + } + + // Copy scene depth for fog sampling + copySceneDepth(cmd); + + // Transition scene color: eShaderReadOnlyOptimal -> eColorAttachmentOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_sceneColor->image; + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Map bloom UBO for fog UBO data + m_ctx->scratchUBOMapped = m_ctx->memoryManager->mapMemory(m_ctx->scratchUBOAlloc); + Verify(m_ctx->scratchUBOMapped); + + // Fill fog UBO + graphics::generic_data::fog_data fogData; + { + float fog_near, fog_density; + neb2_get_adjusted_fog_values(&fog_near, &fog_density); + unsigned char r, g, b; + neb2_get_fog_color(&r, &g, &b); + + fogData.fog_start = fog_near; + fogData.fog_density = fog_density; + fogData.fog_color.xyz.x = r / 255.f; + fogData.fog_color.xyz.y = g / 255.f; + fogData.fog_color.xyz.z = b / 255.f; + fogData.zNear = Min_draw_distance; + fogData.zFar = Max_draw_distance; + } + + // Custom descriptor writes to bind depth copy at binding 4 + PipelineConfig config; + config.shaderType = SDR_TYPE_SCENE_FOG; + config.vertexLayoutHash = 0; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = m_fogRenderPass; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (!pipeline) { + m_ctx->memoryManager->unmapMemory(m_ctx->scratchUBOAlloc); + m_ctx->scratchUBOMapped = nullptr; + return; + } + + vk::PipelineLayout pipelineLayout = pipelineMgr->getPipelineLayout(); + + // Begin render pass + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = m_fogRenderPass; + rpBegin.framebuffer = m_fogFramebuffer; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_ctx->sceneExtent; + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(m_ctx->sceneExtent.width); + viewport.height = static_cast(m_ctx->sceneExtent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = m_ctx->sceneExtent; + cmd.setScissor(0, scissor); + + DescriptorWriter writer; + writer.reset(m_ctx->device, descriptorMgr->getFallbacks()); + + // Set 1: Material + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeSet(materialSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Material)); + { + std::array texArrayInfos; + texArrayInfos.fill(descriptorMgr->getFallbacks().texture2D); + texArrayInfos[0] = {m_ctx->linearSampler, m_gbuffer->compositeView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + writer.setImageArray(MaterialBinding::TextureArray, texArrayInfos); + } + writer.setImage(MaterialBinding::DepthMap, {m_ctx->linearSampler, m_sceneDepthCopy->view, vk::ImageLayout::eShaderReadOnlyOptimal}); + + // Set 2: PerDraw — fog UBO + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + writer.writeSet(perDrawSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::PerDraw)); + { + Assertion(m_ctx->scratchUBOCursor < PostProcessContext::SCRATCH_UBO_MAX_SLOTS, "Fog UBO slot overflow!"); + uint32_t slotOffset = m_ctx->scratchUBOCursor * static_cast(PostProcessContext::SCRATCH_UBO_SLOT_SIZE); + memcpy(static_cast(m_ctx->scratchUBOMapped) + slotOffset, &fogData, sizeof(fogData)); + m_ctx->scratchUBOCursor++; + writer.setBuffer(PerDrawBinding::GenericData, {m_ctx->scratchUBO, slotOffset, PostProcessContext::SCRATCH_UBO_SLOT_SIZE}); + } + writer.flush(); + + // Bind descriptor sets and draw + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, + static_cast(DescriptorSetIndex::Material), + {materialSet, perDrawSet}, {}); + + cmd.draw(3, 1, 0, 0); + cmd.endRenderPass(); + + // Scene color is now in eColorAttachmentOptimal (fog render pass finalLayout) + + m_ctx->memoryManager->unmapMemory(m_ctx->scratchUBOAlloc); + m_ctx->scratchUBOMapped = nullptr; +} + +void VulkanFog::renderVolumetric(vk::CommandBuffer cmd) +{ + GR_DEBUG_SCOPE("Volumetric Nebulae"); + TRACE_SCOPE(tracing::Volumetrics); + + if (!m_fogInitialized) { + if (!initFogPass()) { + return; + } + } + + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + if (!pipelineMgr || !descriptorMgr || !bufferMgr || !texMgr) { + return; + } + + const volumetric_nebula& neb = *The_mission.volumetrics; + Assertion(neb.isVolumeBitmapValid(), "Volumetric nebula was not properly initialized!"); + + // Get 3D texture handles + int volHandle = neb.getVolumeBitmapHandle(); + auto* volSlot = texMgr->getTextureSlot(volHandle); + if (!volSlot || !volSlot->imageView) { + mprintf(("VulkanFog::renderVolumetric: Volume texture not available\n")); + return; + } + + bool noiseActive = neb.getNoiseActive(); + tcache_slot_vulkan* noiseSlot = nullptr; + if (noiseActive) { + int noiseHandle = neb.getNoiseVolumeBitmapHandle(); + noiseSlot = texMgr->getTextureSlot(noiseHandle); + } + + // Prepare mipmapped emissive copy for LOD sampling + if (!m_emissiveMipmappedInitialized) { + m_emissiveMipLevels = 1; + uint32_t dim = std::max(m_ctx->sceneExtent.width, m_ctx->sceneExtent.height); + while (dim > 1) { + dim >>= 1; + m_emissiveMipLevels++; + } + + vk::ImageCreateInfo imgInfo; + imgInfo.imageType = vk::ImageType::e2D; + imgInfo.format = HDR_COLOR_FORMAT; + imgInfo.extent = vk::Extent3D(m_ctx->sceneExtent.width, m_ctx->sceneExtent.height, 1); + imgInfo.mipLevels = m_emissiveMipLevels; + imgInfo.arrayLayers = 1; + imgInfo.samples = vk::SampleCountFlagBits::e1; + imgInfo.tiling = vk::ImageTiling::eOptimal; + imgInfo.usage = vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst + | vk::ImageUsageFlagBits::eSampled; + imgInfo.sharingMode = vk::SharingMode::eExclusive; + imgInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + m_emissiveMipmapped.image = m_ctx->device.createImage(imgInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create mipmapped emissive: %s\n", e.what())); + return; + } + + Verify(m_ctx->memoryManager->allocateImageMemory(m_emissiveMipmapped.image, MemoryUsage::GpuOnly, m_emissiveMipmapped.allocation)); + + // Create full-mip-chain view for LOD sampling + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = m_emissiveMipmapped.image; + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = HDR_COLOR_FORMAT; + viewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = m_emissiveMipLevels; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = 1; + + try { + m_emissiveMipmappedFullView = m_ctx->device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create mipmapped emissive view: %s\n", e.what())); + return; + } + + m_emissiveMipmapped.format = HDR_COLOR_FORMAT; + m_emissiveMipmapped.width = m_ctx->sceneExtent.width; + m_emissiveMipmapped.height = m_ctx->sceneExtent.height; + m_emissiveMipmappedInitialized = true; + } + + // Copy G-buffer emissive (mip 0) to mipmapped emissive, then generate mips. + // dstMipLevels transitions ALL mip levels to eTransferDstOptimal in the pre-barrier. + // Skip dst post-barrier (stays in eTransferDstOptimal for generateMipmaps). + copyImageToImage(cmd, + m_gbuffer->emissiveImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + m_emissiveMipmapped.image, vk::ImageLayout::eUndefined, vk::ImageLayout::eTransferDstOptimal, + m_ctx->sceneExtent, + vk::ImageAspectFlagBits::eColor, + m_emissiveMipLevels); + + // Generate mipmaps via blit chain (expects dst in eTransferDstOptimal). + // After return, all mips are in eShaderReadOnlyOptimal. + PostProcessContext::generateMipmaps(cmd, m_emissiveMipmapped.image, m_ctx->sceneExtent.width, m_ctx->sceneExtent.height, m_emissiveMipLevels); + + // Copy scene depth (if not already done by renderSceneFog) + // copySceneDepth is safe to call multiple times — but it re-transitions the depth buffer. + // The fog pass already called it if scene fog ran. For standalone volumetric, we need it. + copySceneDepth(cmd); + + // Transition scene color → eColorAttachmentOptimal for the fog render pass. + // oldLayout=eUndefined is safe: render pass has loadOp=eDontCare (overwrites every pixel). + // Scene color may be in eShaderReadOnlyOptimal (volumetric-only) or + // eColorAttachmentOptimal (after scene fog + copySceneColorToComposite). + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eUndefined; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_sceneColor->image; + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Map bloom UBO for volumetric fog UBO data + m_ctx->scratchUBOMapped = m_ctx->memoryManager->mapMemory(m_ctx->scratchUBOAlloc); + Verify(m_ctx->scratchUBOMapped); + + // Fill volumetric fog UBO + graphics::generic_data::volumetric_fog_data volData; + { + gr_set_proj_matrix(Proj_fov, gr_screen.clip_aspect, Min_draw_distance, Max_draw_distance); + gr_set_view_matrix(&Eye_position, &Eye_matrix); + vm_inverse_matrix4(&volData.p_inv, &gr_projection_matrix); + vm_inverse_matrix4(&volData.v_inv, &gr_view_matrix); + gr_end_view_matrix(); + gr_end_proj_matrix(); + + volData.zNear = Min_draw_distance; + volData.zFar = Max_draw_distance; + volData.cameraPos = Eye_position; + + // Find first directional light for global light direction/color + vec3d global_light_dir = ZERO_VECTOR; + vec3d global_light_diffuse = ZERO_VECTOR; + for (const auto& l : Lights) { + if (l.type == Light_Type::Directional) { + global_light_dir = l.vec; + global_light_diffuse.xyz.x = l.r * l.intensity; + global_light_diffuse.xyz.y = l.g * l.intensity; + global_light_diffuse.xyz.z = l.b * l.intensity; + break; + } + } + + volData.globalLightDirection = global_light_dir; + volData.globalLightDiffuse = global_light_diffuse; + volData.nebPos = neb.getPos(); + volData.nebSize = neb.getSize(); + volData.stepsize = neb.getStepsize(); + volData.opacitydistance = neb.getOpacityDistance(); + volData.alphalimit = neb.getAlphaLim(); + auto nebColor = neb.getNebulaColor(); + volData.nebColor[0] = std::get<0>(nebColor); + volData.nebColor[1] = std::get<1>(nebColor); + volData.nebColor[2] = std::get<2>(nebColor); + volData.udfScale = neb.getUDFScale(); + volData.emissiveSpreadFactor = neb.getEmissiveSpread(); + volData.emissiveIntensity = neb.getEmissiveIntensity(); + volData.emissiveFalloff = neb.getEmissiveFalloff(); + volData.henyeyGreensteinCoeff = neb.getHenyeyGreensteinCoeff(); + volData.directionalLightSampleSteps = neb.getGlobalLightSteps(); + volData.directionalLightStepSize = neb.getGlobalLightStepsize(); + auto noiseColor = neb.getNoiseColor(); + volData.noiseColor[0] = std::get<0>(noiseColor); + volData.noiseColor[1] = std::get<1>(noiseColor); + volData.noiseColor[2] = std::get<2>(noiseColor); + auto noiseScale = neb.getNoiseColorScale(); + volData.noiseColorScale1 = std::get<0>(noiseScale); + volData.noiseColorScale2 = std::get<1>(noiseScale); + volData.noiseColorIntensity = neb.getNoiseColorIntensity(); + volData.aspect = gr_screen.clip_aspect; + volData.fov = g3_get_hfov(Proj_fov); + } + + // Compute shader flags for volumetric fog variants + unsigned int volFogFlags = 0; + if (neb.getEdgeSmoothing()) { + volFogFlags |= SDR_FLAG_VOLUMETRICS_DO_EDGE_SMOOTHING; + } + if (noiseActive) { + volFogFlags |= SDR_FLAG_VOLUMETRICS_NOISE; + } + + // We need to use a custom descriptor write because the volumetric shader uses sampler3D + // at bindings 5 and 6, which differs from the default drawFullscreenTriangle fallbacks (sampler2D). + // So we replicate the drawFullscreenTriangle pattern but customize the material set. + + PipelineConfig config; + config.shaderType = SDR_TYPE_VOLUMETRIC_FOG; + config.shaderFlags = volFogFlags; + config.vertexLayoutHash = 0; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = m_fogRenderPass; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (!pipeline) { + m_ctx->memoryManager->unmapMemory(m_ctx->scratchUBOAlloc); + m_ctx->scratchUBOMapped = nullptr; + return; + } + + vk::PipelineLayout pipelineLayout = pipelineMgr->getPipelineLayout(); + + // Begin render pass + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = m_fogRenderPass; + rpBegin.framebuffer = m_fogFramebuffer; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_ctx->sceneExtent; + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(m_ctx->sceneExtent.width); + viewport.height = static_cast(m_ctx->sceneExtent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = m_ctx->sceneExtent; + cmd.setScissor(0, scissor); + + DescriptorWriter writer; + writer.reset(m_ctx->device, descriptorMgr->getFallbacks()); + + // Set 1: Material + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeSet(materialSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Material)); + { + std::array texArrayInfos; + texArrayInfos.fill(descriptorMgr->getFallbacks().texture2D); + texArrayInfos[0] = {m_ctx->linearSampler, m_gbuffer->compositeView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + texArrayInfos[1] = {m_ctx->mipmapSampler, m_emissiveMipmappedFullView, vk::ImageLayout::eShaderReadOnlyOptimal}; + writer.setImageArray(MaterialBinding::TextureArray, texArrayInfos); + } + writer.setImage(MaterialBinding::DepthMap, {m_ctx->linearSampler, m_sceneDepthCopy->view, vk::ImageLayout::eShaderReadOnlyOptimal}); + // Binding 5: 3D volume texture (reuses SceneColor slot) + writer.setImage(MaterialBinding::SceneColor, {m_ctx->linearSampler, volSlot->imageView, vk::ImageLayout::eShaderReadOnlyOptimal}); + // Binding 6: 3D noise texture (or fallback 3D) + { + auto noiseInfo = descriptorMgr->getFallbacks().texture3D; + noiseInfo.sampler = m_ctx->linearSampler; + if (noiseSlot && noiseSlot->imageView) { + noiseInfo.imageView = noiseSlot->imageView; + } + writer.setImage(MaterialBinding::DistortionMap, noiseInfo); + } + + // Set 2: PerDraw — volumetric fog UBO + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + writer.writeSet(perDrawSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::PerDraw)); + { + Assertion(m_ctx->scratchUBOCursor < PostProcessContext::SCRATCH_UBO_MAX_SLOTS, "Fog UBO slot overflow!"); + uint32_t slotOffset = m_ctx->scratchUBOCursor * static_cast(PostProcessContext::SCRATCH_UBO_SLOT_SIZE); + memcpy(static_cast(m_ctx->scratchUBOMapped) + slotOffset, &volData, sizeof(volData)); + m_ctx->scratchUBOCursor++; + writer.setBuffer(PerDrawBinding::GenericData, {m_ctx->scratchUBO, slotOffset, PostProcessContext::SCRATCH_UBO_SLOT_SIZE}); + } + writer.flush(); + + // Bind descriptor sets and draw + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, + static_cast(DescriptorSetIndex::Material), + {materialSet, perDrawSet}, {}); + + cmd.draw(3, 1, 0, 0); + cmd.endRenderPass(); + + // Scene color is now in eColorAttachmentOptimal (fog render pass finalLayout) + + m_ctx->memoryManager->unmapMemory(m_ctx->scratchUBOAlloc); + m_ctx->scratchUBOMapped = nullptr; +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanPostProcessingGBuffer.cpp b/code/graphics/vulkan/VulkanPostProcessingGBuffer.cpp new file mode 100644 index 00000000000..64678db4a99 --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessingGBuffer.cpp @@ -0,0 +1,349 @@ +#include "VulkanPostProcessing.h" + +#include + +#include "VulkanRenderer.h" +#include "lighting/lighting_profiles.h" +#include "lighting/lighting.h" +#include "nebula/neb.h" +#include "nebula/volumetrics.h" +#include "mission/missionparse.h" + +extern float Sun_spot; +extern int Game_subspace_effect; +extern SCP_vector Lights; +extern int Num_lights; + + +namespace graphics::vulkan { + + +// ===== G-Buffer (Deferred Lighting) Implementation ===== + +vk::RenderPass VulkanDeferredGBuffer::createGbufRenderPass(const GbufRenderPassConfig& config) +{ + // All G-buffer variants share formats; without composite only the first 5 are used + static constexpr std::array COLOR_FORMATS = {{ + GBUF_FORMAT_COLOR, + GBUF_FORMAT_POSITION, + GBUF_FORMAT_NORMAL, + GBUF_FORMAT_SPECULAR, + GBUF_FORMAT_EMISSIVE, + GBUF_FORMAT_COMPOSITE, + }}; + + const uint32_t colorCount = config.includeComposite + ? GBUF_COLOR_ATTACHMENT_COUNT : MSAA_COLOR_ATTACHMENT_COUNT; + const uint32_t depthIndex = colorCount; + const uint32_t totalAttachments = colorCount + 1; + + // Max 6 color + 1 depth = 7 attachments + std::array attachments; + for (uint32_t i = 0; i < colorCount; ++i) { + attachments[i].format = COLOR_FORMATS[i]; + attachments[i].samples = config.samples; + attachments[i].loadOp = config.colorLoadOp; + attachments[i].storeOp = vk::AttachmentStoreOp::eStore; + attachments[i].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[i].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[i].initialLayout = config.colorInitialLayout; + attachments[i].finalLayout = config.colorFinalLayout; + } + + // Depth — stencil ops mirror the depth loadOp + attachments[depthIndex].format = m_ctx->depthFormat; + attachments[depthIndex].samples = config.samples; + attachments[depthIndex].loadOp = config.depthLoadOp; + attachments[depthIndex].storeOp = vk::AttachmentStoreOp::eStore; + attachments[depthIndex].stencilLoadOp = config.depthLoadOp; + attachments[depthIndex].stencilStoreOp = + (config.depthLoadOp == vk::AttachmentLoadOp::eDontCare) + ? vk::AttachmentStoreOp::eDontCare + : vk::AttachmentStoreOp::eStore; + attachments[depthIndex].initialLayout = config.depthInitialLayout; + attachments[depthIndex].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + std::array colorRefs; + for (uint32_t i = 0; i < colorCount; ++i) { + colorRefs[i].attachment = i; + colorRefs[i].layout = vk::ImageLayout::eColorAttachmentOptimal; + } + + vk::AttachmentReference depthRef; + depthRef.attachment = depthIndex; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = colorCount; + subpass.pColorAttachments = colorRefs.data(); + subpass.pDepthStencilAttachment = &depthRef; + + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + + if (config.useResolveDependency) { + // MSAA resolve: previous pass read these textures as shader inputs + dependency.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eShaderRead; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite; + } else { + // Standard G-buffer: previous pass may have done transfers (copies) + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests + | vk::PipelineStageFlagBits::eTransfer; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eTransferRead + | vk::AccessFlagBits::eTransferWrite; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentRead; + } + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = totalAttachments; + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + return m_ctx->device.createRenderPass(rpInfo); +} + +vk::Framebuffer VulkanDeferredGBuffer::createGbufFramebuffer( + vk::RenderPass renderPass, bool includeComposite, bool useMsaaImages) +{ + // Attachment order: color, position, normal, specular, emissive, [composite], depth + std::array views; + uint32_t count = 0; + + if (useMsaaImages) { + views[count++] = m_msaaColor.view; + views[count++] = m_msaaPosition.view; + views[count++] = m_msaaNormal.view; + views[count++] = m_msaaSpecular.view; + views[count++] = m_msaaEmissive.view; + } else { + views[count++] = m_sceneColor->view; + views[count++] = m_gbufPosition.view; + views[count++] = m_gbufNormal.view; + views[count++] = m_gbufSpecular.view; + views[count++] = m_gbufEmissive.view; + } + + if (includeComposite) { + views[count++] = m_gbufComposite.view; + } + + views[count++] = useMsaaImages ? m_msaaDepthView : m_sceneDepth->view; + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = renderPass; + fbInfo.attachmentCount = count; + fbInfo.pAttachments = views.data(); + fbInfo.width = m_ctx->sceneExtent.width; + fbInfo.height = m_ctx->sceneExtent.height; + fbInfo.layers = 1; + + return m_ctx->device.createFramebuffer(fbInfo); +} + +bool VulkanDeferredGBuffer::init(PostProcessContext& ctx, const RenderTarget& sceneColor, + const RenderTarget& sceneDepth) +{ + m_ctx = &ctx; + m_sceneColor = &sceneColor; + m_sceneDepth = &sceneDepth; + + if (m_gbufInitialized) { + return true; + } + + const uint32_t w = m_ctx->sceneExtent.width; + const uint32_t h = m_ctx->sceneExtent.height; + const vk::ImageUsageFlags gbufUsage = + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled + | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst; + + // Create G-buffer images (position, normal, specular, emissive, composite) + struct GbufTarget { + RenderTarget* target; + vk::Format format; + const char* name; + }; + + std::array targets = {{ + {&m_gbufPosition, GBUF_FORMAT_POSITION, "position"}, + {&m_gbufNormal, GBUF_FORMAT_NORMAL, "normal"}, + {&m_gbufSpecular, GBUF_FORMAT_SPECULAR, "specular"}, + {&m_gbufEmissive, GBUF_FORMAT_EMISSIVE, "emissive"}, + {&m_gbufComposite, GBUF_FORMAT_COMPOSITE, "composite"}, + }}; + + for (auto& t : targets) { + if (!m_ctx->createImage(w, h, t.format, gbufUsage, vk::ImageAspectFlagBits::eColor, + t.target->image, t.target->view, t.target->allocation)) { + mprintf(("VulkanPostProcessor: Failed to create G-buffer %s image!\n", t.name)); + shutdown(); + return false; + } + t.target->format = t.format; + t.target->width = w; + t.target->height = h; + } + + // Create samplable copy of G-buffer normal (for decal angle rejection) + { + vk::ImageUsageFlags copyUsage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst; + if (!m_ctx->createImage(w, h, GBUF_FORMAT_NORMAL, copyUsage, + vk::ImageAspectFlagBits::eColor, + m_gbufNormalCopy.image, m_gbufNormalCopy.view, m_gbufNormalCopy.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create G-buffer normal copy!\n")); + shutdown(); + return false; + } + m_gbufNormalCopy.format = GBUF_FORMAT_NORMAL; + m_gbufNormalCopy.width = w; + m_gbufNormalCopy.height = h; + } + + // Create G-buffer render pass (eClear) — 6 color + depth + try { + m_gbufRenderPass = createGbufRenderPass({ + true, vk::SampleCountFlagBits::e1, + vk::AttachmentLoadOp::eClear, vk::AttachmentLoadOp::eClear, + vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::ImageLayout::eUndefined, + }); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create G-buffer render pass: %s\n", e.what())); + shutdown(); + return false; + } + + // Create G-buffer render pass (eLoad) — for resuming after mid-pass copies + try { + m_gbufRenderPassLoad = createGbufRenderPass({ + true, vk::SampleCountFlagBits::e1, + vk::AttachmentLoadOp::eLoad, vk::AttachmentLoadOp::eLoad, + vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::ImageLayout::eDepthStencilAttachmentOptimal, + }); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create G-buffer load render pass: %s\n", e.what())); + shutdown(); + return false; + } + + // Create G-buffer framebuffer (6 color + depth) + try { + m_gbufFramebuffer = createGbufFramebuffer(m_gbufRenderPass, true, false); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create G-buffer framebuffer: %s\n", e.what())); + shutdown(); + return false; + } + + m_gbufInitialized = true; + mprintf(("VulkanPostProcessor: G-buffer initialized (%ux%u, 6 color + depth)\n", w, h)); + return true; +} + +void VulkanDeferredGBuffer::shutdown() +{ + if (!m_ctx || !m_ctx->device) { + return; + } + + if (m_gbufFramebuffer) { + m_ctx->device.destroyFramebuffer(m_gbufFramebuffer); + m_gbufFramebuffer = nullptr; + } + if (m_gbufRenderPassLoad) { + m_ctx->device.destroyRenderPass(m_gbufRenderPassLoad); + m_gbufRenderPassLoad = nullptr; + } + if (m_gbufRenderPass) { + m_ctx->device.destroyRenderPass(m_gbufRenderPass); + m_gbufRenderPass = nullptr; + } + + std::array gbufTargets = { + &m_gbufPosition, &m_gbufNormal, &m_gbufSpecular, + &m_gbufEmissive, &m_gbufComposite, &m_gbufNormalCopy, + }; + for (auto* rt : gbufTargets) { + if (rt->view) { + m_ctx->device.destroyImageView(rt->view); + rt->view = nullptr; + } + if (rt->image) { + m_ctx->device.destroyImage(rt->image); + rt->image = nullptr; + } + if (rt->allocation.isValid()) { + m_ctx->memoryManager->freeAllocation(rt->allocation); + } + } + + m_gbufInitialized = false; +} + +void VulkanDeferredGBuffer::transitionForResume(vk::CommandBuffer cmd) +{ + if (!m_gbufInitialized) { + return; + } + + // After ending the G-buffer render pass, color attachments 1-5 are in + // eShaderReadOnlyOptimal (from finalLayout). The eLoad pass expects + // eColorAttachmentOptimal. Transition them in a single barrier batch. + std::array gbufImages = { + m_gbufPosition.image, + m_gbufNormal.image, + m_gbufSpecular.image, + m_gbufEmissive.image, + m_gbufComposite.image, + }; + + std::array barriers; + for (size_t i = 0; i < gbufImages.size(); ++i) { + barriers[i].srcAccessMask = {}; + barriers[i].dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barriers[i].oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[i].newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[i].image = gbufImages[i]; + barriers[i].subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barriers[i].subresourceRange.baseMipLevel = 0; + barriers[i].subresourceRange.levelCount = 1; + barriers[i].subresourceRange.baseArrayLayer = 0; + barriers[i].subresourceRange.layerCount = 1; + } + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barriers); +} + +void VulkanDeferredGBuffer::copyNormal(vk::CommandBuffer cmd) +{ + // Called mid-scene, outside a render pass. + // Copies G-buffer normal → normal copy so decal shader can sample it for angle rejection. + // G-buffer normal is in eShaderReadOnlyOptimal (from the ended G-buffer render pass). + // Normal goes back to eShaderReadOnlyOptimal (transitionGbufForResume handles the rest). + copyImageToImage(cmd, + m_gbufNormal.image, vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + m_gbufNormalCopy.image, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + m_ctx->sceneExtent); +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanPostProcessingLDR.cpp b/code/graphics/vulkan/VulkanPostProcessingLDR.cpp new file mode 100644 index 00000000000..185596212cf --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessingLDR.cpp @@ -0,0 +1,542 @@ +#include "VulkanPostProcessing.h" + + +#include "VulkanRenderer.h" +#include "VulkanDescriptorManager.h" +#include "graphics/util/uniform_structs.h" +#include "graphics/post_processing.h" +#include "graphics/grinternal.h" +#include "graphics/2d.h" +#include "io/timer.h" +#include "lighting/lighting_profiles.h" +#include "lighting/lighting.h" +#include "math/floating.h" +#include "math/vecmat.h" +#include "render/3d.h" +#include "nebula/neb.h" +#include "nebula/volumetrics.h" +#include "mission/missionparse.h" + +extern float Sun_spot; +extern int Game_subspace_effect; +extern SCP_vector Lights; +extern int Num_lights; + + +namespace graphics::vulkan { + + +// ===== LDR Targets + FXAA Pipeline Implementation ===== + +bool VulkanPostProcessor::initLDRTargets() +{ + // Create Scene_ldr (RGBA8, full resolution) — tonemapped LDR output + if (!createImage(m_ctx.sceneExtent.width, m_ctx.sceneExtent.height, LDR_COLOR_FORMAT, + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled, + vk::ImageAspectFlagBits::eColor, + m_sceneLdr.image, m_sceneLdr.view, m_sceneLdr.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create Scene_ldr image!\n")); + return false; + } + m_sceneLdr.format = LDR_COLOR_FORMAT; + m_sceneLdr.width = m_ctx.sceneExtent.width; + m_sceneLdr.height = m_ctx.sceneExtent.height; + + // Create Scene_luminance (RGBA8, full resolution) — LDR with luma in alpha for FXAA + if (!createImage(m_ctx.sceneExtent.width, m_ctx.sceneExtent.height, LDR_COLOR_FORMAT, + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled, + vk::ImageAspectFlagBits::eColor, + m_sceneLuminance.image, m_sceneLuminance.view, m_sceneLuminance.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create Scene_luminance image!\n")); + return false; + } + m_sceneLuminance.format = LDR_COLOR_FORMAT; + m_sceneLuminance.width = m_ctx.sceneExtent.width; + m_sceneLuminance.height = m_ctx.sceneExtent.height; + + // Create LDR render pass (color-only RGBA8, loadOp=eDontCare, finalLayout=eShaderReadOnlyOptimal) + { + vk::AttachmentDescription att; + att.format = LDR_COLOR_FORMAT; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eDontCare; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eUndefined; + att.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.srcAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead + | vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_ldrRenderPass = m_ctx.device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create LDR render pass: %s\n", e.what())); + return false; + } + } + + // Create framebuffers + { + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_ldrRenderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = &m_sceneLdr.view; + fbInfo.width = m_ctx.sceneExtent.width; + fbInfo.height = m_ctx.sceneExtent.height; + fbInfo.layers = 1; + + try { + m_sceneLdrFB = m_ctx.device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create Scene_ldr framebuffer: %s\n", e.what())); + return false; + } + + fbInfo.pAttachments = &m_sceneLuminance.view; + try { + m_sceneLuminanceFB = m_ctx.device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create Scene_luminance framebuffer: %s\n", e.what())); + return false; + } + } + + // Create LDR load render pass (loadOp=eLoad for additive blending onto existing content) + { + vk::AttachmentDescription att; + att.format = LDR_COLOR_FORMAT; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eLoad; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + att.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.srcAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead + | vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_ldrLoadRenderPass = m_ctx.device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create LDR load render pass: %s\n", e.what())); + return false; + } + } + + m_ldrInitialized = true; + mprintf(("VulkanPostProcessor: LDR targets initialized (%ux%u, RGBA8)\n", + m_ctx.sceneExtent.width, m_ctx.sceneExtent.height)); + return true; +} + +void VulkanPostProcessor::shutdownLDRTargets() +{ + if (!m_ldrInitialized) { + return; + } + + if (m_sceneLuminanceFB) { + m_ctx.device.destroyFramebuffer(m_sceneLuminanceFB); + m_sceneLuminanceFB = nullptr; + } + if (m_sceneLdrFB) { + m_ctx.device.destroyFramebuffer(m_sceneLdrFB); + m_sceneLdrFB = nullptr; + } + if (m_ldrLoadRenderPass) { + m_ctx.device.destroyRenderPass(m_ldrLoadRenderPass); + m_ldrLoadRenderPass = nullptr; + } + if (m_ldrRenderPass) { + m_ctx.device.destroyRenderPass(m_ldrRenderPass); + m_ldrRenderPass = nullptr; + } + + // Scene_luminance + if (m_sceneLuminance.view) { + m_ctx.device.destroyImageView(m_sceneLuminance.view); + m_sceneLuminance.view = nullptr; + } + if (m_sceneLuminance.image) { + m_ctx.device.destroyImage(m_sceneLuminance.image); + m_sceneLuminance.image = nullptr; + } + if (m_sceneLuminance.allocation.isValid()) { + m_ctx.memoryManager->freeAllocation(m_sceneLuminance.allocation); + } + + // Scene_ldr + if (m_sceneLdr.view) { + m_ctx.device.destroyImageView(m_sceneLdr.view); + m_sceneLdr.view = nullptr; + } + if (m_sceneLdr.image) { + m_ctx.device.destroyImage(m_sceneLdr.image); + m_sceneLdr.image = nullptr; + } + if (m_sceneLdr.allocation.isValid()) { + m_ctx.memoryManager->freeAllocation(m_sceneLdr.allocation); + } + + m_ldrInitialized = false; +} + +void VulkanPostProcessor::executeTonemap(vk::CommandBuffer cmd) +{ + if (!m_ldrInitialized) { + return; + } + + namespace ltp = lighting_profiles; + + // Map bloom UBO for the tonemapping draw's UBO slot + m_ctx.scratchUBOMapped = m_ctx.memoryManager->mapMemory(m_ctx.scratchUBOAlloc); + if (!m_ctx.scratchUBOMapped) { + return; + } + + // Reset cursor if bloom didn't run this frame (bloom resets to 0 when it runs) + if (gr_bloom_intensity() <= 0 || !m_bloom.isInitialized()) { + m_ctx.scratchUBOCursor = 0; + } + + // Build tonemapping data directly from lighting profiles + graphics::generic_data::tonemapping_data tmData; + memset(&tmData, 0, sizeof(tmData)); + auto ppc = ltp::current_piecewise_intermediates(); + tmData.exposure = ltp::current_exposure(); + tmData.tonemapper = static_cast(ltp::current_tonemapper()); + tmData.x0 = ppc.x0; + tmData.y0 = ppc.y0; + tmData.x1 = ppc.x1; + tmData.toe_B = ppc.toe_B; + tmData.toe_lnA = ppc.toe_lnA; + tmData.sh_B = ppc.sh_B; + tmData.sh_lnA = ppc.sh_lnA; + tmData.sh_offsetX = ppc.sh_offsetX; + tmData.sh_offsetY = ppc.sh_offsetY; + + // HDR scene → Scene_ldr via tonemapping shader + drawFullscreenTriangle(cmd, m_ldrRenderPass, + m_sceneLdrFB, m_ctx.sceneExtent, + SDR_TYPE_POST_PROCESS_TONEMAPPING, + m_sceneColor.view, m_ctx.linearSampler, + &tmData, sizeof(tmData), + ALPHA_BLEND_NONE); + + m_ctx.memoryManager->unmapMemory(m_ctx.scratchUBOAlloc); + m_ctx.scratchUBOMapped = nullptr; +} + +void VulkanPostProcessor::executeFXAA(vk::CommandBuffer cmd) +{ + if (!m_ldrInitialized || !gr_is_fxaa_mode(Gr_aa_mode)) { + return; + } + + m_ctx.scratchUBOMapped = m_ctx.memoryManager->mapMemory(m_ctx.scratchUBOAlloc); + if (!m_ctx.scratchUBOMapped) { + return; + } + + // FXAA prepass: Scene_ldr → Scene_luminance (compute luma in alpha) + drawFullscreenTriangle(cmd, m_ldrRenderPass, + m_sceneLuminanceFB, m_ctx.sceneExtent, + SDR_TYPE_POST_PROCESS_FXAA_PREPASS, + m_sceneLdr.view, m_ctx.linearSampler, + nullptr, 0, + ALPHA_BLEND_NONE); + + // FXAA main pass: Scene_luminance → Scene_ldr + graphics::generic_data::fxaa_data fxaaData; + fxaaData.rt_w = static_cast(m_ctx.sceneExtent.width); + fxaaData.rt_h = static_cast(m_ctx.sceneExtent.height); + fxaaData.pad[0] = 0.0f; + fxaaData.pad[1] = 0.0f; + + drawFullscreenTriangle(cmd, m_ldrRenderPass, + m_sceneLdrFB, m_ctx.sceneExtent, + SDR_TYPE_POST_PROCESS_FXAA, + m_sceneLuminance.view, m_ctx.linearSampler, + &fxaaData, sizeof(fxaaData), + ALPHA_BLEND_NONE); + + m_ctx.memoryManager->unmapMemory(m_ctx.scratchUBOAlloc); + m_ctx.scratchUBOMapped = nullptr; +} + +bool VulkanPostProcessor::executePostEffects(vk::CommandBuffer cmd) +{ + m_postEffectsApplied = false; + + if (!m_ldrInitialized || !graphics::Post_processing_manager) { + return false; + } + + const auto& postEffects = graphics::Post_processing_manager->getPostEffects(); + if (postEffects.empty()) { + return false; + } + + // Compute effect flags from current state + int effectFlags = 0; + for (size_t idx = 0; idx < postEffects.size(); idx++) { + if (postEffects[idx].always_on || (postEffects[idx].intensity != postEffects[idx].default_intensity)) { + effectFlags |= (1 << idx); + } + } + + if (effectFlags == 0) { + return false; + } + + m_ctx.scratchUBOMapped = m_ctx.memoryManager->mapMemory(m_ctx.scratchUBOAlloc); + if (!m_ctx.scratchUBOMapped) { + return false; + } + + // Build the extended post_data UBO with effectFlags appended + struct PostEffectsUBOData { + graphics::generic_data::post_data base; + int effectFlags; + int pad[3]; + }; + + PostEffectsUBOData uboData; + memset(&uboData, 0, sizeof(uboData)); + uboData.base.timer = static_cast((timer_get_milliseconds() % 100) + 1); + uboData.effectFlags = effectFlags; + + // Fill effect parameters + for (size_t idx = 0; idx < postEffects.size(); idx++) { + if (!(effectFlags & (1 << idx))) { + continue; + } + float value = postEffects[idx].intensity; + switch (postEffects[idx].uniform_type) { + case graphics::PostEffectUniformType::NoiseAmount: + uboData.base.noise_amount = value; + break; + case graphics::PostEffectUniformType::Saturation: + uboData.base.saturation = value; + break; + case graphics::PostEffectUniformType::Brightness: + uboData.base.brightness = value; + break; + case graphics::PostEffectUniformType::Contrast: + uboData.base.contrast = value; + break; + case graphics::PostEffectUniformType::FilmGrain: + uboData.base.film_grain = value; + break; + case graphics::PostEffectUniformType::TvStripes: + uboData.base.tv_stripes = value; + break; + case graphics::PostEffectUniformType::Cutoff: + uboData.base.cutoff = value; + break; + case graphics::PostEffectUniformType::Dither: + uboData.base.dither = value; + break; + case graphics::PostEffectUniformType::Tint: + uboData.base.tint = postEffects[idx].rgb; + break; + case graphics::PostEffectUniformType::CustomEffectVEC3A: + uboData.base.custom_effect_vec3_a = postEffects[idx].rgb; + break; + case graphics::PostEffectUniformType::CustomEffectFloatA: + uboData.base.custom_effect_float_a = value; + break; + case graphics::PostEffectUniformType::CustomEffectVEC3B: + uboData.base.custom_effect_vec3_b = postEffects[idx].rgb; + break; + case graphics::PostEffectUniformType::CustomEffectFloatB: + uboData.base.custom_effect_float_b = value; + break; + default: + break; + } + } + + // Post-effects: Scene_ldr → Scene_luminance (reusing luminance target as temp) + drawFullscreenTriangle(cmd, m_ldrRenderPass, + m_sceneLuminanceFB, m_ctx.sceneExtent, + SDR_TYPE_POST_PROCESS_MAIN, + m_sceneLdr.view, m_ctx.linearSampler, + &uboData, sizeof(uboData), + ALPHA_BLEND_NONE, + static_cast(effectFlags)); + + m_ctx.memoryManager->unmapMemory(m_ctx.scratchUBOAlloc); + m_ctx.scratchUBOMapped = nullptr; + + m_postEffectsApplied = true; + return true; +} + +void VulkanPostProcessor::executeLightshafts(vk::CommandBuffer cmd) +{ + if (!m_ldrInitialized || !graphics::Post_processing_manager) { + return; + } + + if (Game_subspace_effect || !gr_sunglare_enabled() || !gr_lightshafts_enabled()) { + return; + } + + // Find a global light with glare facing the camera + int n_lights = light_get_global_count(); + float sun_x = 0.0f, sun_y = 0.0f; + bool found = false; + + for (int idx = 0; idx < n_lights; idx++) { + vec3d light_dir; + light_get_global_dir(&light_dir, idx); + + if (!light_has_glare(idx)) { + continue; + } + + float dot = vm_vec_dot(&light_dir, &Eye_matrix.vec.fvec); + if (dot > 0.7f) { + sun_x = asinf_safe(vm_vec_dot(&light_dir, &Eye_matrix.vec.rvec)) / PI * 1.5f + 0.5f; + sun_y = asinf_safe(vm_vec_dot(&light_dir, &Eye_matrix.vec.uvec)) / PI * 1.5f * gr_screen.clip_aspect + 0.5f; + found = true; + break; + } + } + + if (!found) { + return; + } + + // Transition scene depth from eDepthStencilAttachmentOptimal to eShaderReadOnlyOptimal for sampling + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + barrier.oldLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_sceneDepth.image; + barrier.subresourceRange.aspectMask = imageAspectFromFormat(m_ctx.depthFormat); + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eLateFragmentTests, + vk::PipelineStageFlagBits::eFragmentShader, + {}, {}, {}, barrier); + } + + // Transition Scene_ldr to eColorAttachmentOptimal for loadOp=eLoad render pass + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eShaderRead; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_sceneLdr.image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eFragmentShader, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, {}, {}, barrier); + } + + // Build lightshaft UBO data + auto& ls_params = graphics::Post_processing_manager->getLightshaftParams(); + + graphics::generic_data::lightshaft_data lsData; + lsData.sun_pos.x = sun_x; + lsData.sun_pos.y = sun_y; + lsData.density = ls_params.density; + lsData.weight = ls_params.weight; + lsData.falloff = ls_params.falloff; + lsData.intensity = Sun_spot * ls_params.intensity; + lsData.cp_intensity = Sun_spot * ls_params.cpintensity; + lsData.pad[0] = 0.0f; + + m_ctx.scratchUBOMapped = m_ctx.memoryManager->mapMemory(m_ctx.scratchUBOAlloc); + if (!m_ctx.scratchUBOMapped) { + return; + } + + // Additive blend lightshafts onto Scene_ldr + drawFullscreenTriangle(cmd, m_ldrLoadRenderPass, + m_sceneLdrFB, m_ctx.sceneExtent, + SDR_TYPE_POST_PROCESS_LIGHTSHAFTS, + m_sceneDepth.view, m_ctx.linearSampler, + &lsData, sizeof(lsData), + ALPHA_BLEND_ADDITIVE); + + m_ctx.memoryManager->unmapMemory(m_ctx.scratchUBOAlloc); + m_ctx.scratchUBOMapped = nullptr; +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanPostProcessingLighting.cpp b/code/graphics/vulkan/VulkanPostProcessingLighting.cpp new file mode 100644 index 00000000000..5f28145b79b --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessingLighting.cpp @@ -0,0 +1,716 @@ +#include "VulkanPostProcessing.h" + +#include + +#include "gr_vulkan.h" +#include "VulkanRenderer.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" +#include "VulkanPipeline.h" +#include "VulkanDescriptorManager.h" +#include "graphics/util/uniform_structs.h" +#include "graphics/util/primitives.h" +#include "graphics/grinternal.h" +#include "graphics/light.h" +#include "graphics/matrix.h" +#include "graphics/shadows.h" +#include "graphics/2d.h" +#include "bmpman/bmpman.h" +#include "lighting/lighting_profiles.h" +#include "lighting/lighting.h" +#include "math/vecmat.h" +#include "render/3d.h" +#include "tracing/tracing.h" +#include "nebula/neb.h" +#include "mission/missionparse.h" + +extern float Sun_spot; +extern int Game_subspace_effect; +extern SCP_vector Lights; +extern int Num_lights; + + +namespace graphics::vulkan { + + +// ===== Light Accumulation (Deferred Lighting) ===== + +void VulkanDeferredLighting::init(PostProcessContext& ctx, const RenderTarget& sceneColor, + const VulkanDeferredGBuffer& gbuffer, const VulkanShadowMap& shadow) +{ + m_ctx = &ctx; + m_sceneColor = &sceneColor; + m_gbuffer = &gbuffer; + m_shadow = &shadow; +} + +bool VulkanDeferredLighting::initLightVolumes() +{ + if (m_lightVolumesInitialized) { + return true; + } + + // Generate sphere mesh (16 rings x 16 segments) + { + auto mesh = graphics::util::generate_sphere_mesh(16, 16); + m_sphereMesh.vertexCount = mesh.vertex_count; + m_sphereMesh.indexCount = mesh.index_count; + + // Create VBO + vk::BufferCreateInfo vboInfo; + vboInfo.size = mesh.vertices.size() * sizeof(float); + vboInfo.usage = vk::BufferUsageFlagBits::eVertexBuffer; + vboInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_sphereMesh.vbo = m_ctx->device.createBuffer(vboInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create sphere VBO: %s\n", e.what())); + return false; + } + + if (!m_ctx->memoryManager->allocateBufferMemory(m_sphereMesh.vbo, MemoryUsage::CpuToGpu, m_sphereMesh.vboAlloc)) { + m_ctx->device.destroyBuffer(m_sphereMesh.vbo); + m_sphereMesh.vbo = nullptr; + return false; + } + + auto* mapped = m_ctx->memoryManager->mapMemory(m_sphereMesh.vboAlloc); + if (mapped) { + memcpy(mapped, mesh.vertices.data(), mesh.vertices.size() * sizeof(float)); + m_ctx->memoryManager->unmapMemory(m_sphereMesh.vboAlloc); + } + + // Create IBO + vk::BufferCreateInfo iboInfo; + iboInfo.size = mesh.indices.size() * sizeof(ushort); + iboInfo.usage = vk::BufferUsageFlagBits::eIndexBuffer; + iboInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_sphereMesh.ibo = m_ctx->device.createBuffer(iboInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create sphere IBO: %s\n", e.what())); + return false; + } + + if (!m_ctx->memoryManager->allocateBufferMemory(m_sphereMesh.ibo, MemoryUsage::CpuToGpu, m_sphereMesh.iboAlloc)) { + m_ctx->device.destroyBuffer(m_sphereMesh.ibo); + m_sphereMesh.ibo = nullptr; + return false; + } + + mapped = m_ctx->memoryManager->mapMemory(m_sphereMesh.iboAlloc); + if (mapped) { + memcpy(mapped, mesh.indices.data(), mesh.indices.size() * sizeof(ushort)); + m_ctx->memoryManager->unmapMemory(m_sphereMesh.iboAlloc); + } + } + + // Generate cylinder mesh (16 segments) + { + auto mesh = graphics::util::generate_cylinder_mesh(16); + m_cylinderMesh.vertexCount = mesh.vertex_count; + m_cylinderMesh.indexCount = mesh.index_count; + + vk::BufferCreateInfo vboInfo; + vboInfo.size = mesh.vertices.size() * sizeof(float); + vboInfo.usage = vk::BufferUsageFlagBits::eVertexBuffer; + vboInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_cylinderMesh.vbo = m_ctx->device.createBuffer(vboInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create cylinder VBO: %s\n", e.what())); + return false; + } + + if (!m_ctx->memoryManager->allocateBufferMemory(m_cylinderMesh.vbo, MemoryUsage::CpuToGpu, m_cylinderMesh.vboAlloc)) { + m_ctx->device.destroyBuffer(m_cylinderMesh.vbo); + m_cylinderMesh.vbo = nullptr; + return false; + } + + auto* mapped = m_ctx->memoryManager->mapMemory(m_cylinderMesh.vboAlloc); + if (mapped) { + memcpy(mapped, mesh.vertices.data(), mesh.vertices.size() * sizeof(float)); + m_ctx->memoryManager->unmapMemory(m_cylinderMesh.vboAlloc); + } + + vk::BufferCreateInfo iboInfo; + iboInfo.size = mesh.indices.size() * sizeof(ushort); + iboInfo.usage = vk::BufferUsageFlagBits::eIndexBuffer; + iboInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_cylinderMesh.ibo = m_ctx->device.createBuffer(iboInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create cylinder IBO: %s\n", e.what())); + return false; + } + + if (!m_ctx->memoryManager->allocateBufferMemory(m_cylinderMesh.ibo, MemoryUsage::CpuToGpu, m_cylinderMesh.iboAlloc)) { + m_ctx->device.destroyBuffer(m_cylinderMesh.ibo); + m_cylinderMesh.ibo = nullptr; + return false; + } + + mapped = m_ctx->memoryManager->mapMemory(m_cylinderMesh.iboAlloc); + if (mapped) { + memcpy(mapped, mesh.indices.data(), mesh.indices.size() * sizeof(ushort)); + m_ctx->memoryManager->unmapMemory(m_cylinderMesh.iboAlloc); + } + } + + // Create deferred UBO for light data (per-frame, host-visible) + { + vk::BufferCreateInfo bufInfo; + bufInfo.size = DEFERRED_UBO_SIZE; + bufInfo.usage = vk::BufferUsageFlagBits::eUniformBuffer; + bufInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_deferredUBO = m_ctx->device.createBuffer(bufInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create deferred UBO: %s\n", e.what())); + return false; + } + + if (!m_ctx->memoryManager->allocateBufferMemory(m_deferredUBO, MemoryUsage::CpuToGpu, m_deferredUBOAlloc)) { + m_ctx->device.destroyBuffer(m_deferredUBO); + m_deferredUBO = nullptr; + return false; + } + } + + m_lightVolumesInitialized = true; + mprintf(("VulkanPostProcessor: Light volumes initialized (sphere: %u verts/%u idx, cylinder: %u verts/%u idx)\n", + m_sphereMesh.vertexCount, m_sphereMesh.indexCount, + m_cylinderMesh.vertexCount, m_cylinderMesh.indexCount)); + return true; +} + +void VulkanDeferredLighting::shutdown() +{ + if (!m_ctx || !m_ctx->device) { + return; + } + + auto destroyMesh = [&](LightVolumeMesh& mesh) { + if (mesh.vbo) { m_ctx->device.destroyBuffer(mesh.vbo); mesh.vbo = nullptr; } + if (mesh.vboAlloc.isValid()) { m_ctx->memoryManager->freeAllocation(mesh.vboAlloc); } + if (mesh.ibo) { m_ctx->device.destroyBuffer(mesh.ibo); mesh.ibo = nullptr; } + if (mesh.iboAlloc.isValid()) { m_ctx->memoryManager->freeAllocation(mesh.iboAlloc); } + mesh.vertexCount = 0; + mesh.indexCount = 0; + }; + + destroyMesh(m_sphereMesh); + destroyMesh(m_cylinderMesh); + + if (m_deferredUBO) { + m_ctx->device.destroyBuffer(m_deferredUBO); + m_deferredUBO = nullptr; + } + if (m_deferredUBOAlloc.isValid()) { + m_ctx->memoryManager->freeAllocation(m_deferredUBOAlloc); + } + + if (m_lightAccumFramebuffer) { + m_ctx->device.destroyFramebuffer(m_lightAccumFramebuffer); + m_lightAccumFramebuffer = nullptr; + } + if (m_lightAccumRenderPass) { + m_ctx->device.destroyRenderPass(m_lightAccumRenderPass); + m_lightAccumRenderPass = nullptr; + } + + m_lightVolumesInitialized = false; +} + +bool VulkanDeferredLighting::initLightAccumPass() +{ + // Light accumulation render pass: single RGBA16F color attachment + // loadOp=eLoad (preserves emissive copy), storeOp=eStore + // initialLayout=eColorAttachmentOptimal, finalLayout=eShaderReadOnlyOptimal + { + vk::AttachmentDescription att; + att.format = HDR_COLOR_FORMAT; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eLoad; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + att.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eTransfer + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eFragmentShader; + dep.srcAccessMask = vk::AccessFlagBits::eTransferWrite + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead + | vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eShaderRead; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_lightAccumRenderPass = m_ctx->device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create light accum render pass: %s\n", e.what())); + return false; + } + } + + // Framebuffer using composite image as sole color attachment + { + std::array attachments = { m_gbuffer->compositeView() }; + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_lightAccumRenderPass; + fbInfo.attachmentCount = static_cast(attachments.size()); + fbInfo.pAttachments = attachments.data(); + fbInfo.width = m_ctx->sceneExtent.width; + fbInfo.height = m_ctx->sceneExtent.height; + fbInfo.layers = 1; + + try { + m_lightAccumFramebuffer = m_ctx->device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create light accum framebuffer: %s\n", e.what())); + return false; + } + } + + return true; +} + +namespace ltp = lighting_profiles; + +static graphics::deferred_light_data* prepare_light_uniforms(light& l, uint8_t* dest, const ltp::profile* lp) +{ + auto* light_data = reinterpret_cast(dest); + memset(light_data, 0, sizeof(graphics::deferred_light_data)); + + light_data->lightType = static_cast(l.type); + + float intensity = + (Lighting_mode == lighting_mode::COCKPIT) ? lp->cockpit_light_intensity_modifier.handle(l.intensity) : l.intensity; + + vec3d diffuse; + diffuse.xyz.x = l.r * intensity; + diffuse.xyz.y = l.g * intensity; + diffuse.xyz.z = l.b * intensity; + + light_data->diffuseLightColor = diffuse; + light_data->enable_shadows = 0; + light_data->sourceRadius = l.source_radius; + return light_data; +} + +void VulkanDeferredLighting::render(vk::CommandBuffer cmd) +{ + TRACE_SCOPE(tracing::ApplyLights); + + if (!m_gbuffer->isInitialized()) { + return; + } + + // Lazy-init light volumes and accumulation pass on first use + if (!m_lightVolumesInitialized) { + if (!initLightVolumes() || !initLightAccumPass()) { + return; + } + } + + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + if (!pipelineMgr || !descriptorMgr || !bufferMgr || !texMgr) { + return; + } + + // Sort lights by type (same stable sort as OpenGL) + std::stable_sort(Lights.begin(), Lights.end(), light_compare_by_type); + + // Categorize lights + SCP_vector full_frame_lights; + SCP_vector sphere_lights; + SCP_vector cylinder_lights; + for (auto& l : Lights) { + switch (l.type) { + case Light_Type::Directional: + full_frame_lights.push_back(l); + break; + case Light_Type::Cone: + case Light_Type::Point: + sphere_lights.push_back(l); + break; + case Light_Type::Tube: + cylinder_lights.push_back(l); + break; + case Light_Type::Ambient: + break; + } + } + + // Add ambient light + { + light& l = full_frame_lights.emplace_back(); + memset(&l, 0, sizeof(light)); + vec3d ambient; + gr_get_ambient_light(&ambient); + l.r = ambient.xyz.x; + l.g = ambient.xyz.y; + l.b = ambient.xyz.z; + l.type = Light_Type::Ambient; + l.intensity = 1.f; + l.source_radius = 0.f; + } + + size_t total_lights = full_frame_lights.size() + sphere_lights.size() + cylinder_lights.size(); + if (total_lights == 0) { + return; + } + + // Map UBO and pack data + auto* uboMapped = static_cast(m_ctx->memoryManager->mapMemory(m_deferredUBOAlloc)); + if (!uboMapped) { + return; + } + + // Determine alignment requirement + uint32_t uboAlign = getRendererInstance()->getMinUniformBufferOffsetAlignment(); + auto alignUp = [uboAlign](uint32_t v) -> uint32_t { + return (v + uboAlign - 1) & ~(uboAlign - 1); + }; + + // Layout in UBO: + // [0]: deferred_global_data (header) + // [aligned offset 1..N]: deferred_light_data per light + // [aligned offset N+1..2N]: matrix_uniforms per light + uint32_t globalDataSize = alignUp(static_cast(sizeof(graphics::deferred_global_data))); + uint32_t lightDataSize = alignUp(static_cast(sizeof(graphics::deferred_light_data))); + uint32_t matrixDataSize = alignUp(static_cast(sizeof(graphics::matrix_uniforms))); + + uint32_t lightDataOffset = globalDataSize; + uint32_t matrixDataOffset = lightDataOffset + (static_cast(total_lights) * lightDataSize); + uint32_t totalUBOSize = matrixDataOffset + (static_cast(total_lights) * matrixDataSize); + + if (totalUBOSize > DEFERRED_UBO_SIZE) { + mprintf(("VulkanPostProcessor: Deferred UBO overflow (%u > %u), skipping lights\n", totalUBOSize, DEFERRED_UBO_SIZE)); + m_ctx->memoryManager->unmapMemory(m_deferredUBOAlloc); + return; + } + + // Pack global header + auto lp = ltp::current(); + // Determine if environment maps are available + bool envMapAvailable = (ENVMAP > 0); + tcache_slot_vulkan* envMapSlot = nullptr; + tcache_slot_vulkan* irrMapSlot = nullptr; + if (envMapAvailable) { + envMapSlot = texMgr->getTextureSlot(ENVMAP); + if (!envMapSlot || !envMapSlot->imageView || !envMapSlot->isCubemap) { + envMapAvailable = false; + } + } + if (envMapAvailable && IRRMAP > 0) { + irrMapSlot = texMgr->getTextureSlot(IRRMAP); + if (!irrMapSlot || !irrMapSlot->imageView || !irrMapSlot->isCubemap) { + irrMapSlot = nullptr; // Fall back to fallback cube for irrmap + } + } + + { + auto* header = reinterpret_cast(uboMapped); + memset(header, 0, sizeof(graphics::deferred_global_data)); + header->invScreenWidth = 1.0f / gr_screen.max_w; + header->invScreenHeight = 1.0f / gr_screen.max_h; + header->nearPlane = gr_near_plane; + + if (m_shadow->isInitialized() && Shadow_quality != ShadowQuality::Disabled) { + header->shadow_mv_matrix = Shadow_view_matrix_light; + for (size_t i = 0; i < MAX_SHADOW_CASCADES; ++i) { + header->shadow_proj_matrix[i] = Shadow_proj_matrix[i]; + } + header->veryneardist = Shadow_cascade_distances[0]; + header->neardist = Shadow_cascade_distances[1]; + header->middist = Shadow_cascade_distances[2]; + header->fardist = Shadow_cascade_distances[3]; + vm_inverse_matrix4(&header->inv_view_matrix, &Shadow_view_matrix_render); + } + } + + // Pack per-light data + size_t lightIdx = 0; + bool first_directional = true; + + for (auto& l : full_frame_lights) { + auto* ld = prepare_light_uniforms(l, uboMapped + lightDataOffset + (lightIdx * lightDataSize), lp); + + if (l.type == Light_Type::Directional) { + if (m_shadow->isInitialized() && Shadow_quality != ShadowQuality::Disabled) { + ld->enable_shadows = first_directional ? 1 : 0; + } + + if (first_directional) { + first_directional = false; + } + + vec4 light_dir; + light_dir.xyzw.x = -l.vec.xyz.x; + light_dir.xyzw.y = -l.vec.xyz.y; + light_dir.xyzw.z = -l.vec.xyz.z; + light_dir.xyzw.w = 0.0f; + vec4 view_dir; + vm_vec_transform(&view_dir, &light_dir, &gr_view_matrix); + ld->lightDir.xyz.x = view_dir.xyzw.x; + ld->lightDir.xyz.y = view_dir.xyzw.y; + ld->lightDir.xyz.z = view_dir.xyzw.z; + } + + // Matrix: env texture matrix for full-frame lights + auto* md = reinterpret_cast(uboMapped + matrixDataOffset + (lightIdx * matrixDataSize)); + memset(md, 0, sizeof(graphics::matrix_uniforms)); + md->modelViewMatrix = gr_env_texture_matrix; + ++lightIdx; + } + + for (auto& l : sphere_lights) { + auto* ld = prepare_light_uniforms(l, uboMapped + lightDataOffset + (lightIdx * lightDataSize), lp); + + if (l.type == Light_Type::Cone) { + ld->dualCone = (l.flags & LF_DUAL_CONE) ? 1.0f : 0.0f; + ld->coneAngle = l.cone_angle; + ld->coneInnerAngle = l.cone_inner_angle; + ld->coneDir = l.vec2; + } + float rad = (Lighting_mode == lighting_mode::COCKPIT) + ? lp->cockpit_light_radius_modifier.handle(MAX(l.rada, l.radb)) + : MAX(l.rada, l.radb); + ld->lightRadius = rad; + ld->scale.xyz.x = rad * 1.05f; + ld->scale.xyz.y = rad * 1.05f; + ld->scale.xyz.z = rad * 1.05f; + + // Matrix: model-view + projection for light volume + auto* md = reinterpret_cast(uboMapped + matrixDataOffset + (lightIdx * matrixDataSize)); + g3_start_instance_matrix(&l.vec, &vmd_identity_matrix, true); + md->modelViewMatrix = gr_model_view_matrix; + md->projMatrix = gr_projection_matrix; + g3_done_instance(true); + ++lightIdx; + } + + for (auto& l : cylinder_lights) { + auto* ld = prepare_light_uniforms(l, uboMapped + lightDataOffset + (lightIdx * lightDataSize), lp); + float rad = + (Lighting_mode == lighting_mode::COCKPIT) ? lp->cockpit_light_radius_modifier.handle(l.radb) : l.radb; + ld->lightRadius = rad; + ld->lightType = LT_TUBE; + + vec3d a; + vm_vec_sub(&a, &l.vec, &l.vec2); + auto length = vm_vec_mag(&a); + length += ld->lightRadius * 2.0f; + + ld->scale.xyz.x = rad * 1.05f; + ld->scale.xyz.y = rad * 1.05f; + ld->scale.xyz.z = length; + + // Matrix: oriented instance matrix for cylinder + auto* md = reinterpret_cast(uboMapped + matrixDataOffset + (lightIdx * matrixDataSize)); + vec3d dir, newPos; + matrix orient; + vm_vec_normalized_dir(&dir, &l.vec, &l.vec2); + vm_vector_2_matrix_norm(&orient, &dir, nullptr, nullptr); + vm_vec_scale_sub(&newPos, &l.vec2, &dir, l.radb); + + g3_start_instance_matrix(&newPos, &orient, true); + md->modelViewMatrix = gr_model_view_matrix; + md->projMatrix = gr_projection_matrix; + g3_done_instance(true); + ++lightIdx; + } + + m_ctx->memoryManager->unmapMemory(m_deferredUBOAlloc); + + // Both fullscreen and volume lights use the same vertex layout (POSITION3). + // For fullscreen lights the shader ignores vertex data and generates positions + // from gl_VertexIndex, but Vulkan requires all declared vertex inputs to have + // matching pipeline attributes and bound buffers. + vertex_layout volLayout; + volLayout.add_vertex_component(vertex_format_data::POSITION3, sizeof(float) * 3, 0); + + PipelineConfig lightConfig; + lightConfig.shaderType = SDR_TYPE_DEFERRED_LIGHTING; + lightConfig.vertexLayoutHash = volLayout.hash(); + lightConfig.primitiveType = PRIM_TYPE_TRIS; + lightConfig.depthMode = ZBUFFER_TYPE_NONE; + lightConfig.blendMode = ALPHA_BLEND_ADDITIVE; + lightConfig.cullEnabled = false; + lightConfig.depthWriteEnabled = false; + lightConfig.renderPass = m_lightAccumRenderPass; + + vk::Pipeline lightPipeline = pipelineMgr->getPipeline(lightConfig, volLayout); + if (!lightPipeline) { + return; + } + + vk::PipelineLayout pipelineLayout = pipelineMgr->getPipelineLayout(); + + // Begin light accumulation render pass + { + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = m_lightAccumRenderPass; + rpBegin.framebuffer = m_lightAccumFramebuffer; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_ctx->sceneExtent; + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + } + + // Set viewport and scissor + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(m_ctx->sceneExtent.width); + viewport.height = static_cast(m_ctx->sceneExtent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = m_ctx->sceneExtent; + cmd.setScissor(0, scissor); + + // Pre-build G-buffer texture array (shared across all light draws) + const auto& fallbacks = descriptorMgr->getFallbacks(); + std::array gbufTexArray; + gbufTexArray.fill(fallbacks.texture2D); + gbufTexArray[0] = {m_ctx->linearSampler, m_sceneColor->view, vk::ImageLayout::eShaderReadOnlyOptimal}; + gbufTexArray[1] = {m_ctx->linearSampler, m_gbuffer->normalView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + gbufTexArray[2] = {m_ctx->linearSampler, m_gbuffer->positionView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + gbufTexArray[3] = {m_ctx->linearSampler, m_gbuffer->specularView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + + // Pre-build shadow/env/irr image infos (shared across all light draws) + vk::DescriptorImageInfo shadowTexInfo; + if (m_shadow->isInitialized() && m_shadow->colorView()) { + shadowTexInfo = {m_ctx->linearSampler, m_shadow->colorView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + } + vk::DescriptorImageInfo envTexInfo; + if (envMapAvailable && envMapSlot) { + envTexInfo = fallbacks.textureCube; + envTexInfo.imageView = envMapSlot->imageView; + } + vk::DescriptorImageInfo irrTexInfo; + if (envMapAvailable && irrMapSlot) { + irrTexInfo = fallbacks.textureCube; + irrTexInfo.imageView = irrMapSlot->imageView; + } + + // Helper lambda to allocate + write descriptor sets for a single light draw + auto bindLightDescriptors = [&](size_t li) { + DescriptorWriter writer; + writer.reset(m_ctx->device, fallbacks); + + // Set 0: Global + vk::DescriptorSet globalSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Global); + if (!globalSet) return false; + writer.writeSet(globalSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Global)); + writer.setBuffer(GlobalBinding::Lights, {m_deferredUBO, + lightDataOffset + (li * lightDataSize), sizeof(graphics::deferred_light_data)}); + writer.setBuffer(GlobalBinding::DeferredData, {m_deferredUBO, + 0, sizeof(graphics::deferred_global_data)}); + writer.setImage(GlobalBinding::ShadowMap, shadowTexInfo); + writer.setImage(GlobalBinding::EnvMap, envTexInfo); + writer.setImage(GlobalBinding::IrradianceMap, irrTexInfo); + + // Set 1: Material + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + if (!materialSet) return false; + writer.writeSet(materialSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::Material)); + writer.setImageArray(MaterialBinding::TextureArray, gbufTexArray); + + // Set 2: PerDraw + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + if (!perDrawSet) return false; + writer.writeSet(perDrawSet, VulkanDescriptorManager::getSetTemplate(DescriptorSetIndex::PerDraw)); + writer.setBuffer(PerDrawBinding::Matrices, {m_deferredUBO, + matrixDataOffset + (li * matrixDataSize), sizeof(graphics::matrix_uniforms)}); + writer.flush(); + + std::array sets = { globalSet, materialSet, perDrawSet }; + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 0, sets, {}); + + return true; + }; + + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, lightPipeline); + + // Draw full-frame lights (directional + ambient) + // Bind sphere VBO as dummy — shader ignores vertex data for these light types. + lightIdx = 0; + if (!full_frame_lights.empty()) { + cmd.bindVertexBuffers(0, m_sphereMesh.vbo, vk::DeviceSize(0)); + for (size_t i = 0; i < full_frame_lights.size(); ++i) { + if (bindLightDescriptors(lightIdx)) { + cmd.draw(3, 1, 0, 0); + } + ++lightIdx; + } + } + + // Draw sphere lights (point + cone) + if (!sphere_lights.empty()) { + cmd.bindVertexBuffers(0, m_sphereMesh.vbo, vk::DeviceSize(0)); + cmd.bindIndexBuffer(m_sphereMesh.ibo, 0, vk::IndexType::eUint16); + for (size_t i = 0; i < sphere_lights.size(); ++i) { + if (bindLightDescriptors(lightIdx)) { + cmd.drawIndexed(m_sphereMesh.indexCount, 1, 0, 0, 0); + } + ++lightIdx; + } + } + + // Draw cylinder lights (tube) + if (!cylinder_lights.empty()) { + cmd.bindVertexBuffers(0, m_cylinderMesh.vbo, vk::DeviceSize(0)); + cmd.bindIndexBuffer(m_cylinderMesh.ibo, 0, vk::IndexType::eUint16); + for (size_t i = 0; i < cylinder_lights.size(); ++i) { + if (bindLightDescriptors(lightIdx)) { + cmd.drawIndexed(m_cylinderMesh.indexCount, 1, 0, 0, 0); + } + ++lightIdx; + } + } + + // End render pass (composite → eShaderReadOnlyOptimal) + cmd.endRenderPass(); +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanPostProcessingMSAA.cpp b/code/graphics/vulkan/VulkanPostProcessingMSAA.cpp new file mode 100644 index 00000000000..730fa0caa71 --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessingMSAA.cpp @@ -0,0 +1,402 @@ +#include "VulkanPostProcessing.h" + +#include + +#include "cmdline/cmdline.h" +#include "gr_vulkan.h" +#include "VulkanRenderer.h" +#include "VulkanTexture.h" +#include "VulkanDescriptorManager.h" +#include "lighting/lighting_profiles.h" +#include "lighting/lighting.h" +#include "nebula/neb.h" +#include "mission/missionparse.h" + +extern float Sun_spot; +extern int Game_subspace_effect; +extern SCP_vector Lights; +extern int Num_lights; + + +namespace graphics::vulkan { + + +// ===== MSAA G-Buffer ===== + +bool VulkanDeferredGBuffer::initMsaa() +{ + if (m_msaaInitialized) { + return true; + } + + auto* renderer = getRendererInstance(); + vk::SampleCountFlagBits msaaSamples = renderer->getMsaaSampleCount(); + if (msaaSamples == vk::SampleCountFlagBits::e1) { + return false; + } + + const uint32_t w = m_ctx->sceneExtent.width; + const uint32_t h = m_ctx->sceneExtent.height; + const vk::ImageUsageFlags msaaUsage = + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled; + + // Create MSAA color images (5 total: color, position, normal, specular, emissive) + struct MsaaTarget { + RenderTarget* target; + vk::Format format; + const char* name; + }; + + std::array targets = {{ + {&m_msaaColor, GBUF_FORMAT_COLOR, "msaa-color"}, + {&m_msaaPosition, GBUF_FORMAT_POSITION, "msaa-position"}, + {&m_msaaNormal, GBUF_FORMAT_NORMAL, "msaa-normal"}, + {&m_msaaSpecular, GBUF_FORMAT_SPECULAR, "msaa-specular"}, + {&m_msaaEmissive, GBUF_FORMAT_EMISSIVE, "msaa-emissive"}, + }}; + + for (auto& t : targets) { + if (!m_ctx->createImage(w, h, t.format, msaaUsage, vk::ImageAspectFlagBits::eColor, + t.target->image, t.target->view, t.target->allocation, msaaSamples)) { + mprintf(("VulkanPostProcessor: Failed to create %s image!\n", t.name)); + shutdownMsaa(); + return false; + } + t.target->format = t.format; + t.target->width = w; + t.target->height = h; + } + + // Create MSAA depth image + { + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = m_ctx->depthFormat; + imageInfo.extent = vk::Extent3D(w, h, 1); + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.samples = msaaSamples; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = vk::ImageUsageFlagBits::eDepthStencilAttachment | vk::ImageUsageFlagBits::eSampled; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + m_msaaDepthImage = m_ctx->device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA depth image: %s\n", e.what())); + shutdownMsaa(); + return false; + } + + if (!m_ctx->memoryManager->allocateImageMemory(m_msaaDepthImage, MemoryUsage::GpuOnly, m_msaaDepthAlloc)) { + mprintf(("VulkanPostProcessor: Failed to allocate MSAA depth memory!\n")); + m_ctx->device.destroyImage(m_msaaDepthImage); + m_msaaDepthImage = nullptr; + shutdownMsaa(); + return false; + } + + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = m_msaaDepthImage; + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = m_ctx->depthFormat; + viewInfo.subresourceRange.aspectMask = imageAspectFromFormat(m_ctx->depthFormat); + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = 1; + + try { + m_msaaDepthView = m_ctx->device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA depth view: %s\n", e.what())); + shutdownMsaa(); + return false; + } + } + + // MSAA G-buffer render pass (eClear) — 5 color + depth + try { + m_msaaGbufRenderPass = createGbufRenderPass({ + false, msaaSamples, + vk::AttachmentLoadOp::eClear, vk::AttachmentLoadOp::eClear, + vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::eDepthStencilAttachmentOptimal, + }); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA G-buffer render pass: %s\n", e.what())); + shutdownMsaa(); + return false; + } + + // MSAA G-buffer render pass (eLoad) — emissive preserving variant + try { + m_msaaGbufRenderPassLoad = createGbufRenderPass({ + false, msaaSamples, + vk::AttachmentLoadOp::eLoad, vk::AttachmentLoadOp::eLoad, + vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::eDepthStencilAttachmentOptimal, + }); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA G-buffer load render pass: %s\n", e.what())); + shutdownMsaa(); + return false; + } + + // MSAA G-buffer framebuffer (5 color + depth) + try { + m_msaaGbufFramebuffer = createGbufFramebuffer(m_msaaGbufRenderPass, false, true); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA G-buffer framebuffer: %s\n", e.what())); + shutdownMsaa(); + return false; + } + + // Emissive copy render pass — 1 MS color attachment for upsampling non-MSAA → MSAA + { + vk::AttachmentDescription att; + att.format = HDR_COLOR_FORMAT; + att.samples = msaaSamples; + att.loadOp = vk::AttachmentLoadOp::eDontCare; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eUndefined; + att.finalLayout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; + dependency.srcAccessMask = vk::AccessFlagBits::eShaderRead; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_msaaEmissiveCopyRenderPass = m_ctx->device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA emissive copy render pass: %s\n", e.what())); + shutdownMsaa(); + return false; + } + } + + // Emissive copy framebuffer (MSAA emissive as sole attachment) + { + vk::ImageView att = m_msaaEmissive.view; + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_msaaEmissiveCopyRenderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = &att; + fbInfo.width = w; + fbInfo.height = h; + fbInfo.layers = 1; + + try { + m_msaaEmissiveCopyFramebuffer = m_ctx->device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA emissive copy framebuffer: %s\n", e.what())); + shutdownMsaa(); + return false; + } + } + + // MSAA Resolve render pass — 5 non-MSAA color + depth (via gl_FragDepth) + // Writes to the non-MSAA G-buffer images. loadOp=eDontCare (fully overwritten). + try { + m_msaaResolveRenderPass = createGbufRenderPass({ + false, vk::SampleCountFlagBits::e1, + vk::AttachmentLoadOp::eDontCare, vk::AttachmentLoadOp::eDontCare, + vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + vk::ImageLayout::eUndefined, + true, // useResolveDependency + }); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA resolve render pass: %s\n", e.what())); + shutdownMsaa(); + return false; + } + + // MSAA Resolve framebuffer — references non-MSAA G-buffer images + try { + m_msaaResolveFramebuffer = createGbufFramebuffer(m_msaaResolveRenderPass, false, false); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA resolve framebuffer: %s\n", e.what())); + shutdownMsaa(); + return false; + } + + // Create per-frame MSAA resolve UBO (persistently mapped) + // Two 256-byte slots (one per frame in flight) hold {int samples; float fov;} data. + { + vk::BufferCreateInfo bufInfo; + bufInfo.size = MAX_FRAMES_IN_FLIGHT * 256; + bufInfo.usage = vk::BufferUsageFlagBits::eUniformBuffer; + bufInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_msaaResolveUBO = m_ctx->device.createBuffer(bufInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA resolve UBO: %s\n", e.what())); + shutdownMsaa(); + return false; + } + + if (!m_ctx->memoryManager->allocateBufferMemory(m_msaaResolveUBO, MemoryUsage::CpuToGpu, m_msaaResolveUBOAlloc)) { + mprintf(("VulkanPostProcessor: Failed to allocate MSAA resolve UBO memory!\n")); + m_ctx->device.destroyBuffer(m_msaaResolveUBO); + m_msaaResolveUBO = nullptr; + shutdownMsaa(); + return false; + } + + m_msaaResolveUBOMapped = m_ctx->memoryManager->mapMemory(m_msaaResolveUBOAlloc); + if (!m_msaaResolveUBOMapped) { + mprintf(("VulkanPostProcessor: Failed to map MSAA resolve UBO!\n")); + shutdownMsaa(); + return false; + } + } + + // Transition MSAA images to the render pass's initial layout at creation time. + // The validation layer tracks framebuffer attachment layouts from creation, + // so we must match the eClear render pass's initialLayout exactly. + { + auto* texMgr = getTextureManager(); + + std::array colorTargets = { + &m_msaaColor, &m_msaaPosition, &m_msaaNormal, + &m_msaaSpecular, &m_msaaEmissive, + }; + for (auto* t : colorTargets) { + texMgr->transitionImageLayout(t->image, t->format, + vk::ImageLayout::eUndefined, vk::ImageLayout::eColorAttachmentOptimal); + } + + texMgr->transitionImageLayout(m_msaaDepthImage, m_ctx->depthFormat, + vk::ImageLayout::eUndefined, vk::ImageLayout::eDepthStencilAttachmentOptimal); + } + + m_msaaInitialized = true; + mprintf(("VulkanPostProcessor: MSAA initialized (%ux%u, %dx samples, 5 color + depth)\n", + w, h, Cmdline_msaa_enabled)); + return true; +} + +void VulkanDeferredGBuffer::shutdownMsaa() +{ + if (!m_ctx || !m_ctx->device) { + return; + } + + // Destroy MSAA resolve UBO + if (m_msaaResolveUBOMapped) { + m_ctx->memoryManager->unmapMemory(m_msaaResolveUBOAlloc); + m_msaaResolveUBOMapped = nullptr; + } + if (m_msaaResolveUBO) { + m_ctx->device.destroyBuffer(m_msaaResolveUBO); + m_msaaResolveUBO = nullptr; + } + if (m_msaaResolveUBOAlloc.isValid()) { + m_ctx->memoryManager->freeAllocation(m_msaaResolveUBOAlloc); + } + + if (m_msaaResolveFramebuffer) { + m_ctx->device.destroyFramebuffer(m_msaaResolveFramebuffer); + m_msaaResolveFramebuffer = nullptr; + } + if (m_msaaResolveRenderPass) { + m_ctx->device.destroyRenderPass(m_msaaResolveRenderPass); + m_msaaResolveRenderPass = nullptr; + } + if (m_msaaEmissiveCopyFramebuffer) { + m_ctx->device.destroyFramebuffer(m_msaaEmissiveCopyFramebuffer); + m_msaaEmissiveCopyFramebuffer = nullptr; + } + if (m_msaaEmissiveCopyRenderPass) { + m_ctx->device.destroyRenderPass(m_msaaEmissiveCopyRenderPass); + m_msaaEmissiveCopyRenderPass = nullptr; + } + if (m_msaaGbufFramebuffer) { + m_ctx->device.destroyFramebuffer(m_msaaGbufFramebuffer); + m_msaaGbufFramebuffer = nullptr; + } + if (m_msaaGbufRenderPassLoad) { + m_ctx->device.destroyRenderPass(m_msaaGbufRenderPassLoad); + m_msaaGbufRenderPassLoad = nullptr; + } + if (m_msaaGbufRenderPass) { + m_ctx->device.destroyRenderPass(m_msaaGbufRenderPass); + m_msaaGbufRenderPass = nullptr; + } + + // Destroy MSAA depth + if (m_msaaDepthView) { + m_ctx->device.destroyImageView(m_msaaDepthView); + m_msaaDepthView = nullptr; + } + if (m_msaaDepthImage) { + m_ctx->device.destroyImage(m_msaaDepthImage); + m_msaaDepthImage = nullptr; + } + if (m_msaaDepthAlloc.isValid()) { + m_ctx->memoryManager->freeAllocation(m_msaaDepthAlloc); + } + + // Destroy MSAA color targets + std::array msaaTargets = { + &m_msaaColor, &m_msaaPosition, &m_msaaNormal, + &m_msaaSpecular, &m_msaaEmissive, + }; + for (auto* rt : msaaTargets) { + if (rt->view) { + m_ctx->device.destroyImageView(rt->view); + rt->view = nullptr; + } + if (rt->image) { + m_ctx->device.destroyImage(rt->image); + rt->image = nullptr; + } + if (rt->allocation.isValid()) { + m_ctx->memoryManager->freeAllocation(rt->allocation); + } + } + + m_msaaInitialized = false; +} + +void VulkanDeferredGBuffer::transitionMsaaForResume(vk::CommandBuffer /*cmd*/) +{ + // No-op: MSAA render passes use finalLayout == subpass layout (no implicit + // transition at endRenderPass), so color attachments remain in + // eColorAttachmentOptimal — exactly what the eLoad pass expects. +} + +void VulkanDeferredGBuffer::transitionMsaaForBegin(vk::CommandBuffer /*cmd*/) +{ + // No-op: MSAA images are always in eColorAttachmentOptimal / + // eDepthStencilAttachmentOptimal between frames. Init-time transitions + // set this layout, and the post-resolve barriers in + // vulkan_deferred_lighting_msaa restore it after each frame's resolve pass. +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanPostProcessingShadow.cpp b/code/graphics/vulkan/VulkanPostProcessingShadow.cpp new file mode 100644 index 00000000000..d3690682e4d --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessingShadow.cpp @@ -0,0 +1,281 @@ +#include "VulkanPostProcessing.h" + +#include + +#include "VulkanRenderer.h" +#include "VulkanDescriptorManager.h" +#include "graphics/shadows.h" +#include "lighting/lighting_profiles.h" +#include "lighting/lighting.h" +#include "nebula/neb.h" +#include "mission/missionparse.h" + +extern float Sun_spot; +extern int Game_subspace_effect; +extern SCP_vector Lights; +extern int Num_lights; + + +namespace graphics::vulkan { + + +// ===== Shadow Map Implementation ===== + +bool VulkanShadowMap::init(PostProcessContext& ctx) +{ + m_ctx = &ctx; + + if (m_initialized) { + return true; + } + + if (Shadow_quality == ShadowQuality::Disabled) { + return false; + } + + int size; + switch (Shadow_quality) { + case ShadowQuality::Low: size = 512; break; + case ShadowQuality::Medium: size = 1024; break; + case ShadowQuality::High: size = 2048; break; + case ShadowQuality::Ultra: size = 4096; break; + default: size = 512; break; + } + + mprintf(("VulkanPostProcessor: Creating %dx%d shadow map (%d cascades)\n", size, size, MAX_SHADOW_CASCADES)); + + const uint32_t layers = MAX_SHADOW_CASCADES; + + // Create shadow color image (RGBA16F, 2D array, MAX_SHADOW_CASCADES layers) + { + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = HDR_COLOR_FORMAT; + imageInfo.extent = vk::Extent3D(static_cast(size), static_cast(size), 1); + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = layers; + imageInfo.samples = vk::SampleCountFlagBits::e1; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + m_color.image = m_ctx->device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow color image: %s\n", e.what())); + return false; + } + + if (!m_ctx->memoryManager->allocateImageMemory(m_color.image, MemoryUsage::GpuOnly, m_color.allocation)) { + m_ctx->device.destroyImage(m_color.image); + m_color.image = nullptr; + return false; + } + + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = m_color.image; + viewInfo.viewType = vk::ImageViewType::e2DArray; + viewInfo.format = HDR_COLOR_FORMAT; + viewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = layers; + + try { + m_color.view = m_ctx->device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow color view: %s\n", e.what())); + return false; + } + + m_color.format = HDR_COLOR_FORMAT; + m_color.width = static_cast(size); + m_color.height = static_cast(size); + } + + // Create shadow depth image (D32F, 2D array, MAX_SHADOW_CASCADES layers) + { + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = SHADOW_DEPTH_FORMAT; + imageInfo.extent = vk::Extent3D(static_cast(size), static_cast(size), 1); + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = layers; + imageInfo.samples = vk::SampleCountFlagBits::e1; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = vk::ImageUsageFlagBits::eDepthStencilAttachment; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + m_depth.image = m_ctx->device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow depth image: %s\n", e.what())); + return false; + } + + if (!m_ctx->memoryManager->allocateImageMemory(m_depth.image, MemoryUsage::GpuOnly, m_depth.allocation)) { + m_ctx->device.destroyImage(m_depth.image); + m_depth.image = nullptr; + return false; + } + + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = m_depth.image; + viewInfo.viewType = vk::ImageViewType::e2DArray; + viewInfo.format = SHADOW_DEPTH_FORMAT; + viewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eDepth; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = layers; + + try { + m_depth.view = m_ctx->device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow depth view: %s\n", e.what())); + return false; + } + + m_depth.format = SHADOW_DEPTH_FORMAT; + m_depth.width = static_cast(size); + m_depth.height = static_cast(size); + } + + // Create shadow render pass: 1 color (RGBA16F) + 1 depth (D32F), both eClear + { + std::array attachments; + + // Color attachment (RGBA16F) — stores VSM depth variance + attachments[0].format = HDR_COLOR_FORMAT; + attachments[0].samples = vk::SampleCountFlagBits::e1; + attachments[0].loadOp = vk::AttachmentLoadOp::eClear; + attachments[0].storeOp = vk::AttachmentStoreOp::eStore; + attachments[0].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[0].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[0].initialLayout = vk::ImageLayout::eUndefined; + attachments[0].finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + // Depth attachment (D32F) + attachments[1].format = SHADOW_DEPTH_FORMAT; + attachments[1].samples = vk::SampleCountFlagBits::e1; + attachments[1].loadOp = vk::AttachmentLoadOp::eClear; + attachments[1].storeOp = vk::AttachmentStoreOp::eDontCare; + attachments[1].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[1].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[1].initialLayout = vk::ImageLayout::eUndefined; + attachments[1].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::AttachmentReference depthRef; + depthRef.attachment = 1; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + subpass.pDepthStencilAttachment = &depthRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dep.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dep.srcAccessMask = {}; + dep.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite | vk::AccessFlagBits::eDepthStencilAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_renderPass = m_ctx->device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow render pass: %s\n", e.what())); + return false; + } + } + + // Create layered framebuffer (all MAX_SHADOW_CASCADES layers at once) + { + std::array fbAttachments = { + m_color.view, + m_depth.view, + }; + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_renderPass; + fbInfo.attachmentCount = static_cast(fbAttachments.size()); + fbInfo.pAttachments = fbAttachments.data(); + fbInfo.width = static_cast(size); + fbInfo.height = static_cast(size); + fbInfo.layers = layers; + + try { + m_framebuffer = m_ctx->device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow framebuffer: %s\n", e.what())); + return false; + } + } + + m_textureSize = size; + m_initialized = true; + mprintf(("VulkanPostProcessor: Shadow map initialized (%dx%d, %d cascades)\n", size, size, MAX_SHADOW_CASCADES)); + return true; +} + +void VulkanShadowMap::shutdown() +{ + if (!m_initialized) { + return; + } + + if (m_framebuffer) { + m_ctx->device.destroyFramebuffer(m_framebuffer); + m_framebuffer = nullptr; + } + if (m_renderPass) { + m_ctx->device.destroyRenderPass(m_renderPass); + m_renderPass = nullptr; + } + + if (m_color.view) { + m_ctx->device.destroyImageView(m_color.view); + m_color.view = nullptr; + } + if (m_color.image) { + m_ctx->device.destroyImage(m_color.image); + m_color.image = nullptr; + } + if (m_color.allocation.isValid()) { + m_ctx->memoryManager->freeAllocation(m_color.allocation); + } + + if (m_depth.view) { + m_ctx->device.destroyImageView(m_depth.view); + m_depth.view = nullptr; + } + if (m_depth.image) { + m_ctx->device.destroyImage(m_depth.image); + m_depth.image = nullptr; + } + if (m_depth.allocation.isValid()) { + m_ctx->memoryManager->freeAllocation(m_depth.allocation); + } + + m_textureSize = 0; + m_initialized = false; +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanQuery.cpp b/code/graphics/vulkan/VulkanQuery.cpp new file mode 100644 index 00000000000..84afa7072fd --- /dev/null +++ b/code/graphics/vulkan/VulkanQuery.cpp @@ -0,0 +1,311 @@ + +#include "VulkanQuery.h" +#include "VulkanState.h" + + +namespace graphics::vulkan { + +static VulkanQueryManager* g_queryManager = nullptr; + +VulkanQueryManager* getQueryManager() +{ + return g_queryManager; +} + +void setQueryManager(VulkanQueryManager* mgr) +{ + g_queryManager = mgr; +} + +bool VulkanQueryManager::init(vk::Device device, float timestampPeriod, + vk::CommandPool commandPool, vk::Queue queue) +{ + m_device = device; + m_timestampPeriod = timestampPeriod; + + vk::QueryPoolCreateInfo poolInfo; + poolInfo.queryType = vk::QueryType::eTimestamp; + poolInfo.queryCount = POOL_CAPACITY; + + m_queryPool = m_device.createQueryPool(poolInfo); + if (!m_queryPool) { + mprintf(("Vulkan: Failed to create timestamp query pool!\n")); + return false; + } + + // Reset the entire pool via a one-shot command buffer so all queries + // start in the "unavailable" state required by the spec. + vk::CommandBufferAllocateInfo allocInfo; + allocInfo.commandPool = commandPool; + allocInfo.level = vk::CommandBufferLevel::ePrimary; + allocInfo.commandBufferCount = 1; + + auto cmdBuffers = m_device.allocateCommandBuffers(allocInfo); + auto cmd = cmdBuffers.front(); + + vk::CommandBufferBeginInfo beginInfo; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + cmd.begin(beginInfo); + cmd.resetQueryPool(m_queryPool, 0, POOL_CAPACITY); + cmd.end(); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + queue.submit(submitInfo, nullptr); + queue.waitIdle(); + + m_device.freeCommandBuffers(commandPool, cmdBuffers); + + m_slots.clear(); + m_slots.resize(POOL_CAPACITY); + for (uint32_t idx = 0; idx < POOL_CAPACITY; ++idx) { + m_freeSlots.push(idx); + } + + m_resetList.clear(); + m_inflightResets.clear(); + m_pendingWrites.clear(); + m_deferredFreeSlots.clear(); + m_lastFrameSubmitted = true; + + mprintf(("Vulkan: Created timestamp query pool (capacity %u, period %.1f ns/tick)\n", + POOL_CAPACITY, m_timestampPeriod)); + + return true; +} + +void VulkanQueryManager::shutdown() +{ + if (m_device && m_queryPool) { + m_device.destroyQueryPool(m_queryPool); + m_queryPool = nullptr; + } + m_slots.clear(); + while (!m_freeSlots.empty()) { + m_freeSlots.pop(); + } + m_resetList.clear(); + m_inflightResets.clear(); + m_pendingWrites.clear(); + m_deferredFreeSlots.clear(); + m_device = nullptr; +} + +void VulkanQueryManager::beginFrame(vk::CommandBuffer commandBuffer) +{ + // If the previous frame's command buffer was abandoned (no flip/submit), + // the resets and writes we recorded never executed on the GPU. + if (!m_lastFrameSubmitted) { + // Orphaned writes: the vkCmdWriteTimestamp never executed, so the + // slot is still in its pre-write state. Mark as orphaned so that + // queryValueAvailable returns true and getQueryValue returns 0, + // letting the tracing drain proceed to deleteQueryObject. + for (auto idx : m_pendingWrites) { + m_slots[idx].orphaned = true; + m_slots[idx].wasReset = true; + } + m_pendingWrites.clear(); + // Orphaned resets: the vkCmdResetQueryPool never executed. Override + // wasReset back to false for slots whose reset was ALSO on the + // abandoned command buffer, and re-schedule the reset. + for (auto idx : m_inflightResets) { + m_slots[idx].wasReset = false; + m_resetList.push_back(idx); + } + } + m_inflightResets.clear(); + + // Record resets for this frame. Only slots returned via deleteQueryObject + // are in this list. Must happen outside render passes (vkCmdResetQueryPool). + for (auto idx : m_resetList) { + Assertion(!m_slots[idx].inUse, + "Query slot %u in resetList but inUse=true!", idx); + commandBuffer.resetQueryPool(m_queryPool, idx, 1); + m_slots[idx].wasReset = true; + m_inflightResets.push_back(idx); + } + m_resetList.clear(); + + // Slots that were deleted while awaiting reset can now return to the free pool. + // The vkCmdResetQueryPool recorded above makes them safe for new writes on + // this same command buffer. + if (!m_deferredFreeSlots.empty()) { + for (auto idx : m_deferredFreeSlots) { + m_freeSlots.push(idx); + } + m_deferredFreeSlots.clear(); + } + + // Report and reset exhaustion counter from previous frame + if (m_exhaustionMessageCount > 0) { + mprintf(("Vulkan: Query pool exhaustion — %u queries dropped last frame (free: %u)\n", + m_exhaustionMessageCount, static_cast(m_freeSlots.size()))); + m_exhaustionMessageCount = 0; + } + + m_lastFrameSubmitted = false; +} + +void VulkanQueryManager::notifySubmission() +{ + m_lastFrameSubmitted = true; + m_inflightResets.clear(); + + // Confirm all pending writes were submitted to the GPU. + for (auto idx : m_pendingWrites) { + m_slots[idx].submitted = true; + } + m_pendingWrites.clear(); +} + +int VulkanQueryManager::createQueryObject() +{ + if (!m_freeSlots.empty()) { + auto idx = m_freeSlots.front(); + m_freeSlots.pop(); + m_slots[idx].inUse = true; + return static_cast(idx); + } else { + if (m_exhaustionMessageCount == 0) { + uint32_t inUseCount = 0, pendingResetCount = 0; + for (const auto& s : m_slots) { + if (s.inUse) inUseCount++; + } + pendingResetCount = static_cast(m_resetList.size() + m_inflightResets.size() + m_deferredFreeSlots.size()); + mprintf(("Vulkan: Query pool exhausted (%u slots: %u in-use, %u pending-reset, %u pending-write)\n", + POOL_CAPACITY, inUseCount, pendingResetCount, static_cast(m_pendingWrites.size()))); + } + m_exhaustionMessageCount++; + return -1; + } +} + +void VulkanQueryManager::queryValue(int obj, QueryType type) +{ + Assertion(obj >= 0 && obj < static_cast(m_slots.size()), + "Query object index %d is invalid!", obj); + auto& slot = m_slots[obj]; + + switch (type) { + case QueryType::Timestamp: { + // Slots must be reset by beginFrame before a new write. + Assertion(slot.wasReset, + "Query slot %d written before reset! wasReset=%d inUse=%d", + obj, (int)slot.wasReset, (int)slot.inUse); + + getStateTracker()->getCommandBuffer().writeTimestamp( + vk::PipelineStageFlagBits::eBottomOfPipe, + m_queryPool, static_cast(obj)); + + slot.submitted = false; + slot.wasReset = false; + m_pendingWrites.push_back(static_cast(obj)); + break; + } + default: + UNREACHABLE("Unhandled QueryType value!"); + break; + } +} + +bool VulkanQueryManager::queryValueAvailable(int obj) +{ + Assertion(obj >= 0 && obj < static_cast(m_slots.size()), + "Query object index %d is invalid!", obj); + auto& slot = m_slots[obj]; + + if (!slot.inUse || slot.orphaned) { + return true; + } + + // Written on current frame but not yet submitted (flip hasn't happened). + // Return false so process_gpu_events skips this and tries next frame. + if (!slot.submitted) { + return false; + } + + uint64_t dummy; + auto result = m_device.getQueryPoolResults( + m_queryPool, + static_cast(obj), 1, + sizeof(uint64_t), &dummy, sizeof(uint64_t), + vk::QueryResultFlagBits::e64); + + return (result == vk::Result::eSuccess); +} + +std::uint64_t VulkanQueryManager::getQueryValue(int obj) +{ + Assertion(obj >= 0 && obj < static_cast(m_slots.size()), + "Query object index %d is invalid!", obj); + auto& slot = m_slots[obj]; + + if (!slot.inUse || slot.orphaned) { + return 0; + } + + if (!slot.submitted) { + return 0; + } + + uint64_t ticks; + auto result = m_device.getQueryPoolResults( + m_queryPool, + static_cast(obj), 1, + sizeof(uint64_t), &ticks, sizeof(uint64_t), + vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait); + Assertion(result == vk::Result::eSuccess, "Failed to read query %d result!", obj); + + return static_cast(static_cast(ticks) * static_cast(m_timestampPeriod)); +} + +void VulkanQueryManager::deleteQueryObject(int obj) +{ + Assertion(obj >= 0 && obj < static_cast(m_slots.size()), + "Query object index %d is invalid!", obj); + auto& slot = m_slots[obj]; + + slot.inUse = false; + slot.orphaned = false; + + if (!slot.wasReset) { + m_resetList.push_back(static_cast(obj)); + m_deferredFreeSlots.push_back(static_cast(obj)); + } else { + m_freeSlots.push(static_cast(obj)); + } +} + +// Free function wrappers for gr_screen function pointers +int vulkan_create_query_object() +{ + return getQueryManager()->createQueryObject(); +} + +void vulkan_query_value(int obj, QueryType type) +{ + if (obj < 0) return; + getQueryManager()->queryValue(obj, type); +} + +bool vulkan_query_value_available(int obj) +{ + if (obj < 0) return true; + return getQueryManager()->queryValueAvailable(obj); +} + +std::uint64_t vulkan_get_query_value(int obj) +{ + if (obj < 0) return 0; + return getQueryManager()->getQueryValue(obj); +} + +void vulkan_delete_query_object(int obj) +{ + if (obj < 0) return; + getQueryManager()->deleteQueryObject(obj); +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanQuery.h b/code/graphics/vulkan/VulkanQuery.h new file mode 100644 index 00000000000..1526653e529 --- /dev/null +++ b/code/graphics/vulkan/VulkanQuery.h @@ -0,0 +1,59 @@ +#pragma once + +#include "graphics/2d.h" + +#include + + +namespace graphics::vulkan { + +class VulkanQueryManager { + public: + bool init(vk::Device device, float timestampPeriod, + vk::CommandPool commandPool, vk::Queue queue); + void shutdown(); + + void beginFrame(vk::CommandBuffer commandBuffer); + void notifySubmission(); + + int createQueryObject(); + void queryValue(int obj, QueryType type); + bool queryValueAvailable(int obj); + std::uint64_t getQueryValue(int obj); + void deleteQueryObject(int obj); + + private: + static const uint32_t POOL_CAPACITY = 4096; + + struct QuerySlot { + bool inUse = false; // true after createQueryObject, false after deleteQueryObject + bool submitted = false; // true after notifySubmission confirms the write was submitted + bool wasReset = true; // true after reset (init or beginFrame), false after write + bool orphaned = false; // true if write was on an abandoned command buffer + }; + + vk::Device m_device; + vk::QueryPool m_queryPool; + SCP_vector m_slots; + SCP_queue m_freeSlots; // available slot indices + SCP_vector m_resetList; // slots to reset in next beginFrame + SCP_vector m_inflightResets; // resets recorded but not yet confirmed submitted + SCP_vector m_pendingWrites; // writes recorded but not yet confirmed submitted + SCP_vector m_deferredFreeSlots; // deleted slots waiting for reset before returning to freeSlots + float m_timestampPeriod = 0.0f; + bool m_lastFrameSubmitted = true; // false after beginFrame, true after notifySubmission + uint32_t m_exhaustionMessageCount = 0; // throttle exhaustion log spam +}; + +VulkanQueryManager* getQueryManager(); +void setQueryManager(VulkanQueryManager* mgr); + +// Free functions for gr_screen function pointers +int vulkan_create_query_object(); +void vulkan_query_value(int obj, QueryType type); +bool vulkan_query_value_available(int obj); +std::uint64_t vulkan_get_query_value(int obj); +void vulkan_delete_query_object(int obj); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanRenderFrame.cpp b/code/graphics/vulkan/VulkanRenderFrame.cpp new file mode 100644 index 00000000000..b43c496f558 --- /dev/null +++ b/code/graphics/vulkan/VulkanRenderFrame.cpp @@ -0,0 +1,135 @@ + +#include "VulkanRenderFrame.h" + +namespace graphics::vulkan { + +VulkanRenderFrame::VulkanRenderFrame(vk::Device device, vk::SwapchainKHR swapChain, vk::Queue graphicsQueue, vk::Queue presentQueue) + : m_device(device), m_swapChain(swapChain), m_graphicsQueue(graphicsQueue), m_presentQueue(presentQueue) +{ + constexpr vk::SemaphoreCreateInfo semaphoreCreateInfo; + constexpr vk::FenceCreateInfo fenceCreateInfo; + + m_imageAvailableSemaphore = device.createSemaphoreUnique(semaphoreCreateInfo); + m_renderingFinishedSemaphore = device.createSemaphoreUnique(semaphoreCreateInfo); + m_frameInFlightFence = device.createFenceUnique(fenceCreateInfo); +} +void VulkanRenderFrame::waitForFinish() +{ + if (!m_inFlight) { + return; + } + + // waitForFences can theoretically return a timeout, but as this passes the maximum uint64_t value in microseconds, + // this won't happen in practice, and the result can be ignored. + (void)m_device.waitForFences(m_frameInFlightFence.get(), true, std::numeric_limits::max()); + m_device.resetFences(m_frameInFlightFence.get()); + + // That frame is now definitely not in flight anymore so we can call the functions that depend on that + for (const auto& finishFunc : m_frameFinishedCallbacks) { + finishFunc(); + } + m_frameFinishedCallbacks.clear(); + + // Our fence has been signaled so we are no longer in flight and ready to be reused + m_inFlight = false; +} +void VulkanRenderFrame::onFrameFinished(std::function finishFunc) +{ + m_frameFinishedCallbacks.push_back(std::move(finishFunc)); +} +SwapChainStatus VulkanRenderFrame::acquireSwapchainImage(uint32_t& outImageIndex) +{ + Assertion(!m_inFlight, "Cannot acquire swapchain image when frame is still in flight."); + + // Initialized to a safe value: the pointer overload below only writes this + // on success, so it must never be left indeterminate if the acquire fails. + uint32_t imageIndex = 0; + vk::Result res = vk::Result::eErrorOutOfDateKHR; + try { + res = m_device.acquireNextImageKHR(m_swapChain, + std::numeric_limits::max(), + m_imageAvailableSemaphore.get(), + nullptr, + &imageIndex); + } catch (const vk::OutOfDateKHRError&) { + return SwapChainStatus::eOutOfDate; + } + + // IMPORTANT: this overload of acquireNextImageKHR takes a pImageIndex pointer + // and returns the raw vk::Result *without throwing* on error codes. The + // try/catch above therefore does NOT catch eErrorOutOfDateKHR (and other + // errors); they arrive here as a Result. We must inspect it explicitly, + // otherwise an error result would fall through as "success" while leaving + // imageIndex unwritten, producing a garbage swap chain index. + if (res == vk::Result::eErrorOutOfDateKHR) { + return SwapChainStatus::eOutOfDate; + } + if (res != vk::Result::eSuccess && res != vk::Result::eSuboptimalKHR) { + // Surface lost, device lost, timeout, etc. No image was acquired, so the + // index is invalid. Force a swap chain recreation rather than indexing + // with a bogus value. + return SwapChainStatus::eOutOfDate; + } + + m_swapChainIdx = imageIndex; + outImageIndex = imageIndex; + + if (res == vk::Result::eSuboptimalKHR) { + return SwapChainStatus::eSuboptimal; + } + return SwapChainStatus::eSuccess; +} +SwapChainStatus VulkanRenderFrame::submitAndPresent(const SCP_vector& cmdBuffers) +{ + Assertion(!m_inFlight, "Cannot submit a frame for presentation when it is still in flight."); + + // Wait at color attachment output stage — the first use of the swap chain image + // is loadOp=eClear at the start of the render pass, which is a color attachment write. + const std::array waitStages = {vk::PipelineStageFlagBits::eColorAttachmentOutput}; + const std::array waitSemaphores = {m_imageAvailableSemaphore.get()}; + + vk::SubmitInfo submitInfo; + submitInfo.waitSemaphoreCount = 1; + submitInfo.pWaitDstStageMask = waitStages.data(); + submitInfo.pWaitSemaphores = waitSemaphores.data(); + + submitInfo.commandBufferCount = static_cast(cmdBuffers.size()); + submitInfo.pCommandBuffers = cmdBuffers.data(); + + const std::array signalSemaphores = {m_renderingFinishedSemaphore.get()}; + submitInfo.signalSemaphoreCount = 1; + submitInfo.pSignalSemaphores = signalSemaphores.data(); + + m_graphicsQueue.submit(submitInfo, m_frameInFlightFence.get()); + + // This frame is now officially in flight (fence pending even if present fails) + m_inFlight = true; + + vk::PresentInfoKHR presentInfo; + presentInfo.waitSemaphoreCount = 1; + presentInfo.pWaitSemaphores = signalSemaphores.data(); + + const std::array swapChains = {m_swapChain}; + presentInfo.swapchainCount = 1; + presentInfo.pSwapchains = swapChains.data(); + presentInfo.pImageIndices = &m_swapChainIdx; + presentInfo.pResults = nullptr; + + vk::Result res; + try { + res = m_presentQueue.presentKHR(presentInfo); + } catch (vk::OutOfDateKHRError&) { + return SwapChainStatus::eOutOfDate; + } + + if (res == vk::Result::eSuboptimalKHR) { + return SwapChainStatus::eSuboptimal; + } + return SwapChainStatus::eSuccess; +} +void VulkanRenderFrame::updateSwapChain(vk::SwapchainKHR swapChain) +{ + m_swapChain = swapChain; +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanRenderFrame.h b/code/graphics/vulkan/VulkanRenderFrame.h new file mode 100644 index 00000000000..c4661292747 --- /dev/null +++ b/code/graphics/vulkan/VulkanRenderFrame.h @@ -0,0 +1,45 @@ +#pragma once + +#include "globalincs/pstypes.h" + +#include + +namespace graphics::vulkan { + +enum class SwapChainStatus { + eSuccess, + eSuboptimal, // Swap chain works but should be recreated + eOutOfDate, // Must recreate before next use +}; + +class VulkanRenderFrame { + public: + VulkanRenderFrame(vk::Device device, vk::SwapchainKHR swapChain, vk::Queue graphicsQueue, vk::Queue presentQueue); + + void waitForFinish(); + + SwapChainStatus acquireSwapchainImage(uint32_t& outImageIndex); + + void onFrameFinished(std::function finishFunc); + + SwapChainStatus submitAndPresent(const SCP_vector& cmdBuffers); + + void updateSwapChain(vk::SwapchainKHR swapChain); + + private: + vk::Device m_device; + vk::SwapchainKHR m_swapChain; + vk::Queue m_graphicsQueue; + vk::Queue m_presentQueue; + + vk::UniqueSemaphore m_imageAvailableSemaphore; + vk::UniqueSemaphore m_renderingFinishedSemaphore; + vk::UniqueFence m_frameInFlightFence; + SCP_vector> m_frameFinishedCallbacks; + + bool m_inFlight = false; + + uint32_t m_swapChainIdx = 0; +}; + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanRenderer.cpp b/code/graphics/vulkan/VulkanRenderer.cpp index 50b6da7e220..5fc0f3d5c33 100644 --- a/code/graphics/vulkan/VulkanRenderer.cpp +++ b/code/graphics/vulkan/VulkanRenderer.cpp @@ -1,645 +1,44 @@ #include "VulkanRenderer.h" +#include "VulkanMemory.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" -#include "globalincs/version.h" +#include "graphics/grinternal.h" +#include "graphics/post_processing.h" -#include "backends/imgui_impl_sdl.h" #include "backends/imgui_impl_vulkan.h" -#include "def_files/def_files.h" #include "graphics/2d.h" -#include "libs/renderdoc/renderdoc.h" +#include "lighting/lighting.h" #include "mod_table/mod_table.h" #if SDL_VERSION_ATLEAST(2, 0, 6) -#include #endif VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE -namespace graphics { -namespace vulkan { +extern float flFrametime; -namespace { -#if SDL_SUPPORTS_VULKAN -const char* EngineName = "FreeSpaceOpen"; +namespace graphics::vulkan { -const gameversion::version MinVulkanVersion(1, 1, 0, 0); - -VkBool32 VKAPI_PTR debugReportCallback( -#if VK_HEADER_VERSION >= 304 - vk::DebugReportFlagsEXT /*flags*/, - vk::DebugReportObjectTypeEXT /*objectType*/, -#else - VkDebugReportFlagsEXT /*flags*/, - VkDebugReportObjectTypeEXT /*objectType*/, -#endif - uint64_t /*object*/, - size_t /*location*/, - int32_t /*messageCode*/, - const char* pLayerPrefix, - const char* pMessage, - void* /*pUserData*/) -{ - mprintf(("Vulkan message: [%s]: %s\n", pLayerPrefix, pMessage)); - return VK_FALSE; -} -#endif - -const SCP_vector RequiredDeviceExtensions = { - VK_KHR_SWAPCHAIN_EXTENSION_NAME, -}; - -bool checkDeviceExtensionSupport(PhysicalDeviceValues& values) -{ - values.extensions = values.device.enumerateDeviceExtensionProperties(); - - std::set requiredExtensions(RequiredDeviceExtensions.cbegin(), RequiredDeviceExtensions.cend()); - for (const auto& extension : values.extensions) { - requiredExtensions.erase(extension.extensionName); - } - - return requiredExtensions.empty(); -} - -bool checkSwapChainSupport(PhysicalDeviceValues& values, const vk::UniqueSurfaceKHR& surface) -{ - values.surfaceCapabilities = values.device.getSurfaceCapabilitiesKHR(surface.get()); - values.surfaceFormats = values.device.getSurfaceFormatsKHR(surface.get()); - values.presentModes = values.device.getSurfacePresentModesKHR(surface.get()); - - return !values.surfaceFormats.empty() && !values.presentModes.empty(); -} - -bool isDeviceUnsuitable(PhysicalDeviceValues& values, const vk::UniqueSurfaceKHR& surface) -{ - // We need a GPU. Reject CPU or "other" types. - if (values.properties.deviceType != vk::PhysicalDeviceType::eDiscreteGpu && - values.properties.deviceType != vk::PhysicalDeviceType::eIntegratedGpu && - values.properties.deviceType != vk::PhysicalDeviceType::eVirtualGpu) { - mprintf(("Rejecting %s (%d) because the device type is unsuitable.\n", - values.properties.deviceName.data(), - values.properties.deviceID)); - return true; - } - - uint32_t i = 0; - for (const auto& queue : values.queueProperties) { - if (!values.graphicsQueueIndex.initialized && queue.queueFlags & vk::QueueFlagBits::eGraphics) { - values.graphicsQueueIndex.initialized = true; - values.graphicsQueueIndex.index = i; - } - if (!values.transferQueueIndex.initialized && queue.queueFlags & vk::QueueFlagBits::eTransfer) { - values.transferQueueIndex.initialized = true; - values.transferQueueIndex.index = i; - } else if (queue.queueFlags & vk::QueueFlagBits::eTransfer && - !(queue.queueFlags & vk::QueueFlagBits::eGraphics)) { - // Found a dedicated transfer queue and we prefer that - values.transferQueueIndex.initialized = true; - values.transferQueueIndex.index = i; - } - if (!values.presentQueueIndex.initialized && values.device.getSurfaceSupportKHR(i, surface.get())) { - values.presentQueueIndex.initialized = true; - values.presentQueueIndex.index = i; - } - - ++i; - } - - if (!values.graphicsQueueIndex.initialized) { - mprintf(("Rejecting %s (%d) because the device does not have a graphics queue.\n", - values.properties.deviceName.data(), - values.properties.deviceID)); - return true; - } - if (!values.transferQueueIndex.initialized) { - mprintf(("Rejecting %s (%d) because the device does not have a transfer queue.\n", - values.properties.deviceName.data(), - values.properties.deviceID)); - return true; - } - if (!values.presentQueueIndex.initialized) { - mprintf(("Rejecting %s (%d) because the device does not have a presentation queue.\n", - values.properties.deviceName.data(), - values.properties.deviceID)); - return true; - } - - if (!checkDeviceExtensionSupport(values)) { - mprintf(("Rejecting %s (%d) because the device does not support our required extensions.\n", - values.properties.deviceName.data(), - values.properties.deviceID)); - return true; - } - - if (!checkSwapChainSupport(values, surface)) { - mprintf(("Rejecting %s (%d) because the device swap chain was not compatible.\n", - values.properties.deviceName.data(), - values.properties.deviceID)); - return true; - } - - return false; -} - -uint32_t deviceTypeScore(vk::PhysicalDeviceType type) -{ - switch (type) { - case vk::PhysicalDeviceType::eIntegratedGpu: - return 1; - case vk::PhysicalDeviceType::eDiscreteGpu: - return 2; - case vk::PhysicalDeviceType::eVirtualGpu: - case vk::PhysicalDeviceType::eCpu: - case vk::PhysicalDeviceType::eOther: - default: - return 0; - } -} - -uint32_t scoreDevice(const PhysicalDeviceValues& device) -{ - uint32_t score = 0; - - score += deviceTypeScore(device.properties.deviceType) * 1000; - score += device.properties.apiVersion * 100; - - return score; -} - -bool compareDevices(const PhysicalDeviceValues& left, const PhysicalDeviceValues& right) -{ - return scoreDevice(left) < scoreDevice(right); -} - -void printPhysicalDevice(const PhysicalDeviceValues& values) -{ - mprintf((" Found %s (%d) of type %s. API version %d.%d.%d, Driver version %d.%d.%d. Scored as %d\n", - values.properties.deviceName.data(), - values.properties.deviceID, - to_string(values.properties.deviceType).c_str(), - VK_VERSION_MAJOR(values.properties.apiVersion), - VK_VERSION_MINOR(values.properties.apiVersion), - VK_VERSION_PATCH(values.properties.apiVersion), - VK_VERSION_MAJOR(values.properties.driverVersion), - VK_VERSION_MINOR(values.properties.driverVersion), - VK_VERSION_PATCH(values.properties.driverVersion), - scoreDevice(values))); -} - -vk::SurfaceFormatKHR chooseSurfaceFormat(const PhysicalDeviceValues& values) -{ - for (const auto& availableFormat : values.surfaceFormats) { - // Simple check is enough for now - if (availableFormat.format == vk::Format::eB8G8R8A8Srgb && - availableFormat.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear) { - return availableFormat; - } - } - - return values.surfaceFormats.front(); -} - -vk::PresentModeKHR choosePresentMode(const PhysicalDeviceValues& values) -{ - // Depending on if we want Vsync or not, choose the best mode - for (const auto& availablePresentMode : values.presentModes) { - if (Gr_enable_vsync) { - if (availablePresentMode == vk::PresentModeKHR::eMailbox) { - return availablePresentMode; - } - } else { - if (availablePresentMode == vk::PresentModeKHR::eImmediate) { - return availablePresentMode; - } - } - } - - // Guaranteed to be supported - return vk::PresentModeKHR::eFifo; -} - -vk::Extent2D chooseSwapChainExtent(const PhysicalDeviceValues& values, uint32_t width, uint32_t height) -{ - if (values.surfaceCapabilities.currentExtent.width != UINT32_MAX) { - return values.surfaceCapabilities.currentExtent; - } else { - VkExtent2D actualExtent = {width, height}; - - actualExtent.width = std::max(values.surfaceCapabilities.minImageExtent.width, - std::min(values.surfaceCapabilities.maxImageExtent.width, actualExtent.width)); - actualExtent.height = std::max(values.surfaceCapabilities.minImageExtent.height, - std::min(values.surfaceCapabilities.maxImageExtent.height, actualExtent.height)); - - return actualExtent; - } -} - -} // namespace VulkanRenderer::VulkanRenderer(std::unique_ptr graphicsOps) : m_graphicsOps(std::move(graphicsOps)) { } -bool VulkanRenderer::initialize() -{ - mprintf(("Initializing Vulkan graphics device at %ix%i with %i-bit color...\n", - gr_screen.max_w, - gr_screen.max_h, - gr_screen.bits_per_pixel)); - - // Load the RenderDoc API if available before doing anything with OpenGL - renderdoc::loadApi(); - - if (!initDisplayDevice()) { - return false; - } - - if (!initializeInstance()) { - mprintf(("Failed to create Vulkan instance!\n")); - return false; - } - - if (!initializeSurface()) { - mprintf(("Failed to create Vulkan surface!\n")); - return false; - } - - PhysicalDeviceValues deviceValues; - if (!pickPhysicalDevice(deviceValues)) { - mprintf(("Could not find suitable physical Vulkan device.\n")); - return false; - } - - if (!createLogicalDevice(deviceValues)) { - mprintf(("Failed to create logical device.\n")); - return false; - } - - if (!createSwapChain(deviceValues)) { - mprintf(("Failed to create swap chain.\n")); - return false; - } - - createRenderPass(); - createGraphicsPipeline(); - createFrameBuffers(); - createPresentSyncObjects(); - createCommandPool(deviceValues); - - // Prepare the rendering state by acquiring our first swap chain image - acquireNextSwapChainImage(); - - return true; -} - -bool VulkanRenderer::initDisplayDevice() const -{ - os::ViewPortProperties attrs; - attrs.enable_opengl = false; - attrs.enable_vulkan = true; - - attrs.display = os_config_read_uint("Video", "Display", 0); - attrs.width = static_cast(gr_screen.max_w); - attrs.height = static_cast(gr_screen.max_h); - - attrs.title = Osreg_title; - if (!Window_title.empty()) { - attrs.title = Window_title; - } - - if (Using_in_game_options) { - switch (Gr_configured_window_state) { - case os::ViewportState::Windowed: - // That's the default - break; - case os::ViewportState::Borderless: - attrs.flags.set(os::ViewPortFlags::Borderless); - break; - case os::ViewportState::Fullscreen: - attrs.flags.set(os::ViewPortFlags::Fullscreen); - break; - } - } else { - if (!Cmdline_window && !Cmdline_fullscreen_window) { - attrs.flags.set(os::ViewPortFlags::Fullscreen); - } else if (Cmdline_fullscreen_window) { - attrs.flags.set(os::ViewPortFlags::Borderless); - } - } - - if (Cmdline_capture_mouse) - attrs.flags.set(os::ViewPortFlags::Capture_Mouse); - - auto viewPort = m_graphicsOps->createViewport(attrs); - if (!viewPort) { - return false; - } - - const auto port = os::addViewport(std::move(viewPort)); - os::setMainViewPort(port); - - return true; -} -bool VulkanRenderer::initializeInstance() -{ -#if SDL_SUPPORTS_VULKAN - const auto vkGetInstanceProcAddr = - reinterpret_cast(SDL_Vulkan_GetVkGetInstanceProcAddr()); - - VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); - - const auto window = os::getSDLMainWindow(); - - unsigned int count; - if (!SDL_Vulkan_GetInstanceExtensions(window, &count, nullptr)) { - mprintf(("Error in first SDL_Vulkan_GetInstanceExtensions: %s\n", SDL_GetError())); - return false; - } - - std::vector extensions; - extensions.resize(count); - - if (!SDL_Vulkan_GetInstanceExtensions(window, &count, extensions.data())) { - mprintf(("Error in second SDL_Vulkan_GetInstanceExtensions: %s\n", SDL_GetError())); - return false; - } - - const auto instanceVersion = vk::enumerateInstanceVersion(); - gameversion::version vulkanVersion(VK_VERSION_MAJOR(instanceVersion), - VK_VERSION_MINOR(instanceVersion), - VK_VERSION_PATCH(instanceVersion), - 0); - mprintf(("Vulkan instance version %s\n", gameversion::format_version(vulkanVersion).c_str())); - - if (vulkanVersion < MinVulkanVersion) { - mprintf(("Vulkan version is less than the minimum which is %s.\n", - gameversion::format_version(MinVulkanVersion).c_str())); - return false; - } - - const auto supportedExtensions = vk::enumerateInstanceExtensionProperties(); - mprintf(("Instance extensions:\n")); - for (const auto& ext : supportedExtensions) { - mprintf((" Found support for %s version %" PRIu32 "\n", ext.extensionName.data(), ext.specVersion)); - if (FSO_DEBUG || Cmdline_graphics_debug_output) { - if (!stricmp(ext.extensionName, VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) { - extensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); - m_debugReportEnabled = true; - } - } - } - - std::vector layers; - const auto supportedLayers = vk::enumerateInstanceLayerProperties(); - mprintf(("Instance layers:\n")); - for (const auto& layer : supportedLayers) { - mprintf((" Found layer %s(%s). Spec version %d.%d.%d and implementation %" PRIu32 "\n", - layer.layerName.data(), - layer.description.data(), - VK_VERSION_MAJOR(layer.specVersion), - VK_VERSION_MINOR(layer.specVersion), - VK_VERSION_PATCH(layer.specVersion), - layer.implementationVersion)); - if (FSO_DEBUG || Cmdline_graphics_debug_output) { - if (!stricmp(layer.layerName, "VK_LAYER_LUNARG_core_validation")) { - layers.push_back("VK_LAYER_LUNARG_core_validation"); - } - } - } - - vk::ApplicationInfo appInfo(Window_title.c_str(), 1, EngineName, 1, VK_API_VERSION_1_1); - - // Now we can make the Vulkan instance - vk::InstanceCreateInfo createInfo(vk::Flags(), &appInfo); - createInfo.enabledExtensionCount = static_cast(extensions.size()); - createInfo.ppEnabledExtensionNames = extensions.data(); - createInfo.enabledLayerCount = static_cast(layers.size()); - createInfo.ppEnabledLayerNames = layers.data(); - - vk::DebugReportCallbackCreateInfoEXT createInstanceReportInfo(vk::DebugReportFlagBitsEXT::eError | - vk::DebugReportFlagBitsEXT::eWarning | - vk::DebugReportFlagBitsEXT::ePerformanceWarning); - createInstanceReportInfo.pfnCallback = debugReportCallback; - - vk::StructureChain createInstanceChain(createInfo, - createInstanceReportInfo); - - if (!m_debugReportEnabled) { - createInstanceChain.unlink(); - } - - vk::UniqueInstance instance = vk::createInstanceUnique(createInstanceChain.get(), nullptr); - if (!instance) { - return false; - } - - VULKAN_HPP_DEFAULT_DISPATCHER.init(instance.get()); - - if (m_debugReportEnabled) { - vk::DebugReportCallbackCreateInfoEXT reportCreateInfo(vk::DebugReportFlagBitsEXT::eError | - vk::DebugReportFlagBitsEXT::eWarning | - vk::DebugReportFlagBitsEXT::ePerformanceWarning); - reportCreateInfo.pfnCallback = debugReportCallback; - - m_debugReport = instance->createDebugReportCallbackEXTUnique(reportCreateInfo); - } - - m_vkInstance = std::move(instance); - return true; -#else - mprintf(("SDL does not support Vulkan in its current version.\n")); - return false; -#endif -} - -bool VulkanRenderer::initializeSurface() -{ -#if SDL_SUPPORTS_VULKAN - const auto window = os::getSDLMainWindow(); - - VkSurfaceKHR surface; - if (!SDL_Vulkan_CreateSurface(window, static_cast(*m_vkInstance), &surface)) { - mprintf(("Failed to create vulkan surface: %s\n", SDL_GetError())); - return false; - } - -#if VK_HEADER_VERSION >= 301 - const vk::detail::ObjectDestroy deleter(*m_vkInstance, -#else - const vk::ObjectDestroy deleter(*m_vkInstance, -#endif - nullptr, - VULKAN_HPP_DEFAULT_DISPATCHER); - m_vkSurface = vk::UniqueSurfaceKHR(vk::SurfaceKHR(surface), deleter); - return true; -#else - return false; -#endif -} - -bool VulkanRenderer::pickPhysicalDevice(PhysicalDeviceValues& deviceValues) -{ - const auto devices = m_vkInstance->enumeratePhysicalDevices(); - if (devices.empty()) { - return false; - } - - SCP_vector values; - std::transform(devices.cbegin(), devices.cend(), std::back_inserter(values), [](const vk::PhysicalDevice& dev) { - PhysicalDeviceValues vals; - vals.device = dev; - vals.properties = dev.getProperties2().properties; - vals.features = dev.getFeatures2().features; - vals.queueProperties = dev.getQueueFamilyProperties(); - return vals; - }); - - mprintf(("Physical Vulkan devices:\n")); - std::for_each(values.cbegin(), values.cend(), printPhysicalDevice); - - // Remove devices that do not have the features we need - values.erase(std::remove_if(values.begin(), - values.end(), - [this](PhysicalDeviceValues& value) { return isDeviceUnsuitable(value, m_vkSurface); }), - values.end()); - if (values.empty()) { - return false; - } - - // Sort the suitability of the devices in increasing order - std::sort(values.begin(), values.end(), compareDevices); - - deviceValues = values.back(); - mprintf(("Selected device %s (%d) as the primary Vulkan device.\n", - deviceValues.properties.deviceName.data(), - deviceValues.properties.deviceID)); - mprintf(("Device extensions:\n")); - for (const auto& extProp : deviceValues.extensions) { - mprintf((" Found support for %s version %" PRIu32 "\n", extProp.extensionName.data(), extProp.specVersion)); - } - - return true; -} - -bool VulkanRenderer::createLogicalDevice(const PhysicalDeviceValues& deviceValues) -{ - float queuePriority = 1.0f; - - std::vector queueInfos; - const std::set familyIndices{deviceValues.graphicsQueueIndex.index, - deviceValues.transferQueueIndex.index, - deviceValues.presentQueueIndex.index}; - - queueInfos.reserve(familyIndices.size()); - for (auto index : familyIndices) { - queueInfos.emplace_back(vk::DeviceQueueCreateFlags(), index, 1, &queuePriority); - } - - vk::DeviceCreateInfo deviceCreate; - deviceCreate.pQueueCreateInfos = queueInfos.data(); - deviceCreate.queueCreateInfoCount = static_cast(queueInfos.size()); - deviceCreate.pEnabledFeatures = &deviceValues.features; - - deviceCreate.ppEnabledExtensionNames = RequiredDeviceExtensions.data(); - deviceCreate.enabledExtensionCount = static_cast(RequiredDeviceExtensions.size()); - - m_device = deviceValues.device.createDeviceUnique(deviceCreate); - - // Create queues - m_graphicsQueue = m_device->getQueue(deviceValues.graphicsQueueIndex.index, 0); - m_transferQueue = m_device->getQueue(deviceValues.transferQueueIndex.index, 0); - m_presentQueue = m_device->getQueue(deviceValues.presentQueueIndex.index, 0); - - return true; -} -bool VulkanRenderer::createSwapChain(const PhysicalDeviceValues& deviceValues) -{ - // Choose one more than the minimum to avoid driver synchronization if it is not done with a thread yet - uint32_t imageCount = deviceValues.surfaceCapabilities.minImageCount + 1; - if (deviceValues.surfaceCapabilities.maxImageCount > 0 && - imageCount > deviceValues.surfaceCapabilities.maxImageCount) { - imageCount = deviceValues.surfaceCapabilities.maxImageCount; - } - - const auto surfaceFormat = chooseSurfaceFormat(deviceValues); - - vk::SwapchainCreateInfoKHR createInfo; - createInfo.surface = m_vkSurface.get(); - createInfo.minImageCount = imageCount; - createInfo.imageFormat = surfaceFormat.format; - createInfo.imageColorSpace = surfaceFormat.colorSpace; - createInfo.imageExtent = chooseSwapChainExtent(deviceValues, gr_screen.max_w, gr_screen.max_h); - createInfo.imageArrayLayers = 1; - createInfo.imageUsage = vk::ImageUsageFlagBits::eColorAttachment; - - const uint32_t queueFamilyIndices[] = {deviceValues.graphicsQueueIndex.index, deviceValues.presentQueueIndex.index}; - if (deviceValues.graphicsQueueIndex.index != deviceValues.presentQueueIndex.index) { - createInfo.imageSharingMode = vk::SharingMode::eConcurrent; - createInfo.queueFamilyIndexCount = 2; - createInfo.pQueueFamilyIndices = queueFamilyIndices; - } else { - createInfo.imageSharingMode = vk::SharingMode::eExclusive; - createInfo.queueFamilyIndexCount = 0; // Optional - createInfo.pQueueFamilyIndices = nullptr; // Optional - } - - createInfo.preTransform = deviceValues.surfaceCapabilities.currentTransform; - createInfo.compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eOpaque; - createInfo.presentMode = choosePresentMode(deviceValues); - createInfo.clipped = true; - createInfo.oldSwapchain = nullptr; - - m_swapChain = m_device->createSwapchainKHRUnique(createInfo); - - std::vector swapChainImages = m_device->getSwapchainImagesKHR(m_swapChain.get()); - m_swapChainImages = SCP_vector(swapChainImages.begin(), swapChainImages.end()); - m_swapChainImageFormat = surfaceFormat.format; - m_swapChainExtent = createInfo.imageExtent; - - m_swapChainImageViews.reserve(m_swapChainImages.size()); - for (const auto& image : m_swapChainImages) { - vk::ImageViewCreateInfo viewCreateInfo; - viewCreateInfo.image = image; - viewCreateInfo.viewType = vk::ImageViewType::e2D; - viewCreateInfo.format = m_swapChainImageFormat; - - viewCreateInfo.components.r = vk::ComponentSwizzle::eIdentity; - viewCreateInfo.components.g = vk::ComponentSwizzle::eIdentity; - viewCreateInfo.components.b = vk::ComponentSwizzle::eIdentity; - viewCreateInfo.components.a = vk::ComponentSwizzle::eIdentity; - - viewCreateInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; - viewCreateInfo.subresourceRange.baseMipLevel = 0; - viewCreateInfo.subresourceRange.levelCount = 1; - viewCreateInfo.subresourceRange.baseArrayLayer = 0; - viewCreateInfo.subresourceRange.layerCount = 1; - - m_swapChainImageViews.push_back(m_device->createImageViewUnique(viewCreateInfo)); - } - - return true; -} -vk::UniqueShaderModule VulkanRenderer::loadShader(const SCP_string& name) -{ - const auto def_file = defaults_get_file(name.c_str()); - - vk::ShaderModuleCreateInfo createInfo; - createInfo.codeSize = def_file.size; - createInfo.pCode = static_cast(def_file.data); - - return m_device->createShaderModuleUnique(createInfo); -} void VulkanRenderer::createFrameBuffers() { m_swapChainFramebuffers.reserve(m_swapChainImageViews.size()); for (const auto& imageView : m_swapChainImageViews) { + // Attachment 0: color, Attachment 1: depth (shared across all framebuffers) const vk::ImageView attachments[] = { imageView.get(), + m_depthImageView.get(), }; vk::FramebufferCreateInfo framebufferInfo; framebufferInfo.renderPass = m_renderPass.get(); - framebufferInfo.attachmentCount = 1; + framebufferInfo.attachmentCount = 2; framebufferInfo.pAttachments = attachments; framebufferInfo.width = m_swapChainExtent.width; framebufferInfo.height = m_swapChainExtent.height; @@ -648,168 +47,139 @@ void VulkanRenderer::createFrameBuffers() m_swapChainFramebuffers.push_back(m_device->createFramebufferUnique(framebufferInfo)); } } +vk::Format VulkanRenderer::findDepthFormat() +{ + // Prefer D32_SFLOAT for best precision, fall back to D32_SFLOAT_S8 or D24_UNORM_S8 + const vk::Format candidates[] = { + vk::Format::eD32Sfloat, + vk::Format::eD32SfloatS8Uint, + vk::Format::eD24UnormS8Uint, + }; + + for (auto format : candidates) { + auto props = m_physicalDevice.getFormatProperties(format); + if (props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eDepthStencilAttachment) { + return format; + } + } + + // Should never happen on any real GPU + Error(LOCATION, "Failed to find supported depth format!"); + return vk::Format::eD32Sfloat; +} +void VulkanRenderer::createDepthResources() +{ + m_depthFormat = findDepthFormat(); + + // Create depth image + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = m_depthFormat; + imageInfo.extent.width = m_swapChainExtent.width; + imageInfo.extent.height = m_swapChainExtent.height; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.samples = vk::SampleCountFlagBits::e1; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = vk::ImageUsageFlagBits::eDepthStencilAttachment; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + m_depthImage = m_device->createImageUnique(imageInfo); + + // Allocate GPU memory for the depth image + m_memoryManager->allocateImageMemory(m_depthImage.get(), MemoryUsage::GpuOnly, m_depthImageMemory); + + // Create depth image view + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = m_depthImage.get(); + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = m_depthFormat; + viewInfo.subresourceRange.aspectMask = imageAspectFromFormat(m_depthFormat); + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = 1; + + m_depthImageView = m_device->createImageViewUnique(viewInfo); + + mprintf(("Vulkan: Created depth buffer (%dx%d, format %d)\n", + m_swapChainExtent.width, m_swapChainExtent.height, static_cast(m_depthFormat))); +} void VulkanRenderer::createRenderPass() { + // Attachment 0: Color - clear each frame + // UI screens draw their own full-screen backgrounds; 3D clears via scene_texture_begin. + // Popups that need previous frame content use gr_save_screen/gr_restore_screen. vk::AttachmentDescription colorAttachment; colorAttachment.format = m_swapChainImageFormat; colorAttachment.samples = vk::SampleCountFlagBits::e1; - colorAttachment.loadOp = vk::AttachmentLoadOp::eClear; colorAttachment.storeOp = vk::AttachmentStoreOp::eStore; - colorAttachment.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; colorAttachment.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; - colorAttachment.initialLayout = vk::ImageLayout::eUndefined; colorAttachment.finalLayout = vk::ImageLayout::ePresentSrcKHR; + // Attachment 1: Depth + vk::AttachmentDescription depthAttachment; + depthAttachment.format = m_depthFormat; + depthAttachment.samples = vk::SampleCountFlagBits::e1; + depthAttachment.loadOp = vk::AttachmentLoadOp::eClear; + depthAttachment.storeOp = vk::AttachmentStoreOp::eDontCare; + depthAttachment.stencilLoadOp = vk::AttachmentLoadOp::eClear; + depthAttachment.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + depthAttachment.initialLayout = vk::ImageLayout::eUndefined; + depthAttachment.finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + vk::AttachmentReference colorAttachRef; colorAttachRef.attachment = 0; colorAttachRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + vk::AttachmentReference depthAttachRef; + depthAttachRef.attachment = 1; + depthAttachRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + vk::SubpassDescription subpass; subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &colorAttachRef; + subpass.pDepthStencilAttachment = &depthAttachRef; vk::SubpassDependency dependency; dependency.srcSubpass = VK_SUBPASS_EXTERNAL; dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite; - dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; - - dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; - dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + std::array attachments = {colorAttachment, depthAttachment}; vk::RenderPassCreateInfo renderPassInfo; - renderPassInfo.attachmentCount = 1; - renderPassInfo.pAttachments = &colorAttachment; + renderPassInfo.attachmentCount = static_cast(attachments.size()); + renderPassInfo.pAttachments = attachments.data(); renderPassInfo.subpassCount = 1; renderPassInfo.pSubpasses = &subpass; renderPassInfo.dependencyCount = 1; renderPassInfo.pDependencies = &dependency; m_renderPass = m_device->createRenderPassUnique(renderPassInfo); -} -void VulkanRenderer::createGraphicsPipeline() -{ - auto vertShaderMod = loadShader("vulkan.vert.spv"); - vk::PipelineShaderStageCreateInfo vertStageCreate; - vertStageCreate.stage = vk::ShaderStageFlagBits::eVertex; - vertStageCreate.module = vertShaderMod.get(); - vertStageCreate.pName = "main"; - - auto fragShaderMod = loadShader("vulkan.frag.spv"); - vk::PipelineShaderStageCreateInfo fragStageCreate; - fragStageCreate.stage = vk::ShaderStageFlagBits::eFragment; - fragStageCreate.module = fragShaderMod.get(); - fragStageCreate.pName = "main"; - - std::array shaderStages = {vertStageCreate, fragStageCreate}; - - vk::PipelineVertexInputStateCreateInfo vertInCreate; - vertInCreate.vertexBindingDescriptionCount = 0; - vertInCreate.vertexAttributeDescriptionCount = 0; - - vk::PipelineInputAssemblyStateCreateInfo inputAssembly; - inputAssembly.topology = vk::PrimitiveTopology::eTriangleList; - inputAssembly.primitiveRestartEnable = false; - - vk::Viewport viewport; - viewport.x = 0.0f; - viewport.y = 0.0f; - viewport.width = i2fl(gr_screen.max_w); - viewport.height = i2fl(gr_screen.max_h); - viewport.minDepth = 0.0f; - viewport.maxDepth = 1.0f; - - vk::Rect2D scissor; - scissor.offset.x = 0; - scissor.offset.y = 0; - scissor.extent = m_swapChainExtent; - - vk::PipelineViewportStateCreateInfo viewportState; - viewportState.viewportCount = 1; - viewportState.pViewports = &viewport; - viewportState.scissorCount = 1; - viewportState.pScissors = &scissor; - - vk::PipelineRasterizationStateCreateInfo rasterizer; - rasterizer.depthClampEnable = false; - rasterizer.rasterizerDiscardEnable = false; - rasterizer.polygonMode = vk::PolygonMode::eFill; - rasterizer.lineWidth = 1.0f; - rasterizer.cullMode |= vk::CullModeFlagBits::eBack; - rasterizer.frontFace = vk::FrontFace::eClockwise; - rasterizer.depthBiasEnable = false; - rasterizer.depthBiasConstantFactor = 0.0f; - rasterizer.depthBiasClamp = 0.0f; - rasterizer.depthBiasSlopeFactor = 0.0f; - - vk::PipelineMultisampleStateCreateInfo multisampling; - multisampling.sampleShadingEnable = false; - multisampling.rasterizationSamples = vk::SampleCountFlagBits::e1; - multisampling.minSampleShading = 1.0f; - multisampling.pSampleMask = nullptr; - multisampling.alphaToCoverageEnable = false; - multisampling.alphaToOneEnable = false; - - vk::PipelineColorBlendAttachmentState colorBlendAttachment; - colorBlendAttachment.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | - vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA; - colorBlendAttachment.blendEnable = false; - colorBlendAttachment.srcColorBlendFactor = vk::BlendFactor::eOne; // Optional - colorBlendAttachment.dstColorBlendFactor = vk::BlendFactor::eZero; // Optional - colorBlendAttachment.colorBlendOp = vk::BlendOp::eAdd; // Optional - colorBlendAttachment.srcAlphaBlendFactor = vk::BlendFactor::eOne; // Optional - colorBlendAttachment.dstAlphaBlendFactor = vk::BlendFactor::eZero; // Optional - colorBlendAttachment.alphaBlendOp = vk::BlendOp::eAdd; // Optional - - vk::PipelineColorBlendStateCreateInfo colorBlending; - colorBlending.logicOpEnable = false; - colorBlending.logicOp = vk::LogicOp::eCopy; - colorBlending.attachmentCount = 1; - colorBlending.pAttachments = &colorBlendAttachment; - colorBlending.blendConstants[0] = 0.0f; - colorBlending.blendConstants[1] = 0.0f; - colorBlending.blendConstants[2] = 0.0f; - colorBlending.blendConstants[3] = 0.0f; - - vk::DynamicState dynamicStates[] = { - vk::DynamicState::eViewport, - vk::DynamicState::eLineWidth, - }; - vk::PipelineDynamicStateCreateInfo dynamicStateInfo; - dynamicStateInfo.dynamicStateCount = 2; - dynamicStateInfo.pDynamicStates = dynamicStates; - - vk::PipelineLayoutCreateInfo pipelineLayout; - pipelineLayout.setLayoutCount = 0; - pipelineLayout.pSetLayouts = nullptr; - pipelineLayout.pushConstantRangeCount = 0; - pipelineLayout.pPushConstantRanges = nullptr; - - m_pipelineLayout = m_device->createPipelineLayoutUnique(pipelineLayout); - - vk::GraphicsPipelineCreateInfo pipelineInfo; - pipelineInfo.stageCount = 2; - pipelineInfo.pStages = shaderStages.data(); - pipelineInfo.pVertexInputState = &vertInCreate; - pipelineInfo.pInputAssemblyState = &inputAssembly; - pipelineInfo.pViewportState = &viewportState; - pipelineInfo.pRasterizationState = &rasterizer; - pipelineInfo.pMultisampleState = &multisampling; - pipelineInfo.pDepthStencilState = nullptr; - pipelineInfo.pColorBlendState = &colorBlending; - pipelineInfo.pDynamicState = nullptr; - pipelineInfo.layout = m_pipelineLayout.get(); - pipelineInfo.renderPass = m_renderPass.get(); - pipelineInfo.subpass = 0; - pipelineInfo.basePipelineHandle = nullptr; - pipelineInfo.basePipelineIndex = -1; - - m_graphicsPipeline = m_device->createGraphicsPipelineUnique(nullptr, pipelineInfo).value; + // Create a second render pass with loadOp=eLoad for resuming the swap chain + // after post-processing. Same formats/samples = render-pass-compatible with m_renderPass. + colorAttachment.loadOp = vk::AttachmentLoadOp::eLoad; + colorAttachment.initialLayout = vk::ImageLayout::ePresentSrcKHR; + + depthAttachment.loadOp = vk::AttachmentLoadOp::eClear; + depthAttachment.initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + attachments = {colorAttachment, depthAttachment}; + + m_renderPassLoad = m_device->createRenderPassUnique(renderPassInfo); } void VulkanRenderer::createCommandPool(const PhysicalDeviceValues& values) { @@ -822,79 +192,258 @@ void VulkanRenderer::createCommandPool(const PhysicalDeviceValues& values) void VulkanRenderer::createPresentSyncObjects() { for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { - m_frames[i].reset(new RenderFrame(m_device.get(), m_swapChain.get(), m_graphicsQueue, m_presentQueue)); + m_frames[i].reset(new VulkanRenderFrame(m_device.get(), m_swapChain.get(), m_graphicsQueue, m_presentQueue)); } m_swapChainImageRenderImage.resize(m_swapChainImages.size(), nullptr); } -void VulkanRenderer::acquireNextSwapChainImage() + +bool VulkanRenderer::readbackFramebuffer(ubyte** outPixels, uint32_t* outWidth, uint32_t* outHeight) { - m_frames[m_currentFrame]->waitForFinish(); + *outPixels = nullptr; + *outWidth = 0; + *outHeight = 0; - m_currentSwapChainImage = m_frames[m_currentFrame]->acquireSwapchainImage(); + if (m_previousSwapChainImage == UINT32_MAX) { + mprintf(("VulkanRenderer::readbackFramebuffer - no previous frame available\n")); + return false; + } - // Ensure that this image is no longer in use - if (m_swapChainImageRenderImage[m_currentSwapChainImage]) { - m_swapChainImageRenderImage[m_currentSwapChainImage]->waitForFinish(); + if (!m_frameInProgress) { + mprintf(("VulkanRenderer::readbackFramebuffer - no frame in progress\n")); + return false; } - // Reserve the image as in use - m_swapChainImageRenderImage[m_currentSwapChainImage] = m_frames[m_currentFrame].get(); -} -void VulkanRenderer::drawScene(vk::Framebuffer destinationFb, vk::CommandBuffer cmdBuffer) -{ + + auto prevImage = m_swapChainImages[m_previousSwapChainImage]; + uint32_t w = m_swapChainExtent.width; + uint32_t h = m_swapChainExtent.height; + vk::DeviceSize bufferSize = static_cast(w) * h * 4; + + // End the current render pass so we can record transfer commands + m_currentCommandBuffer.endRenderPass(); + + // --- One-shot command buffer to copy previous frame to staging buffer --- + + vk::CommandBufferAllocateInfo cmdAlloc; + cmdAlloc.commandPool = m_graphicsCommandPool.get(); + cmdAlloc.level = vk::CommandBufferLevel::ePrimary; + cmdAlloc.commandBufferCount = 1; + + auto cmdBuffers = m_device->allocateCommandBuffers(cmdAlloc); + auto cmd = cmdBuffers.front(); + vk::CommandBufferBeginInfo beginInfo; - beginInfo.flags |= vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + cmd.begin(beginInfo); + + // Transition previous swap chain image for transfer read + vk::ImageMemoryBarrier preBarrier; + preBarrier.oldLayout = vk::ImageLayout::ePresentSrcKHR; + preBarrier.newLayout = vk::ImageLayout::eTransferSrcOptimal; + preBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + preBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + preBarrier.image = prevImage; + preBarrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + preBarrier.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + preBarrier.dstAccessMask = vk::AccessFlagBits::eTransferRead; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, + {}, nullptr, nullptr, preBarrier); + + // Create staging buffer for readback + vk::BufferCreateInfo bufferCreateInfo; + bufferCreateInfo.size = bufferSize; + bufferCreateInfo.usage = vk::BufferUsageFlagBits::eTransferDst; + bufferCreateInfo.sharingMode = vk::SharingMode::eExclusive; + + auto stagingBuffer = m_device->createBuffer(bufferCreateInfo); + + VulkanAllocation stagingAlloc{}; + if (!m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::GpuToCpu, stagingAlloc)) { + mprintf(("VulkanRenderer::readbackFramebuffer - failed to allocate staging buffer\n")); + m_device->destroyBuffer(stagingBuffer); + cmd.end(); + m_device->freeCommandBuffers(m_graphicsCommandPool.get(), cmdBuffers); + + // Re-begin render pass so the frame can continue + vk::RenderPassBeginInfo renderPassBegin; + renderPassBegin.renderPass = m_renderPass.get(); + renderPassBegin.framebuffer = m_swapChainFramebuffers[m_currentSwapChainImage].get(); + renderPassBegin.renderArea.offset.x = 0; + renderPassBegin.renderArea.offset.y = 0; + renderPassBegin.renderArea.extent = m_swapChainExtent; + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + renderPassBegin.clearValueCount = static_cast(clearValues.size()); + renderPassBegin.pClearValues = clearValues.data(); + m_currentCommandBuffer.beginRenderPass(renderPassBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_renderPass.get(), 0); + m_stateTracker->setViewport(0.0f, + static_cast(m_swapChainExtent.height), + static_cast(m_swapChainExtent.width), + -static_cast(m_swapChainExtent.height)); + return false; + } - cmdBuffer.begin(beginInfo); + // Copy image to staging buffer + vk::BufferImageCopy region; + region.bufferOffset = 0; + region.bufferRowLength = 0; // tightly packed + region.bufferImageHeight = 0; // tightly packed + region.imageSubresource = {vk::ImageAspectFlagBits::eColor, 0, 0, 1}; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(w, h, 1); + + cmd.copyImageToBuffer(prevImage, vk::ImageLayout::eTransferSrcOptimal, stagingBuffer, region); + + // Transition previous swap chain image back + vk::ImageMemoryBarrier postBarrier; + postBarrier.oldLayout = vk::ImageLayout::eTransferSrcOptimal; + postBarrier.newLayout = vk::ImageLayout::ePresentSrcKHR; + postBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + postBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + postBarrier.image = prevImage; + postBarrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + postBarrier.srcAccessMask = vk::AccessFlagBits::eTransferRead; + postBarrier.dstAccessMask = {}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eBottomOfPipe, + {}, nullptr, nullptr, postBarrier); + + cmd.end(); + + // Submit one-shot command buffer and wait + auto fence = m_device->createFence({}); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + m_graphicsQueue.submit(submitInfo, fence); + + auto waitResult = m_device->waitForFences(fence, VK_TRUE, UINT64_MAX); + if (waitResult != vk::Result::eSuccess) { + mprintf(("VulkanRenderer::readbackFramebuffer - fence wait failed\n")); + } + + m_device->destroyFence(fence); + m_device->freeCommandBuffers(m_graphicsCommandPool.get(), cmdBuffers); + + // Read back pixels from staging buffer (raw BGRA matching swap chain format) + bool success = false; + auto* mappedPtr = static_cast(m_memoryManager->mapMemory(stagingAlloc)); + + if (mappedPtr) { + auto* pixels = static_cast(vm_malloc(static_cast(bufferSize))); + if (pixels) { + memcpy(pixels, mappedPtr, static_cast(bufferSize)); + *outPixels = pixels; + *outWidth = w; + *outHeight = h; + success = true; + } + m_memoryManager->unmapMemory(stagingAlloc); + } + // Free staging buffer + m_device->destroyBuffer(stagingBuffer); + m_memoryManager->freeAllocation(stagingAlloc); + + // Re-begin render pass on main command buffer vk::RenderPassBeginInfo renderPassBegin; renderPassBegin.renderPass = m_renderPass.get(); - renderPassBegin.framebuffer = destinationFb; + renderPassBegin.framebuffer = m_swapChainFramebuffers[m_currentSwapChainImage].get(); renderPassBegin.renderArea.offset.x = 0; renderPassBegin.renderArea.offset.y = 0; renderPassBegin.renderArea.extent = m_swapChainExtent; - vk::ClearValue clearColor; - clearColor.color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); - renderPassBegin.clearValueCount = 1; - renderPassBegin.pClearValues = &clearColor; + renderPassBegin.clearValueCount = static_cast(clearValues.size()); + renderPassBegin.pClearValues = clearValues.data(); - cmdBuffer.beginRenderPass(renderPassBegin, vk::SubpassContents::eInline); + m_currentCommandBuffer.beginRenderPass(renderPassBegin, vk::SubpassContents::eInline); - cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, m_graphicsPipeline.get()); + m_stateTracker->setRenderPass(m_renderPass.get(), 0); + m_stateTracker->setViewport(0.0f, + static_cast(m_swapChainExtent.height), + static_cast(m_swapChainExtent.width), + -static_cast(m_swapChainExtent.height)); - cmdBuffer.draw(3, 1, 0, 0); + return success; +} - cmdBuffer.endRenderPass(); +uint32_t VulkanRenderer::getMinUniformBufferOffsetAlignment() const +{ + if (!m_physicalDevice) { + // Fallback to common value if device not initialized + return 256; + } - cmdBuffer.end(); + auto properties = m_physicalDevice.getProperties(); + return static_cast(properties.limits.minUniformBufferOffsetAlignment); } -void VulkanRenderer::flip() + +uint32_t VulkanRenderer::getMaxUniformBufferSize() const { - vk::CommandBufferAllocateInfo cmdBufferAlloc; - cmdBufferAlloc.commandPool = m_graphicsCommandPool.get(); - cmdBufferAlloc.level = vk::CommandBufferLevel::ePrimary; - cmdBufferAlloc.commandBufferCount = 1; + if (!m_physicalDevice) { + return 65536; + } - // Uses the non-unique version since we can't get the buffers into the lambda below otherwise. Only C++14 can do - // that - auto allocatedBuffers = m_device->allocateCommandBuffers(cmdBufferAlloc); - auto& cmdBuffer = allocatedBuffers.front(); + auto properties = m_physicalDevice.getProperties(); + return properties.limits.maxUniformBufferRange; +} - drawScene(m_swapChainFramebuffers[m_currentSwapChainImage].get(), cmdBuffer); - m_frames[m_currentFrame]->onFrameFinished([this, allocatedBuffers]() mutable { - m_device->freeCommandBuffers(m_graphicsCommandPool.get(), allocatedBuffers); - allocatedBuffers.clear(); - }); +float VulkanRenderer::getMaxAnisotropy() const +{ + if (!m_physicalDevice) { + return 1.0f; + } - m_frames[m_currentFrame]->submitAndPresent(allocatedBuffers); + auto properties = m_physicalDevice.getProperties(); + return properties.limits.maxSamplerAnisotropy; +} - // Advance counters to prepare for the next frame - m_currentFrame = (m_currentFrame + 1) % MAX_FRAMES_IN_FLIGHT; +bool VulkanRenderer::isTextureCompressionBCSupported() const +{ + if (!m_physicalDevice) { + return false; + } + + auto features = m_physicalDevice.getFeatures(); + return features.textureCompressionBC == VK_TRUE; +} - acquireNextSwapChainImage(); +void VulkanRenderer::waitIdle() +{ + if (m_device) { + m_device->waitIdle(); + } } + +void VulkanRenderer::waitForFrame(uint64_t frameNumber) +{ + // Fast path: if enough frames have elapsed, the work is definitely done + if (m_frameNumber >= frameNumber + MAX_FRAMES_IN_FLIGHT) { + return; + } + + // Wait on the specific frame's fence + auto frameIndex = static_cast(frameNumber % MAX_FRAMES_IN_FLIGHT); + m_frames[frameIndex]->waitForFinish(); +} + +VkCommandBuffer VulkanRenderer::getVkCurrentCommandBuffer() const +{ + return static_cast(m_currentCommandBuffer); +} + void VulkanRenderer::shutdown() { // Wait for all frames to complete to ensure no drawing is in progress when we destroy the device @@ -903,7 +452,91 @@ void VulkanRenderer::shutdown() } // For good measure, also wait until the device is idle m_device->waitIdle(); + + // Shutdown ImGui Vulkan backend before destroying any Vulkan objects + shutdownImGui(); + + // Shutdown managers in reverse order of initialization + if (m_queryManager) { + setQueryManager(nullptr); + m_queryManager->shutdown(); + m_queryManager.reset(); + } + + if (m_postProcessor) { + setPostProcessor(nullptr); + m_postProcessor->shutdown(); + m_postProcessor.reset(); + } + + // Clean up shared post-processing manager + if (graphics::Post_processing_manager) { + graphics::Post_processing_manager->clear(); + graphics::Post_processing_manager = nullptr; + } + + if (m_drawManager) { + setDrawManager(nullptr); + m_drawManager->shutdown(); + m_drawManager.reset(); + } + + if (m_stateTracker) { + setStateTracker(nullptr); + m_stateTracker->shutdown(); + m_stateTracker.reset(); + } + + if (m_pipelineManager) { + m_pipelineManager->savePipelineCache("vulkan_pipeline.cache"); + setPipelineManager(nullptr); + m_pipelineManager->shutdown(); + m_pipelineManager.reset(); + } + + if (m_descriptorManager) { + setDescriptorManager(nullptr); + m_descriptorManager->shutdown(); + m_descriptorManager.reset(); + } + + if (m_shaderManager) { + setShaderManager(nullptr); + m_shaderManager->shutdown(); + m_shaderManager.reset(); + } + + if (m_textureManager) { + setTextureManager(nullptr); + m_textureManager->shutdown(); + m_textureManager.reset(); + } + + if (m_bufferManager) { + setBufferManager(nullptr); + m_bufferManager->shutdown(); + m_bufferManager.reset(); + } + + // Destroy depth resources before memory manager + m_depthImageView.reset(); + m_depthImage.reset(); + if (m_memoryManager && m_depthImageMemory.isValid()) { + m_memoryManager->freeAllocation(m_depthImageMemory); + } + + // Deletion queue must be flushed before memory manager shutdown + if (m_deletionQueue) { + setDeletionQueue(nullptr); + m_deletionQueue->shutdown(); + m_deletionQueue.reset(); + } + + if (m_memoryManager) { + setMemoryManager(nullptr); + m_memoryManager->shutdown(); + m_memoryManager.reset(); + } } -} // namespace vulkan -} // namespace graphics +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanRenderer.h b/code/graphics/vulkan/VulkanRenderer.h index c2d53f21f42..2f8d103ea82 100644 --- a/code/graphics/vulkan/VulkanRenderer.h +++ b/code/graphics/vulkan/VulkanRenderer.h @@ -2,7 +2,18 @@ #include "osapi/osapi.h" -#include "RenderFrame.h" +#include "VulkanMemory.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" +#include "VulkanShader.h" +#include "VulkanDescriptorManager.h" +#include "VulkanPipeline.h" +#include "VulkanState.h" +#include "VulkanDraw.h" +#include "VulkanDeletionQueue.h" +#include "VulkanPostProcessing.h" +#include "VulkanQuery.h" +#include "VulkanRenderFrame.h" #include @@ -12,8 +23,7 @@ #define SDL_SUPPORTS_VULKAN 0 #endif -namespace graphics { -namespace vulkan { +namespace graphics::vulkan { struct QueueIndex { // Poor mans std::optional @@ -26,13 +36,13 @@ struct PhysicalDeviceValues { vk::PhysicalDeviceProperties properties; vk::PhysicalDeviceFeatures features; - std::vector extensions; + SCP_vector extensions; vk::SurfaceCapabilitiesKHR surfaceCapabilities; - std::vector surfaceFormats; - std::vector presentModes; + SCP_vector surfaceFormats; + SCP_vector presentModes; - std::vector queueProperties; + SCP_vector queueProperties; QueueIndex graphicsQueueIndex; QueueIndex transferQueueIndex; QueueIndex presentQueueIndex; @@ -44,13 +54,179 @@ class VulkanRenderer { bool initialize(); + /** + * @brief Setup for a new frame - begins command buffer and render pass + * Called at the START of each frame before any draw calls + */ + void setupFrame(); + + /** + * @brief End frame - ends render pass, submits, and presents + * Called at the END of each frame after all draw calls + */ void flip(); void shutdown(); - private: - static constexpr uint32_t MAX_FRAMES_IN_FLIGHT = 2; + /** + * @brief Read back the previous frame's framebuffer to CPU memory + * + * Copies the previously presented swap chain image to a vm_malloc'd RGBA + * pixel buffer. Handles the BGRA→RGBA swizzle since the swap chain uses + * B8G8R8A8 format. Caller must vm_free the returned buffer. + * + * @param[out] outPixels Receives the vm_malloc'd RGBA pixel buffer + * @param[out] outWidth Receives the image width + * @param[out] outHeight Receives the image height + * @return true on success, false on failure + */ + bool readbackFramebuffer(ubyte** outPixels, uint32_t* outWidth, uint32_t* outHeight); + + /** + * @brief Get the minimum uniform buffer offset alignment requirement + * @return The alignment in bytes (typically 64 or 256) + */ + uint32_t getMinUniformBufferOffsetAlignment() const; + + /** + * @brief Get the current frame number (total frames rendered) + */ + uint64_t getCurrentFrameNumber() const { return m_frameNumber; } + + /** + * @brief Wait for a specific frame's GPU work to complete + * + * Waits on that frame's fence rather than stalling the entire device. + * No-op if the frame has already completed. + */ + void waitForFrame(uint64_t frameNumber); + + /** + * @brief Wait for all GPU work to complete + */ + void waitIdle(); + + /** + * @brief Get the current command buffer as a raw Vulkan handle (for ImGui) + */ + VkCommandBuffer getVkCurrentCommandBuffer() const; + + /** + * @brief Check if VK_EXT_debug_utils is enabled + */ + bool isDebugUtilsEnabled() const { return m_debugUtilsEnabled; } + + /** + * @brief Get the maximum uniform buffer range + */ + uint32_t getMaxUniformBufferSize() const; + + /** + * @brief Get the maximum sampler anisotropy + */ + float getMaxAnisotropy() const; + + /** + * @brief Check if BC texture compression is supported + */ + bool isTextureCompressionBCSupported() const; + + /** + * @brief Check if vertex shader layer output is supported (for shadow cascades) + */ + bool supportsShaderViewportLayerOutput() const { return m_supportsShaderViewportLayerOutput; } + + /** + * @brief Switch from swap chain pass to HDR scene pass + * + * Called by vulkan_scene_texture_begin(). Ends the current swap chain + * render pass and begins the HDR scene render pass. + */ + void beginSceneRendering(); + + void resumeSceneRendering(); + + /** + * @brief Switch from HDR scene pass back to swap chain + * + * Called by vulkan_scene_texture_end(). Ends the HDR scene render pass, + * runs post-processing, and begins the resumed swap chain render pass. + */ + void endSceneRendering(); + + /** + * @brief Copy scene color to effect texture mid-scene + * + * Called by vulkan_copy_effect_texture(). Ends the current scene render + * pass, copies scene color → effect texture, then resumes the scene + * render pass with loadOp=eLoad to preserve existing content. + */ + void copyEffectTexture(); + + /** + * @brief Copy scene depth mid-scene for soft particle sampling + * + * Called lazily from the first particle draw per frame. Ends the current + * scene render pass, copies depth → samplable copy, then resumes the + * scene render pass with loadOp=eLoad. No-op if already copied this frame. + */ + void copySceneDepthForParticles(); + + /** + * @brief Check if scene depth copy is available for sampling this frame + */ + bool isSceneDepthCopied() const { return m_sceneDepthCopiedThisFrame; } + + /** + * @brief Check if we're currently rendering to the HDR scene target + */ + bool isSceneRendering() const { return m_sceneRendering; } + + /** + * @brief Begin rendering to an off-screen render target + * + * Ends the current render pass (swap chain or previous RT face) and begins + * a new render pass targeting the given texture's framebuffer. + */ + void beginRenderTarget(tcache_slot_vulkan* ts, int face); + + /** + * @brief End render target and resume the swap chain pass + * + * Ends the current RT render pass and resumes the swap chain render pass + * with loadOp=eLoad to preserve existing content. + */ + void endRenderTarget(); + + /** + * @brief Resume the swap chain render pass with loadOp=eLoad + * + * Begins a new render pass targeting the current swap chain image, + * preserving existing content. Used after off-screen rendering + * (render targets, irradiance map generation) to return to swap chain. + */ + void resumeSwapChainPass(); + + /** + * @brief Check if we're currently rendering to an off-screen render target + */ + bool isRenderTargetActive() const { return m_renderTargetActive; } + + /** + * @brief Set whether the G-buffer render pass is active + * + * Called by deferred_lighting_finish() to switch from G-buffer to + * scene render pass mid-frame for forward transparent rendering. + */ + void setUseGbufRenderPass(bool use) { m_useGbufRenderPass = use; } + bool isUsingGbufRenderPass() const { return m_useGbufRenderPass; } + + /** + * @brief Get the validated MSAA sample count for deferred lighting + */ + vk::SampleCountFlagBits getMsaaSampleCount() const { return m_msaaSampleCount; } + private: bool initDisplayDevice() const; bool initializeInstance(); @@ -61,24 +237,28 @@ class VulkanRenderer { bool createLogicalDevice(const PhysicalDeviceValues& deviceValues); - bool createSwapChain(const PhysicalDeviceValues& deviceValues); - - vk::UniqueShaderModule loadShader(const SCP_string& name); - - void createGraphicsPipeline(); + bool createSwapChain(const PhysicalDeviceValues& deviceValues, vk::SwapchainKHR oldSwapchain = nullptr); void createRenderPass(); void createFrameBuffers(); + void createDepthResources(); + + vk::Format findDepthFormat(); + void createCommandPool(const PhysicalDeviceValues& values); void createPresentSyncObjects(); - void drawScene(vk::Framebuffer destinationFb, vk::CommandBuffer cmdBuffer); - void acquireNextSwapChainImage(); + bool recreateSwapChain(); + + void createImGuiDescriptorPool(); + void initImGui(); + void shutdownImGui(); + std::unique_ptr m_graphicsOps; vk::UniqueInstance m_vkInstance; @@ -98,23 +278,74 @@ class VulkanRenderer { SCP_vector m_swapChainImages; SCP_vector m_swapChainImageViews; SCP_vector m_swapChainFramebuffers; - SCP_vector m_swapChainImageRenderImage; + SCP_vector m_swapChainImageRenderImage; uint32_t m_currentSwapChainImage = 0; + uint32_t m_previousSwapChainImage = UINT32_MAX; // For saveScreen() readback of previous frame + + // Depth buffer + vk::UniqueImage m_depthImage; + vk::UniqueImageView m_depthImageView; + VulkanAllocation m_depthImageMemory; + vk::Format m_depthFormat = vk::Format::eUndefined; - vk::UniqueRenderPass m_renderPass; - vk::UniquePipelineLayout m_pipelineLayout; - vk::UniquePipeline m_graphicsPipeline; + vk::UniqueRenderPass m_renderPass; // Swap chain pass with loadOp=eClear + vk::UniqueRenderPass m_renderPassLoad; // Swap chain pass with loadOp=eLoad (resumed after post-processing) + vk::UniqueDescriptorPool m_imguiDescriptorPool; uint32_t m_currentFrame = 0; - std::array, MAX_FRAMES_IN_FLIGHT> m_frames; + uint64_t m_frameNumber = 0; // Total frames rendered (for sync tracking) + std::array, MAX_FRAMES_IN_FLIGHT> m_frames; vk::UniqueCommandPool m_graphicsCommandPool; + // Current frame command buffer (valid between setupFrame and flip) + vk::CommandBuffer m_currentCommandBuffer; + SCP_vector m_currentCommandBuffers; // For cleanup + bool m_frameInProgress = false; + + // Swap chain recreation + bool m_swapChainNeedsRecreation = false; + + // Physical device info (needed for memory manager) + vk::PhysicalDevice m_physicalDevice; + uint32_t m_graphicsQueueFamilyIndex = 0; + uint32_t m_transferQueueFamilyIndex = 0; + uint32_t m_presentQueueFamilyIndex = 0; + + // Memory, buffer, and texture management + std::unique_ptr m_memoryManager; + std::unique_ptr m_bufferManager; + std::unique_ptr m_textureManager; + std::unique_ptr m_deletionQueue; + + // Shader, descriptor, and pipeline management + std::unique_ptr m_shaderManager; + std::unique_ptr m_descriptorManager; + std::unique_ptr m_pipelineManager; + + // State tracking and draw management + std::unique_ptr m_stateTracker; + std::unique_ptr m_drawManager; + + // Query management (GPU timestamp profiling) + std::unique_ptr m_queryManager; + + // Post-processing + std::unique_ptr m_postProcessor; + bool m_sceneRendering = false; + bool m_sceneDepthCopiedThisFrame = false; + bool m_useGbufRenderPass = false; // True when scene uses G-buffer (deferred lighting) + + bool m_supportsShaderViewportLayerOutput = false; // VK_EXT_shader_viewport_index_layer + vk::SampleCountFlagBits m_msaaSampleCount = vk::SampleCountFlagBits::e1; // Validated MSAA sample count + bool m_renderTargetActive = false; // True when rendering to off-screen RT (bm_set_render_target) + #if SDL_SUPPORTS_VULKAN bool m_debugReportEnabled = false; + bool m_debugUtilsEnabled = false; #endif + }; -} // namespace vulkan -} // namespace graphics +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanRendererImGui.cpp b/code/graphics/vulkan/VulkanRendererImGui.cpp new file mode 100644 index 00000000000..936b51a67d5 --- /dev/null +++ b/code/graphics/vulkan/VulkanRendererImGui.cpp @@ -0,0 +1,100 @@ + +#include "VulkanRenderer.h" + + +#include "backends/imgui_impl_vulkan.h" +#include "graphics/2d.h" +#include "lighting/lighting.h" +#include "mod_table/mod_table.h" + +#if SDL_VERSION_ATLEAST(2, 0, 6) +#endif + + +extern float flFrametime; + +namespace graphics::vulkan { + + +void VulkanRenderer::createImGuiDescriptorPool() +{ + vk::DescriptorPoolSize poolSize; + poolSize.type = vk::DescriptorType::eCombinedImageSampler; + poolSize.descriptorCount = 100; + + vk::DescriptorPoolCreateInfo poolInfo; + poolInfo.flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet; + poolInfo.maxSets = 100; + poolInfo.poolSizeCount = 1; + poolInfo.pPoolSizes = &poolSize; + + m_imguiDescriptorPool = m_device->createDescriptorPoolUnique(poolInfo); +} + +void VulkanRenderer::initImGui() +{ + createImGuiDescriptorPool(); + + // Load Vulkan function pointers for imgui (required with VK_NO_PROTOTYPES) + auto vkInstance = static_cast(*m_vkInstance); + ImGui_ImplVulkan_LoadFunctions([](const char* function_name, void* user_data) -> PFN_vkVoidFunction { + return VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr( + static_cast(user_data), function_name); + }, vkInstance); + + ImGui_ImplVulkan_InitInfo initInfo = {}; + initInfo.Instance = static_cast(*m_vkInstance); + initInfo.PhysicalDevice = static_cast(m_physicalDevice); + initInfo.Device = static_cast(*m_device); + initInfo.QueueFamily = m_graphicsQueueFamilyIndex; + initInfo.Queue = static_cast(m_graphicsQueue); + initInfo.PipelineCache = VK_NULL_HANDLE; + initInfo.DescriptorPool = static_cast(*m_imguiDescriptorPool); + initInfo.Subpass = 0; + initInfo.MinImageCount = 2; + initInfo.ImageCount = static_cast(m_swapChainImages.size()); + initInfo.MSAASamples = VK_SAMPLE_COUNT_1_BIT; + initInfo.Allocator = nullptr; + initInfo.CheckVkResultFn = nullptr; + + ImGui_ImplVulkan_Init(&initInfo, static_cast(*m_renderPass)); + + // Upload font textures via one-time command buffer + { + vk::CommandBufferAllocateInfo allocInfo; + allocInfo.commandPool = m_graphicsCommandPool.get(); + allocInfo.level = vk::CommandBufferLevel::ePrimary; + allocInfo.commandBufferCount = 1; + + auto cmdBuffers = m_device->allocateCommandBuffers(allocInfo); + auto cmd = cmdBuffers.front(); + + vk::CommandBufferBeginInfo beginInfo; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + cmd.begin(beginInfo); + + ImGui_ImplVulkan_CreateFontsTexture(static_cast(cmd)); + + cmd.end(); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + m_graphicsQueue.submit(submitInfo, nullptr); + m_graphicsQueue.waitIdle(); + + m_device->freeCommandBuffers(m_graphicsCommandPool.get(), cmdBuffers); + ImGui_ImplVulkan_DestroyFontUploadObjects(); + } + + mprintf(("Vulkan: ImGui backend initialized successfully\n")); +} + +void VulkanRenderer::shutdownImGui() +{ + ImGui_ImplVulkan_Shutdown(); + m_imguiDescriptorPool.reset(); + mprintf(("Vulkan: ImGui backend shut down\n")); +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanRendererLoop.cpp b/code/graphics/vulkan/VulkanRendererLoop.cpp new file mode 100644 index 00000000000..98ca7eaba1a --- /dev/null +++ b/code/graphics/vulkan/VulkanRendererLoop.cpp @@ -0,0 +1,537 @@ + +#include "VulkanRenderer.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" + + +#include "backends/imgui_impl_vulkan.h" +#include "graphics/2d.h" +#include "lighting/lighting.h" +#include "mod_table/mod_table.h" + +#if SDL_VERSION_ATLEAST(2, 0, 6) +#endif + + +extern float flFrametime; + +namespace graphics::vulkan { + +void VulkanRenderer::acquireNextSwapChainImage() +{ + m_frames[m_currentFrame]->waitForFinish(); + + // Recreate swap chain if flagged from a previous frame + if (m_swapChainNeedsRecreation) { + // Wait for minimized window (0x0 extent) before recreating + while (true) { + if (recreateSwapChain()) { + break; + } + // Window is minimized — wait and pump events until surface is valid again + os_sleep(100); + SDL_PumpEvents(); + } + } + + uint32_t imageIndex = 0; + auto status = m_frames[m_currentFrame]->acquireSwapchainImage(imageIndex); + + if (status == SwapChainStatus::eOutOfDate) { + // Must recreate immediately and retry + while (true) { + if (recreateSwapChain()) { + break; + } + os_sleep(100); + SDL_PumpEvents(); + } + status = m_frames[m_currentFrame]->acquireSwapchainImage(imageIndex); + if (status == SwapChainStatus::eOutOfDate) { + // If still failing after recreation, flag for next frame + m_swapChainNeedsRecreation = true; + } + } + + if (status == SwapChainStatus::eSuboptimal) { + m_swapChainNeedsRecreation = true; + } + + m_currentSwapChainImage = imageIndex; + + // Ensure that this image is no longer in use + if (m_swapChainImageRenderImage[m_currentSwapChainImage]) { + m_swapChainImageRenderImage[m_currentSwapChainImage]->waitForFinish(); + } + // Reserve the image as in use + m_swapChainImageRenderImage[m_currentSwapChainImage] = m_frames[m_currentFrame].get(); +} +void VulkanRenderer::setupFrame() +{ + if (m_frameInProgress) { + Warning(LOCATION, "VulkanRenderer::setupFrame called while frame already in progress!"); + return; + } + + // Free completed texture upload command buffers + Assertion(m_textureManager, "Vulkan TextureManager not initialized in setupFrame!"); + m_textureManager->frameStart(); + + // Allocate command buffer for this frame + vk::CommandBufferAllocateInfo cmdBufferAlloc; + cmdBufferAlloc.commandPool = m_graphicsCommandPool.get(); + cmdBufferAlloc.level = vk::CommandBufferLevel::ePrimary; + cmdBufferAlloc.commandBufferCount = 1; + + auto cmdBufs = m_device->allocateCommandBuffers(cmdBufferAlloc); + m_currentCommandBuffers.assign(cmdBufs.begin(), cmdBufs.end()); + m_currentCommandBuffer = m_currentCommandBuffers.front(); + + // Begin command buffer + vk::CommandBufferBeginInfo beginInfo; + beginInfo.flags |= vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + m_currentCommandBuffer.begin(beginInfo); + + Assertion(m_descriptorManager, "Vulkan DescriptorManager not initialized in setupFrame!"); + m_descriptorManager->beginFrame(); + + Assertion(m_stateTracker, "Vulkan StateTracker not initialized in setupFrame!"); + m_stateTracker->beginFrame(m_currentCommandBuffer); + + // Reset timestamp queries that were written last frame (must be outside render pass) + if (m_queryManager) { + m_queryManager->beginFrame(m_currentCommandBuffer); + } + + // Reset per-frame flags + m_sceneDepthCopiedThisFrame = false; + + // Reset per-frame draw statistics + Assertion(m_drawManager, "Vulkan DrawManager not initialized in setupFrame!"); + m_drawManager->resetFrameStats(); + + // Begin render pass + vk::RenderPassBeginInfo renderPassBegin; + renderPassBegin.renderPass = m_renderPass.get(); + renderPassBegin.framebuffer = m_swapChainFramebuffers[m_currentSwapChainImage].get(); + renderPassBegin.renderArea.offset.x = 0; + renderPassBegin.renderArea.offset.y = 0; + renderPassBegin.renderArea.extent = m_swapChainExtent; + + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); // Clear to black each frame + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); // Clear depth to far plane + + renderPassBegin.clearValueCount = static_cast(clearValues.size()); + renderPassBegin.pClearValues = clearValues.data(); + + m_currentCommandBuffer.beginRenderPass(renderPassBegin, vk::SubpassContents::eInline); + + // Set up state tracker for FSO draws + m_stateTracker->setRenderPass(m_renderPass.get(), 0); + // Negative viewport height for OpenGL-compatible Y-up NDC (VK_KHR_maintenance1) + m_stateTracker->setViewport(0.0f, + static_cast(m_swapChainExtent.height), + static_cast(m_swapChainExtent.width), + -static_cast(m_swapChainExtent.height)); + + m_frameInProgress = true; +} + +void VulkanRenderer::flip() +{ + if (!m_frameInProgress) { + nprintf(("Vulkan", "VulkanRenderer::flip called without frame in progress, skipping\n")); + return; + } + + // Print per-frame diagnostic summary before ending + Assertion(m_drawManager, "Vulkan DrawManager not initialized in flip!"); + m_drawManager->printFrameStats(); + + // End render pass + m_currentCommandBuffer.endRenderPass(); + m_stateTracker->endFrame(); + m_descriptorManager->endFrame(); + + // End command buffer + m_currentCommandBuffer.end(); + + // Set up cleanup callback for command buffers + auto buffersToFree = m_currentCommandBuffers; + m_frames[m_currentFrame]->onFrameFinished([this, buffersToFree]() mutable { + m_device->freeCommandBuffers(m_graphicsCommandPool.get(), buffersToFree); + }); + + // Submit and present + auto presentStatus = m_frames[m_currentFrame]->submitAndPresent(m_currentCommandBuffers); + + if (presentStatus == SwapChainStatus::eSuboptimal || presentStatus == SwapChainStatus::eOutOfDate) { + m_swapChainNeedsRecreation = true; + } + + // Notify query manager that this frame's command buffer was submitted + if (m_queryManager) { + m_queryManager->notifySubmission(); + } + + // Track which swap chain image was just presented so saveScreen() can read it + m_previousSwapChainImage = m_currentSwapChainImage; + + // Clear current command buffer reference + m_currentCommandBuffer = nullptr; + m_currentCommandBuffers.clear(); + m_frameInProgress = false; + + // Advance counters to prepare for the next frame + m_currentFrame = (m_currentFrame + 1) % MAX_FRAMES_IN_FLIGHT; + ++m_frameNumber; + + // Set the frame index for the buffer manager immediately after incrementing + // This ensures any buffer operations that happen before setupFrame() use the correct frame + m_bufferManager->setCurrentFrame(m_currentFrame); + + acquireNextSwapChainImage(); + + // Process deferred resource deletions AFTER the fence wait in + // acquireNextSwapChainImage, so we know the previous frame's commands + // (including async upload CBs) have completed before destroying resources. + m_deletionQueue->processDestructions(); +} + +void VulkanRenderer::beginSceneRendering() +{ + if (!m_postProcessor || !m_postProcessor->isInitialized()) { + return; + } + if (m_sceneRendering) { + return; + } + + // End the current swap chain render pass + m_currentCommandBuffer.endRenderPass(); + + // Use G-buffer render pass when deferred lighting is enabled and G-buffer is ready + m_useGbufRenderPass = m_postProcessor->deferred().isInitialized() && light_deferred_enabled(); + + // Begin the HDR scene render pass (or G-buffer render pass for deferred) + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_postProcessor->getSceneExtent(); + + if (m_useGbufRenderPass) { + rpBegin.renderPass = m_postProcessor->deferred().renderPass(); + rpBegin.framebuffer = m_postProcessor->deferred().framebuffer(); + + // Clear values: 6 color + depth + std::array clearValues{}; + clearValues[VulkanDeferredGBuffer::GBUF_ATT_COLOR].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[VulkanDeferredGBuffer::GBUF_ATT_POSITION].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[VulkanDeferredGBuffer::GBUF_ATT_NORMAL].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[VulkanDeferredGBuffer::GBUF_ATT_SPECULAR].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[VulkanDeferredGBuffer::GBUF_ATT_EMISSIVE].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[VulkanDeferredGBuffer::GBUF_ATT_COMPOSITE].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[VulkanDeferredGBuffer::GBUF_COLOR_ATTACHMENT_COUNT].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->deferred().renderPass(), 0); + m_stateTracker->setColorAttachmentCount(VulkanDeferredGBuffer::GBUF_COLOR_ATTACHMENT_COUNT); + } else { + rpBegin.renderPass = m_postProcessor->getSceneRenderPass(); + rpBegin.framebuffer = m_postProcessor->getSceneFramebuffer(); + + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->getSceneRenderPass(), 0); + m_stateTracker->setColorAttachmentCount(1); + } + + // Negative viewport height for Y-flip (same as swap chain pass) + auto extent = m_postProcessor->getSceneExtent(); + m_stateTracker->setViewport(0.0f, + static_cast(extent.height), + static_cast(extent.width), + -static_cast(extent.height)); + + m_sceneRendering = true; +} + +void VulkanRenderer::resumeSceneRendering() +{ + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_postProcessor->getSceneExtent(); + + rpBegin.renderPass = m_postProcessor->getSceneRenderPassLoad(); + rpBegin.framebuffer = m_postProcessor->getSceneFramebuffer(); + + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->getSceneRenderPassLoad(), 0); + m_stateTracker->setColorAttachmentCount(1); +} + +void VulkanRenderer::endSceneRendering() +{ + if (!m_postProcessor || !m_postProcessor->isInitialized()) { + return; + } + if (!m_sceneRendering) { + return; + } + + // End HDR scene render pass (transitions scene color to eShaderReadOnlyOptimal) + m_currentCommandBuffer.endRenderPass(); + + // Update distortion ping-pong textures (every ~30ms, matching OpenGL) + if (Gr_framebuffer_effects.any_set()) { + m_postProcessor->updateDistortion(m_currentCommandBuffer, flFrametime); + } + + // Execute post-processing passes (all between HDR scene pass and swap chain pass) + m_postProcessor->executeBloom(m_currentCommandBuffer); + m_postProcessor->executeTonemap(m_currentCommandBuffer); + m_postProcessor->executeFXAA(m_currentCommandBuffer); + m_postProcessor->executeLightshafts(m_currentCommandBuffer); + m_postProcessor->executePostEffects(m_currentCommandBuffer); + + // Begin the resumed swap chain render pass (loadOp=eLoad to preserve pre-scene content) + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = m_renderPassLoad.get(); + rpBegin.framebuffer = m_swapChainFramebuffers[m_currentSwapChainImage].get(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_swapChainExtent; + + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + + // Update state tracker for the resumed swap chain pass + m_stateTracker->setRenderPass(m_renderPassLoad.get(), 0); + m_stateTracker->setColorAttachmentCount(1); + // Non-flipped viewport for post-processing blit (HDR texture is already correct orientation) + m_stateTracker->setViewport(0.0f, 0.0f, + static_cast(m_swapChainExtent.width), + static_cast(m_swapChainExtent.height)); + + // Blit the HDR scene to swap chain through post-processing + m_postProcessor->blitToSwapChain(m_currentCommandBuffer); + + // Restore Y-flipped viewport for HUD rendering + m_stateTracker->setViewport(0.0f, + static_cast(m_swapChainExtent.height), + static_cast(m_swapChainExtent.width), + -static_cast(m_swapChainExtent.height)); + + m_sceneRendering = false; + m_useGbufRenderPass = false; +} + +void VulkanRenderer::copyEffectTexture() +{ + if (!m_sceneRendering || !m_postProcessor || !m_postProcessor->isInitialized()) { + return; + } + + // End the current scene render pass + // This transitions scene color to eShaderReadOnlyOptimal (the render pass's finalLayout) + // For G-buffer: all 6 color attachments transition to eShaderReadOnlyOptimal + m_currentCommandBuffer.endRenderPass(); + + // Copy scene color → effect texture (handles scene color transitions) + m_postProcessor->copyEffectTexture(m_currentCommandBuffer); + + // If G-buffer is active, transition attachments 1-5 for render pass resume + if (m_useGbufRenderPass) { + m_postProcessor->deferred().transitionForResume(m_currentCommandBuffer); + } + + // Resume the scene render pass with loadOp=eLoad to preserve existing content + // Scene color is now in eColorAttachmentOptimal (copyEffectTexture transitions it back) + // Depth is still in eDepthStencilAttachmentOptimal (untouched by the copy) + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_postProcessor->getSceneExtent(); + + if (m_useGbufRenderPass) { + rpBegin.renderPass = m_postProcessor->deferred().renderPassLoad(); + rpBegin.framebuffer = m_postProcessor->deferred().framebuffer(); + // Clear values ignored for eLoad but array must cover all attachments + std::array clearValues{}; + clearValues[VulkanDeferredGBuffer::GBUF_COLOR_ATTACHMENT_COUNT].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->deferred().renderPassLoad(), 0); + } else { + rpBegin.renderPass = m_postProcessor->getSceneRenderPassLoad(); + rpBegin.framebuffer = m_postProcessor->getSceneFramebuffer(); + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->getSceneRenderPassLoad(), 0); + } + + // Restore Y-flipped viewport for scene rendering + auto extent = m_postProcessor->getSceneExtent(); + m_stateTracker->setViewport(0.0f, + static_cast(extent.height), + static_cast(extent.width), + -static_cast(extent.height)); +} + +void VulkanRenderer::copySceneDepthForParticles() +{ + if (m_sceneDepthCopiedThisFrame || !m_sceneRendering || !m_postProcessor || !m_postProcessor->isInitialized()) { + return; + } + + // End the current scene render pass + // This transitions: color → eShaderReadOnlyOptimal, depth → eDepthStencilAttachmentOptimal + // For G-buffer: all 6 color attachments → eShaderReadOnlyOptimal + m_currentCommandBuffer.endRenderPass(); + + // Copy scene depth → samplable depth copy (handles all depth image transitions) + m_postProcessor->copySceneDepth(m_currentCommandBuffer); + + // Transition scene color: eShaderReadOnlyOptimal → eColorAttachmentOptimal + // (needed for the resumed render pass with loadOp=eLoad, which expects + // initialLayout=eColorAttachmentOptimal; copySceneDepth only touches depth) + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_postProcessor->getSceneColorImage(); + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + m_currentCommandBuffer.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, {}, {}, barrier); + } + + // If G-buffer is active, transition attachments 1-5 for render pass resume + if (m_useGbufRenderPass) { + m_postProcessor->deferred().transitionForResume(m_currentCommandBuffer); + } + + // Resume the scene render pass with loadOp=eLoad + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_postProcessor->getSceneExtent(); + + if (m_useGbufRenderPass) { + rpBegin.renderPass = m_postProcessor->deferred().renderPassLoad(); + rpBegin.framebuffer = m_postProcessor->deferred().framebuffer(); + std::array clearValues{}; + clearValues[VulkanDeferredGBuffer::GBUF_COLOR_ATTACHMENT_COUNT].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->deferred().renderPassLoad(), 0); + } else { + rpBegin.renderPass = m_postProcessor->getSceneRenderPassLoad(); + rpBegin.framebuffer = m_postProcessor->getSceneFramebuffer(); + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->getSceneRenderPassLoad(), 0); + } + + // Restore Y-flipped viewport for scene rendering + auto extent = m_postProcessor->getSceneExtent(); + m_stateTracker->setViewport(0.0f, + static_cast(extent.height), + static_cast(extent.width), + -static_cast(extent.height)); + + m_sceneDepthCopiedThisFrame = true; +} + +void VulkanRenderer::beginRenderTarget(tcache_slot_vulkan* ts, int face) +{ + // End current render pass (swap chain or previous RT face) + m_currentCommandBuffer.endRenderPass(); + + // Select the correct framebuffer for cubemap face or flat RT + vk::Framebuffer fb = (ts->isCubemap && face >= 0 && face < 6) + ? ts->cubeFaceFramebuffers[face] : ts->framebuffer; + + vk::ClearValue clearValue; + clearValue.color = m_stateTracker->getClearColor(); + + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = ts->renderPass; + rpBegin.framebuffer = fb; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = vk::Extent2D(ts->width, ts->height); + rpBegin.clearValueCount = 1; + rpBegin.pClearValues = &clearValue; + + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + + m_stateTracker->setRenderPass(ts->renderPass, 0); + m_stateTracker->setColorAttachmentCount(1); + m_stateTracker->setCurrentSampleCount(vk::SampleCountFlagBits::e1); + m_renderTargetActive = true; +} + +void VulkanRenderer::endRenderTarget() +{ + if (!m_renderTargetActive) { + return; + } + + // End the RT render pass (finalLayout transitions to eShaderReadOnlyOptimal) + m_currentCommandBuffer.endRenderPass(); + m_renderTargetActive = false; + + resumeSwapChainPass(); +} + +void VulkanRenderer::resumeSwapChainPass() +{ + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = m_renderPassLoad.get(); + rpBegin.framebuffer = m_swapChainFramebuffers[m_currentSwapChainImage].get(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_swapChainExtent; + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + + m_stateTracker->setRenderPass(m_renderPassLoad.get(), 0); + m_stateTracker->setColorAttachmentCount(1); + m_stateTracker->setCurrentSampleCount(vk::SampleCountFlagBits::e1); +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanRendererSetup.cpp b/code/graphics/vulkan/VulkanRendererSetup.cpp new file mode 100644 index 00000000000..91ca7fdf307 --- /dev/null +++ b/code/graphics/vulkan/VulkanRendererSetup.cpp @@ -0,0 +1,931 @@ + +#include "VulkanRenderer.h" +#include "VulkanMemory.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" + +#include "cmdline/cmdline.h" +#include "globalincs/version.h" +#include "graphics/grinternal.h" +#include "graphics/post_processing.h" + +#include "backends/imgui_impl_vulkan.h" +#include "graphics/2d.h" +#include "lighting/lighting.h" +#include "libs/renderdoc/renderdoc.h" +#include "mod_table/mod_table.h" + +#if SDL_VERSION_ATLEAST(2, 0, 6) +#include +#endif + + +extern float flFrametime; + +namespace graphics::vulkan { + +namespace { +#if SDL_SUPPORTS_VULKAN +const char* EngineName = "FreeSpaceOpen"; + +const gameversion::version MinVulkanVersion(1, 1, 0, 0); + +VkBool32 VKAPI_PTR debugReportCallback( + vk::DebugReportFlagsEXT /*flags*/, + vk::DebugReportObjectTypeEXT /*objectType*/, + uint64_t /*object*/, + size_t /*location*/, + int32_t /*messageCode*/, + const char* pLayerPrefix, + const char* pMessage, + void* /*pUserData*/) +{ + mprintf(("Vulkan message: [%s]: %s\n", pLayerPrefix, pMessage)); + return VK_FALSE; +} +#endif + +const SCP_vector RequiredDeviceExtensions = { + VK_KHR_SWAPCHAIN_EXTENSION_NAME, +}; + +bool checkDeviceExtensionSupport(PhysicalDeviceValues& values) +{ + auto exts = values.device.enumerateDeviceExtensionProperties(); + values.extensions.assign(exts.begin(), exts.end()); + + std::set requiredExtensions(RequiredDeviceExtensions.cbegin(), RequiredDeviceExtensions.cend()); + for (const auto& extension : values.extensions) { + requiredExtensions.erase(extension.extensionName); + } + + return requiredExtensions.empty(); +} + +bool checkSwapChainSupport(PhysicalDeviceValues& values, const vk::UniqueSurfaceKHR& surface) +{ + values.surfaceCapabilities = values.device.getSurfaceCapabilitiesKHR(surface.get()); + auto fmts = values.device.getSurfaceFormatsKHR(surface.get()); + values.surfaceFormats.assign(fmts.begin(), fmts.end()); + auto modes = values.device.getSurfacePresentModesKHR(surface.get()); + values.presentModes.assign(modes.begin(), modes.end()); + + return !values.surfaceFormats.empty() && !values.presentModes.empty(); +} + +bool isDeviceUnsuitable(PhysicalDeviceValues& values, const vk::UniqueSurfaceKHR& surface) +{ + // We need a GPU. Reject CPU or "other" types. + if (values.properties.deviceType != vk::PhysicalDeviceType::eDiscreteGpu && + values.properties.deviceType != vk::PhysicalDeviceType::eIntegratedGpu && + values.properties.deviceType != vk::PhysicalDeviceType::eVirtualGpu) { + mprintf(("Rejecting %s (%d) because the device type is unsuitable.\n", + values.properties.deviceName.data(), + values.properties.deviceID)); + return true; + } + + uint32_t i = 0; + for (const auto& queue : values.queueProperties) { + if (queue.queueFlags & vk::QueueFlagBits::eGraphics) { + if (!values.graphicsQueueIndex.initialized) { + values.graphicsQueueIndex.initialized = true; + values.graphicsQueueIndex.index = i; + } + // "All commands that are allowed on a queue that supports transfer operations + // are also allowed on a queue that supports either graphics or compute operations + if (!values.transferQueueIndex.initialized) { + values.transferQueueIndex.initialized = true; + values.transferQueueIndex.index = i; + } + } + if (queue.queueFlags & vk::QueueFlagBits::eTransfer && + !(queue.queueFlags & vk::QueueFlagBits::eGraphics) && + !(queue.queueFlags & vk::QueueFlagBits::eCompute)) { + // Found a dedicated transfer queue and we prefer that + values.transferQueueIndex.initialized = true; + values.transferQueueIndex.index = i; + } + if (!values.presentQueueIndex.initialized && values.device.getSurfaceSupportKHR(i, surface.get())) { + values.presentQueueIndex.initialized = true; + values.presentQueueIndex.index = i; + } + + ++i; + } + + if (!values.graphicsQueueIndex.initialized) { + mprintf(("Rejecting %s (%d) because the device does not have a graphics queue.\n", + values.properties.deviceName.data(), + values.properties.deviceID)); + return true; + } + if (!values.transferQueueIndex.initialized) { + mprintf(("Rejecting %s (%d) because the device does not have a transfer queue.\n", + values.properties.deviceName.data(), + values.properties.deviceID)); + return true; + } + if (!values.presentQueueIndex.initialized) { + mprintf(("Rejecting %s (%d) because the device does not have a presentation queue.\n", + values.properties.deviceName.data(), + values.properties.deviceID)); + return true; + } + + if (!checkDeviceExtensionSupport(values)) { + mprintf(("Rejecting %s (%d) because the device does not support our required extensions.\n", + values.properties.deviceName.data(), + values.properties.deviceID)); + return true; + } + + if (!checkSwapChainSupport(values, surface)) { + mprintf(("Rejecting %s (%d) because the device swap chain was not compatible.\n", + values.properties.deviceName.data(), + values.properties.deviceID)); + return true; + } + + return false; +} + +uint32_t deviceTypeScore(vk::PhysicalDeviceType type) +{ + switch (type) { + case vk::PhysicalDeviceType::eIntegratedGpu: + return 1; + case vk::PhysicalDeviceType::eDiscreteGpu: + return 2; + case vk::PhysicalDeviceType::eVirtualGpu: + case vk::PhysicalDeviceType::eCpu: + case vk::PhysicalDeviceType::eOther: + default: + return 0; + } +} + +uint64_t scoreDevice(const PhysicalDeviceValues& device) +{ + // Device type is the primary selection criterion (a discrete GPU must always + // beat an integrated one). The API version is only a tiebreaker between + // devices of the same type. We pack the type into the high bits and the + // (already 32-bit) API version into the low bits so the type can never be + // overwhelmed by the magnitude of the packed apiVersion value. + uint64_t score = 0; + + score |= static_cast(deviceTypeScore(device.properties.deviceType)) << 32; + score |= static_cast(device.properties.apiVersion); + + return score; +} + +bool compareDevices(const PhysicalDeviceValues& left, const PhysicalDeviceValues& right) +{ + return scoreDevice(left) < scoreDevice(right); +} + +void printPhysicalDevice(const PhysicalDeviceValues& values) +{ + mprintf((" Found %s (%d) of type %s. API version %d.%d.%d, Driver version %d.%d.%d. Scored as %" PRIu64 "\n", + values.properties.deviceName.data(), + values.properties.deviceID, + to_string(values.properties.deviceType).c_str(), + VK_VERSION_MAJOR(values.properties.apiVersion), + VK_VERSION_MINOR(values.properties.apiVersion), + VK_VERSION_PATCH(values.properties.apiVersion), + VK_VERSION_MAJOR(values.properties.driverVersion), + VK_VERSION_MINOR(values.properties.driverVersion), + VK_VERSION_PATCH(values.properties.driverVersion), + scoreDevice(values))); +} + +vk::SurfaceFormatKHR chooseSurfaceFormat(const PhysicalDeviceValues& values) +{ + // Use a non-sRGB (UNORM) format to match OpenGL's default framebuffer behavior. + // The FSO shaders handle gamma correction manually in the fragment shader and + // post-processing pipeline, so hardware sRGB conversion would double-correct. + for (const auto& availableFormat : values.surfaceFormats) { + if (availableFormat.format == vk::Format::eB8G8R8A8Unorm && + availableFormat.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear) { + return availableFormat; + } + } + + return values.surfaceFormats.front(); +} + +vk::PresentModeKHR choosePresentMode(const PhysicalDeviceValues& values) +{ + vk::PresentModeKHR chosen = vk::PresentModeKHR::eFifo; // guaranteed to be supported + + // Depending on if we want Vsync or not, choose the best mode + for (const auto& availablePresentMode : values.presentModes) { + if (Gr_enable_vsync) { + if (availablePresentMode == vk::PresentModeKHR::eMailbox) { + chosen = availablePresentMode; + break; + } + } else { + if (availablePresentMode == vk::PresentModeKHR::eImmediate) { + chosen = availablePresentMode; + break; + } + } + } + + const char* name = "Unknown"; + switch (chosen) { + case vk::PresentModeKHR::eImmediate: name = "Immediate"; break; + case vk::PresentModeKHR::eMailbox: name = "Mailbox"; break; + case vk::PresentModeKHR::eFifo: name = "FIFO (vsync)"; break; + case vk::PresentModeKHR::eFifoRelaxed: name = "FIFO Relaxed"; break; + default: break; + } + mprintf(("Vulkan: Present mode: %s (Gr_enable_vsync=%d)\n", name, Gr_enable_vsync ? 1 : 0)); + + return chosen; +} + +vk::Extent2D chooseSwapChainExtent(const PhysicalDeviceValues& values, uint32_t width, uint32_t height) +{ + if (values.surfaceCapabilities.currentExtent.width != UINT32_MAX) { + return values.surfaceCapabilities.currentExtent; + } else { + VkExtent2D actualExtent = {width, height}; + + actualExtent.width = std::max(values.surfaceCapabilities.minImageExtent.width, + std::min(values.surfaceCapabilities.maxImageExtent.width, actualExtent.width)); + actualExtent.height = std::max(values.surfaceCapabilities.minImageExtent.height, + std::min(values.surfaceCapabilities.maxImageExtent.height, actualExtent.height)); + + return actualExtent; + } +} + +} // namespace +bool VulkanRenderer::initialize() +{ + mprintf(("Initializing Vulkan graphics device at %ix%i with %i-bit color...\n", + gr_screen.max_w, + gr_screen.max_h, + gr_screen.bits_per_pixel)); + + // Load the RenderDoc API if available before doing anything with OpenGL + renderdoc::loadApi(); + + if (!initDisplayDevice()) { + return false; + } + + if (!initializeInstance()) { + mprintf(("Failed to create Vulkan instance!\n")); + return false; + } + + if (!initializeSurface()) { + mprintf(("Failed to create Vulkan surface!\n")); + return false; + } + + PhysicalDeviceValues deviceValues; + if (!pickPhysicalDevice(deviceValues)) { + mprintf(("Could not find suitable physical Vulkan device.\n")); + return false; + } + + // Validate MSAA sample count against device limits + if (Cmdline_msaa_enabled > 0) { + auto limits = deviceValues.properties.limits; + vk::SampleCountFlags supported = limits.framebufferColorSampleCounts & limits.framebufferDepthSampleCounts; + + // Map requested count to vk::SampleCountFlagBits + vk::SampleCountFlagBits requested = vk::SampleCountFlagBits::e1; + switch (Cmdline_msaa_enabled) { + case 4: requested = vk::SampleCountFlagBits::e4; break; + case 8: requested = vk::SampleCountFlagBits::e8; break; + case 16: requested = vk::SampleCountFlagBits::e16; break; + default: + mprintf(("Vulkan: Unsupported MSAA count %d, disabling MSAA\n", Cmdline_msaa_enabled)); + Cmdline_msaa_enabled = 0; + break; + } + + if (Cmdline_msaa_enabled > 0) { + if (supported & requested) { + m_msaaSampleCount = requested; + mprintf(("Vulkan: MSAA enabled with %dx sample count\n", Cmdline_msaa_enabled)); + } else { + // Clamp down to highest supported + vk::SampleCountFlagBits fallback = vk::SampleCountFlagBits::e1; + int fallbackCount = 0; + if ((supported & vk::SampleCountFlagBits::e8) && Cmdline_msaa_enabled >= 8) { + fallback = vk::SampleCountFlagBits::e8; fallbackCount = 8; + } else if (supported & vk::SampleCountFlagBits::e4) { + fallback = vk::SampleCountFlagBits::e4; fallbackCount = 4; + } + + if (fallbackCount > 0) { + mprintf(("Vulkan: Requested MSAA %dx not supported, falling back to %dx\n", + Cmdline_msaa_enabled, fallbackCount)); + Cmdline_msaa_enabled = fallbackCount; + m_msaaSampleCount = fallback; + } else { + mprintf(("Vulkan: No suitable MSAA support, disabling MSAA\n")); + Cmdline_msaa_enabled = 0; + } + } + } + } + + if (!createLogicalDevice(deviceValues)) { + mprintf(("Failed to create logical device.\n")); + return false; + } + + createCommandPool(deviceValues); + + if (!createSwapChain(deviceValues)) { + mprintf(("Failed to create swap chain.\n")); + return false; + } + + createDepthResources(); + createRenderPass(); + createFrameBuffers(); + + createPresentSyncObjects(); + + // Initialize texture manager (needs command pool for uploads) + m_textureManager = std::unique_ptr(new VulkanTextureManager()); + if (!m_textureManager->init(m_device.get(), m_physicalDevice, m_memoryManager.get(), + m_graphicsCommandPool.get(), m_graphicsQueue)) { + mprintf(("Failed to initialize Vulkan texture manager!\n")); + return false; + } + setTextureManager(m_textureManager.get()); + + // Initialize shader manager + m_shaderManager = std::unique_ptr(new VulkanShaderManager()); + if (!m_shaderManager->init(m_device.get())) { + mprintf(("Failed to initialize Vulkan shader manager!\n")); + return false; + } + setShaderManager(m_shaderManager.get()); + + // Initialize descriptor manager + m_descriptorManager = std::unique_ptr(new VulkanDescriptorManager()); + if (!m_descriptorManager->init(m_device.get())) { + mprintf(("Failed to initialize Vulkan descriptor manager!\n")); + return false; + } + setDescriptorManager(m_descriptorManager.get()); + m_descriptorManager->buildFallbacks(m_bufferManager.get(), m_textureManager.get()); + + // Initialize pipeline manager + m_pipelineManager = std::unique_ptr(new VulkanPipelineManager()); + if (!m_pipelineManager->init(m_device.get(), m_shaderManager.get(), m_descriptorManager.get())) { + mprintf(("Failed to initialize Vulkan pipeline manager!\n")); + return false; + } + setPipelineManager(m_pipelineManager.get()); + m_pipelineManager->loadPipelineCache("vulkan_pipeline.cache"); + + // Initialize state tracker + m_stateTracker = std::unique_ptr(new VulkanStateTracker()); + if (!m_stateTracker->init(m_device.get())) { + mprintf(("Failed to initialize Vulkan state tracker!\n")); + return false; + } + setStateTracker(m_stateTracker.get()); + + // Initialize draw manager + m_drawManager = std::unique_ptr(new VulkanDrawManager()); + if (!m_drawManager->init(m_device.get())) { + mprintf(("Failed to initialize Vulkan draw manager!\n")); + return false; + } + setDrawManager(m_drawManager.get()); + + // Initialize post-processing + m_postProcessor = std::unique_ptr(new VulkanPostProcessor()); + if (!m_postProcessor->init(m_device.get(), m_physicalDevice, m_memoryManager.get(), + m_swapChainExtent, m_depthFormat)) { + mprintf(("Warning: Failed to initialize Vulkan post-processor, post-processing will be disabled\n")); + m_postProcessor.reset(); + } else { + setPostProcessor(m_postProcessor.get()); + } + + // Initialize shared post-processing manager (bloom/lightshaft settings, post-effect table) + // This is renderer-agnostic; OpenGL creates it in opengl_post_process_init(). + if (!graphics::Post_processing_manager) { + graphics::Post_processing_manager.reset(new graphics::PostProcessingManager()); + if (!graphics::Post_processing_manager->parse_table()) { + mprintf(("Warning: Unable to read post-processing table\n")); + } + } + + // Initialize query manager for GPU timestamp profiling + m_queryManager = std::unique_ptr(new VulkanQueryManager()); + if (!m_queryManager->init(m_device.get(), m_physicalDevice.getProperties().limits.timestampPeriod, + m_graphicsCommandPool.get(), m_graphicsQueue)) { + mprintf(("Warning: Failed to initialize Vulkan query manager, GPU profiling will be disabled\n")); + m_queryManager.reset(); + } else { + setQueryManager(m_queryManager.get()); + } + + // Prepare the rendering state by acquiring our first swap chain image + acquireNextSwapChainImage(); + + // Initialize ImGui Vulkan rendering backend + initImGui(); + + return true; +} + +bool VulkanRenderer::initDisplayDevice() const +{ + os::ViewPortProperties attrs; + attrs.enable_opengl = false; + attrs.enable_vulkan = true; + + attrs.display = os_config_read_uint("Video", "Display", 0); + attrs.width = static_cast(gr_screen.max_w); + attrs.height = static_cast(gr_screen.max_h); + + attrs.title = Osreg_title; + if (!Window_title.empty()) { + attrs.title = Window_title; + } + + if (Using_in_game_options) { + switch (Gr_configured_window_state) { + case os::ViewportState::Windowed: + // That's the default + break; + case os::ViewportState::Borderless: + attrs.flags.set(os::ViewPortFlags::Borderless); + break; + case os::ViewportState::Fullscreen: + attrs.flags.set(os::ViewPortFlags::Fullscreen); + break; + } + } else { + if (!Cmdline_window && !Cmdline_fullscreen_window) { + attrs.flags.set(os::ViewPortFlags::Fullscreen); + } else if (Cmdline_fullscreen_window) { + attrs.flags.set(os::ViewPortFlags::Borderless); + } + } + + if (Cmdline_capture_mouse) + attrs.flags.set(os::ViewPortFlags::Capture_Mouse); + + auto viewPort = m_graphicsOps->createViewport(attrs); + if (!viewPort) { + return false; + } + + const auto port = os::addViewport(std::move(viewPort)); + os::setMainViewPort(port); + + return true; +} +bool VulkanRenderer::initializeInstance() +{ +#if SDL_SUPPORTS_VULKAN + const auto vkGetInstanceProcAddr = + reinterpret_cast(SDL_Vulkan_GetVkGetInstanceProcAddr()); + + VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); + + const auto window = os::getSDLMainWindow(); + + unsigned int count; + if (!SDL_Vulkan_GetInstanceExtensions(window, &count, nullptr)) { + mprintf(("Error in first SDL_Vulkan_GetInstanceExtensions: %s\n", SDL_GetError())); + return false; + } + + SCP_vector extensions; + extensions.resize(count); + + if (!SDL_Vulkan_GetInstanceExtensions(window, &count, extensions.data())) { + mprintf(("Error in second SDL_Vulkan_GetInstanceExtensions: %s\n", SDL_GetError())); + return false; + } + + const auto instanceVersion = vk::enumerateInstanceVersion(); + gameversion::version vulkanVersion(VK_VERSION_MAJOR(instanceVersion), + VK_VERSION_MINOR(instanceVersion), + VK_VERSION_PATCH(instanceVersion), + 0); + mprintf(("Vulkan instance version %s\n", gameversion::format_version(vulkanVersion).c_str())); + + if (vulkanVersion < MinVulkanVersion) { + mprintf(("Vulkan version is less than the minimum which is %s.\n", + gameversion::format_version(MinVulkanVersion).c_str())); + return false; + } + + const auto supportedExtensions = vk::enumerateInstanceExtensionProperties(); + mprintf(("Instance extensions:\n")); + for (const auto& ext : supportedExtensions) { + mprintf((" Found support for %s version %" PRIu32 "\n", ext.extensionName.data(), ext.specVersion)); + if (FSO_DEBUG || Cmdline_graphics_debug_output) { + if (!stricmp(ext.extensionName, VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) { + extensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); + m_debugReportEnabled = true; + } + if (!stricmp(ext.extensionName, VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + m_debugUtilsEnabled = true; + } + } + } + + SCP_vector layers; + const auto supportedLayers = vk::enumerateInstanceLayerProperties(); + mprintf(("Instance layers:\n")); + for (const auto& layer : supportedLayers) { + mprintf((" Found layer %s(%s). Spec version %d.%d.%d and implementation %" PRIu32 "\n", + layer.layerName.data(), + layer.description.data(), + VK_VERSION_MAJOR(layer.specVersion), + VK_VERSION_MINOR(layer.specVersion), + VK_VERSION_PATCH(layer.specVersion), + layer.implementationVersion)); + if (FSO_DEBUG || Cmdline_graphics_debug_output) { + if (!stricmp(layer.layerName, "VK_LAYER_KHRONOS_validation")) { + layers.push_back("VK_LAYER_KHRONOS_validation"); + } else if (!stricmp(layer.layerName, "VK_LAYER_LUNARG_core_validation")) { + layers.push_back("VK_LAYER_LUNARG_core_validation"); + } + } + } + + vk::ApplicationInfo appInfo(Window_title.c_str(), 1, EngineName, 1, VK_API_VERSION_1_1); + + // Now we can make the Vulkan instance + vk::InstanceCreateInfo createInfo(vk::Flags(), &appInfo); + createInfo.enabledExtensionCount = static_cast(extensions.size()); + createInfo.ppEnabledExtensionNames = extensions.data(); + createInfo.enabledLayerCount = static_cast(layers.size()); + createInfo.ppEnabledLayerNames = layers.data(); + + vk::DebugReportCallbackCreateInfoEXT createInstanceReportInfo(vk::DebugReportFlagBitsEXT::eError | + vk::DebugReportFlagBitsEXT::eWarning | + vk::DebugReportFlagBitsEXT::ePerformanceWarning); + createInstanceReportInfo.pfnCallback = debugReportCallback; + + vk::StructureChain createInstanceChain(createInfo, + createInstanceReportInfo); + + if (!m_debugReportEnabled) { + createInstanceChain.unlink(); + } + + vk::UniqueInstance instance = vk::createInstanceUnique(createInstanceChain.get(), nullptr); + if (!instance) { + return false; + } + + VULKAN_HPP_DEFAULT_DISPATCHER.init(instance.get()); + + if (m_debugReportEnabled) { + vk::DebugReportCallbackCreateInfoEXT reportCreateInfo(vk::DebugReportFlagBitsEXT::eError | + vk::DebugReportFlagBitsEXT::eWarning | + vk::DebugReportFlagBitsEXT::ePerformanceWarning); + reportCreateInfo.pfnCallback = debugReportCallback; + + m_debugReport = instance->createDebugReportCallbackEXTUnique(reportCreateInfo); + } + + m_vkInstance = std::move(instance); + return true; +#else + mprintf(("SDL does not support Vulkan in its current version.\n")); + return false; +#endif +} + +bool VulkanRenderer::initializeSurface() +{ +#if SDL_SUPPORTS_VULKAN + const auto window = os::getSDLMainWindow(); + + VkSurfaceKHR surface; + if (!SDL_Vulkan_CreateSurface(window, static_cast(*m_vkInstance), &surface)) { + mprintf(("Failed to create vulkan surface: %s\n", SDL_GetError())); + return false; + } + + const vk::detail::ObjectDestroy deleter(*m_vkInstance, + nullptr, + VULKAN_HPP_DEFAULT_DISPATCHER); + m_vkSurface = vk::UniqueSurfaceKHR(vk::SurfaceKHR(surface), deleter); + return true; +#else + return false; +#endif +} + +bool VulkanRenderer::pickPhysicalDevice(PhysicalDeviceValues& deviceValues) +{ + const auto devices = m_vkInstance->enumeratePhysicalDevices(); + if (devices.empty()) { + return false; + } + + SCP_vector values; + std::transform(devices.cbegin(), devices.cend(), std::back_inserter(values), [](const vk::PhysicalDevice& dev) { + PhysicalDeviceValues vals; + vals.device = dev; + vals.properties = dev.getProperties2().properties; + vals.features = dev.getFeatures2().features; + auto qprops = dev.getQueueFamilyProperties(); + vals.queueProperties.assign(qprops.begin(), qprops.end()); + return vals; + }); + + mprintf(("Physical Vulkan devices:\n")); + std::for_each(values.cbegin(), values.cend(), printPhysicalDevice); + + // Remove devices that do not have the features we need + values.erase(std::remove_if(values.begin(), + values.end(), + [this](PhysicalDeviceValues& value) { return isDeviceUnsuitable(value, m_vkSurface); }), + values.end()); + if (values.empty()) { + return false; + } + + // Sort the suitability of the devices in increasing order + std::sort(values.begin(), values.end(), compareDevices); + + deviceValues = values.back(); + mprintf(("Selected device %s (%d) as the primary Vulkan device.\n", + deviceValues.properties.deviceName.data(), + deviceValues.properties.deviceID)); + mprintf(("Device extensions:\n")); + for (const auto& extProp : deviceValues.extensions) { + mprintf((" Found support for %s version %" PRIu32 "\n", extProp.extensionName.data(), extProp.specVersion)); + } + + return true; +} + +bool VulkanRenderer::createLogicalDevice(const PhysicalDeviceValues& deviceValues) +{ + float queuePriority = 1.0f; + + SCP_vector queueInfos; + const std::set familyIndices{deviceValues.graphicsQueueIndex.index, + deviceValues.transferQueueIndex.index, + deviceValues.presentQueueIndex.index}; + + queueInfos.reserve(familyIndices.size()); + for (auto index : familyIndices) { + queueInfos.emplace_back(vk::DeviceQueueCreateFlags(), index, 1, &queuePriority); + } + + // Build extension list: required + optional + SCP_vector enabledExtensions(RequiredDeviceExtensions.begin(), RequiredDeviceExtensions.end()); + + // Check for VK_EXT_shader_viewport_index_layer (needed for shadow cascade routing) + m_supportsShaderViewportLayerOutput = false; + for (const auto& ext : deviceValues.extensions) { + if (strcmp(ext.extensionName, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME) == 0) { + m_supportsShaderViewportLayerOutput = true; + enabledExtensions.push_back(VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME); + mprintf(("Vulkan: Enabling %s (shadow cascade support)\n", VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME)); + break; + } + } + + vk::DeviceCreateInfo deviceCreate; + deviceCreate.pQueueCreateInfos = queueInfos.data(); + deviceCreate.queueCreateInfoCount = static_cast(queueInfos.size()); + deviceCreate.pEnabledFeatures = &deviceValues.features; + + deviceCreate.ppEnabledExtensionNames = enabledExtensions.data(); + deviceCreate.enabledExtensionCount = static_cast(enabledExtensions.size()); + + m_device = deviceValues.device.createDeviceUnique(deviceCreate); + + // Load device-level function pointers for the dynamic dispatcher + VULKAN_HPP_DEFAULT_DISPATCHER.init(m_device.get()); + + // Create queues + m_graphicsQueue = m_device->getQueue(deviceValues.graphicsQueueIndex.index, 0); + m_transferQueue = m_device->getQueue(deviceValues.transferQueueIndex.index, 0); + m_presentQueue = m_device->getQueue(deviceValues.presentQueueIndex.index, 0); + + // Store physical device and queue family indices for later use + m_physicalDevice = deviceValues.device; + m_graphicsQueueFamilyIndex = deviceValues.graphicsQueueIndex.index; + m_transferQueueFamilyIndex = deviceValues.transferQueueIndex.index; + m_presentQueueFamilyIndex = deviceValues.presentQueueIndex.index; + + // Initialize memory manager + m_memoryManager = std::unique_ptr(new VulkanMemoryManager()); + if (!m_memoryManager->init(m_vkInstance.get(), m_physicalDevice, m_device.get())) { + mprintf(("Failed to initialize Vulkan memory manager!\n")); + return false; + } + setMemoryManager(m_memoryManager.get()); + + // Initialize deletion queue for deferred resource destruction + m_deletionQueue = std::unique_ptr(new VulkanDeletionQueue()); + m_deletionQueue->init(m_device.get(), m_memoryManager.get()); + setDeletionQueue(m_deletionQueue.get()); + + // Initialize buffer manager + m_bufferManager = std::unique_ptr(new VulkanBufferManager()); + if (!m_bufferManager->init(m_device.get(), m_memoryManager.get(), + m_graphicsQueueFamilyIndex, m_transferQueueFamilyIndex, + getMinUniformBufferOffsetAlignment())) { + mprintf(("Failed to initialize Vulkan buffer manager!\n")); + return false; + } + setBufferManager(m_bufferManager.get()); + // Set initial frame index for buffer manager + m_bufferManager->setCurrentFrame(m_currentFrame); + + return true; +} +bool VulkanRenderer::createSwapChain(const PhysicalDeviceValues& deviceValues, vk::SwapchainKHR oldSwapchain) +{ + // Choose one more than the minimum to avoid driver synchronization if it is not done with a thread yet + uint32_t imageCount = deviceValues.surfaceCapabilities.minImageCount + 1; + if (deviceValues.surfaceCapabilities.maxImageCount > 0 && + imageCount > deviceValues.surfaceCapabilities.maxImageCount) { + imageCount = deviceValues.surfaceCapabilities.maxImageCount; + } + + const auto surfaceFormat = chooseSurfaceFormat(deviceValues); + + vk::SwapchainCreateInfoKHR createInfo; + createInfo.surface = m_vkSurface.get(); + createInfo.minImageCount = imageCount; + createInfo.imageFormat = surfaceFormat.format; + createInfo.imageColorSpace = surfaceFormat.colorSpace; + createInfo.imageExtent = chooseSwapChainExtent(deviceValues, gr_screen.max_w, gr_screen.max_h); + createInfo.imageArrayLayers = 1; + createInfo.imageUsage = vk::ImageUsageFlagBits::eColorAttachment + | vk::ImageUsageFlagBits::eTransferSrc + | vk::ImageUsageFlagBits::eTransferDst; + + const uint32_t queueFamilyIndices[] = {deviceValues.graphicsQueueIndex.index, deviceValues.presentQueueIndex.index}; + if (deviceValues.graphicsQueueIndex.index != deviceValues.presentQueueIndex.index) { + createInfo.imageSharingMode = vk::SharingMode::eConcurrent; + createInfo.queueFamilyIndexCount = 2; + createInfo.pQueueFamilyIndices = queueFamilyIndices; + } else { + createInfo.imageSharingMode = vk::SharingMode::eExclusive; + } + + createInfo.preTransform = deviceValues.surfaceCapabilities.currentTransform; + createInfo.compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eOpaque; + createInfo.presentMode = choosePresentMode(deviceValues); + createInfo.clipped = true; + createInfo.oldSwapchain = oldSwapchain; + + auto newSwapChain = m_device->createSwapchainKHRUnique(createInfo); + + // Clear old resources before replacing the swap chain + m_swapChainFramebuffers.clear(); + m_swapChainImageViews.clear(); + + m_swapChain = std::move(newSwapChain); + + auto swapChainImages = m_device->getSwapchainImagesKHR(m_swapChain.get()); + m_swapChainImages.assign(swapChainImages.begin(), swapChainImages.end()); + m_swapChainImageFormat = surfaceFormat.format; + m_swapChainExtent = createInfo.imageExtent; + + m_swapChainImageViews.reserve(m_swapChainImages.size()); + for (const auto& image : m_swapChainImages) { + vk::ImageViewCreateInfo viewCreateInfo; + viewCreateInfo.image = image; + viewCreateInfo.viewType = vk::ImageViewType::e2D; + viewCreateInfo.format = m_swapChainImageFormat; + + viewCreateInfo.components.r = vk::ComponentSwizzle::eIdentity; + viewCreateInfo.components.g = vk::ComponentSwizzle::eIdentity; + viewCreateInfo.components.b = vk::ComponentSwizzle::eIdentity; + viewCreateInfo.components.a = vk::ComponentSwizzle::eIdentity; + + viewCreateInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + viewCreateInfo.subresourceRange.baseMipLevel = 0; + viewCreateInfo.subresourceRange.levelCount = 1; + viewCreateInfo.subresourceRange.baseArrayLayer = 0; + viewCreateInfo.subresourceRange.layerCount = 1; + + m_swapChainImageViews.push_back(m_device->createImageViewUnique(viewCreateInfo)); + } + + // Transition new images eUndefined → ePresentSrcKHR so the render pass + // can use initialLayout=ePresentSrcKHR from the start. + { + vk::CommandBufferAllocateInfo allocInfo; + allocInfo.commandPool = m_graphicsCommandPool.get(); + allocInfo.level = vk::CommandBufferLevel::ePrimary; + allocInfo.commandBufferCount = 1; + + auto cmdBuffers = m_device->allocateCommandBuffers(allocInfo); + auto cmd = cmdBuffers.front(); + + vk::CommandBufferBeginInfo beginInfo; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + cmd.begin(beginInfo); + + for (auto& image : m_swapChainImages) { + vk::ImageMemoryBarrier barrier; + barrier.oldLayout = vk::ImageLayout::eUndefined; + barrier.newLayout = vk::ImageLayout::ePresentSrcKHR; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = {}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eBottomOfPipe, + {}, nullptr, nullptr, barrier); + } + + cmd.end(); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + m_graphicsQueue.submit(submitInfo, nullptr); + m_graphicsQueue.waitIdle(); + + m_device->freeCommandBuffers(m_graphicsCommandPool.get(), cmdBuffers); + } + + return true; +} + +bool VulkanRenderer::recreateSwapChain() +{ + mprintf(("Vulkan: Recreating swap chain...\n")); + + // Wait for all frames to finish so no resources are in use + for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { + m_frames[i]->waitForFinish(); + } + m_device->waitIdle(); + + // Re-query surface state (may have changed due to resize/compositor) + PhysicalDeviceValues freshValues; + freshValues.device = m_physicalDevice; + freshValues.surfaceCapabilities = m_physicalDevice.getSurfaceCapabilitiesKHR(m_vkSurface.get()); + auto fmts = m_physicalDevice.getSurfaceFormatsKHR(m_vkSurface.get()); + freshValues.surfaceFormats.assign(fmts.begin(), fmts.end()); + auto modes = m_physicalDevice.getSurfacePresentModesKHR(m_vkSurface.get()); + freshValues.presentModes.assign(modes.begin(), modes.end()); + freshValues.graphicsQueueIndex = {true, m_graphicsQueueFamilyIndex}; + freshValues.presentQueueIndex = {true, m_presentQueueFamilyIndex}; + + // Check for 0x0 extent (minimized window) — caller should retry later + auto extent = chooseSwapChainExtent(freshValues, gr_screen.max_w, gr_screen.max_h); + if (extent.width == 0 || extent.height == 0) { + mprintf(("Vulkan: Surface extent is 0x0 (minimized), deferring swap chain recreation\n")); + return false; + } + + // Recreate swap chain, image views, and framebuffers + // (createSwapChain clears old resources and transitions new images internally) + createSwapChain(freshValues, m_swapChain.get()); + createFrameBuffers(); + + // Update VulkanRenderFrame handles to point to the new swap chain + for (auto& frame : m_frames) { + frame->updateSwapChain(m_swapChain.get()); + } + + // Reset swap chain image tracking + m_swapChainImageRenderImage.clear(); + m_swapChainImageRenderImage.resize(m_swapChainImages.size(), nullptr); + m_previousSwapChainImage = UINT32_MAX; + + m_swapChainNeedsRecreation = false; + + mprintf(("Vulkan: Swap chain recreated successfully (%ux%u, %zu images)\n", + m_swapChainExtent.width, m_swapChainExtent.height, m_swapChainImages.size())); + + return true; +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanShader.cpp b/code/graphics/vulkan/VulkanShader.cpp new file mode 100644 index 00000000000..2d66233b27e --- /dev/null +++ b/code/graphics/vulkan/VulkanShader.cpp @@ -0,0 +1,266 @@ +#include "VulkanShader.h" +#include "VulkanShaderCompiler.h" + +namespace graphics::vulkan { + +// Global shader manager pointer +static VulkanShaderManager* g_shaderManager = nullptr; + +VulkanShaderManager* getShaderManager() +{ + Assertion(g_shaderManager != nullptr, "Vulkan ShaderManager not initialized!"); + return g_shaderManager; +} + +void setShaderManager(VulkanShaderManager* manager) +{ + g_shaderManager = manager; +} + +// ========== gr_screen function pointer implementations ========== + +int vulkan_maybe_create_shader(shader_type shader_t, unsigned int flags) +{ + auto* shaderManager = getShaderManager(); + return shaderManager->maybeCreateShader(shader_t, flags); +} + +void vulkan_recompile_all_shaders(const std::function& progressCallback) +{ + auto* shaderManager = getShaderManager(); + shaderManager->recompileAllShaders(progressCallback); +} + +VulkanShaderManager::VulkanShaderManager() = default; +VulkanShaderManager::~VulkanShaderManager() = default; + +bool VulkanShaderManager::init(vk::Device device) +{ + if (m_initialized) { + return true; + } + + m_device = device; + + // Initialize runtime shader compiler + m_compiler = std::make_unique(); + if (!m_compiler->init()) { + mprintf(("VulkanShaderManager: Failed to initialize shader compiler!\n")); + return false; + } + + VulkanShaderCompiler::purgeOldCache(); + + m_initialized = true; + + mprintf(("VulkanShaderManager: Initialized with runtime shader compilation\n")); + return true; +} + +void VulkanShaderManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Clear all shaders (unique_ptrs will clean up) + m_shaders.clear(); + m_shaderMap.clear(); + m_freeSlots.clear(); + + if (m_compiler) { + m_compiler->shutdown(); + m_compiler.reset(); + } + + m_initialized = false; + mprintf(("VulkanShaderManager: Shutdown complete\n")); +} + +int VulkanShaderManager::maybeCreateShader(shader_type type, unsigned int flags) +{ + if (!m_initialized) { + return -1; + } + + shader_key_t key(static_cast(type), flags); + auto it = m_shaderMap.find(key); + if (it != m_shaderMap.end()) { + return static_cast(it->second); + } + + return loadShader(type, flags); +} + +void VulkanShaderManager::recompileAllShaders(const std::function& progressCallback) +{ + if (!m_initialized || !m_compiler) { + return; + } + + size_t total = m_shaders.size(); + size_t current = 0; + + for (auto& shader : m_shaders) { + if (shader.valid) { + shader_type type = shader.type; + unsigned int flags = shader.flags; + + // Release old modules + shader.vertexModule.reset(); + shader.fragmentModule.reset(); + shader.valid = false; + + const ShaderTypeInfo* typeInfo = shader_get_type_info(type); + if (typeInfo) { + SCP_string vertFile = typeInfo->vert; + auto vertSpirv = m_compiler->compile(vertFile, vk::ShaderStageFlagBits::eVertex, type, flags); + if (!vertSpirv.empty()) { + vk::ShaderModuleCreateInfo createInfo; + createInfo.codeSize = vertSpirv.size() * sizeof(uint32_t); + createInfo.pCode = vertSpirv.data(); + try { + shader.vertexModule = m_device.createShaderModuleUnique(createInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanShaderManager: Failed to create vertex module: %s\n", e.what())); + } + } + shader.vertexInputMask = 0; + for (auto attr : typeInfo->attributes) { + shader.vertexInputMask |= (1u << attr); + } + + SCP_string fragFile = typeInfo->frag; + auto fragSpirv = m_compiler->compile(fragFile, vk::ShaderStageFlagBits::eFragment, type, flags); + if (!fragSpirv.empty()) { + vk::ShaderModuleCreateInfo createInfo; + createInfo.codeSize = fragSpirv.size() * sizeof(uint32_t); + createInfo.pCode = fragSpirv.data(); + try { + shader.fragmentModule = m_device.createShaderModuleUnique(createInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanShaderManager: Failed to create fragment module: %s\n", e.what())); + } + } + + shader.valid = shader.vertexModule && shader.fragmentModule; + } + } + + ++current; + if (progressCallback) { + progressCallback(current, total); + } + } + + mprintf(("VulkanShaderManager: Recompiled %zu shaders\n", total)); +} + +const VulkanShaderModule* VulkanShaderManager::getShader(int handle) const +{ + if (handle < 0 || static_cast(handle) >= m_shaders.size()) { + return nullptr; + } + + const VulkanShaderModule& shader = m_shaders[handle]; + return shader.valid ? &shader : nullptr; +} + +const VulkanShaderModule* VulkanShaderManager::getShaderByType(shader_type type) const +{ + // Find the first shader of this type (any flags). + // Used for vertex input mask queries where the specific variant doesn't matter. + for (const auto& pair : m_shaderMap) { + if (pair.first.first == static_cast(type)) { + return getShader(static_cast(pair.second)); + } + } + return nullptr; +} + +int VulkanShaderManager::loadShader(shader_type type, unsigned int flags) +{ + const ShaderTypeInfo* typeInfo = shader_get_type_info(type); + if (!typeInfo) { + mprintf(("VulkanShaderManager: Unknown shader type: %d\n", static_cast(type))); + return -1; + } + + VulkanShaderModule shader; + shader.type = type; + shader.flags = flags; + shader.description = typeInfo->description; + if (flags != 0) { + shader.description += " (flags=0x"; + char buf[16]; + snprintf(buf, sizeof(buf), "%x", flags); + shader.description += buf; + shader.description += ")"; + } + + // Compile vertex shader + SCP_string vertFile = typeInfo->vert; + auto vertSpirv = m_compiler->compile(vertFile, vk::ShaderStageFlagBits::eVertex, type, flags); + if (!vertSpirv.empty()) { + vk::ShaderModuleCreateInfo createInfo; + createInfo.codeSize = vertSpirv.size() * sizeof(uint32_t); + createInfo.pCode = vertSpirv.data(); + try { + shader.vertexModule = m_device.createShaderModuleUnique(createInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanShaderManager: Failed to create vertex module for %s: %s\n", + vertFile.c_str(), e.what())); + } + } + shader.vertexInputMask = 0; + for (auto attr : typeInfo->attributes) { + shader.vertexInputMask |= (1u << attr); + } + + // Compile fragment shader + SCP_string fragFile = typeInfo->frag; + auto fragSpirv = m_compiler->compile(fragFile, vk::ShaderStageFlagBits::eFragment, type, flags); + if (!fragSpirv.empty()) { + vk::ShaderModuleCreateInfo createInfo; + createInfo.codeSize = fragSpirv.size() * sizeof(uint32_t); + createInfo.pCode = fragSpirv.data(); + try { + shader.fragmentModule = m_device.createShaderModuleUnique(createInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanShaderManager: Failed to create fragment module for %s: %s\n", + fragFile.c_str(), e.what())); + } + } + + // Check if essential modules loaded + shader.valid = shader.vertexModule && shader.fragmentModule; + + if (!shader.valid) { + mprintf(("VulkanShaderManager: Failed to load shader type %d (flags=0x%x)\n", + static_cast(type), flags)); + } + + // Find or allocate slot + size_t index; + if (!m_freeSlots.empty()) { + index = m_freeSlots.back(); + m_freeSlots.pop_back(); + m_shaders[index] = std::move(shader); + } else { + index = m_shaders.size(); + m_shaders.push_back(std::move(shader)); + } + + // Add to lookup map + shader_key_t key(static_cast(type), flags); + m_shaderMap[key] = index; + + if (m_shaders[index].valid) { + nprintf(("Vulkan", "VulkanShaderManager: Created shader %zu: %s\n", + index, m_shaders[index].description.c_str())); + } + + return static_cast(index); +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanShader.h b/code/graphics/vulkan/VulkanShader.h new file mode 100644 index 00000000000..0f3e13381cb --- /dev/null +++ b/code/graphics/vulkan/VulkanShader.h @@ -0,0 +1,157 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/shader_types.h" + +#include +#include +#include + + +namespace graphics::vulkan { + +class VulkanShaderCompiler; + +/** + * @brief Holds SPIR-V shader modules for a single shader program (or variant) + * + * Corresponds to an OpenGL shader program (vertex + fragment). + * With runtime compilation, each unique (type, flags) pair produces a distinct + * VulkanShaderModule — matching OpenGL's compile-time variant system. + */ +struct VulkanShaderModule { + vk::UniqueShaderModule vertexModule; + vk::UniqueShaderModule fragmentModule; + + shader_type type = SDR_TYPE_NONE; + unsigned int flags = 0; // SDR_FLAG_* bitmask for this variant + + SCP_string description; + bool valid = false; + + // Bitmask of vertex input locations this shader declares (bit N = location N). + // Used at pipeline creation to filter out fallback attributes the shader + // doesn't consume. Computed from ShaderTypeInfo::attributes at load time. + uint32_t vertexInputMask = 0; +}; + +/** + * @brief Manages Vulkan shader modules with runtime GLSL→SPIR-V compilation + * + * Provides the implementation for gr_screen.gf_maybe_create_shader and + * gr_screen.gf_recompile_all_shaders function pointers. + * + * Shader variants are handled via compile-time #defines, matching OpenGL's + * system. Each unique (type, flags) pair produces a distinct compiled shader. + */ +class VulkanShaderManager { +public: + VulkanShaderManager(); + ~VulkanShaderManager(); + + // Non-copyable + VulkanShaderManager(const VulkanShaderManager&) = delete; + VulkanShaderManager& operator=(const VulkanShaderManager&) = delete; + + /** + * @brief Initialize the shader manager and runtime compiler + * @param device Vulkan logical device + * @return true on success + */ + bool init(vk::Device device); + + /** + * @brief Shutdown and release all shader modules + */ + void shutdown(); + + /** + * @brief Get or create a shader program variant + * + * Implements gr_screen.gf_maybe_create_shader. + * Each unique (type, flags) pair produces a distinct compiled shader. + * + * @param type Shader type + * @param flags SDR_FLAG_* bitmask for variant selection + * @return Shader handle (index), or -1 on failure + */ + int maybeCreateShader(shader_type type, unsigned int flags); + + /** + * @brief Recompile all loaded shader variants + * + * Implements gr_screen.gf_recompile_all_shaders + * + * @param progressCallback Called with (current, total) progress + */ + void recompileAllShaders(const std::function& progressCallback); + + /** + * @brief Get a shader by handle + * @param handle Shader handle from maybeCreateShader + * @return Pointer to shader module, or nullptr if invalid + */ + const VulkanShaderModule* getShader(int handle) const; + + /** + * @brief Get a shader by handle (alias for getShader) + */ + const VulkanShaderModule* getShaderByHandle(int handle) const { return getShader(handle); } + + /** + * @brief Get any shader of the given type (ignoring flags). + * + * Used for vertex input mask queries where the specific variant doesn't matter. + * @param type Shader type + * @return Pointer to shader module, or nullptr if not found + */ + const VulkanShaderModule* getShaderByType(shader_type type) const; + + /** + * @brief Get total number of loaded shaders + */ + size_t getShaderCount() const { return m_shaders.size(); } + +private: + /** + * @brief Load and compile a shader variant + * @param type Shader type + * @param flags SDR_FLAG_* bitmask + * @return Index of new shader, or -1 on failure + */ + int loadShader(shader_type type, unsigned int flags); + + vk::Device m_device; + + // Shader lookup: (type, flags) -> index in m_shaders + typedef std::pair shader_key_t; + struct key_hasher { + size_t operator()(const shader_key_t& k) const { + return std::hash()(k.first) ^ (std::hash()(k.second) << 16); + } + }; + SCP_unordered_map m_shaderMap; + + // All loaded shaders + SCP_vector m_shaders; + + // Free list for shader slot reuse + SCP_vector m_freeSlots; + + // Runtime GLSL→SPIR-V compiler + std::unique_ptr m_compiler; + + bool m_initialized = false; +}; + +// Global shader manager access +VulkanShaderManager* getShaderManager(); +void setShaderManager(VulkanShaderManager* manager); + +// ========== gr_screen function pointer implementations ========== + +int vulkan_maybe_create_shader(shader_type shader_t, unsigned int flags); +void vulkan_recompile_all_shaders(const std::function& progressCallback); + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanShaderCompiler.cpp b/code/graphics/vulkan/VulkanShaderCompiler.cpp new file mode 100644 index 00000000000..eb8f837e8e9 --- /dev/null +++ b/code/graphics/vulkan/VulkanShaderCompiler.cpp @@ -0,0 +1,351 @@ +#include "VulkanShaderCompiler.h" +#include "VulkanShader.h" + +#include "def_files/def_files.h" +#include "external_dll/externalcode.h" +#include "graphics/shader_preprocess.h" +#include "graphics/shader_types.h" +#include "globalincs/systemvars.h" +#include "graphics/post_processing.h" + +#include +#include + +namespace graphics::vulkan { + +// ========== VulkanShadercLibrary ========== + +class VulkanShadercLibrary : public SCP_ExternalCode { +public: + VulkanShadercLibrary(); + bool isLoaded() const { return m_loaded; } + + decltype(&shaderc_compiler_initialize) compiler_initialize = nullptr; + decltype(&shaderc_compiler_release) compiler_release = nullptr; + decltype(&shaderc_compile_options_initialize) compile_options_initialize = nullptr; + decltype(&shaderc_compile_options_release) compile_options_release = nullptr; + decltype(&shaderc_compile_options_set_target_env) compile_options_set_target_env = nullptr; + decltype(&shaderc_compile_options_set_optimization_level) compile_options_set_optimization_level = nullptr; + decltype(&shaderc_compile_options_set_generate_debug_info) compile_options_set_generate_debug_info = nullptr; + decltype(&shaderc_compile_into_spv) compile_into_spv = nullptr; + decltype(&shaderc_result_release) result_release = nullptr; + decltype(&shaderc_result_get_compilation_status) result_get_compilation_status = nullptr; + decltype(&shaderc_result_get_error_message) result_get_error_message = nullptr; + decltype(&shaderc_result_get_num_warnings) result_get_num_warnings = nullptr; + decltype(&shaderc_result_get_bytes) result_get_bytes = nullptr; + decltype(&shaderc_result_get_length) result_get_length = nullptr; + +private: + bool m_loaded = false; +}; + +VulkanShadercLibrary::VulkanShadercLibrary() +{ + // Try platform-specific library names +#if defined(_WIN32) + const char* names[] = {"shaderc_shared.dll", "shaderc.dll"}; +#elif defined(__APPLE__) + const char* names[] = {"libshaderc_shared.1.dylib", "libshaderc_shared.dylib", "libshaderc.dylib"}; +#else + const char* names[] = {"libshaderc_shared.so.1", "libshaderc_shared.so", "libshaderc.so.1", "libshaderc.so"}; +#endif + + auto base_path = SDL_GetBasePath(); + + bool loaded = false; + for (const auto* name : names) { + if (LoadExternal(name, base_path)) { + loaded = true; + mprintf(("VulkanShadercLibrary: Loaded '%s'\n", name)); + break; + } + } + + if (base_path) { + SDL_free(base_path); + } + + if (!loaded) { + mprintf(("VulkanShadercLibrary: Could not load shaderc shared library\n")); + return; + } + + // Load all required function pointers + compiler_initialize = LoadFunction("shaderc_compiler_initialize"); + compiler_release = LoadFunction("shaderc_compiler_release"); + compile_options_initialize = LoadFunction("shaderc_compile_options_initialize"); + compile_options_release = LoadFunction("shaderc_compile_options_release"); + compile_options_set_target_env = LoadFunction("shaderc_compile_options_set_target_env"); + compile_options_set_optimization_level = LoadFunction("shaderc_compile_options_set_optimization_level"); + compile_options_set_generate_debug_info = LoadFunction("shaderc_compile_options_set_generate_debug_info"); + compile_into_spv = LoadFunction("shaderc_compile_into_spv"); + result_release = LoadFunction("shaderc_result_release"); + result_get_compilation_status = LoadFunction("shaderc_result_get_compilation_status"); + result_get_error_message = LoadFunction("shaderc_result_get_error_message"); + result_get_num_warnings = LoadFunction("shaderc_result_get_num_warnings"); + result_get_bytes = LoadFunction("shaderc_result_get_bytes"); + result_get_length = LoadFunction("shaderc_result_get_length"); + + // Verify all required functions were loaded + m_loaded = compiler_initialize && compiler_release + && compile_options_initialize && compile_options_release + && compile_options_set_target_env && compile_options_set_optimization_level + && compile_options_set_generate_debug_info + && compile_into_spv + && result_release && result_get_compilation_status + && result_get_error_message && result_get_num_warnings + && result_get_bytes && result_get_length; + + if (!m_loaded) { + mprintf(("VulkanShadercLibrary: Library loaded but some functions are missing!\n")); + } +} + +// ========== VulkanShaderCompiler ========== + +VulkanShaderCompiler::VulkanShaderCompiler() = default; +VulkanShaderCompiler::~VulkanShaderCompiler() = default; + +bool VulkanShaderCompiler::init() +{ + if (m_initialized) { + return true; + } + + m_shaderc = std::make_unique(); + if (!m_shaderc->isLoaded()) { + mprintf(("VulkanShaderCompiler: shaderc library not available!\n" + " Install the Vulkan SDK or shaderc shared library:\n" + " Debian/Ubuntu: sudo apt install libshaderc-dev\n" + " Vulkan SDK: https://vulkan.lunarg.com/sdk/home\n")); + m_shaderc.reset(); + return false; + } + + m_initialized = true; + mprintf(("VulkanShaderCompiler: Initialized (runtime shaderc compilation)\n")); + return true; +} + +void VulkanShaderCompiler::shutdown() +{ + m_shaderc.reset(); + m_initialized = false; +} + +bool VulkanShaderCompiler::isAvailable() const +{ + return m_initialized && m_shaderc && m_shaderc->isLoaded(); +} + +SCP_string VulkanShaderCompiler::buildHeader(vk::ShaderStageFlagBits /*stage*/, shader_type sdrType, + unsigned int flags) const +{ + (void)this; + SCP_string header; + header.reserve(512); + + // Required for layout(location=N) on varyings in Vulkan GLSL. + // Injected here so individual shaders don't need it. + header += "#extension GL_ARB_separate_shader_objects : enable\n"; + + // shaderc automatically predefines VULKAN=100 when targeting Vulkan, + // so we do NOT define it here — doing so causes a "Macro redefined" error. + + // Blinn-Phong lighting model (matches OpenGL's opengl_shader_get_header) + if (Detail.lighting < 3) { + header += "#define FLAG_LIGHT_MODEL_BLINN_PHONG\n"; + } + + // Post-processing shaders need special header injection (matching OpenGL's + // opengl_post_shader_header). Effect indices map to #define names, and + // lightshafts needs the sample count. + if (sdrType == SDR_TYPE_POST_PROCESS_MAIN || sdrType == SDR_TYPE_POST_PROCESS_LIGHTSHAFTS) { + if (graphics::Post_processing_manager) { + if (sdrType == SDR_TYPE_POST_PROCESS_MAIN) { + const auto& postEffects = graphics::Post_processing_manager->getPostEffects(); + for (size_t idx = 0; idx < postEffects.size(); idx++) { + if (flags & (1 << idx)) { + header += "#define "; + header += postEffects[idx].define_name; + header += "\n"; + } + } + } else if (sdrType == SDR_TYPE_POST_PROCESS_LIGHTSHAFTS) { + const auto& ls_params = graphics::Post_processing_manager->getLightshaftParams(); + char temp[64]; + snprintf(temp, sizeof(temp), "#define SAMPLE_NUM %d\n", ls_params.samplenum); + header += temp; + } + } + } else if (sdrType == SDR_TYPE_POST_PROCESS_FXAA) { + // GLSL 450 always has textureGather + header += shader_get_fxaa_defines(Gr_aa_mode, true); + } else { + // Inject variant-specific #defines based on flags + header += shader_build_variant_defines(sdrType, flags); + } + + return header; +} + +SCP_string VulkanShaderCompiler::computeSourceHash(const SCP_string& header, const SCP_string& source) +{ + MD5 md5; + md5.update(header.c_str(), static_cast(header.size())); + md5.update(source.c_str(), static_cast(source.size())); + + // Include a version tag so cache is invalidated on engine updates + static const char VERSION_TAG[] = "vk_shader_v1"; + md5.update(VERSION_TAG, sizeof(VERSION_TAG) - 1); + + md5.finalize(); + return md5.hexdigest(); +} + +SCP_vector VulkanShaderCompiler::compile(const SCP_string& filename, + vk::ShaderStageFlagBits stage, + shader_type sdrType, + unsigned int flags) +{ + if (!m_initialized || !m_shaderc) { + mprintf(("VulkanShaderCompiler: Not initialized!\n")); + return {}; + } + + // Load and preprocess GLSL source + SCP_string source = shader_load_source(filename); + if (source.empty()) { + mprintf(("VulkanShaderCompiler: Failed to load GLSL source: %s\n", filename.c_str())); + return {}; + } + + source = shader_preprocess_includes(filename, source); + source = shader_preprocess_defines(filename, source); + + // Build preprocessor header with #defines for variant flags + SCP_string header = buildHeader(stage, sdrType, flags); + + // Compute hash for disk cache (uses preprocessed source) + SCP_string hash = computeSourceHash(header, source); + SCP_string cacheFilename = "vk_shader-" + hash + ".spv"; + + // Check disk cache + CFILE* cacheFile = cfopen(cacheFilename.c_str(), "rb", CF_TYPE_CACHE); + if (cacheFile != nullptr) { + int fileSize = cfilelength(cacheFile); + if (fileSize > 0 && (fileSize % 4) == 0) { + SCP_vector spirv(fileSize / 4); + if (cfread(spirv.data(), 1, fileSize, cacheFile) == fileSize) { + cfclose(cacheFile); + nprintf(("Vulkan", "VulkanShaderCompiler: Cache hit for %s (flags=0x%x)\n", + filename.c_str(), flags)); + return spirv; + } + } + cfclose(cacheFile); + } + + // Cache miss — compile with shaderc + mprintf(("VulkanShaderCompiler: Compiling %s (flags=0x%x)...\n", filename.c_str(), flags)); + + // Assemble: #version + header (extension + defines) + source + SCP_string fullSource; + fullSource.reserve(header.size() + source.size() + 32); + fullSource += "#version 450\n"; + fullSource += header; + fullSource += source; + + auto* sc = m_shaderc.get(); + + shaderc_compiler_t compiler = sc->compiler_initialize(); + if (!compiler) { + mprintf(("VulkanShaderCompiler: Failed to initialize shaderc compiler!\n")); + return {}; + } + + shaderc_compile_options_t opts = sc->compile_options_initialize(); + sc->compile_options_set_target_env(opts, shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_0); + sc->compile_options_set_optimization_level(opts, shaderc_optimization_level_performance); + sc->compile_options_set_generate_debug_info(opts); + + shaderc_shader_kind kind; + if (stage == vk::ShaderStageFlagBits::eVertex) { + kind = shaderc_vertex_shader; + } else { + kind = shaderc_fragment_shader; + } + + shaderc_compilation_result_t result = sc->compile_into_spv( + compiler, fullSource.c_str(), fullSource.size(), kind, filename.c_str(), "main", opts); + + SCP_vector spirv; + auto status = sc->result_get_compilation_status(result); + + if (status != shaderc_compilation_status_success) { + const char* errMsg = sc->result_get_error_message(result); + mprintf(("VulkanShaderCompiler: COMPILATION FAILED for %s (flags=0x%x):\n%s\n", + filename.c_str(), flags, errMsg ? errMsg : "(no error message)")); + } else { + if (sc->result_get_num_warnings(result) > 0) { + const char* errMsg = sc->result_get_error_message(result); + mprintf(("VulkanShaderCompiler: Warnings for %s:\n%s\n", + filename.c_str(), errMsg ? errMsg : "")); + } + + size_t byteLen = sc->result_get_length(result); + const char* bytes = sc->result_get_bytes(result); + + if (bytes && byteLen > 0 && (byteLen % 4) == 0) { + spirv.resize(byteLen / 4); + std::memcpy(spirv.data(), bytes, byteLen); + + mprintf(("VulkanShaderCompiler: Compiled %s -> %zu bytes SPIR-V\n", + filename.c_str(), byteLen)); + + // Save to disk cache + cacheFile = cfopen(cacheFilename.c_str(), "wb", CF_TYPE_CACHE); + if (cacheFile != nullptr) { + cfwrite(spirv.data(), static_cast(spirv.size() * sizeof(uint32_t)), 1, cacheFile); + cfclose(cacheFile); + } + } + } + + sc->result_release(result); + sc->compile_options_release(opts); + sc->compiler_release(compiler); + + return spirv; +} + +void VulkanShaderCompiler::purgeOldCache() +{ + const SCP_string PREFIX = "vk_shader-"; + const auto TIMEOUT = 2.0 * 30.0 * 24.0 * 60.0 * 60.0; // ~2 months in seconds + + SCP_vector cache_files; + SCP_vector file_info; + cf_get_file_list(cache_files, CF_TYPE_CACHE, "*.spv", CF_SORT_NONE, &file_info, + CF_LOCATION_ROOT_USER | CF_LOCATION_ROOT_GAME | CF_LOCATION_TYPE_ROOT); + + Assertion(cache_files.size() == file_info.size(), + "cf_get_file_list returned different sizes for file names and file informations!"); + + auto now = std::time(nullptr); + for (size_t i = 0; i < cache_files.size(); ++i) { + auto& name = cache_files[i]; + + if (name.compare(0, PREFIX.size(), PREFIX) != 0) { + continue; // Not our cache file + } + + auto diff = std::difftime(now, file_info[i].write_time); + if (diff > TIMEOUT) { + auto full_name = name + ".spv"; + cf_delete(full_name.c_str(), CF_TYPE_CACHE); + } + } +} + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanShaderCompiler.h b/code/graphics/vulkan/VulkanShaderCompiler.h new file mode 100644 index 00000000000..ebc604eda65 --- /dev/null +++ b/code/graphics/vulkan/VulkanShaderCompiler.h @@ -0,0 +1,68 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" + +#include + +#include + +namespace graphics::vulkan { + +class VulkanShadercLibrary; + +/** + * @brief Runtime GLSL->SPIR-V compiler using shaderc + * + * Replaces build-time glslc compilation. Compiles GLSL to SPIR-V at runtime + * with disk caching, #define variant injection, and include resolution. + * The shaderc library is loaded dynamically. + */ +class VulkanShaderCompiler { +public: + VulkanShaderCompiler(); + ~VulkanShaderCompiler(); + + bool init(); + void shutdown(); + + /** + * @brief Check if the shaderc library was loaded successfully + */ + bool isAvailable() const; + + /** + * @brief Compile GLSL to SPIR-V with disk caching + * + * @param filename GLSL source filename, e.g. "main.frag" + * @param stage Vertex or fragment + * @param sdrType Shader type (for variant flag lookup) + * @param flags SDR_FLAG_* bitmask — matching flags injected as #defines + * @return SPIR-V words, or empty vector on failure + */ + SCP_vector compile(const SCP_string& filename, + vk::ShaderStageFlagBits stage, + shader_type sdrType, + unsigned int flags); + + /** + * @brief Delete stale cache files older than ~2 months + */ + static void purgeOldCache(); + +private: + /** + * @brief Build the GLSL header with #defines for variant flags + */ + SCP_string buildHeader(vk::ShaderStageFlagBits stage, shader_type sdrType, unsigned int flags) const; + + /** + * @brief Compute an MD5 hash of the header + source + version tag for disk caching + */ + static SCP_string computeSourceHash(const SCP_string& header, const SCP_string& source); + + std::unique_ptr m_shaderc; + bool m_initialized = false; +}; + +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/VulkanState.cpp b/code/graphics/vulkan/VulkanState.cpp new file mode 100644 index 00000000000..c45bd7b0f4b --- /dev/null +++ b/code/graphics/vulkan/VulkanState.cpp @@ -0,0 +1,346 @@ +#include "VulkanState.h" +#include "VulkanDraw.h" + + +namespace graphics::vulkan { + +// Global state tracker pointer +static VulkanStateTracker* g_stateTracker = nullptr; + +VulkanStateTracker* getStateTracker() +{ + Assertion(g_stateTracker != nullptr, "Vulkan StateTracker not initialized!"); + return g_stateTracker; +} + +void setStateTracker(VulkanStateTracker* tracker) +{ + g_stateTracker = tracker; +} + +bool VulkanStateTracker::init(vk::Device device) +{ + if (m_initialized) { + return true; + } + + m_device = device; + + // Initialize default viewport + m_viewport.x = 0.0f; + m_viewport.y = 0.0f; + m_viewport.width = static_cast(gr_screen.max_w); + m_viewport.height = static_cast(gr_screen.max_h); + m_viewport.minDepth = 0.0f; + m_viewport.maxDepth = 1.0f; + + // Initialize default scissor + m_scissor.offset.x = 0; + m_scissor.offset.y = 0; + m_scissor.extent.width = gr_screen.max_w; + m_scissor.extent.height = gr_screen.max_h; + + // Initialize clear color (dark blue for debugging - shows clears are working) + m_clearColor.float32[0] = 0.0f; + m_clearColor.float32[1] = 0.0f; + m_clearColor.float32[2] = 0.3f; + m_clearColor.float32[3] = 1.0f; + + m_initialized = true; + mprintf(("VulkanStateTracker: Initialized\n")); + return true; +} + +void VulkanStateTracker::shutdown() +{ + if (!m_initialized) { + return; + } + + m_cmdBuffer = nullptr; + m_currentPipeline = nullptr; + m_currentRenderPass = nullptr; + + m_initialized = false; + mprintf(("VulkanStateTracker: Shutdown complete\n")); +} + +void VulkanStateTracker::beginFrame(vk::CommandBuffer cmdBuffer) +{ + mprintf(("VulkanStateTracker::beginFrame - cmdBuffer=%p\n", + static_cast(static_cast(cmdBuffer)))); + + m_cmdBuffer = cmdBuffer; + + // Reset state for new frame + m_currentPipeline = nullptr; + m_currentRenderPass = nullptr; + + for (auto& set : m_boundDescriptorSets) { + set = nullptr; + } + + // Mark all dynamic state as dirty + m_viewportDirty = true; + m_scissorDirty = true; + m_depthBiasDirty = true; + m_stencilRefDirty = true; + m_lineWidthDirty = true; +} + +void VulkanStateTracker::endFrame() +{ + mprintf(("VulkanStateTracker::endFrame - clearing cmdBuffer (was %p)\n", + static_cast(static_cast(m_cmdBuffer)))); + m_cmdBuffer = nullptr; +} + +void VulkanStateTracker::setRenderPass(vk::RenderPass renderPass, uint32_t subpass) +{ + m_currentRenderPass = renderPass; + m_currentSubpass = subpass; + + // Pipeline needs to be rebound when render pass changes + m_currentPipeline = nullptr; + + // Dynamic state must be re-applied after a render pass change. + // Vulkan doesn't preserve dynamic state across render pass instances, + // and mid-frame render passes (e.g. light accumulation) may have set + // different viewport/scissor values directly on the command buffer. + m_viewportDirty = true; + m_scissorDirty = true; +} + +void VulkanStateTracker::setViewport(float x, float y, float width, float height, float minDepth, float maxDepth) +{ + if (m_viewport.x != x || m_viewport.y != y || + m_viewport.width != width || m_viewport.height != height || + m_viewport.minDepth != minDepth || m_viewport.maxDepth != maxDepth) { + m_viewport.x = x; + m_viewport.y = y; + m_viewport.width = width; + m_viewport.height = height; + m_viewport.minDepth = minDepth; + m_viewport.maxDepth = maxDepth; + m_viewportDirty = true; + + // When scissor is disabled, applyDynamicState derives the scissor rect + // from the viewport dimensions. So a viewport change invalidates that + // computed scissor and must trigger a re-flush. + if (!m_scissorEnabled) { + m_scissorDirty = true; + } + } +} + +void VulkanStateTracker::setScissor(int32_t x, int32_t y, uint32_t width, uint32_t height) +{ + if (m_scissor.offset.x != x || m_scissor.offset.y != y || + m_scissor.extent.width != width || m_scissor.extent.height != height) { + m_scissor.offset.x = x; + m_scissor.offset.y = y; + m_scissor.extent.width = width; + m_scissor.extent.height = height; + m_scissorDirty = true; + } +} + +void VulkanStateTracker::setScissorEnabled(bool enabled) +{ + if (m_scissorEnabled != enabled) { + m_scissorEnabled = enabled; + m_scissorDirty = true; + } +} + +void VulkanStateTracker::setDepthBias(float constantFactor, float slopeFactor) +{ + if (m_depthBiasConstant != constantFactor || m_depthBiasSlope != slopeFactor) { + m_depthBiasConstant = constantFactor; + m_depthBiasSlope = slopeFactor; + m_depthBiasDirty = true; + } +} + +void VulkanStateTracker::setStencilReference(uint32_t reference) +{ + if (m_stencilReference != reference) { + m_stencilReference = reference; + m_stencilRefDirty = true; + } +} + +void VulkanStateTracker::setLineWidth(float width) +{ + if (m_lineWidth != width) { + m_lineWidth = width; + m_lineWidthDirty = true; + } +} + +void VulkanStateTracker::bindPipeline(vk::Pipeline pipeline, vk::PipelineLayout layout) +{ + if (m_currentPipeline != pipeline && pipeline && m_cmdBuffer) { + m_cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + m_currentPipeline = pipeline; + m_currentPipelineLayout = layout; + + // After binding new pipeline, need to re-apply dynamic state + applyDynamicState(); + + // Clear bound descriptor sets since they need to be rebound with new layout + for (auto& set : m_boundDescriptorSets) { + set = nullptr; + } + } +} + +void VulkanStateTracker::bindDescriptorSet(DescriptorSetIndex setIndex, vk::DescriptorSet set, + const SCP_vector& dynamicOffsets) +{ + Assertion(m_cmdBuffer, "bindDescriptorSet called without active command buffer!"); + Assertion(m_currentPipelineLayout, "bindDescriptorSet called without bound pipeline layout!"); + Assertion(set, "bindDescriptorSet called with null descriptor set!"); + + auto index = static_cast(setIndex); + + if (m_boundDescriptorSets[index] != set) { + m_cmdBuffer.bindDescriptorSets( + vk::PipelineBindPoint::eGraphics, + m_currentPipelineLayout, + index, + 1, &set, + static_cast(dynamicOffsets.size()), + dynamicOffsets.empty() ? nullptr : dynamicOffsets.data()); + + m_boundDescriptorSets[index] = set; + } +} + +void VulkanStateTracker::bindVertexBuffer(uint32_t binding, vk::Buffer buffer, vk::DeviceSize offset) +{ + Assertion(m_cmdBuffer, "bindVertexBuffer called without active command buffer!"); + Assertion(buffer, "bindVertexBuffer called with null buffer!"); + m_cmdBuffer.bindVertexBuffers(binding, 1, &buffer, &offset); +} + +void VulkanStateTracker::bindIndexBuffer(vk::Buffer buffer, vk::DeviceSize offset, vk::IndexType indexType) +{ + Assertion(m_cmdBuffer, "bindIndexBuffer called without active command buffer!"); + Assertion(buffer, "bindIndexBuffer called with null buffer!"); + m_cmdBuffer.bindIndexBuffer(buffer, offset, indexType); +} + +void VulkanStateTracker::setClearColor(float r, float g, float b, float a) +{ + m_clearColor.float32[0] = r; + m_clearColor.float32[1] = g; + m_clearColor.float32[2] = b; + m_clearColor.float32[3] = a; +} + +void VulkanStateTracker::applyDynamicState() +{ + Assertion(m_cmdBuffer, "applyDynamicState called without active command buffer!"); + + if (m_viewportDirty) { + m_cmdBuffer.setViewport(0, 1, &m_viewport); + m_viewportDirty = false; + } + + if (m_scissorDirty) { + if (m_scissorEnabled) { + m_cmdBuffer.setScissor(0, 1, &m_scissor); + } else { + // Set scissor to full viewport when disabled. + // Handle negative viewport height (VK_KHR_maintenance1 Y-flip): + // when height < 0, the viewport covers [y+height, y] in framebuffer Y. + vk::Rect2D fullScissor; + float vy = m_viewport.y; + float vh = m_viewport.height; + if (vh < 0.0f) { + vy = vy + vh; + vh = -vh; + } + fullScissor.offset.x = static_cast(m_viewport.x); + fullScissor.offset.y = static_cast(vy); + fullScissor.extent.width = static_cast(m_viewport.width); + fullScissor.extent.height = static_cast(vh); + m_cmdBuffer.setScissor(0, 1, &fullScissor); + } + m_scissorDirty = false; + } + + if (m_depthBiasDirty) { + m_cmdBuffer.setDepthBias(m_depthBiasConstant, 0.0f, m_depthBiasSlope); + m_depthBiasDirty = false; + } + + if (m_stencilRefDirty) { + m_cmdBuffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, m_stencilReference); + m_stencilRefDirty = false; + } + + if (m_lineWidthDirty) { + m_cmdBuffer.setLineWidth(m_lineWidth); + m_lineWidthDirty = false; + } +} + +} // namespace graphics::vulkan + + + +namespace graphics::vulkan { + +// ========== gr_screen function pointer implementations ========== + +void vulkan_zbias(int bias) +{ + auto* stateTracker = getStateTracker(); + auto* drawManager = getDrawManager(); + + if (bias) { + drawManager->setDepthBiasEnabled(true); + if (bias < 0) { + stateTracker->setDepthBias(1.0f, static_cast(-bias)); + } else { + stateTracker->setDepthBias(0.0f, static_cast(-bias)); + } + } else { + drawManager->setDepthBiasEnabled(false); + stateTracker->setDepthBias(0.0f, 0.0f); + } +} + +int vulkan_alpha_mask_set(int mode, float alpha) +{ + if (mode) { + getStateTracker()->setAlphaThreshold(alpha); + } else { + getStateTracker()->setAlphaThreshold(0.0f); + } + return mode; +} + +void vulkan_set_viewport(int x, int y, int width, int height) +{ + auto* stateTracker = getStateTracker(); + if (gr_screen.rendering_to_texture == -1) { + // Screen rendering: use negative viewport height for OpenGL-compatible Y-up NDC + // (VK_KHR_maintenance1, core since Vulkan 1.1) + stateTracker->setViewport( + static_cast(x), + static_cast(gr_screen.max_h - y), + static_cast(width), + static_cast(-height)); + } else { + // RTT: standard positive viewport (RTT projection matrix handles Y-flip) + stateTracker->setViewport( + static_cast(x), static_cast(y), + static_cast(width), static_cast(height)); + } +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanState.h b/code/graphics/vulkan/VulkanState.h new file mode 100644 index 00000000000..9f512377665 --- /dev/null +++ b/code/graphics/vulkan/VulkanState.h @@ -0,0 +1,261 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/material.h" + +#include "VulkanPipeline.h" +#include "VulkanDescriptorManager.h" + +#include + + +namespace graphics::vulkan { + +/** + * @brief Tracks current Vulkan render state + * + * Unlike OpenGL where state is set globally, Vulkan requires explicit + * command buffer recording. This class tracks what state has been set + * and what needs to be updated before draw calls. + */ +class VulkanStateTracker { +public: + VulkanStateTracker() = default; + ~VulkanStateTracker() = default; + + // Non-copyable + VulkanStateTracker(const VulkanStateTracker&) = delete; + VulkanStateTracker& operator=(const VulkanStateTracker&) = delete; + + /** + * @brief Initialize state tracker + */ + bool init(vk::Device device); + + /** + * @brief Shutdown and release resources + */ + void shutdown(); + + /** + * @brief Begin recording for a new frame + * @param cmdBuffer Command buffer to record to + */ + void beginFrame(vk::CommandBuffer cmdBuffer); + + /** + * @brief End frame recording + */ + void endFrame(); + + /** + * @brief Set the current render pass + */ + void setRenderPass(vk::RenderPass renderPass, uint32_t subpass = 0); + + /** + * @brief Get current render pass + */ + vk::RenderPass getCurrentRenderPass() const { return m_currentRenderPass; } + + // ========== Dynamic State ========== + + /** + * @brief Set viewport (dynamic state) + */ + void setViewport(float x, float y, float width, float height, float minDepth = 0.0f, float maxDepth = 1.0f); + + /** + * @brief Set scissor rectangle (dynamic state) + */ + void setScissor(int32_t x, int32_t y, uint32_t width, uint32_t height); + + /** + * @brief Enable or disable scissor test + */ + void setScissorEnabled(bool enabled); + + /** + * @brief Set depth bias (dynamic state) + */ + void setDepthBias(float constantFactor, float slopeFactor); + + /** + * @brief Set stencil reference value (dynamic state) + */ + void setStencilReference(uint32_t reference); + + /** + * @brief Set line width (dynamic state) + */ + void setLineWidth(float width); + + // ========== Pipeline State ========== + + /** + * @brief Bind a pipeline + */ + void bindPipeline(vk::Pipeline pipeline, vk::PipelineLayout layout); + + /** + * @brief Get currently bound pipeline + */ + vk::Pipeline getCurrentPipeline() const { return m_currentPipeline; } + + /** + * @brief Get current pipeline layout + */ + vk::PipelineLayout getCurrentPipelineLayout() const { return m_currentPipelineLayout; } + + // ========== Descriptor State ========== + + /** + * @brief Bind descriptor set + */ + void bindDescriptorSet(DescriptorSetIndex setIndex, vk::DescriptorSet set, + const SCP_vector& dynamicOffsets = {}); + + // ========== Buffer Binding ========== + + /** + * @brief Bind vertex buffer + */ + void bindVertexBuffer(uint32_t binding, vk::Buffer buffer, vk::DeviceSize offset = 0); + + /** + * @brief Bind index buffer + */ + void bindIndexBuffer(vk::Buffer buffer, vk::DeviceSize offset, vk::IndexType indexType); + + // ========== State Queries ========== + + /** + * @brief Get current command buffer. + * Asserts if no command buffer is active — rendering outside a frame is always a bug. + */ + vk::CommandBuffer getCommandBuffer() const { + Assertion(m_cmdBuffer, "No active command buffer — rendering outside a frame?"); + return m_cmdBuffer; + } + + /** + * @brief Check if scissor test is enabled + */ + bool isScissorEnabled() const { return m_scissorEnabled; } + + // ========== Clear Operations ========== + + /** + * @brief Set clear color for next clear operation + */ + void setClearColor(float r, float g, float b, float a); + + /** + * @brief Get current clear color + */ + const vk::ClearColorValue& getClearColor() const { return m_clearColor; } + + // ========== Render State Tracking ========== + + /** + * @brief Set current zbuffer mode (for tracking) + */ + void setZBufferMode(gr_zbuffer_type mode) { m_zbufferMode = mode; } + gr_zbuffer_type getZBufferMode() const { return m_zbufferMode; } + + /** + * @brief Set current stencil mode (for tracking) + */ + void setStencilMode(int mode) { m_stencilMode = mode; } + int getStencilMode() const { return m_stencilMode; } + + /** + * @brief Set current cull mode (for tracking) + */ + void setCullMode(bool enabled) { m_cullEnabled = enabled; } + bool getCullMode() const { return m_cullEnabled; } + + void setAlphaThreshold(float threshold) { m_alphaThreshold = threshold; } + float getAlphaThreshold() const { return m_alphaThreshold; } + + /** + * @brief Set color attachment count for current render pass + */ + void setColorAttachmentCount(uint32_t count) { m_colorAttachmentCount = count; } + uint32_t getColorAttachmentCount() const { return m_colorAttachmentCount; } + + /** + * @brief Set current MSAA sample count for pipeline creation + */ + void setCurrentSampleCount(vk::SampleCountFlagBits count) { m_currentSampleCount = count; } + vk::SampleCountFlagBits getCurrentSampleCount() const { return m_currentSampleCount; } + + /** + * @brief Apply pending dynamic state to command buffer + * + * Must be called before every draw command to ensure dirty dynamic state + * (viewport, scissor, depth bias, stencil ref, line width) is flushed. + * applyMaterial() sets depth bias/stencil AFTER bindPipeline(), so if + * the pipeline didn't change, those changes would be lost without this. + */ + void applyDynamicState(); + +private: + + vk::Device m_device; + vk::CommandBuffer m_cmdBuffer; + + // Current render pass state + vk::RenderPass m_currentRenderPass; + uint32_t m_currentSubpass = 0; + + // Current pipeline state + vk::Pipeline m_currentPipeline; + vk::PipelineLayout m_currentPipelineLayout; + + // Descriptor sets + std::array(DescriptorSetIndex::Count)> m_boundDescriptorSets; + + // Dynamic state + vk::Viewport m_viewport; + vk::Rect2D m_scissor; + bool m_scissorEnabled = false; + float m_depthBiasConstant = 0.0f; + float m_depthBiasSlope = 0.0f; + uint32_t m_stencilReference = 0; + float m_lineWidth = 1.0f; + + // Dirty flags for dynamic state + bool m_viewportDirty = true; + bool m_scissorDirty = true; + bool m_depthBiasDirty = false; + bool m_stencilRefDirty = false; + bool m_lineWidthDirty = false; + + // Clear values + vk::ClearColorValue m_clearColor; + + // Render state tracking (for FSO compatibility) + gr_zbuffer_type m_zbufferMode = ZBUFFER_TYPE_NONE; + int m_stencilMode = 0; + bool m_cullEnabled = true; + float m_alphaThreshold = 0.0f; + uint32_t m_colorAttachmentCount = 1; + vk::SampleCountFlagBits m_currentSampleCount = vk::SampleCountFlagBits::e1; + + bool m_initialized = false; +}; + +// Global state tracker access +VulkanStateTracker* getStateTracker(); +void setStateTracker(VulkanStateTracker* tracker); + +// ========== gr_screen function pointer implementations ========== + +void vulkan_zbias(int bias); +int vulkan_alpha_mask_set(int mode, float alpha); +void vulkan_set_viewport(int x, int y, int width, int height); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanTexture.cpp b/code/graphics/vulkan/VulkanTexture.cpp new file mode 100644 index 00000000000..e687403ae7e --- /dev/null +++ b/code/graphics/vulkan/VulkanTexture.cpp @@ -0,0 +1,2416 @@ +#include "VulkanTexture.h" + +#include +#include "VulkanBuffer.h" +#include "VulkanDeletionQueue.h" +#include "VulkanRenderer.h" +#include "gr_vulkan.h" + +#include "bmpman/bmpman.h" +#include "ddsutils/ddsutils.h" +#include "globalincs/systemvars.h" + + +namespace graphics::vulkan { + +namespace { +VulkanTextureManager* g_textureManager = nullptr; + +// Geometry/format description shared by every texture-upload path (single 2D, +// animation array, cubemap). Captures exactly what the staging-size, copy-region +// and per-mip math need, so those calculations live in one place instead of +// being copy-pasted into each upload function. +struct TextureUploadLayout { + uint32_t width = 0; + uint32_t height = 0; + uint32_t mipLevels = 1; + bool isCompressed = false; + size_t blockSize = 0; // compressed block size (DXT1=8, others=16) + size_t dstBytesPerPixel = 0; // uncompressed destination bpp (24bpp stored as 4) +}; + +// Byte size of a single mip level for one layer. +size_t mipLevelSize(const TextureUploadLayout& l, uint32_t mipW, uint32_t mipH) +{ + if (l.isCompressed) { + return dds_compressed_mip_size(static_cast(mipW), static_cast(mipH), + static_cast(l.blockSize)); + } + return static_cast(mipW) * mipH * l.dstBytesPerPixel; +} + +// Total bytes occupied by one layer (all mip levels). Matches the staging +// layout produced by appendLayerCopyRegions(). +size_t layerByteSize(const TextureUploadLayout& l) +{ + size_t total = 0; + uint32_t mipW = l.width; + uint32_t mipH = l.height; + for (uint32_t m = 0; m < l.mipLevels; ++m) { + total += mipLevelSize(l, mipW, mipH); + mipW = std::max(1u, mipW / 2); + mipH = std::max(1u, mipH / 2); + } + return total; +} + +// Append one vk::BufferImageCopy per mip level for a single array layer/face, +// starting at layerBufferOffset. Returns the number of bytes the layer occupies +// (so callers can advance their staging offset). The regions depend only on the +// layout, not on the pixel data, so this is safe to call even when a frame's +// data could not be locked. +size_t appendLayerCopyRegions(SCP_vector& regions, + const TextureUploadLayout& l, uint32_t layerIndex, size_t layerBufferOffset) +{ + uint32_t mipW = l.width; + uint32_t mipH = l.height; + size_t offset = layerBufferOffset; + for (uint32_t m = 0; m < l.mipLevels; ++m) { + vk::BufferImageCopy region; + region.bufferOffset = static_cast(offset); + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = m; + region.imageSubresource.baseArrayLayer = layerIndex; + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(mipW, mipH, 1); + regions.push_back(region); + + offset += mipLevelSize(l, mipW, mipH); + mipW = std::max(1u, mipW / 2); + mipH = std::max(1u, mipH / 2); + } + return offset - layerBufferOffset; +} + +// Expand 24bpp BGR pixel data to 32bpp BGRA (alpha forced to 255). Vulkan does +// not support 24bpp optimal-tiling formats, so every upload path widens. +void expandBgrToBgra(uint8_t* dst, const uint8_t* src, size_t pixelCount) +{ + for (size_t i = 0; i < pixelCount; ++i) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = 255; + src += 3; + dst += 4; + } +} +} // namespace + +VulkanTextureManager* getTextureManager() +{ + Assertion(g_textureManager != nullptr, "Vulkan TextureManager not initialized!"); + return g_textureManager; +} + +void setTextureManager(VulkanTextureManager* manager) +{ + g_textureManager = manager; +} + +// tcache_slot_vulkan implementation + +void tcache_slot_vulkan::reset() +{ + image = nullptr; + imageView = nullptr; + allocation = VulkanAllocation(); + format = vk::Format::eUndefined; + currentLayout = vk::ImageLayout::eUndefined; + width = 0; + height = 0; + mipLevels = 1; + arrayLayers = 1; + bpp = 0; + bitmapHandle = -1; + arrayIndex = 0; + used = false; + framebuffer = nullptr; + framebufferView = nullptr; + renderPass = nullptr; + isRenderTarget = false; + is3D = false; + depth = 1; + isCubemap = false; + for (auto& v : cubeFaceViews) v = nullptr; + for (auto& fb : cubeFaceFramebuffers) fb = nullptr; + cubeImageView = nullptr; + uScale = 1.0f; + vScale = 1.0f; +} + +// VulkanTextureManager implementation + +VulkanTextureManager::VulkanTextureManager() = default; + +VulkanTextureManager::~VulkanTextureManager() +{ + if (m_initialized) { + shutdown(); + } +} + +bool VulkanTextureManager::init(vk::Device device, vk::PhysicalDevice physicalDevice, + VulkanMemoryManager* memoryManager, + vk::CommandPool commandPool, vk::Queue graphicsQueue) +{ + if (m_initialized) { + mprintf(("VulkanTextureManager::init called when already initialized!\n")); + return false; + } + + m_device = device; + m_physicalDevice = physicalDevice; + m_memoryManager = memoryManager; + m_commandPool = commandPool; + m_graphicsQueue = graphicsQueue; + + // Query device limits + auto properties = physicalDevice.getProperties(); + m_maxTextureSize = properties.limits.maxImageDimension2D; + m_maxAnisotropy = properties.limits.maxSamplerAnisotropy; + + mprintf(("Vulkan Texture Manager initialized\n")); + mprintf((" Max texture size: %u\n", m_maxTextureSize)); + mprintf((" Max anisotropy: %.1f\n", m_maxAnisotropy)); + + // Create default sampler + vk::SamplerCreateInfo samplerInfo; + samplerInfo.magFilter = vk::Filter::eLinear; + samplerInfo.minFilter = vk::Filter::eLinear; + // Use ClampToEdge by default to match OpenGL's behavior for UI/interface textures. + // OpenGL creates all textures with GL_CLAMP_TO_EDGE and only switches to GL_REPEAT + // for 3D model textures at bind time (excluding AABITMAP, INTERFACE, CUBEMAP types). + // Using eRepeat here causes visible 1-pixel seams on UI bitmaps where edge texels + // blend with the opposite edge via linear filtering. + samplerInfo.addressModeU = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeV = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeW = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.anisotropyEnable = (m_maxAnisotropy > 1.0f); + samplerInfo.maxAnisotropy = m_maxAnisotropy; + samplerInfo.borderColor = vk::BorderColor::eIntOpaqueBlack; + samplerInfo.unnormalizedCoordinates = false; + samplerInfo.compareEnable = false; + samplerInfo.compareOp = vk::CompareOp::eAlways; + samplerInfo.mipmapMode = vk::SamplerMipmapMode::eLinear; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = VK_LOD_CLAMP_NONE; + + try { + m_defaultSampler = m_device.createSampler(samplerInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create default sampler: %s\n", e.what())); + return false; + } + + // Create 1x1 white fallback textures for unbound descriptor slots + if (!createFallbackTexture(m_fallback2DArrayTexture, m_fallback2DArrayAllocation, + m_fallback2DArrayView, ImageViewType::Array2D)) { + return false; + } + if (!createFallbackTexture(m_fallbackTexture2D, m_fallbackTexture2DAllocation, + m_fallbackTextureView2D, ImageViewType::Plain2D)) { + return false; + } + if (!createFallbackTexture(m_fallbackCubeTexture, m_fallbackCubeAllocation, + m_fallbackCubeView, ImageViewType::Cube, 6, true)) { + return false; + } + if (!createFallbackTexture(m_fallback3DTexture, m_fallback3DAllocation, + m_fallback3DView, ImageViewType::Volume3D, 1, false, vk::ImageType::e3D)) { + return false; + } + + m_initialized = true; + return true; +} + +void VulkanTextureManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Destroy fallback 3D texture + if (m_fallback3DView) { + m_device.destroyImageView(m_fallback3DView); + m_fallback3DView = nullptr; + } + if (m_fallback3DTexture) { + m_device.destroyImage(m_fallback3DTexture); + m_fallback3DTexture = nullptr; + } + if (m_fallback3DAllocation.isValid()) { + m_memoryManager->freeAllocation(m_fallback3DAllocation); + } + + // Destroy fallback cubemap + if (m_fallbackCubeView) { + m_device.destroyImageView(m_fallbackCubeView); + m_fallbackCubeView = nullptr; + } + if (m_fallbackCubeTexture) { + m_device.destroyImage(m_fallbackCubeTexture); + m_fallbackCubeTexture = nullptr; + } + if (m_fallbackCubeAllocation.isValid()) { + m_memoryManager->freeAllocation(m_fallbackCubeAllocation); + } + + // Destroy fallback textures + if (m_fallbackTextureView2D) { + m_device.destroyImageView(m_fallbackTextureView2D); + m_fallbackTextureView2D = nullptr; + } + if (m_fallbackTexture2D) { + m_device.destroyImage(m_fallbackTexture2D); + m_fallbackTexture2D = nullptr; + } + if (m_fallbackTexture2DAllocation.isValid()) { + m_memoryManager->freeAllocation(m_fallbackTexture2DAllocation); + } + if (m_fallback2DArrayView) { + m_device.destroyImageView(m_fallback2DArrayView); + m_fallback2DArrayView = nullptr; + } + if (m_fallback2DArrayTexture) { + m_device.destroyImage(m_fallback2DArrayTexture); + m_fallback2DArrayTexture = nullptr; + } + if (m_fallback2DArrayAllocation.isValid()) { + m_memoryManager->freeAllocation(m_fallback2DArrayAllocation); + } + + // Destroy samplers + if (m_defaultSampler) { + m_device.destroySampler(m_defaultSampler); + m_defaultSampler = nullptr; + } + + for (auto& pair : m_samplerCache) { + m_device.destroySampler(pair.second); + } + m_samplerCache.clear(); + + m_initialized = false; + mprintf(("Vulkan Texture Manager shutdown\n")); +} + +void VulkanTextureManager::flushTextures() const +{ + if (!m_initialized) { + return; + } + + int flushed = 0; + auto* deletionQueue = getDeletionQueue(); + + for (auto& block : bm_blocks) { + for (auto& slot : block) { + if (!slot.gr_info) { + continue; + } + + auto* ts = static_cast(slot.gr_info); + + if (!ts->image) { + continue; + } + + // Skip render targets — they are managed by the post-processor + // and scene texture system, not by the bitmap paging system + if (ts->isRenderTarget) { + continue; + } + + // For shared animation texture arrays: mark this frame as unused. + // Only destroy the actual image when no frame references it. + if (ts->arrayLayers > 1 && ts->bitmapHandle >= 0) { + ts->used = false; + + int baseFrame = ts->bitmapHandle - static_cast(ts->arrayIndex); + int numFrames = static_cast(ts->arrayLayers); + vk::Image sharedImage = ts->image; + + bool anyInUse = false; + for (int f = baseFrame; f < baseFrame + numFrames; f++) { + if (f == ts->bitmapHandle) { + continue; + } + auto* fSlot = bm_get_slot(f, true); + if (fSlot && fSlot->gr_info) { + auto* fTs = static_cast(fSlot->gr_info); + if (fTs->used && fTs->image == sharedImage) { + anyInUse = true; + break; + } + } + } + if (anyInUse) { + // Other frames still reference — just detach this slot + ts->image = nullptr; + ts->imageView = nullptr; + ts->allocation = VulkanAllocation{}; + ts->reset(); + continue; + } + // Last reference — fall through to destroy + } + + // Queue deferred destruction of GPU resources + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + } + if (ts->image) { + deletionQueue->queueImage(ts->image, ts->allocation); + } + + ts->reset(); + flushed++; + } + } + + mprintf(("VulkanTextureManager: Flushed %d textures for level transition\n", flushed)); +} + +void VulkanTextureManager::bm_init(bitmap_slot* slot) const +{ + if (!m_initialized || !slot) { + return; + } + + // Allocate Vulkan-specific data + if (slot->gr_info == nullptr) { + slot->gr_info = new tcache_slot_vulkan(); + } else { + static_cast(slot->gr_info)->reset(); + } +} + +void VulkanTextureManager::bm_create(bitmap_slot* slot) const +{ + if (!m_initialized || !slot) { + return; + } + + // Ensure gr_info is allocated + if (slot->gr_info == nullptr) { + slot->gr_info = new tcache_slot_vulkan(); + } +} + +void VulkanTextureManager::bm_free_data(bitmap_slot* slot, bool release) const +{ + if (!m_initialized || !slot || !slot->gr_info) { + return; + } + + auto* ts = static_cast(slot->gr_info); + auto* deletionQueue = getDeletionQueue(); + + // For shared animation texture arrays: check if any other frame still needs the image. + // We compute base frame from slot data (bitmapHandle - arrayIndex) rather than calling + // bm_get_base_frame(), because during shutdown/mission-unload the bitmap entries may + // already be cleaned up, causing bm_get_base_frame() to return -1. That would skip + // ref-counting and every frame slot would independently queue the same shared resources + // for destruction (double-free). + if (ts->arrayLayers > 1 && ts->bitmapHandle >= 0) { + ts->used = false; + + int baseFrame = ts->bitmapHandle - static_cast(ts->arrayIndex); + int numFrames = static_cast(ts->arrayLayers); + vk::Image sharedImage = ts->image; + + bool anyInUse = false; + for (int f = baseFrame; f < baseFrame + numFrames; f++) { + if (f == ts->bitmapHandle) { + continue; // skip self (already marked unused) + } + auto* fSlot = bm_get_slot(f, true); + if (fSlot && fSlot->gr_info) { + auto* fTs = static_cast(fSlot->gr_info); + if (fTs->used && fTs->image == sharedImage) { + anyInUse = true; + break; + } + } + } + if (anyInUse) { + // Other frames still use the shared image — just detach this slot + ts->image = nullptr; + ts->imageView = nullptr; + ts->allocation = VulkanAllocation{}; + ts->reset(); + if (release) { + delete ts; + slot->gr_info = nullptr; + } + return; + } + // No frames in use — fall through to destroy the shared image + } + + // Queue resources for deferred destruction to avoid destroying + // resources that may still be referenced by in-flight command buffers + + // Cubemap per-face framebuffers and views (must be before ts->framebuffer + // since framebuffer may alias cubeFaceFramebuffers[0]) + for (auto& fb : ts->cubeFaceFramebuffers) { + if (fb) { + deletionQueue->queueFramebuffer(fb); + fb = nullptr; + } + } + for (auto& v : ts->cubeFaceViews) { + if (v) { + deletionQueue->queueImageView(v); + v = nullptr; + } + } + if (ts->cubeImageView) { + deletionQueue->queueImageView(ts->cubeImageView); + ts->cubeImageView = nullptr; + } + // If framebuffer was aliased to cubeFaceFramebuffers[0], it's already cleaned up + if (ts->isCubemap) { + ts->framebuffer = nullptr; + } + + if (ts->framebuffer) { + deletionQueue->queueFramebuffer(ts->framebuffer); + ts->framebuffer = nullptr; + } + + if (ts->renderPass) { + deletionQueue->queueRenderPass(ts->renderPass); + ts->renderPass = nullptr; + } + + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + ts->imageView = nullptr; + } + + if (ts->framebufferView) { + deletionQueue->queueImageView(ts->framebufferView); + ts->framebufferView = nullptr; + } + + if (ts->image) { + deletionQueue->queueImage(ts->image, ts->allocation); + ts->image = nullptr; + ts->allocation = VulkanAllocation{}; // Clear to prevent double-free + } + + ts->reset(); + + if (release) { + delete ts; + slot->gr_info = nullptr; + } +} + +bool VulkanTextureManager::uploadAnimationFrames(int handle, bitmap* bm, int compType, + int baseFrame, int numFrames) +{ + mprintf(("VulkanTexture: Uploading animation array: base=%d numFrames=%d triggered by handle=%d\n", + baseFrame, numFrames, handle)); + + // Get dimensions and format from the triggering frame's bitmap + auto width = static_cast(bm->w); + auto height = static_cast(bm->h); + auto arrayLayerCount = static_cast(numFrames); + + bool isCompressed = (compType == DDS_DXT1 || compType == DDS_DXT3 || + compType == DDS_DXT5 || compType == DDS_BC7); + + // Determine format + vk::Format format; + if (isCompressed) { + format = bppToVkFormat(bm->bpp, true, compType); + } else { + format = bppToVkFormat(bm->bpp); + } + if (format == vk::Format::eUndefined) { + mprintf(("VulkanTexture: uploadAnimationFrames: unsupported format bpp=%d compType=%d\n", + bm->bpp, compType)); + return false; + } + + // Each animation frame is one array layer; they all share this layout. + uint32_t mipLevels = 1; + if (isCompressed) { + mipLevels = static_cast(bm_get_num_mipmaps(handle)); + mipLevels = std::max(mipLevels, 1); + } + + TextureUploadLayout layout; + layout.width = width; + layout.height = height; + layout.mipLevels = mipLevels; + layout.isCompressed = isCompressed; + layout.blockSize = isCompressed ? dds_block_size(compType) : 0; + layout.dstBytesPerPixel = (bm->bpp == 24) ? 4 : (bm->bpp / 8); + + size_t layerDataSize = layerByteSize(layout); + size_t totalDataSize = layerDataSize * arrayLayerCount; + + // Create multi-layer image + vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled; + vk::Image image; + VulkanAllocation allocation; + + if (!createImage(width, height, mipLevels, format, vk::ImageTiling::eOptimal, + usage, MemoryUsage::GpuOnly, image, allocation, arrayLayerCount)) { + mprintf(("VulkanTexture: uploadAnimationFrames: failed to create %ux%u x%d array image\n", + width, height, numFrames)); + return false; + } + + // Create multi-layer image view + vk::ImageView imageView = createImageView(image, format, + vk::ImageAspectFlagBits::eColor, mipLevels, ImageViewType::Array2D, arrayLayerCount); + if (!imageView) { + mprintf(("VulkanTexture: uploadAnimationFrames: failed to create image view\n")); + m_device.destroyImage(image); + m_memoryManager->freeAllocation(allocation); + return false; + } + + // Create staging buffer for all layers + vk::Buffer stagingBuffer; + VulkanAllocation stagingAllocation; + if (!createStagingBuffer(totalDataSize, stagingBuffer, stagingAllocation)) { + m_device.destroyImageView(imageView); + m_device.destroyImage(image); + m_memoryManager->freeAllocation(allocation); + return false; + } + + void* mapped = m_memoryManager->mapMemory(stagingAllocation); + if (!mapped) { + m_memoryManager->freeAllocation(stagingAllocation); + m_device.destroyBuffer(stagingBuffer); + m_device.destroyImageView(imageView); + m_device.destroyImage(image); + m_memoryManager->freeAllocation(allocation); + return false; + } + + // Build per-layer copy regions and upload each frame's data + SCP_vector copyRegions; + + // Use the same lock parameters that were used for the triggering frame. + // bm->flags contains the lock flags (BMP_AABITMAP, BMP_TEX_OTHER, BMP_TEX_DXT*, etc.) + // bm->bpp contains the requested bpp. Using these ensures all frames are locked + // consistently (e.g., 8bpp for aabitmaps, 32bpp for RGBA textures). + auto lockBpp = bm->bpp; + auto lockFlags = bm->flags; + + // Set guard flag to make recursive bm_data calls no-ops + m_uploadingAnimation = true; + + for (int frame = baseFrame; frame < baseFrame + numFrames; frame++) { + auto layerIndex = static_cast(frame - baseFrame); + size_t layerOffset = layerIndex * layerDataSize; + uint8_t* dst = static_cast(mapped) + layerOffset; + + // Copy regions depend only on the layout, not the pixel data, so build + // them up front — even a frame that fails to lock still occupies its layer. + appendLayerCopyRegions(copyRegions, layout, layerIndex, layerOffset); + + bitmap* frameBm; + bool needUnlock = false; + + if (frame == handle) { + // This is the frame that triggered us — use the passed bitmap directly + frameBm = bm; + } else { + // Lock this frame to get its data + frameBm = bm_lock(frame, lockBpp, lockFlags); + if (!frameBm) { + mprintf(("VulkanTexture: uploadAnimationFrames: failed to lock frame %d\n", frame)); + // Fill with zeros to avoid undefined data + memset(dst, 0, layerDataSize); + continue; + } + needUnlock = true; + } + + // Copy frame data to staging buffer + if (!isCompressed && frameBm->bpp == 24) { + expandBgrToBgra(dst, reinterpret_cast(frameBm->data), + static_cast(width) * height); + } else { + memcpy(dst, reinterpret_cast(frameBm->data), layerDataSize); + } + + if (needUnlock) { + bm_unlock(frame); + } + } + + m_uploadingAnimation = false; + + // Flush staging buffer + m_memoryManager->flushMemory(stagingAllocation, 0, totalDataSize); + m_memoryManager->unmapMemory(stagingAllocation); + + // Record transitions + copy and submit async + vk::CommandBuffer cmd = beginSingleTimeCommands(); + recordUploadCommands(cmd, image, stagingBuffer, format, width, height, + mipLevels, vk::ImageLayout::eUndefined, false, copyRegions, + arrayLayerCount); + submitUploadAsync(cmd, stagingBuffer, stagingAllocation); + + // Store shared image in ALL frame slots + for (int frame = baseFrame; frame < baseFrame + numFrames; frame++) { + int layerIndex = frame - baseFrame; + auto* frameSlot = bm_get_slot(frame, true); + if (!frameSlot) { + continue; + } + if (!frameSlot->gr_info) { + bm_init(frameSlot); + } + auto* ts = static_cast(frameSlot->gr_info); + + // Defer destruction of any existing image in this slot + if (ts->image && ts->arrayLayers <= 1) { + auto* deletionQueue = getDeletionQueue(); + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + } + deletionQueue->queueImage(ts->image, ts->allocation); + } + + ts->image = image; + ts->imageView = imageView; + ts->allocation = allocation; + ts->width = width; + ts->height = height; + ts->format = format; + ts->mipLevels = mipLevels; + ts->bpp = bm->bpp; + ts->arrayLayers = arrayLayerCount; + ts->arrayIndex = static_cast(layerIndex); + ts->bitmapHandle = frame; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + ts->used = true; + ts->uScale = 1.0f; + ts->vScale = 1.0f; + } + + mprintf(("VulkanTexture: Animation array uploaded: %ux%u x%d layers, %zu bytes total\n", + width, height, numFrames, totalDataSize)); + return true; +} + +bool VulkanTextureManager::uploadCubemap(int handle, bitmap* bm, int compType) +{ + mprintf(("VulkanTexture: Uploading cubemap: handle=%d w=%d h=%d compType=%d\n", + handle, bm->w, bm->h, compType)); + + auto* slot = bm_get_slot(handle, true); + if (!slot) { + return false; + } + if (!slot->gr_info) { + bm_init(slot); + } + auto* ts = static_cast(slot->gr_info); + + auto faceW = static_cast(bm->w); + auto faceH = static_cast(bm->h); + + // Map cubemap DDS compression types to base types + int baseCompType = compType; + if (compType == DDS_CUBEMAP_DXT1) baseCompType = DDS_DXT1; + else if (compType == DDS_CUBEMAP_DXT3) baseCompType = DDS_DXT3; + else if (compType == DDS_CUBEMAP_DXT5) baseCompType = DDS_DXT5; + + bool isCompressed = (baseCompType == DDS_DXT1 || baseCompType == DDS_DXT3 || + baseCompType == DDS_DXT5 || baseCompType == DDS_BC7); + + vk::Format format; + if (isCompressed) { + format = bppToVkFormat(bm->bpp, true, baseCompType); + } else { + format = bppToVkFormat(bm->bpp); + } + if (format == vk::Format::eUndefined) { + mprintf(("VulkanTexture: uploadCubemap: unsupported format\n")); + return false; + } + + uint32_t mipLevels = 1; + size_t blockSize = 0; + + if (isCompressed) { + blockSize = dds_block_size(baseCompType); + mipLevels = static_cast(bm_get_num_mipmaps(handle)); + mipLevels = std::max(mipLevels, 1); + } + + // A cubemap is six array layers; each face has the same layout. + TextureUploadLayout layout; + layout.width = faceW; + layout.height = faceH; + layout.mipLevels = mipLevels; + layout.isCompressed = isCompressed; + layout.blockSize = blockSize; + layout.dstBytesPerPixel = (bm->bpp == 24) ? 4 : (bm->bpp / 8); + + size_t perFaceSize = layerByteSize(layout); + size_t totalDataSize = perFaceSize * 6; + + // Defer destruction of existing resources + if (ts->image) { + auto* deletionQueue = getDeletionQueue(); + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + ts->imageView = nullptr; + } + deletionQueue->queueImage(ts->image, ts->allocation); + ts->image = nullptr; + ts->allocation = VulkanAllocation{}; + } + + // Create cubemap image (6 layers, eCubeCompatible) + vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled; + if (!createImage(faceW, faceH, mipLevels, format, vk::ImageTiling::eOptimal, + usage, MemoryUsage::GpuOnly, ts->image, ts->allocation, 6, true)) { + mprintf(("VulkanTexture: uploadCubemap: failed to create cubemap image\n")); + return false; + } + + // Create cubemap image view (samplerCube) + ts->imageView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, + mipLevels, ImageViewType::Cube, 6); + if (!ts->imageView) { + mprintf(("VulkanTexture: uploadCubemap: failed to create cube image view\n")); + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return false; + } + + // Create staging buffer + vk::Buffer stagingBuffer; + VulkanAllocation stagingAllocation; + if (!createStagingBuffer(totalDataSize, stagingBuffer, stagingAllocation)) { + m_device.destroyImageView(ts->imageView); + ts->imageView = nullptr; + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return false; + } + + void* mapped = m_memoryManager->mapMemory(stagingAllocation); + if (!mapped) { + m_memoryManager->freeAllocation(stagingAllocation); + m_device.destroyBuffer(stagingBuffer); + return false; + } + + // Copy data to staging buffer + // DDS cubemap data layout: face0[mip0..mipN], face1[mip0..mipN], ..., face5[mip0..mipN] + if (!isCompressed && bm->bpp == 24) { + // Convert BGR to BGRA for all 6 faces + expandBgrToBgra(static_cast(mapped), + reinterpret_cast(bm->data), static_cast(faceW) * faceH * 6); + } else { + memcpy(mapped, reinterpret_cast(bm->data), totalDataSize); + } + + // Build per-face, per-mip copy regions (each face is one array layer) + SCP_vector copyRegions; + size_t bufferOffset = 0; + for (uint32_t face = 0; face < 6; face++) { + bufferOffset += appendLayerCopyRegions(copyRegions, layout, face, bufferOffset); + } + + m_memoryManager->flushMemory(stagingAllocation, 0, totalDataSize); + m_memoryManager->unmapMemory(stagingAllocation); + + // Record transitions + copy and submit async + vk::CommandBuffer cmd = beginSingleTimeCommands(); + recordUploadCommands(cmd, ts->image, stagingBuffer, format, faceW, faceH, + mipLevels, vk::ImageLayout::eUndefined, false, copyRegions, 6); + submitUploadAsync(cmd, stagingBuffer, stagingAllocation); + + // Update slot info + ts->width = faceW; + ts->height = faceH; + ts->format = format; + ts->mipLevels = mipLevels; + ts->bpp = bm->bpp; + ts->arrayLayers = 6; + ts->bitmapHandle = handle; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + ts->used = true; + ts->isCubemap = true; + ts->uScale = 1.0f; + ts->vScale = 1.0f; + + mprintf(("VulkanTexture: Cubemap uploaded: %ux%u, %u mips, format=%d\n", + faceW, faceH, mipLevels, static_cast(format))); + return true; +} + +bool VulkanTextureManager::upload3DTexture(int handle, bitmap* bm, int texDepth) +{ + auto* slot = bm_get_slot(handle, true); + if (!slot) { + return false; + } + + if (!slot->gr_info) { + bm_init(slot); + } + + auto* ts = static_cast(slot->gr_info); + + auto width = static_cast(bm->w); + auto height = static_cast(bm->h); + auto depth3D = static_cast(texDepth); + + // 3D textures are always 32bpp RGBA uncompressed, single mip + vk::Format format = vk::Format::eR8G8B8A8Unorm; + size_t dataSize = width * height * depth3D * 4; + + // Defer destruction of existing resources + if (ts->image) { + auto* deletionQueue = getDeletionQueue(); + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + ts->imageView = nullptr; + } + deletionQueue->queueImage(ts->image, ts->allocation); + ts->image = nullptr; + ts->allocation = VulkanAllocation{}; + } + + // Create 3D image + if (!createImage(width, height, 1, format, vk::ImageTiling::eOptimal, + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, + MemoryUsage::GpuOnly, ts->image, ts->allocation, + 1, false, depth3D, vk::ImageType::e3D)) { + mprintf(("Failed to create 3D texture image!\n")); + return false; + } + + // Create 3D image view + ts->imageView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, 1, ImageViewType::Volume3D); + if (!ts->imageView) { + mprintf(("Failed to create 3D texture image view!\n")); + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return false; + } + + // Create staging buffer + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = dataSize; + bufferInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuffer; + VulkanAllocation stagingAllocation; + + try { + stagingBuffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create staging buffer for 3D texture: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::CpuOnly, stagingAllocation)) { + m_device.destroyBuffer(stagingBuffer); + return false; + } + + // Copy data to staging buffer + void* mapped = m_memoryManager->mapMemory(stagingAllocation); + Verify(mapped); + memcpy(mapped, reinterpret_cast(bm->data), dataSize); + m_memoryManager->flushMemory(stagingAllocation, 0, dataSize); + m_memoryManager->unmapMemory(stagingAllocation); + + // Record transitions + copy and submit + vk::CommandBuffer cmd = beginSingleTimeCommands(); + + // Transition: eUndefined → eTransferDstOptimal + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.oldLayout = vk::ImageLayout::eUndefined; + barrier.newLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = ts->image; + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTransfer, + {}, nullptr, nullptr, barrier); + + // Copy buffer to 3D image + vk::BufferImageCopy region; + region.bufferOffset = 0; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(width, height, depth3D); + + cmd.copyBufferToImage(stagingBuffer, ts->image, vk::ImageLayout::eTransferDstOptimal, region); + + // Transition: eTransferDstOptimal → eShaderReadOnlyOptimal + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + {}, nullptr, nullptr, barrier); + + submitUploadAsync(cmd, stagingBuffer, stagingAllocation); + + // Update slot info + ts->width = width; + ts->height = height; + ts->depth = depth3D; + ts->is3D = true; + ts->format = format; + ts->mipLevels = 1; + ts->bpp = 32; + ts->bitmapHandle = handle; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + ts->used = true; + ts->uScale = 1.0f; + ts->vScale = 1.0f; + + mprintf(("VulkanTexture: 3D texture uploaded: %ux%ux%u, format=%d\n", + width, height, depth3D, static_cast(format))); + return true; +} + +bool VulkanTextureManager::bm_data(int handle, bitmap* bm, int compType) +{ + static int callCount = 0; + if (callCount < 20) { + mprintf(("VulkanTextureManager::bm_data #%d: handle=%d bm=%p bm->data=%p compType=%d\n", + callCount++, handle, bm, bm ? reinterpret_cast(bm->data) : nullptr, compType)); + } + + if (!m_initialized || !bm || !bm->data) { + return false; + } + + // Guard: nested bm_lock→bm_data calls during animation upload are no-ops + if (m_uploadingAnimation) { + return true; + } + + // Detect animated texture arrays + int numFrames = 0; + int baseFrame = bm_get_base_frame(handle, &numFrames); + if (baseFrame < 0) { + return false; + } + + if (numFrames > 1) { + // Check if the shared image already exists (earlier frame created it) + auto* baseSlot = bm_get_slot(baseFrame, true); + if (baseSlot) { + if (!baseSlot->gr_info) { + bm_init(baseSlot); + } + auto* baseTs = static_cast(baseSlot->gr_info); + if (baseTs->image && baseTs->arrayLayers == static_cast(numFrames)) { + // Share existing image with this frame's slot + auto* slot = bm_get_slot(handle, true); + if (!slot->gr_info) { + bm_init(slot); + } + auto* ts = static_cast(slot->gr_info); + ts->image = baseTs->image; + ts->imageView = baseTs->imageView; + ts->allocation = baseTs->allocation; + ts->width = baseTs->width; + ts->height = baseTs->height; + ts->format = baseTs->format; + ts->mipLevels = baseTs->mipLevels; + ts->bpp = baseTs->bpp; + ts->arrayLayers = baseTs->arrayLayers; + ts->arrayIndex = static_cast(handle - baseFrame); + ts->bitmapHandle = handle; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + ts->used = true; + return true; + } + } + // First frame requested — create array and upload all frames + return uploadAnimationFrames(handle, bm, compType, baseFrame, numFrames); + } + + // Detect cubemap textures + bool isCubemapUpload = (bm->flags & BMP_TEX_CUBEMAP) != 0; + if (!isCubemapUpload) { + // Also check compression type for cubemap DDS variants + isCubemapUpload = (compType == DDS_CUBEMAP_DXT1 || compType == DDS_CUBEMAP_DXT3 || + compType == DDS_CUBEMAP_DXT5); + } + + if (isCubemapUpload) { + return uploadCubemap(handle, bm, compType); + } + + // Detect 3D textures (volumetric data) + if (bm->d > 1) { + return upload3DTexture(handle, bm, bm->d); + } + + auto* slot = bm_get_slot(handle, true); + if (!slot) { + return false; + } + + // Ensure slot is initialized + if (!slot->gr_info) { + bm_init(slot); + } + + auto* ts = static_cast(slot->gr_info); + + auto width = static_cast(bm->w); + auto height = static_cast(bm->h); + uint32_t mipLevels = 1; + bool autoGenerateMips = false; + bool isCompressed = (compType == DDS_DXT1 || compType == DDS_DXT3 || + compType == DDS_DXT5 || compType == DDS_BC7); + + static int fmtLogCount = 0; + if (fmtLogCount < 30) { + mprintf(("VulkanTextureManager::bm_data: handle=%d w=%d h=%d bpp=%d true_bpp=%d flags=0x%x compType=%d\n", + handle, bm->w, bm->h, bm->bpp, bm->true_bpp, bm->flags, compType)); + fmtLogCount++; + } + + // Determine format and data size + vk::Format format; + size_t dataSize; + size_t blockSize = 0; + SCP_vector copyRegions; + + if (isCompressed) { + format = bppToVkFormat(bm->bpp, true, compType); + if (format == vk::Format::eUndefined) { + mprintf(("VulkanTextureManager::bm_data: Unsupported compression type %d\n", compType)); + return false; + } + + blockSize = dds_block_size(compType); + + // Get pre-baked mipmap count from DDS file + mipLevels = static_cast(bm_get_num_mipmaps(handle)); + mipLevels = std::max(mipLevels, 1); + + // Calculate total data size for all mip levels and build copy regions + TextureUploadLayout layout; + layout.width = width; + layout.height = height; + layout.mipLevels = mipLevels; + layout.isCompressed = true; + layout.blockSize = blockSize; + dataSize = appendLayerCopyRegions(copyRegions, layout, 0, 0); + } else { + format = bppToVkFormat(bm->bpp); + if (format == vk::Format::eUndefined) { + mprintf(("VulkanTextureManager::bm_data: Unsupported bpp %d\n", bm->bpp)); + return false; + } + + // 24bpp textures uploaded as 32bpp (Vulkan doesn't support 24bpp optimal tiling) + size_t dstBytesPerPixel = (bm->bpp == 24) ? 4 : (bm->bpp / 8); + dataSize = width * height * dstBytesPerPixel; + + // Auto-generate mipmaps for textures whose files originally had them. + // This only triggers for uncompressed textures that were originally DDS + // with mipmaps but got decompressed by a non-DDS lock path. + if (width > 4 && height > 4) { + int numMipmaps = bm_get_num_mipmaps(handle); + if (numMipmaps > 1) { + vk::FormatProperties fmtProps = m_physicalDevice.getFormatProperties(format); + if ((fmtProps.optimalTilingFeatures & vk::FormatFeatureFlagBits::eSampledImageFilterLinear) && + (fmtProps.optimalTilingFeatures & vk::FormatFeatureFlagBits::eBlitSrc) && + (fmtProps.optimalTilingFeatures & vk::FormatFeatureFlagBits::eBlitDst)) { + mipLevels = calculateMipLevels(width, height); + autoGenerateMips = true; + } + } + } + } + + // If texture already exists with same dimensions, just update data + if (ts->image && ts->width == width && ts->height == height && ts->format == format) { + // Update existing texture - would use staging buffer + // For now, recreate + } + + // Defer destruction of existing resources — they may still be referenced + // by in-flight render or upload command buffers + if (ts->image) { + if (ts->arrayLayers > 1) { + // Shared animation image — just clear references, don't destroy + // (the image is shared with other frame slots) + ts->imageView = nullptr; + ts->image = nullptr; + ts->allocation = VulkanAllocation{}; + } else { + auto* deletionQueue = getDeletionQueue(); + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + ts->imageView = nullptr; + } + deletionQueue->queueImage(ts->image, ts->allocation); + ts->image = nullptr; + ts->allocation = VulkanAllocation{}; + } + } + + // Create image + vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled; + if (autoGenerateMips) { + usage |= vk::ImageUsageFlagBits::eTransferSrc; // Needed for vkCmdBlitImage mipmap generation + } + + if (!createImage(width, height, mipLevels, format, vk::ImageTiling::eOptimal, + usage, MemoryUsage::GpuOnly, ts->image, ts->allocation)) { + mprintf(("Failed to create texture image!\n")); + return false; + } + + // Create image view (sampler2DArray for regular textures) + ts->imageView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, mipLevels, ImageViewType::Array2D); + if (!ts->imageView) { + mprintf(("Failed to create texture image view!\n")); + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return false; + } + + // Create staging buffer + vk::Buffer stagingBuffer; + VulkanAllocation stagingAllocation; + if (!createStagingBuffer(dataSize, stagingBuffer, stagingAllocation)) { + m_device.destroyImageView(ts->imageView); + ts->imageView = nullptr; + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return false; + } + + // Copy data to staging buffer + void* mapped = m_memoryManager->mapMemory(stagingAllocation); + Verify(mapped); + if (isCompressed) { + // Compressed data: copy raw block data directly (includes all mip levels) + memcpy(mapped, reinterpret_cast(bm->data), dataSize); + } else if (bm->bpp == 24) { + expandBgrToBgra(static_cast(mapped), + reinterpret_cast(bm->data), static_cast(width) * height); + } else { + memcpy(mapped, reinterpret_cast(bm->data), dataSize); + } + m_memoryManager->flushMemory(stagingAllocation, 0, dataSize); + m_memoryManager->unmapMemory(stagingAllocation); + + // Record transitions + copy (+ optional mipmap generation) and submit async + vk::CommandBuffer cmd = beginSingleTimeCommands(); + recordUploadCommands(cmd, ts->image, stagingBuffer, format, width, height, + mipLevels, vk::ImageLayout::eUndefined, autoGenerateMips, copyRegions); + submitUploadAsync(cmd, stagingBuffer, stagingAllocation); + + // Update slot info + ts->width = width; + ts->height = height; + ts->format = format; + ts->mipLevels = mipLevels; + ts->bpp = bm->bpp; + ts->bitmapHandle = handle; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + ts->used = true; + ts->uScale = 1.0f; + ts->vScale = 1.0f; + + return true; +} + +int VulkanTextureManager::bm_make_render_target(int handle, int* width, int* height, + int* bpp, int* mm_lvl, int flags) +{ + if (!m_initialized || !width || !height) { + return 0; + } + + // Clamp to max size + if (static_cast(*width) > m_maxTextureSize) { + *width = static_cast(m_maxTextureSize); + } + if (static_cast(*height) > m_maxTextureSize) { + *height = static_cast(m_maxTextureSize); + } + + auto* slot = bm_get_slot(handle, true); + if (!slot) { + return 0; + } + + if (!slot->gr_info) { + bm_init(slot); + } + + auto* ts = static_cast(slot->gr_info); + + // Free any existing resources + bm_free_data(slot, false); + + auto w = static_cast(*width); + auto h = static_cast(*height); + uint32_t mipLevels = 1; + + if (flags & BMP_FLAG_RENDER_TARGET_MIPMAP) { + mipLevels = calculateMipLevels(w, h); + } + + bool isCubemapRT = (flags & BMP_FLAG_CUBEMAP) != 0; + uint32_t arrayLayers = isCubemapRT ? 6 : 1; + vk::Format format = LDR_COLOR_FORMAT; + + // Create image for render target + vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eColorAttachment | + vk::ImageUsageFlagBits::eSampled | + vk::ImageUsageFlagBits::eTransferSrc; + + if (flags & BMP_FLAG_RENDER_TARGET_MIPMAP) { + usage |= vk::ImageUsageFlagBits::eTransferDst; // For mipmap generation + } + + if (!createImage(w, h, mipLevels, format, vk::ImageTiling::eOptimal, + usage, MemoryUsage::GpuOnly, ts->image, ts->allocation, arrayLayers, isCubemapRT)) { + mprintf(("Failed to create render target image!\n")); + return 0; + } + + if (isCubemapRT) { + // Cubemap render target: create cube view for sampling + per-face 2D views for framebuffer + ts->imageView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, + mipLevels, ImageViewType::Cube, 6); + if (!ts->imageView) { + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + + // Create per-face 2D views for framebuffer attachments + for (size_t face = 0; face < ts->cubeFaceViews.size(); face++) { + ts->cubeFaceViews[face] = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, + 1, ImageViewType::Plain2D, 1, static_cast(face)); + if (!ts->cubeFaceViews[face]) { + mprintf(("Failed to create cubemap face %zu view!\n", face)); + // Clean up previously created views + for (size_t j = 0; j < face; j++) { + m_device.destroyImageView(ts->cubeFaceViews[j]); + ts->cubeFaceViews[j] = nullptr; + } + m_device.destroyImageView(ts->imageView); + m_device.destroyImage(ts->image); + ts->image = nullptr; + ts->imageView = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + } + } else { + // Regular render target: array view for shader compatibility + ts->imageView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, mipLevels, ImageViewType::Array2D); + if (!ts->imageView) { + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + + // For mipmapped render targets, create a single-mip view for framebuffer use + // (framebuffer attachments must have levelCount == 1) + if (mipLevels > 1) { + ts->framebufferView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, 1, ImageViewType::Array2D); + if (!ts->framebufferView) { + m_device.destroyImageView(ts->imageView); + m_device.destroyImage(ts->image); + ts->image = nullptr; + ts->imageView = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + } + } + + // Create render pass for this target + vk::AttachmentDescription colorAttachment; + colorAttachment.format = format; + colorAttachment.samples = vk::SampleCountFlagBits::e1; + colorAttachment.loadOp = vk::AttachmentLoadOp::eClear; + colorAttachment.storeOp = vk::AttachmentStoreOp::eStore; + colorAttachment.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + colorAttachment.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + colorAttachment.initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + colorAttachment.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorAttachmentRef; + colorAttachmentRef.attachment = 0; + colorAttachmentRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorAttachmentRef; + + vk::RenderPassCreateInfo renderPassInfo; + renderPassInfo.attachmentCount = 1; + renderPassInfo.pAttachments = &colorAttachment; + renderPassInfo.subpassCount = 1; + renderPassInfo.pSubpasses = &subpass; + + try { + ts->renderPass = m_device.createRenderPass(renderPassInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create render pass: %s\n", e.what())); + m_device.destroyImageView(ts->imageView); + m_device.destroyImage(ts->image); + ts->image = nullptr; + ts->imageView = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + + if (isCubemapRT) { + // Create per-face framebuffers + for (size_t face = 0; face < ts->cubeFaceFramebuffers.size(); face++) { + vk::FramebufferCreateInfo framebufferInfo; + framebufferInfo.renderPass = ts->renderPass; + framebufferInfo.attachmentCount = 1; + framebufferInfo.pAttachments = &ts->cubeFaceViews[face]; + framebufferInfo.width = w; + framebufferInfo.height = h; + framebufferInfo.layers = 1; + + try { + ts->cubeFaceFramebuffers[face] = m_device.createFramebuffer(framebufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create cubemap face %zu framebuffer: %s\n", face, e.what())); + return 0; + } + } + // Default framebuffer points to face 0 + ts->framebuffer = ts->cubeFaceFramebuffers[0]; + } else { + // Create framebuffer + // Use framebufferView (single-mip) if available, otherwise imageView + vk::ImageView fbAttachment = ts->framebufferView ? ts->framebufferView : ts->imageView; + vk::FramebufferCreateInfo framebufferInfo; + framebufferInfo.renderPass = ts->renderPass; + framebufferInfo.attachmentCount = 1; + framebufferInfo.pAttachments = &fbAttachment; + framebufferInfo.width = w; + framebufferInfo.height = h; + framebufferInfo.layers = 1; + + try { + ts->framebuffer = m_device.createFramebuffer(framebufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create framebuffer: %s\n", e.what())); + m_device.destroyRenderPass(ts->renderPass); + m_device.destroyImageView(ts->imageView); + m_device.destroyImage(ts->image); + ts->image = nullptr; + ts->imageView = nullptr; + ts->renderPass = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + } + + // Transition image to eShaderReadOnlyOptimal so it's in a valid layout + // if sampled before being rendered into (render pass expects this initial layout) + transitionImageLayout(ts->image, format, vk::ImageLayout::eUndefined, + vk::ImageLayout::eShaderReadOnlyOptimal, mipLevels, arrayLayers); + + // Update slot info + ts->width = w; + ts->height = h; + ts->format = format; + ts->mipLevels = mipLevels; + ts->bpp = 32; + ts->arrayLayers = arrayLayers; + ts->bitmapHandle = handle; + ts->isRenderTarget = true; + ts->isCubemap = isCubemapRT; + ts->used = true; + ts->uScale = 1.0f; + ts->vScale = 1.0f; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + if (bpp) { + *bpp = 32; + } + if (mm_lvl) { + *mm_lvl = static_cast(mipLevels); + } + + mprintf(("Created Vulkan render target: %ux%u\n", w, h)); + return 1; +} + +int VulkanTextureManager::bm_set_render_target(int handle, int face) +{ + if (!m_initialized) { + return 0; + } + + auto* renderer = getRendererInstance(); + + // handle < 0 means reset to default framebuffer + if (handle < 0) { + if (renderer->isRenderTargetActive()) { + renderer->endRenderTarget(); + } + m_currentRenderTarget = -1; + return 1; + } + + auto* slot = bm_get_slot(handle, true); + if (!slot || !slot->gr_info) { + return 0; + } + + auto* ts = static_cast(slot->gr_info); + if (!ts->isRenderTarget || !ts->framebuffer) { + return 0; + } + + renderer->beginRenderTarget(ts, face); + m_currentRenderTarget = handle; + + return 1; +} + +void VulkanTextureManager::update_texture(int bitmap_handle, int bpp, const ubyte* data, + int width, int height) +{ + if (!m_initialized || !data) { + return; + } + + auto* slot = bm_get_slot(bitmap_handle, true); + if (!slot || !slot->gr_info) { + return; + } + + auto* ts = static_cast(slot->gr_info); + if (!ts->image) { + return; + } + + auto w = static_cast(width); + auto h = static_cast(height); + + // Verify dimensions match existing texture + if (ts->width != w || ts->height != h) { + mprintf(("VulkanTextureManager::update_texture: Size mismatch (%ux%u vs %ux%u)\n", + w, h, ts->width, ts->height)); + return; + } + + // Use bppToVkFormat to determine format, matching how bm_data creates textures + vk::Format format = bppToVkFormat(bpp); + if (format == vk::Format::eUndefined) { + mprintf(("VulkanTextureManager::update_texture: Unsupported bpp %d\n", bpp)); + return; + } + + // Calculate staging buffer size (24bpp is uploaded as 32bpp BGRA) + size_t srcBytesPerPixel = bpp / 8; + size_t dstBytesPerPixel = (bpp == 24) ? 4 : srcBytesPerPixel; + size_t dataSize = w * h * dstBytesPerPixel; + + // Create staging buffer + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = dataSize; + bufferInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuffer; + VulkanAllocation stagingAllocation; + + try { + stagingBuffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanTextureManager::update_texture: Failed to create staging buffer: %s\n", e.what())); + return; + } + + Verify(m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::CpuOnly, stagingAllocation)); + + // Copy data to staging buffer + void* mapped = m_memoryManager->mapMemory(stagingAllocation); + Verify(mapped); + if (bpp == 24) { + // Convert BGR (3 bytes) to BGRA (4 bytes), adding alpha=255 + const uint8_t* src = data; + auto* dst = static_cast(mapped); + size_t pixelCount = w * h; + for (size_t i = 0; i < pixelCount; ++i) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = 255; + src += 3; + dst += 4; + } + } else { + memcpy(mapped, data, dataSize); + } + m_memoryManager->flushMemory(stagingAllocation, 0, dataSize); + m_memoryManager->unmapMemory(stagingAllocation); + + // Record transitions + copy into a single command buffer and submit async + vk::CommandBuffer cmd = beginSingleTimeCommands(); + recordUploadCommands(cmd, ts->image, stagingBuffer, format, w, h, + ts->mipLevels, ts->currentLayout); + submitUploadAsync(cmd, stagingBuffer, stagingAllocation); + + // Update layout tracking + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; +} + +void VulkanTextureManager::get_bitmap_from_texture(void* data_out, int bitmap_num) const +{ + if (!m_initialized || !data_out) { + return; + } + + // TODO: Implement texture readback + (void)bitmap_num; +} + +vk::Sampler VulkanTextureManager::getSampler(vk::Filter magFilter, vk::Filter minFilter, + vk::SamplerAddressMode addressMode, + bool enableAnisotropy, float maxAnisotropy, + bool enableMipmaps) +{ + // Create a key from sampler state + uint64_t key = 0; + key |= static_cast(magFilter) << 0; + key |= static_cast(minFilter) << 4; + key |= static_cast(addressMode) << 8; + key |= static_cast(enableAnisotropy) << 16; + key |= static_cast(enableMipmaps) << 17; + key |= static_cast(maxAnisotropy * 10) << 24; + + auto it = m_samplerCache.find(key); + if (it != m_samplerCache.end()) { + return it->second; + } + + // Create new sampler + vk::SamplerCreateInfo samplerInfo; + samplerInfo.magFilter = magFilter; + samplerInfo.minFilter = minFilter; + samplerInfo.addressModeU = addressMode; + samplerInfo.addressModeV = addressMode; + samplerInfo.addressModeW = addressMode; + samplerInfo.anisotropyEnable = enableAnisotropy && (m_maxAnisotropy > 1.0f); + samplerInfo.maxAnisotropy = std::max(1.0f, std::min(maxAnisotropy > 0.0f ? maxAnisotropy : m_maxAnisotropy, m_maxAnisotropy)); + samplerInfo.borderColor = vk::BorderColor::eIntOpaqueBlack; + samplerInfo.unnormalizedCoordinates = false; + samplerInfo.compareEnable = false; + samplerInfo.compareOp = vk::CompareOp::eAlways; + samplerInfo.mipmapMode = enableMipmaps ? vk::SamplerMipmapMode::eLinear : vk::SamplerMipmapMode::eNearest; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = enableMipmaps ? VK_LOD_CLAMP_NONE : 0.0f; + + try { + vk::Sampler sampler = m_device.createSampler(samplerInfo); + m_samplerCache[key] = sampler; + return sampler; + } catch (const vk::SystemError& e) { + mprintf(("Failed to create sampler: %s\n", e.what())); + return m_defaultSampler; + } +} + +vk::DescriptorImageInfo VulkanTextureManager::getFallbackTextureInfo2D() +{ + return {m_defaultSampler, m_fallbackTextureView2D, + vk::ImageLayout::eShaderReadOnlyOptimal}; +} + +vk::DescriptorImageInfo VulkanTextureManager::getFallbackTextureInfoCube() +{ + return {m_defaultSampler, m_fallbackCubeView, + vk::ImageLayout::eShaderReadOnlyOptimal}; +} + +vk::DescriptorImageInfo VulkanTextureManager::getFallbackTextureInfo2DArray() +{ + return {m_defaultSampler, m_fallback2DArrayView, + vk::ImageLayout::eShaderReadOnlyOptimal}; +} + +vk::DescriptorImageInfo VulkanTextureManager::getFallbackTextureInfo3D() +{ + return {m_defaultSampler, m_fallback3DView, + vk::ImageLayout::eShaderReadOnlyOptimal}; +} + +tcache_slot_vulkan* VulkanTextureManager::getTextureSlot(int handle) +{ + (void)this; + auto* slot = bm_get_slot(handle, true); + if (!slot || !slot->gr_info) { + return nullptr; + } + return static_cast(slot->gr_info); +} + +vk::Format VulkanTextureManager::bppToVkFormat(int bpp, bool compressed, int compressionType) +{ + if (compressed) { + // DDS compression types + switch (compressionType) { + case DDS_DXT1: + return vk::Format::eBc1RgbaUnormBlock; + case DDS_DXT3: + return vk::Format::eBc2UnormBlock; + case DDS_DXT5: + return vk::Format::eBc3UnormBlock; + case DDS_BC7: + return vk::Format::eBc7UnormBlock; + default: + return vk::Format::eUndefined; + } + } + + switch (bpp) { + case 8: + return vk::Format::eR8Unorm; + case 16: + // OpenGL uses GL_UNSIGNED_SHORT_1_5_5_5_REV with GL_BGRA (A1R5G5B5) + return vk::Format::eA1R5G5B5UnormPack16; + case 24: + // 24bpp (BGR) is almost never supported for optimal tiling in Vulkan. + // We convert to 32bpp BGRA at upload time, so return the 32bpp format. + return vk::Format::eB8G8R8A8Unorm; + case 32: + // FSO uses BGRA format (BMP_AARRGGBB = BGRA in memory) + return vk::Format::eB8G8R8A8Unorm; + default: + return vk::Format::eUndefined; + } +} + +void VulkanTextureManager::transitionImageLayout(vk::Image image, vk::Format format, + vk::ImageLayout oldLayout, + vk::ImageLayout newLayout, + uint32_t mipLevels, + uint32_t arrayLayers) +{ + vk::CommandBuffer commandBuffer = beginSingleTimeCommands(); + + vk::ImageMemoryBarrier barrier; + barrier.oldLayout = oldLayout; + barrier.newLayout = newLayout; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = imageAspectFromFormat(format); + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = mipLevels; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + vk::PipelineStageFlags sourceStage; + vk::PipelineStageFlags destinationStage; + + if (oldLayout == vk::ImageLayout::eUndefined && + newLayout == vk::ImageLayout::eTransferDstOptimal) { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + sourceStage = vk::PipelineStageFlagBits::eTopOfPipe; + destinationStage = vk::PipelineStageFlagBits::eTransfer; + } else if (oldLayout == vk::ImageLayout::eTransferDstOptimal && + newLayout == vk::ImageLayout::eShaderReadOnlyOptimal) { + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + sourceStage = vk::PipelineStageFlagBits::eTransfer; + destinationStage = vk::PipelineStageFlagBits::eFragmentShader; + } else if (oldLayout == vk::ImageLayout::eUndefined && + newLayout == vk::ImageLayout::eShaderReadOnlyOptimal) { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + sourceStage = vk::PipelineStageFlagBits::eTopOfPipe; + destinationStage = vk::PipelineStageFlagBits::eFragmentShader; + } else if (oldLayout == vk::ImageLayout::eUndefined && + newLayout == vk::ImageLayout::eColorAttachmentOptimal) { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + sourceStage = vk::PipelineStageFlagBits::eTopOfPipe; + destinationStage = vk::PipelineStageFlagBits::eColorAttachmentOutput; + } else if (oldLayout == vk::ImageLayout::eUndefined && + newLayout == vk::ImageLayout::eDepthStencilAttachmentOptimal) { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite; + sourceStage = vk::PipelineStageFlagBits::eTopOfPipe; + destinationStage = vk::PipelineStageFlagBits::eEarlyFragmentTests; + } else { + // Generic transition + barrier.srcAccessMask = vk::AccessFlagBits::eMemoryWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eMemoryRead; + sourceStage = vk::PipelineStageFlagBits::eAllCommands; + destinationStage = vk::PipelineStageFlagBits::eAllCommands; + } + + commandBuffer.pipelineBarrier(sourceStage, destinationStage, {}, + nullptr, nullptr, barrier); + + endSingleTimeCommands(commandBuffer); +} + +void vulkan_generate_mipmap_chain(vk::CommandBuffer cmd, vk::Image image, + uint32_t width, uint32_t height, + uint32_t mipLevels, uint32_t arrayLayers) +{ + if (mipLevels <= 1) { + return; + } + + // Generate each mip level via blit from the previous level + for (uint32_t i = 1; i < mipLevels; i++) { + uint32_t srcW = std::max(1u, width >> (i - 1)); + uint32_t srcH = std::max(1u, height >> (i - 1)); + uint32_t dstW = std::max(1u, width >> i); + uint32_t dstH = std::max(1u, height >> i); + + // Transition mip i from eUndefined to eTransferDstOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.oldLayout = vk::ImageLayout::eUndefined; + barrier.newLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = i; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer, + {}, {}, {}, barrier); + } + + // Blit from mip i-1 to mip i + vk::ImageBlit blit; + blit.srcSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + blit.srcSubresource.mipLevel = i - 1; + blit.srcSubresource.baseArrayLayer = 0; + blit.srcSubresource.layerCount = arrayLayers; + blit.srcOffsets[0] = vk::Offset3D(0, 0, 0); + blit.srcOffsets[1] = vk::Offset3D(static_cast(srcW), static_cast(srcH), 1); + + blit.dstSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + blit.dstSubresource.mipLevel = i; + blit.dstSubresource.baseArrayLayer = 0; + blit.dstSubresource.layerCount = arrayLayers; + blit.dstOffsets[0] = vk::Offset3D(0, 0, 0); + blit.dstOffsets[1] = vk::Offset3D(static_cast(dstW), static_cast(dstH), 1); + + cmd.blitImage(image, vk::ImageLayout::eTransferSrcOptimal, + image, vk::ImageLayout::eTransferDstOptimal, + blit, vk::Filter::eLinear); + + // Transition mip i to eTransferSrcOptimal (source for next blit) + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferRead; + barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.newLayout = vk::ImageLayout::eTransferSrcOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = i; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer, + {}, {}, {}, barrier); + } + } + + // Final transition: all mips to eShaderReadOnlyOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eTransferRead; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + barrier.oldLayout = vk::ImageLayout::eTransferSrcOptimal; + barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = mipLevels; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + {}, {}, {}, barrier); + } +} + +void VulkanTextureManager::frameStart() +{ + processPendingCommandBuffers(); +} + +bool VulkanTextureManager::createImage(uint32_t width, uint32_t height, uint32_t mipLevels, + vk::Format format, vk::ImageTiling tiling, + vk::ImageUsageFlags usage, MemoryUsage memUsage, + vk::Image& image, VulkanAllocation& allocation, + uint32_t arrayLayers, bool cubemap, + uint32_t imageDepth, vk::ImageType imageType) +{ + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = imageType; + imageInfo.extent.width = width; + imageInfo.extent.height = height; + imageInfo.extent.depth = imageDepth; + imageInfo.mipLevels = mipLevels; + imageInfo.arrayLayers = arrayLayers; + imageInfo.format = format; + imageInfo.tiling = tiling; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + imageInfo.usage = usage; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.samples = vk::SampleCountFlagBits::e1; + + if (cubemap) { + imageInfo.flags |= vk::ImageCreateFlagBits::eCubeCompatible; + Assertion(arrayLayers == 6, "Cubemap images must have exactly 6 array layers!"); + } + + try { + image = m_device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create image: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateImageMemory(image, memUsage, allocation)) { + m_device.destroyImage(image); + image = nullptr; + return false; + } + + return true; +} + +vk::ImageView VulkanTextureManager::createImageView(vk::Image image, vk::Format format, + vk::ImageAspectFlags aspectFlags, + uint32_t mipLevels, + ImageViewType viewType, + uint32_t layerCount, + uint32_t baseArrayLayer) +{ + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = image; + switch (viewType) { + case ImageViewType::Cube: + viewInfo.viewType = vk::ImageViewType::eCube; + break; + case ImageViewType::Array2D: + viewInfo.viewType = vk::ImageViewType::e2DArray; + break; + case ImageViewType::Volume3D: + viewInfo.viewType = vk::ImageViewType::e3D; + break; + case ImageViewType::Plain2D: + default: + viewInfo.viewType = vk::ImageViewType::e2D; + break; + } + viewInfo.format = format; + viewInfo.subresourceRange.aspectMask = aspectFlags; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = mipLevels; + viewInfo.subresourceRange.baseArrayLayer = baseArrayLayer; + viewInfo.subresourceRange.layerCount = layerCount; + + try { + return m_device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create image view: %s\n", e.what())); + return nullptr; + } +} + +bool VulkanTextureManager::createFallbackTexture(vk::Image& outImage, VulkanAllocation& outAlloc, + vk::ImageView& outView, ImageViewType viewType, + uint32_t arrayLayers, bool cubemap, + vk::ImageType imageType) +{ + if (!createImage(1, 1, 1, vk::Format::eR8G8B8A8Unorm, vk::ImageTiling::eOptimal, + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, + MemoryUsage::GpuOnly, outImage, outAlloc, arrayLayers, cubemap, 1, imageType)) { + mprintf(("Failed to create fallback texture image!\n")); + return false; + } + + outView = createImageView(outImage, vk::Format::eR8G8B8A8Unorm, + vk::ImageAspectFlagBits::eColor, 1, viewType, arrayLayers); + if (!outView) { + mprintf(("Failed to create fallback texture view!\n")); + m_device.destroyImage(outImage); + m_memoryManager->freeAllocation(outAlloc); + return false; + } + + // Upload white pixels via staging buffer + SCP_vector whitePixels(arrayLayers, 0xFFFFFFFF); + vk::DeviceSize bufferSize = arrayLayers * sizeof(uint32_t); + + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = bufferSize; + bufferInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuffer; + VulkanAllocation stagingAlloc; + try { + stagingBuffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create fallback staging buffer: %s\n", e.what())); + m_device.destroyImageView(outView); + m_device.destroyImage(outImage); + m_memoryManager->freeAllocation(outAlloc); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::CpuToGpu, stagingAlloc)) { + m_device.destroyBuffer(stagingBuffer); + m_device.destroyImageView(outView); + m_device.destroyImage(outImage); + m_memoryManager->freeAllocation(outAlloc); + return false; + } + + void* mapped = m_memoryManager->mapMemory(stagingAlloc); + memcpy(mapped, whitePixels.data(), static_cast(bufferSize)); + m_memoryManager->unmapMemory(stagingAlloc); + + SCP_vector regions; + for (uint32_t i = 0; i < arrayLayers; i++) { + vk::BufferImageCopy region; + region.bufferOffset = i * sizeof(uint32_t); + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = i; + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(1, 1, 1); + regions.push_back(region); + } + + vk::CommandBuffer cmd = beginSingleTimeCommands(); + recordUploadCommands(cmd, outImage, stagingBuffer, vk::Format::eR8G8B8A8Unorm, + 1, 1, 1, vk::ImageLayout::eUndefined, false, regions, arrayLayers); + endSingleTimeCommands(cmd); + + m_device.destroyBuffer(stagingBuffer); + m_memoryManager->freeAllocation(stagingAlloc); + + return true; +} + +bool VulkanTextureManager::createStagingBuffer(size_t size, vk::Buffer& outBuffer, + VulkanAllocation& outAllocation) +{ + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = size; + bufferInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + outBuffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanTexture: failed to create staging buffer: %s\n", e.what())); + outBuffer = nullptr; + return false; + } + + if (!m_memoryManager->allocateBufferMemory(outBuffer, MemoryUsage::CpuOnly, outAllocation)) { + m_device.destroyBuffer(outBuffer); + outBuffer = nullptr; + return false; + } + return true; +} + +vk::CommandBuffer VulkanTextureManager::beginSingleTimeCommands() +{ + vk::CommandBufferAllocateInfo allocInfo; + allocInfo.level = vk::CommandBufferLevel::ePrimary; + allocInfo.commandPool = m_commandPool; + allocInfo.commandBufferCount = 1; + + vk::CommandBuffer commandBuffer = m_device.allocateCommandBuffers(allocInfo)[0]; + + vk::CommandBufferBeginInfo beginInfo; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + + commandBuffer.begin(beginInfo); + + return commandBuffer; +} + +void VulkanTextureManager::endSingleTimeCommands(vk::CommandBuffer commandBuffer) +{ + commandBuffer.end(); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + m_graphicsQueue.submit(submitInfo, nullptr); + m_graphicsQueue.waitIdle(); + + m_device.freeCommandBuffers(m_commandPool, commandBuffer); +} + +void VulkanTextureManager::recordUploadCommands(vk::CommandBuffer cmd, vk::Image image, + vk::Buffer stagingBuffer, vk::Format format, + uint32_t width, uint32_t height, + uint32_t mipLevels, vk::ImageLayout oldLayout, + bool generateMips, + const SCP_vector& regions, + uint32_t arrayLayers) +{ + (void)format; // May be needed for depth/stencil transitions in the future + + // Barrier 1: oldLayout -> eTransferDstOptimal (all mip levels, all layers) + { + vk::ImageMemoryBarrier barrier; + barrier.oldLayout = oldLayout; + barrier.newLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = mipLevels; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + if (oldLayout == vk::ImageLayout::eUndefined) { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTransfer, + {}, nullptr, nullptr, barrier); + } else { + barrier.srcAccessMask = vk::AccessFlagBits::eMemoryWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + {}, nullptr, nullptr, barrier); + } + } + + if (!regions.empty()) { + // Pre-baked mip levels: copy all regions (one per mip level) from the staging buffer + cmd.copyBufferToImage(stagingBuffer, image, vk::ImageLayout::eTransferDstOptimal, + static_cast(regions.size()), regions.data()); + } else { + // Single mip-0 copy + vk::BufferImageCopy region; + region.bufferOffset = 0; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(width, height, 1); + + cmd.copyBufferToImage(stagingBuffer, image, vk::ImageLayout::eTransferDstOptimal, region); + } + + if (generateMips && mipLevels > 1 && regions.empty()) { + // Generate mipmaps via blit chain: upload mip 0, then downsample each level + + // Transition mip 0 from eTransferDstOptimal to eTransferSrcOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferRead; + barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.newLayout = vk::ImageLayout::eTransferSrcOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer, + {}, {}, {}, barrier); + } + + vulkan_generate_mipmap_chain(cmd, image, width, height, mipLevels, arrayLayers); + } else { + // Simple transition: all mips from eTransferDstOptimal to eShaderReadOnlyOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = mipLevels; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + {}, nullptr, nullptr, barrier); + } + } +} + +void VulkanTextureManager::submitUploadAsync(vk::CommandBuffer cmd, vk::Buffer stagingBuffer, + VulkanAllocation stagingAllocation) +{ + cmd.end(); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + + m_graphicsQueue.submit(submitInfo, nullptr); + + // Defer staging buffer destruction (2 frames matches MAX_FRAMES_IN_FLIGHT) + auto* deletionQueue = getDeletionQueue(); + deletionQueue->queueBuffer(stagingBuffer, stagingAllocation); + + // Defer command buffer free + m_pendingCommandBuffers.push_back({cmd, VulkanDeletionQueue::FRAMES_TO_WAIT}); +} + +void VulkanTextureManager::processPendingCommandBuffers() +{ + auto it = m_pendingCommandBuffers.begin(); + while (it != m_pendingCommandBuffers.end()) { + if (it->framesRemaining == 0) { + m_device.freeCommandBuffers(m_commandPool, it->cb); + it = m_pendingCommandBuffers.erase(it); + } else { + it->framesRemaining--; + ++it; + } + } +} + +uint32_t VulkanTextureManager::calculateMipLevels(uint32_t width, uint32_t height) +{ + return static_cast(std::floor(std::log2(std::max(width, height)))) + 1; +} + +// ========== gr_screen function pointer implementations ========== + +int vulkan_preload(int bitmap_num, int /*is_aabitmap*/) +{ + auto* texManager = getTextureManager(); + + // Check if texture is already loaded + auto* slot = texManager->getTextureSlot(bitmap_num); + if (slot && slot->imageView) { + return 1; // Already loaded + } + + // Determine lock parameters based on compression type. + // For compressed DDS textures, lock with the matching DXT/BC7 flags to get + // raw compressed data with all pre-baked mipmap levels. + int compType = bm_is_compressed(bitmap_num); + int lockBpp = 32; + ubyte lockFlags = BMP_TEX_XPARENT; + + switch (compType) { + case DDS_DXT1: + lockBpp = 24; + lockFlags = BMP_TEX_DXT1; + break; + case DDS_DXT3: + lockBpp = 32; + lockFlags = BMP_TEX_DXT3; + break; + case DDS_DXT5: + lockBpp = 32; + lockFlags = BMP_TEX_DXT5; + break; + case DDS_BC7: + lockBpp = 32; + lockFlags = BMP_TEX_BC7; + break; + case DDS_CUBEMAP_DXT1: + lockBpp = 24; + lockFlags = BMP_TEX_CUBEMAP; + break; + case DDS_CUBEMAP_DXT3: + case DDS_CUBEMAP_DXT5: + lockBpp = 32; + lockFlags = BMP_TEX_CUBEMAP; + break; + default: + // Uncompressed — use 32bpp decompressed + compType = 0; + break; + } + + bitmap* bmp = bm_lock(bitmap_num, static_cast(lockBpp), lockFlags); + if (!bmp) { + static int warnCount = 0; + if (warnCount < 10) { + mprintf(("vulkan_preload: Failed to lock bitmap %d (compType=%d)\n", bitmap_num, compType)); + warnCount++; + } + return 0; + } + + // Upload the texture + bool success = texManager->bm_data(bitmap_num, bmp, compType); + + // Unlock bitmap + bm_unlock(bitmap_num); + + if (success) { + static int successCount = 0; + if (successCount < 10) { + mprintf(("vulkan_preload: Successfully uploaded texture %d (compressed=%d)\n", + bitmap_num, compType)); + successCount++; + } + } + + return success ? 1 : 0; +} + +void vulkan_bm_create(bitmap_slot* slot) +{ + auto* texManager = getTextureManager(); + texManager->bm_create(slot); +} + +void vulkan_bm_free_data(bitmap_slot* slot, bool release) +{ + auto* texManager = getTextureManager(); + texManager->bm_free_data(slot, release); +} + +void vulkan_bm_init(bitmap_slot* slot) +{ + auto* texManager = getTextureManager(); + texManager->bm_init(slot); +} + +bool vulkan_bm_data(int handle, bitmap* bm) +{ + auto* texManager = getTextureManager(); + int compType = bm_is_compressed(handle); + return texManager->bm_data(handle, bm, compType); +} + +void vulkan_bm_page_in_start() +{ + // Flush all GPU texture resources so that textures not needed in the next + // mission are freed. Matches the OpenGL pattern (opengl_tcache_flush in + // opengl_preload_init, currently commented out there). Textures that ARE + // needed will be re-uploaded on demand during level load / first use. + // Without this, Vulkan VkImage/VMA allocations accumulate across missions + // because bm_unload_fast() only frees CPU-side pixel data. + auto* texManager = getTextureManager(); + texManager->flushTextures(); +} + +int vulkan_bm_make_render_target(int handle, int* width, int* height, int* bpp, int* mm_lvl, int flags) +{ + auto* texManager = getTextureManager(); + return texManager->bm_make_render_target(handle, width, height, bpp, mm_lvl, flags); +} + +int vulkan_bm_set_render_target(int handle, int face) +{ + auto* texManager = getTextureManager(); + return texManager->bm_set_render_target(handle, face); +} + +void vulkan_update_texture(int bitmap_handle, int bpp, const ubyte* data, int width, int height) +{ + auto* texManager = getTextureManager(); + texManager->update_texture(bitmap_handle, bpp, data, width, height); +} + +void vulkan_get_bitmap_from_texture(void* data_out, int bitmap_num) +{ + auto* texManager = getTextureManager(); + texManager->get_bitmap_from_texture(data_out, bitmap_num); +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanTexture.h b/code/graphics/vulkan/VulkanTexture.h new file mode 100644 index 00000000000..5b94b7cd0ea --- /dev/null +++ b/code/graphics/vulkan/VulkanTexture.h @@ -0,0 +1,373 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "VulkanMemory.h" + +#define BMPMAN_INTERNAL +#include "bmpman/bm_internal.h" + +#include +#include + + +namespace graphics::vulkan { + +/** + * @brief Vulkan-specific texture data stored in bitmap slots + * + * Extends gr_bitmap_info to store Vulkan image handles and metadata. + * This is the Vulkan equivalent of tcache_slot_opengl. + */ +class tcache_slot_vulkan : public gr_bitmap_info { +public: + vk::Image image; + vk::ImageView imageView; + VulkanAllocation allocation; + vk::Format format = vk::Format::eUndefined; + vk::ImageLayout currentLayout = vk::ImageLayout::eUndefined; + + uint32_t width = 0; + uint32_t height = 0; + uint32_t mipLevels = 1; + uint32_t arrayLayers = 1; + int bpp = 0; + + int bitmapHandle = -1; + uint32_t arrayIndex = 0; + bool used = false; + + // For render targets + vk::Framebuffer framebuffer; + vk::ImageView framebufferView; // Single-mip view for framebuffer (when mipLevels > 1) + vk::RenderPass renderPass; // Render pass compatible with this target + bool isRenderTarget = false; + + // 3D texture support + bool is3D = false; + uint32_t depth = 1; + + // Cubemap support + bool isCubemap = false; + std::array cubeFaceViews = {}; // Per-face 2D views for render-to-cubemap + std::array cubeFaceFramebuffers = {}; // Per-face framebuffers for render-to-cubemap + vk::ImageView cubeImageView; // Cube view for sampling (viewType=eCube, layerCount=6) + + // Texture scaling (for non-power-of-two handling) + float uScale = 1.0f; + float vScale = 1.0f; + + tcache_slot_vulkan() { reset(); } + ~tcache_slot_vulkan() override = default; + + void reset(); +}; + +/** + * @brief Manages Vulkan textures, samplers, and render targets + */ +class VulkanTextureManager { +public: + VulkanTextureManager(); + ~VulkanTextureManager(); + + // Non-copyable + VulkanTextureManager(const VulkanTextureManager&) = delete; + VulkanTextureManager& operator=(const VulkanTextureManager&) = delete; + + /** + * @brief Initialize the texture manager + */ + bool init(vk::Device device, vk::PhysicalDevice physicalDevice, + VulkanMemoryManager* memoryManager, + vk::CommandPool commandPool, vk::Queue graphicsQueue); + + /** + * @brief Shutdown and free all textures + */ + void shutdown(); + + /** + * @brief Flush all GPU texture resources from bitmap slots + * + * Called between missions (from vulkan_bm_page_in_start) to release + * VkImage/VMA allocations for textures that won't be needed. + * Textures are re-uploaded on demand when accessed again. + */ + void flushTextures() const; + + // Bitmap management functions (implement gr_screen function pointers) + + /** + * @brief Initialize a bitmap slot for Vulkan + */ + void bm_init(bitmap_slot* slot) const; + + /** + * @brief Create Vulkan resources for a bitmap slot + */ + void bm_create(bitmap_slot* slot) const; + + /** + * @brief Free Vulkan resources for a bitmap slot + */ + void bm_free_data(bitmap_slot* slot, bool release) const; + + /** + * @brief Upload bitmap data to GPU + * @param compType Compression type (DDS_DXT1/3/5, DDS_BC7) or 0 for uncompressed + */ + bool bm_data(int handle, bitmap* bm, int compType); + + /** + * @brief Create a render target + */ + int bm_make_render_target(int handle, int* width, int* height, int* bpp, int* mm_lvl, int flags); + + /** + * @brief Set active render target + */ + int bm_set_render_target(int handle, int face); + + /** + * @brief Update texture data + */ + void update_texture(int bitmap_handle, int bpp, const ubyte* data, int width, int height); + + /** + * @brief Read texture data back to CPU + */ + void get_bitmap_from_texture(void* data_out, int bitmap_num) const; + + // Sampler management + + /** + * @brief Get or create a sampler with specified parameters + */ + vk::Sampler getSampler(vk::Filter magFilter, vk::Filter minFilter, + vk::SamplerAddressMode addressMode, + bool enableAnisotropy, float maxAnisotropy, + bool enableMipmaps); + + /** + * @brief Get a ready-to-use DescriptorImageInfo for the 2D fallback texture + */ + vk::DescriptorImageInfo getFallbackTextureInfo2D(); + + /** + * @brief Get a ready-to-use DescriptorImageInfo for the cubemap fallback texture + */ + vk::DescriptorImageInfo getFallbackTextureInfoCube(); + + /** + * @brief Get a ready-to-use DescriptorImageInfo for the 2D array fallback texture + */ + vk::DescriptorImageInfo getFallbackTextureInfo2DArray(); + + /** + * @brief Get a ready-to-use DescriptorImageInfo for the 3D fallback texture + */ + vk::DescriptorImageInfo getFallbackTextureInfo3D(); + + // Texture access + + /** + * @brief Get texture slot data + */ + tcache_slot_vulkan* getTextureSlot(int handle); + + // Utility functions + + /** + * @brief Convert FSO bitmap format to Vulkan format + */ + static vk::Format bppToVkFormat(int bpp, bool compressed = false, int compressionType = 0); + + /** + * @brief Transition image layout + */ + void transitionImageLayout(vk::Image image, vk::Format format, + vk::ImageLayout oldLayout, vk::ImageLayout newLayout, + uint32_t mipLevels = 1, uint32_t arrayLayers = 1); + + /** + * @brief Called at start of frame + */ + void frameStart(); + +private: + /** + * @brief Create a Vulkan image + * @param cubemap If true, sets eCubeCompatible flag (requires arrayLayers=6) + * @param imageType Vulkan image type (e2D, e3D, etc.) + */ + bool createImage(uint32_t width, uint32_t height, uint32_t mipLevels, + vk::Format format, vk::ImageTiling tiling, + vk::ImageUsageFlags usage, MemoryUsage memUsage, + vk::Image& image, VulkanAllocation& allocation, + uint32_t arrayLayers = 1, bool cubemap = false, + uint32_t imageDepth = 1, + vk::ImageType imageType = vk::ImageType::e2D); + + enum class ImageViewType { Array2D, Plain2D, Cube, Volume3D }; + + /** + * @brief Create an image view + * @param viewType Controls view type: Array2D=sampler2DArray, Plain2D=sampler2D, Cube=samplerCube + */ + vk::ImageView createImageView(vk::Image image, vk::Format format, + vk::ImageAspectFlags aspectFlags, + uint32_t mipLevels, + ImageViewType viewType = ImageViewType::Array2D, + uint32_t layerCount = 1, + uint32_t baseArrayLayer = 0); + + /** + * @brief Create a 1x1 white fallback texture (image + view + upload) + */ + bool createFallbackTexture(vk::Image& outImage, VulkanAllocation& outAlloc, + vk::ImageView& outView, ImageViewType viewType, + uint32_t arrayLayers = 1, bool cubemap = false, + vk::ImageType imageType = vk::ImageType::e2D); + + /** + * @brief Begin single-time command buffer + */ + vk::CommandBuffer beginSingleTimeCommands(); + + /** + * @brief End and submit single-time command buffer (synchronous, blocks on waitIdle) + */ + void endSingleTimeCommands(vk::CommandBuffer commandBuffer); + + /** + * @brief Create a host-visible staging buffer (eTransferSrc) sized for an upload + * + * Creates the buffer and allocates CpuOnly memory for it. On failure the + * buffer is destroyed and false is returned (outBuffer left null). Callers + * remain responsible for any image/view they allocated beforehand. + */ + bool createStagingBuffer(size_t size, vk::Buffer& outBuffer, VulkanAllocation& outAllocation); + + /** + * @brief Record layout transitions and buffer-to-image copy into a command buffer + */ + static void recordUploadCommands(vk::CommandBuffer cmd, vk::Image image, vk::Buffer stagingBuffer, + vk::Format format, uint32_t width, uint32_t height, + uint32_t mipLevels, vk::ImageLayout oldLayout, + bool generateMips = false, + const SCP_vector& regions = {}, + uint32_t arrayLayers = 1); + + /** + * @brief Submit an upload command buffer asynchronously and defer resource cleanup + * + * Submits without waitIdle. Queues staging buffer and command buffer for + * deferred destruction/free after enough frames have elapsed. + */ + void submitUploadAsync(vk::CommandBuffer cmd, vk::Buffer stagingBuffer, + VulkanAllocation stagingAllocation); + + /** + * @brief Free command buffers whose GPU work has completed + */ + void processPendingCommandBuffers(); + + /** + * @brief Calculate number of mipmap levels + */ + static uint32_t calculateMipLevels(uint32_t width, uint32_t height); + + /** + * @brief Upload all frames of an animation as layers of a single texture array + */ + bool uploadAnimationFrames(int handle, bitmap* bm, int compType, + int baseFrame, int numFrames); + + /** + * @brief Upload a cubemap DDS texture (6 faces) as a single cubemap image + */ + bool uploadCubemap(int handle, bitmap* bm, int compType); + + /** + * @brief Upload a 3D texture (volumetric data) as a single 3D image + */ + bool upload3DTexture(int handle, bitmap* bm, int texDepth); + + // Guard flag to prevent recursion when bm_lock calls bm_data during animation upload + bool m_uploadingAnimation = false; + + // Deferred command buffer free list + struct PendingCommandBuffer { + vk::CommandBuffer cb; + uint32_t framesRemaining; + }; + SCP_vector m_pendingCommandBuffers; + + vk::Device m_device; + vk::PhysicalDevice m_physicalDevice; + VulkanMemoryManager* m_memoryManager = nullptr; + vk::CommandPool m_commandPool; + vk::Queue m_graphicsQueue; + + // Cached samplers (key: packed sampler state) + SCP_unordered_map m_samplerCache; + vk::Sampler m_defaultSampler; + + // Fallback 1x1 white textures for unbound texture slots + vk::Image m_fallback2DArrayTexture; + vk::ImageView m_fallback2DArrayView; // 2D_ARRAY view (for material texture arrays) + VulkanAllocation m_fallback2DArrayAllocation; + + vk::Image m_fallbackTexture2D; + vk::ImageView m_fallbackTextureView2D; // 2D view (for post-processing sampler2D) + VulkanAllocation m_fallbackTexture2DAllocation; + + // Fallback 1x1x6 white cubemap for unbound samplerCube slots + vk::Image m_fallbackCubeTexture; + vk::ImageView m_fallbackCubeView; // Cube view (for samplerCube) + VulkanAllocation m_fallbackCubeAllocation; + + // Fallback 1x1x1 white 3D texture for unbound sampler3D slots + vk::Image m_fallback3DTexture; + vk::ImageView m_fallback3DView; // 3D view (for sampler3D) + VulkanAllocation m_fallback3DAllocation; + + // Device limits + uint32_t m_maxTextureSize = 4096; + float m_maxAnisotropy = 1.0f; + + // Current render target state + int m_currentRenderTarget = -1; + + bool m_initialized = false; +}; + +// Global texture manager instance +VulkanTextureManager* getTextureManager(); +void setTextureManager(VulkanTextureManager* manager); + +/** + * @brief Generate mip levels 1..mipLevels-1 via blit chain from the previous level. + * + * Prerequisite: mip 0 must already be in eTransferSrcOptimal. + * Result: ALL mip levels transitioned to eShaderReadOnlyOptimal. + */ +void vulkan_generate_mipmap_chain(vk::CommandBuffer cmd, vk::Image image, + uint32_t width, uint32_t height, + uint32_t mipLevels, uint32_t arrayLayers = 1); + +// ========== gr_screen function pointer implementations ========== + +int vulkan_preload(int bitmap_num, int is_aabitmap); +void vulkan_bm_create(bitmap_slot* slot); +void vulkan_bm_free_data(bitmap_slot* slot, bool release); +void vulkan_bm_init(bitmap_slot* slot); +bool vulkan_bm_data(int handle, bitmap* bm); +void vulkan_bm_page_in_start(); +int vulkan_bm_make_render_target(int handle, int* width, int* height, int* bpp, int* mm_lvl, int flags); +int vulkan_bm_set_render_target(int handle, int face); +void vulkan_update_texture(int bitmap_handle, int bpp, const ubyte* data, int width, int height); +void vulkan_get_bitmap_from_texture(void* data_out, int bitmap_num); + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanVertexFormat.cpp b/code/graphics/vulkan/VulkanVertexFormat.cpp new file mode 100644 index 00000000000..8c7bf3712c0 --- /dev/null +++ b/code/graphics/vulkan/VulkanVertexFormat.cpp @@ -0,0 +1,184 @@ +#include "VulkanVertexFormat.h" + + +namespace graphics::vulkan { + +// Vertex format mapping table +// Maps FSO vertex_format_data::vertex_format to Vulkan formats +// Based on GL_array_binding_data in gropengltnl.cpp +const VertexFormatMapping VERTEX_FORMAT_MAPPINGS[] = { + // Position formats + { vertex_format_data::POSITION4, vk::Format::eR32G32B32A32Sfloat, VATTRIB_POSITION, 4, 16 }, + { vertex_format_data::POSITION3, vk::Format::eR32G32B32Sfloat, VATTRIB_POSITION, 3, 12 }, + { vertex_format_data::POSITION2, vk::Format::eR32G32Sfloat, VATTRIB_POSITION, 2, 8 }, + + // Color formats + { vertex_format_data::COLOR3, vk::Format::eR8G8B8Unorm, VATTRIB_COLOR, 3, 3 }, + { vertex_format_data::COLOR4, vk::Format::eR8G8B8A8Unorm, VATTRIB_COLOR, 4, 4 }, + { vertex_format_data::COLOR4F, vk::Format::eR32G32B32A32Sfloat, VATTRIB_COLOR, 4, 16 }, + + // Texture coordinate formats + { vertex_format_data::TEX_COORD2, vk::Format::eR32G32Sfloat, VATTRIB_TEXCOORD, 2, 8 }, + { vertex_format_data::TEX_COORD4, vk::Format::eR32G32B32A32Sfloat, VATTRIB_TEXCOORD, 4, 16 }, + + // Normal/tangent formats + { vertex_format_data::NORMAL, vk::Format::eR32G32B32Sfloat, VATTRIB_NORMAL, 3, 12 }, + { vertex_format_data::TANGENT, vk::Format::eR32G32B32A32Sfloat, VATTRIB_TANGENT, 4, 16 }, + + // Instance/particle formats + { vertex_format_data::MODEL_ID, vk::Format::eR32Sfloat, VATTRIB_MODELID, 1, 4 }, + { vertex_format_data::RADIUS, vk::Format::eR32Sfloat, VATTRIB_RADIUS, 1, 4 }, + { vertex_format_data::UVEC, vk::Format::eR32G32B32Sfloat, VATTRIB_UVEC, 3, 12 }, + + // Matrix format (mat4 = 4 vec4s, uses locations 8-11) + { vertex_format_data::MATRIX4, vk::Format::eR32G32B32A32Sfloat, VATTRIB_MODEL_MATRIX, 16, 64 }, +}; + +const size_t VERTEX_FORMAT_MAPPINGS_COUNT = sizeof(VERTEX_FORMAT_MAPPINGS) / sizeof(VERTEX_FORMAT_MAPPINGS[0]); + +const VertexFormatMapping* getVertexFormatMapping(vertex_format_data::vertex_format format) +{ + for (const auto & i : VERTEX_FORMAT_MAPPINGS) { + if (i.format == format) { + return &i; + } + } + return nullptr; +} + +void VertexInputConfig::updatePointers() +{ + createInfo.vertexBindingDescriptionCount = static_cast(bindings.size()); + createInfo.pVertexBindingDescriptions = bindings.empty() ? nullptr : bindings.data(); + createInfo.vertexAttributeDescriptionCount = static_cast(attributes.size()); + createInfo.pVertexAttributeDescriptions = attributes.empty() ? nullptr : attributes.data(); +} + +const VertexInputConfig& VulkanVertexFormatCache::getVertexInputConfig(const vertex_layout& layout) +{ + size_t hash = layout.hash(); + + auto it = m_cache.find(hash); + if (it != m_cache.end()) { + return it->second; + } + + // Create new configuration + auto result = m_cache.emplace(hash, createVertexInputConfig(layout)); + return result.first->second; +} + +void VulkanVertexFormatCache::clear() +{ + m_cache.clear(); +} + +VertexInputConfig VulkanVertexFormatCache::createVertexInputConfig(const vertex_layout& layout) +{ + VertexInputConfig config; + + // Track which bindings we've already added + SCP_unordered_map bufferBindings; // buffer_number -> binding index + + size_t numComponents = layout.get_num_vertex_components(); + + for (size_t i = 0; i < numComponents; ++i) { + const vertex_format_data* component = layout.get_vertex_component(i); + const VertexFormatMapping* mapping = getVertexFormatMapping(component->format_type); + + if (!mapping) { + mprintf(("VulkanVertexFormat: Unknown vertex format %d\n", static_cast(component->format_type))); + continue; + } + + // Track which locations the layout natively provides + auto loc = mapping->location; + config.providedInputMask |= (1u << loc); + + // Get or create binding for this buffer + uint32_t bindingIndex; + auto bindingIt = bufferBindings.find(component->buffer_number); + if (bindingIt == bufferBindings.end()) { + bindingIndex = static_cast(config.bindings.size()); + bufferBindings[component->buffer_number] = bindingIndex; + + vk::VertexInputBindingDescription binding; + binding.binding = bindingIndex; + binding.stride = static_cast(component->stride); + binding.inputRate = (component->divisor > 0) ? + vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex; + config.bindings.push_back(binding); + } else { + bindingIndex = bindingIt->second; + } + + // Handle MATRIX4 specially - it needs 4 attribute locations + if (component->format_type == vertex_format_data::MATRIX4) { + // mat4 requires 4 vec4 attributes at consecutive locations + for (uint32_t row = 0; row < 4; ++row) { + vk::VertexInputAttributeDescription attr; + attr.location = mapping->location + row; + attr.binding = bindingIndex; + attr.format = vk::Format::eR32G32B32A32Sfloat; + attr.offset = static_cast(component->offset) + (row * 16); + config.attributes.push_back(attr); + } + // Mark all 4 matrix locations as provided + config.providedInputMask |= (1u << (loc + 1)) | (1u << (loc + 2)) | (1u << (loc + 3)); + } else { + vk::VertexInputAttributeDescription attr; + attr.location = mapping->location; + attr.binding = bindingIndex; + attr.format = mapping->vkFormat; + attr.offset = static_cast(component->offset); + config.attributes.push_back(attr); + } + } + + // Only add fallback bindings when the layout has actual vertex components. + // Empty layouts (e.g. fullscreen triangles) generate vertices in the shader + // and don't need any vertex input bindings. + uint32_t colorBit = 1u << VATTRIB_COLOR; + if (!(config.providedInputMask & colorBit) && numComponents > 0) { + // Add binding for fallback color buffer (instanced so one value applies to all vertices) + vk::VertexInputBindingDescription colorBinding; + colorBinding.binding = FALLBACK_COLOR_BINDING; + colorBinding.stride = 16; // vec4 = 16 bytes + colorBinding.inputRate = vk::VertexInputRate::eInstance; // Same color for all vertices + config.bindings.push_back(colorBinding); + + vk::VertexInputAttributeDescription colorAttr; + colorAttr.location = VATTRIB_COLOR; + colorAttr.binding = FALLBACK_COLOR_BINDING; + colorAttr.format = vk::Format::eR32G32B32A32Sfloat; + colorAttr.offset = 0; + config.attributes.push_back(colorAttr); + } + + // If no texcoord attribute, add a fallback providing (0,0,0,0) + // In OpenGL, missing vertex attributes default to (0,0,0,1); Vulkan requires explicit input + uint32_t texCoordBit = 1u << VATTRIB_TEXCOORD; + if (!(config.providedInputMask & texCoordBit) && numComponents > 0) { + // Add binding for fallback texcoord buffer (instanced so one value applies to all vertices) + vk::VertexInputBindingDescription texCoordBinding; + texCoordBinding.binding = FALLBACK_TEXCOORD_BINDING; + texCoordBinding.stride = 16; // vec4 = 16 bytes + texCoordBinding.inputRate = vk::VertexInputRate::eInstance; + config.bindings.push_back(texCoordBinding); + + vk::VertexInputAttributeDescription texCoordAttr; + texCoordAttr.location = VATTRIB_TEXCOORD; + texCoordAttr.binding = FALLBACK_TEXCOORD_BINDING; + texCoordAttr.format = vk::Format::eR32G32B32A32Sfloat; + texCoordAttr.offset = 0; + config.attributes.push_back(texCoordAttr); + } + + // Update the createInfo pointers + config.updatePointers(); + + return config; +} + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/VulkanVertexFormat.h b/code/graphics/vulkan/VulkanVertexFormat.h new file mode 100644 index 00000000000..13b946af653 --- /dev/null +++ b/code/graphics/vulkan/VulkanVertexFormat.h @@ -0,0 +1,98 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/shader_types.h" + +#include + + +namespace graphics::vulkan { + +/** + * @brief Mapping from FSO vertex_format to Vulkan format and location + */ +struct VertexFormatMapping { + vertex_format_data::vertex_format format; + vk::Format vkFormat; + VertexAttributeLocation location; + uint32_t componentCount; + uint32_t sizeInBytes; +}; + +/** + * @brief Get the Vulkan format mapping for a given vertex format + * @param format The FSO vertex format type + * @return Pointer to mapping info, or nullptr if not found + */ +const VertexFormatMapping* getVertexFormatMapping(vertex_format_data::vertex_format format); + +// Reserved binding indices for fallback buffers when vertex data is missing attributes +static constexpr uint32_t FALLBACK_COLOR_BINDING = 15; +static constexpr uint32_t FALLBACK_TEXCOORD_BINDING = 14; + +/** + * @brief Cached vertex input configuration + */ +struct VertexInputConfig { + SCP_vector bindings; + SCP_vector attributes; + vk::PipelineVertexInputStateCreateInfo createInfo; + + // Bitmask of vertex input locations natively provided by the layout (bit N = location N). + // Does NOT include fallback attributes. Compare with shader's vertexInputMask to + // determine which fallbacks are actually needed: shaderMask & ~providedInputMask. + uint32_t providedInputMask = 0; + + // Update createInfo pointers after vector modifications + void updatePointers(); +}; + +/** + * @brief Manages vertex format to Vulkan vertex input state conversion + * + * Converts FSO vertex_layout objects to Vulkan VkPipelineVertexInputStateCreateInfo. + * Caches configurations to avoid repeated conversions. + */ +class VulkanVertexFormatCache { +public: + VulkanVertexFormatCache() = default; + ~VulkanVertexFormatCache() = default; + + // Non-copyable + VulkanVertexFormatCache(const VulkanVertexFormatCache&) = delete; + VulkanVertexFormatCache& operator=(const VulkanVertexFormatCache&) = delete; + + /** + * @brief Get Vulkan vertex input state for a given layout + * @param layout The FSO vertex layout + * @return Reference to cached vertex input configuration + */ + const VertexInputConfig& getVertexInputConfig(const vertex_layout& layout); + + /** + * @brief Clear all cached configurations + */ + void clear(); + + /** + * @brief Get number of cached configurations + */ + size_t getCacheSize() const { return m_cache.size(); } + +private: + /** + * @brief Create a new vertex input configuration for a layout + */ + static VertexInputConfig createVertexInputConfig(const vertex_layout& layout); + + // Cache: layout hash -> vertex input config + SCP_unordered_map m_cache; +}; + +// Global vertex format mapping table +extern const VertexFormatMapping VERTEX_FORMAT_MAPPINGS[]; +extern const size_t VERTEX_FORMAT_MAPPINGS_COUNT; + +} // namespace graphics::vulkan + diff --git a/code/graphics/vulkan/gr_vulkan.cpp b/code/graphics/vulkan/gr_vulkan.cpp index 833fccd152f..4e25fdafa38 100644 --- a/code/graphics/vulkan/gr_vulkan.cpp +++ b/code/graphics/vulkan/gr_vulkan.cpp @@ -1,22 +1,478 @@ #include "gr_vulkan.h" - #include "VulkanRenderer.h" -#include "vulkan_stubs.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" +#include "VulkanShader.h" +#include "VulkanDescriptorManager.h" +#include "VulkanPipeline.h" +#include "VulkanQuery.h" +#include "VulkanState.h" +#include "VulkanDraw.h" +#include "VulkanDeferred.h" +#include "VulkanPostProcessing.h" #include "backends/imgui_impl_sdl.h" #include "backends/imgui_impl_vulkan.h" -#include "mod_table/mod_table.h" +#include "osapi/osapi.h" + +#include "bmpman/bmpman.h" +#include "cfile/cfile.h" +#include "cmdline/cmdline.h" +#include "graphics/2d.h" +#include "graphics/matrix.h" +#include "graphics/material.h" +#include "graphics/post_processing.h" +#include "graphics/grinternal.h" +#include "lighting/lighting.h" +#include "pngutils/pngutils.h" + +#if defined(_WIN32) +#include +#endif -namespace graphics { -namespace vulkan { +namespace graphics::vulkan { namespace { + std::unique_ptr renderer_instance; + +// Sync object for tracking frame completion +struct VulkanSyncObject { + uint64_t frameNumber; +}; + +// ========== Renderer-level functions ========== + +void vulkan_setup_frame() +{ + auto* renderer = getRendererInstance(); + renderer->setupFrame(); +} + +void vulkan_flip() +{ + renderer_instance->flip(); +} + +bool vulkan_is_capable(gr_capability capability) +{ + switch (capability) { + case gr_capability::CAPABILITY_ENVIRONMENT_MAP: + return true; + case gr_capability::CAPABILITY_NORMAL_MAP: + return Cmdline_normal != 0; + case gr_capability::CAPABILITY_HEIGHT_MAP: + return Cmdline_height != 0; + case gr_capability::CAPABILITY_SOFT_PARTICLES: + return Gr_post_processing_enabled; + case gr_capability::CAPABILITY_DISTORTION: + return Gr_post_processing_enabled; + case gr_capability::CAPABILITY_POST_PROCESSING: + return Gr_post_processing_enabled; + case gr_capability::CAPABILITY_DEFERRED_LIGHTING: + return light_deferred_enabled(); + case gr_capability::CAPABILITY_SHADOWS: + return getRendererInstance()->supportsShaderViewportLayerOutput(); + case gr_capability::CAPABILITY_THICK_OUTLINE: + return false; + case gr_capability::CAPABILITY_BATCHED_SUBMODELS: + return true; + case gr_capability::CAPABILITY_TIMESTAMP_QUERY: + return getQueryManager() != nullptr; + case gr_capability::CAPABILITY_SEPARATE_BLEND_FUNCTIONS: + // Vulkan supports per-attachment blend by spec + return true; + case gr_capability::CAPABILITY_PERSISTENT_BUFFER_MAPPING: + // Vulkan has persistently mappable host-visible memory + return true; + case gr_capability::CAPABILITY_BPTC: + return getRendererInstance()->isTextureCompressionBCSupported(); + case gr_capability::CAPABILITY_S3TC: + // Vulkan always supports BC1/BC2/BC3 (S3TC equivalent) as core features + return true; + case gr_capability::CAPABILITY_LARGE_SHADER: + return true; + case gr_capability::CAPABILITY_INSTANCED_RENDERING: + return true; + case gr_capability::CAPABILITY_QUERIES_REUSABLE: + // Vulkan queries require explicit reset between read and write. + // The backend manages this lifecycle internally via deleteQueryObject. + return false; + } + return false; +} + +bool vulkan_get_property(gr_property prop, void* dest) +{ + auto* renderer = getRendererInstance(); + + switch (prop) { + case gr_property::UNIFORM_BUFFER_OFFSET_ALIGNMENT: + *reinterpret_cast(dest) = static_cast(renderer->getMinUniformBufferOffsetAlignment()); + return true; + case gr_property::UNIFORM_BUFFER_MAX_SIZE: + *reinterpret_cast(dest) = static_cast(renderer->getMaxUniformBufferSize()); + return true; + case gr_property::MAX_ANISOTROPY: + *reinterpret_cast(dest) = renderer->getMaxAnisotropy(); + return true; + default: + return false; + } +} + +void vulkan_push_debug_group(const char* name) +{ + auto* renderer = getRendererInstance(); + if (!renderer->isDebugUtilsEnabled()) { + return; + } + + auto* stateTracker = getStateTracker(); + + vk::DebugUtilsLabelEXT label; + label.pLabelName = name; + label.color = {{ 1.0f, 1.0f, 1.0f, 1.0f }}; + stateTracker->getCommandBuffer().beginDebugUtilsLabelEXT(label); +} + +void vulkan_pop_debug_group() +{ + auto* renderer = getRendererInstance(); + if (!renderer->isDebugUtilsEnabled()) { + return; + } + + auto* stateTracker = getStateTracker(); + stateTracker->getCommandBuffer().endDebugUtilsLabelEXT(); +} + +void vulkan_imgui_new_frame() +{ + ImGui_ImplVulkan_NewFrame(); +} + +void vulkan_imgui_render_draw_data() +{ + auto* renderer = getRendererInstance(); + if (renderer) { + ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), renderer->getVkCurrentCommandBuffer()); + } +} + +gr_sync vulkan_sync_fence() +{ + auto* renderer = getRendererInstance(); + auto* sync = new VulkanSyncObject(); + sync->frameNumber = renderer->getCurrentFrameNumber(); + return static_cast(sync); +} + +bool vulkan_sync_wait(gr_sync sync, uint64_t /*timeoutns*/) +{ + if (!sync) { + return true; + } + + auto* renderer = getRendererInstance(); + auto* syncObj = static_cast(sync); + + // Wait on the specific frame's fence (no-op if already complete) + renderer->waitForFrame(syncObj->frameNumber); + return true; +} + +void vulkan_sync_delete(gr_sync sync) +{ + if (sync) { + delete static_cast(sync); + } +} + +// ========== Screen capture (save/restore, screenshots) ========== + +ubyte* Vulkan_saved_screen = nullptr; +int Vulkan_saved_screen_id = -1; + +int vulkan_save_screen() +{ + if (Vulkan_saved_screen) { + // Already have a saved screen + return -1; + } + + ubyte* pixels = nullptr; + uint32_t w, h; + if (!renderer_instance->readbackFramebuffer(&pixels, &w, &h)) { + return -1; + } + + int bmpId = bm_create(32, static_cast(w), static_cast(h), pixels, 0); + if (bmpId < 0) { + vm_free(pixels); + return -1; + } + + Vulkan_saved_screen = pixels; + Vulkan_saved_screen_id = bmpId; + return Vulkan_saved_screen_id; +} + +void vulkan_restore_screen(int bmp_id) +{ + gr_reset_clip(); + + if (!Vulkan_saved_screen) { + gr_clear(); + return; + } + + Assert((bmp_id < 0) || (bmp_id == Vulkan_saved_screen_id)); + + if (Vulkan_saved_screen_id < 0) { + return; + } + + gr_set_bitmap(Vulkan_saved_screen_id); + gr_bitmap(0, 0, GR_RESIZE_NONE); +} + +void vulkan_free_screen(int bmp_id) +{ + if (!Vulkan_saved_screen) { + return; + } + + vm_free(Vulkan_saved_screen); + Vulkan_saved_screen = nullptr; + + Assert((bmp_id < 0) || (bmp_id == Vulkan_saved_screen_id)); + + if (Vulkan_saved_screen_id >= 0) { + bm_release(Vulkan_saved_screen_id); + Vulkan_saved_screen_id = -1; + } +} + +// Swizzle BGRA→RGBA in-place for PNG output (swap chain is B8G8R8A8) +void swizzle_bgra_to_rgba(ubyte* pixels, size_t pixelCount) +{ + for (size_t i = 0; i < pixelCount; i++) { + size_t off = i * 4; + std::swap(pixels[off + 0], pixels[off + 2]); + } +} + +void vulkan_print_screen(const char* filename) +{ + ubyte* pixels = nullptr; + uint32_t w, h; + if (!renderer_instance->readbackFramebuffer(&pixels, &w, &h)) { + return; + } + + swizzle_bgra_to_rgba(pixels, static_cast(w) * h); + + char tmp[MAX_PATH_LEN]; + snprintf(tmp, MAX_PATH_LEN - 1, "screenshots/%s.png", filename); + + _mkdir(os_get_config_path("screenshots").c_str()); + + if (!png_write_bitmap(os_get_config_path(tmp).c_str(), w, h, false, pixels)) { + ReleaseWarning(LOCATION, "Failed to write screenshot to \"%s\".", os_get_config_path(tmp).c_str()); + } + + vm_free(pixels); +} + +SCP_string vulkan_blob_screen() +{ + ubyte* pixels = nullptr; + uint32_t w, h; + if (!renderer_instance->readbackFramebuffer(&pixels, &w, &h)) { + return ""; + } + + swizzle_bgra_to_rgba(pixels, static_cast(w) * h); + + SCP_string result = png_b64_bitmap(w, h, false, pixels); + + vm_free(pixels); + + return "data:image/png;base64," + result; +} + +// get_region: intentional no-op. The only caller is neb2_pre_render() in +// NEB2_RENDER_POF mode, which renders a 32x32 background thumbnail into a +// CPU buffer that is never actually read — the pixel data, ex_scale, and +// ey_scale it computes have no consumers. Modern nebula rendering uses +// NEB2_RENDER_HTL (fog color + gr_clear) and doesn't need get_region at all. +void vulkan_get_region(int /*front*/, int /*w*/, int /*h*/, ubyte* /*data*/) {} + +void stub_dump_envmap(const char* /*filename*/) {} + +std::unique_ptr stub_create_viewport(const os::ViewPortProperties& /*props*/) +{ + return {}; +} +void stub_use_viewport(os::Viewport* /*view*/) {} +SCP_vector stub_openxr_get_extensions() { return {}; } +bool stub_openxr_test_capabilities() { return false; } +bool stub_openxr_create_session() { return false; } +int64_t stub_openxr_get_swapchain_format(const SCP_vector& /*allowed*/) { return 0; } +bool stub_openxr_acquire_swapchain_buffers() { return false; } +bool stub_openxr_flip() { return false; } + +// ========== Function pointer table ========== +// Implementations are defined in their respective files: +// VulkanDraw.cpp, VulkanBuffer.cpp, VulkanTexture.cpp, VulkanShader.cpp, VulkanState.cpp + +void init_function_pointers() +{ + // function pointers... + gr_screen.gf_setup_frame = vulkan_setup_frame; + gr_screen.gf_set_clip = vulkan_set_clip; + gr_screen.gf_reset_clip = vulkan_reset_clip; + + gr_screen.gf_clear = vulkan_clear; + + gr_screen.gf_print_screen = vulkan_print_screen; + gr_screen.gf_blob_screen = vulkan_blob_screen; + + gr_screen.gf_zbuffer_get = vulkan_zbuffer_get; + gr_screen.gf_zbuffer_set = vulkan_zbuffer_set; + gr_screen.gf_zbuffer_clear = vulkan_zbuffer_clear; + + gr_screen.gf_stencil_set = vulkan_stencil_set; + gr_screen.gf_stencil_clear = vulkan_stencil_clear; + + gr_screen.gf_alpha_mask_set = vulkan_alpha_mask_set; + + gr_screen.gf_save_screen = vulkan_save_screen; + gr_screen.gf_restore_screen = vulkan_restore_screen; + gr_screen.gf_free_screen = vulkan_free_screen; + + gr_screen.gf_get_region = vulkan_get_region; + + // now for the bitmap functions + gr_screen.gf_bm_free_data = vulkan_bm_free_data; + gr_screen.gf_bm_create = vulkan_bm_create; + gr_screen.gf_bm_init = vulkan_bm_init; + gr_screen.gf_bm_page_in_start = vulkan_bm_page_in_start; + gr_screen.gf_bm_data = vulkan_bm_data; + gr_screen.gf_bm_make_render_target = vulkan_bm_make_render_target; + gr_screen.gf_bm_set_render_target = vulkan_bm_set_render_target; + + gr_screen.gf_set_cull = vulkan_set_cull; + gr_screen.gf_set_color_buffer = vulkan_set_color_buffer; + + gr_screen.gf_set_clear_color = vulkan_set_clear_color; + + gr_screen.gf_preload = vulkan_preload; + + gr_screen.gf_set_texture_addressing = vulkan_set_texture_addressing; + gr_screen.gf_zbias = vulkan_zbias; + gr_screen.gf_set_fill_mode = vulkan_set_fill_mode; + + gr_screen.gf_create_buffer = vulkan_create_buffer; + gr_screen.gf_delete_buffer = vulkan_delete_buffer; + + gr_screen.gf_update_transform_buffer = vulkan_update_transform_buffer; + gr_screen.gf_update_buffer_data = vulkan_update_buffer_data; + gr_screen.gf_update_buffer_data_offset = vulkan_update_buffer_data_offset; + gr_screen.gf_map_buffer = vulkan_map_buffer; + gr_screen.gf_flush_mapped_buffer = vulkan_flush_mapped_buffer; + + gr_screen.gf_post_process_set_effect = vulkan_post_process_set_effect; + gr_screen.gf_post_process_set_defaults = vulkan_post_process_set_defaults; + + gr_screen.gf_post_process_begin = vulkan_post_process_begin; + gr_screen.gf_post_process_end = vulkan_post_process_end; + gr_screen.gf_post_process_save_zbuffer = vulkan_post_process_save_zbuffer; + gr_screen.gf_post_process_restore_zbuffer = vulkan_post_process_restore_zbuffer; + + gr_screen.gf_scene_texture_begin = vulkan_scene_texture_begin; + gr_screen.gf_scene_texture_end = vulkan_scene_texture_end; + gr_screen.gf_copy_effect_texture = vulkan_copy_effect_texture; + + gr_screen.gf_deferred_lighting_begin = vulkan_deferred_lighting_begin; + gr_screen.gf_deferred_lighting_msaa = vulkan_deferred_lighting_msaa; + gr_screen.gf_deferred_lighting_end = vulkan_deferred_lighting_end; + gr_screen.gf_deferred_lighting_finish = vulkan_deferred_lighting_finish; + + gr_screen.gf_calculate_irrmap = vulkan_calculate_irrmap; + gr_screen.gf_dump_envmap = stub_dump_envmap; + gr_screen.gf_override_fog = vulkan_override_fog; + + gr_screen.gf_imgui_new_frame = vulkan_imgui_new_frame; + gr_screen.gf_imgui_render_draw_data = vulkan_imgui_render_draw_data; + + gr_screen.gf_set_line_width = vulkan_set_line_width; + + gr_screen.gf_sphere = vulkan_draw_sphere; + + gr_screen.gf_shadow_map_start = vulkan_shadow_map_start; + gr_screen.gf_shadow_map_end = vulkan_shadow_map_end; + + gr_screen.gf_start_decal_pass = vulkan_start_decal_pass; + gr_screen.gf_stop_decal_pass = vulkan_stop_decal_pass; + gr_screen.gf_render_decals = vulkan_render_decals; + + gr_screen.gf_render_shield_impact = vulkan_render_shield_impact; + + gr_screen.gf_maybe_create_shader = vulkan_maybe_create_shader; + gr_screen.gf_recompile_all_shaders = vulkan_recompile_all_shaders; + + gr_screen.gf_clear_states = vulkan_clear_states; + + gr_screen.gf_update_texture = vulkan_update_texture; + gr_screen.gf_get_bitmap_from_texture = vulkan_get_bitmap_from_texture; + + gr_screen.gf_render_model = vulkan_render_model; + gr_screen.gf_render_primitives = vulkan_render_primitives; + gr_screen.gf_render_primitives_particle = vulkan_render_primitives_particle; + gr_screen.gf_render_primitives_distortion = vulkan_render_primitives_distortion; + gr_screen.gf_render_movie = vulkan_render_movie; + gr_screen.gf_render_nanovg = vulkan_render_nanovg; + gr_screen.gf_render_primitives_batched = vulkan_render_primitives_batched; + gr_screen.gf_render_rocket_primitives = vulkan_render_rocket_primitives; + + gr_screen.gf_is_capable = vulkan_is_capable; + gr_screen.gf_get_property = vulkan_get_property; + + gr_screen.gf_push_debug_group = vulkan_push_debug_group; + gr_screen.gf_pop_debug_group = vulkan_pop_debug_group; + + gr_screen.gf_create_query_object = vulkan_create_query_object; + gr_screen.gf_query_value = vulkan_query_value; + gr_screen.gf_query_value_available = vulkan_query_value_available; + gr_screen.gf_get_query_value = vulkan_get_query_value; + gr_screen.gf_delete_query_object = vulkan_delete_query_object; + + gr_screen.gf_create_viewport = stub_create_viewport; + gr_screen.gf_use_viewport = stub_use_viewport; + + gr_screen.gf_bind_uniform_buffer = vulkan_bind_uniform_buffer; + + gr_screen.gf_sync_fence = vulkan_sync_fence; + gr_screen.gf_sync_wait = vulkan_sync_wait; + gr_screen.gf_sync_delete = vulkan_sync_delete; + + gr_screen.gf_set_viewport = vulkan_set_viewport; + + gr_screen.gf_openxr_get_extensions = stub_openxr_get_extensions; + gr_screen.gf_openxr_test_capabilities = stub_openxr_test_capabilities; + gr_screen.gf_openxr_create_session = stub_openxr_create_session; + gr_screen.gf_openxr_get_swapchain_format = stub_openxr_get_swapchain_format; + gr_screen.gf_openxr_acquire_swapchain_buffers = stub_openxr_acquire_swapchain_buffers; + gr_screen.gf_openxr_flip = stub_openxr_flip; } +} // anonymous namespace + void initialize_function_pointers() { - init_stub_pointers(); + init_function_pointers(); } bool initialize(std::unique_ptr&& graphicsOps) @@ -26,12 +482,27 @@ bool initialize(std::unique_ptr&& graphicsOps) return false; } - gr_screen.gf_flip = []() { - renderer_instance->flip(); - }; + // Initialize ImGui SDL2 backend for input handling. + // The Vulkan rendering backend (ImGui_ImplVulkan) is initialized + // inside VulkanRenderer::initImGui() after all Vulkan objects are ready. + SDL_Window* window = os::getSDLMainWindow(); + if (window) { + ImGui_ImplSDL2_InitForVulkan(window); + } + + gr_screen.gf_flip = vulkan_flip; + + // Initialize matrices and viewport (matching OpenGL backend initialization) + gr_reset_matrices(); + gr_setup_viewport(); + + // Start first frame so a command buffer is active before the first draw calls. + // The engine draws the title screen during game_init(), before the main loop's + // first gr_flip() → setupFrame(). Without this, any gr_clear/gr_bitmap before + // the first flip would hit a null command buffer. Matches OpenGL init behavior. + gr_setup_frame(); - // Nothing else is finished so always fail here - mprintf(("Vulkan support is not finished yet so graphics initialization will always fail...\n")); + mprintf(("Vulkan: Initialization complete\n")); return true; } @@ -46,5 +517,4 @@ void cleanup() renderer_instance = nullptr; } -} // namespace vulkan -} // namespace graphics +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/gr_vulkan.h b/code/graphics/vulkan/gr_vulkan.h index e91451a7d5e..f3b8f1424e4 100644 --- a/code/graphics/vulkan/gr_vulkan.h +++ b/code/graphics/vulkan/gr_vulkan.h @@ -2,8 +2,7 @@ #include "osapi/osapi.h" -namespace graphics { -namespace vulkan { +namespace graphics::vulkan { class VulkanRenderer; @@ -14,5 +13,4 @@ VulkanRenderer* getRendererInstance(); void cleanup(); -} // namespace vulkan -} // namespace graphics +} // namespace graphics::vulkan diff --git a/code/graphics/vulkan/vulkan_stubs.cpp b/code/graphics/vulkan/vulkan_stubs.cpp deleted file mode 100644 index a757ed553ef..00000000000 --- a/code/graphics/vulkan/vulkan_stubs.cpp +++ /dev/null @@ -1,395 +0,0 @@ -#include "vulkan_stubs.h" - -#include "graphics/2d.h" - -#define BMPMAN_INTERNAL -#include "bmpman/bm_internal.h" - -namespace graphics { -namespace vulkan { - -namespace { - -gr_buffer_handle stub_create_buffer(BufferType, BufferUsageHint) -{ - return gr_buffer_handle::invalid(); -} - -void stub_setup_frame() {} - -void stub_delete_buffer(gr_buffer_handle /*handle*/) {} - -int stub_preload(int /*bitmap_num*/, int /*is_aabitmap*/) { return 0; } - -int stub_save_screen() { return 1; } - -int stub_zbuffer_get() { return 0; } - -int stub_zbuffer_set(int /*mode*/) { return 0; } - -void gr_set_fill_mode_stub(int /*mode*/) {} - -void stub_clear() {} - -void stub_free_screen(int /*id*/) {} - -void stub_get_region(int /*front*/, int /*w*/, int /*h*/, ubyte* /*data*/) {} - -void stub_print_screen(const char* /*filename*/) {} - -SCP_string stub_blob_screen() { return ""; } - -void stub_reset_clip() {} - -void stub_restore_screen(int /*id*/) {} - -void stub_update_buffer_data(gr_buffer_handle /*handle*/, size_t /*size*/, const void* /*data*/) {} - -void stub_update_buffer_data_offset(gr_buffer_handle /*handle*/, - size_t /*offset*/, - size_t /*size*/, - const void* /*data*/) -{ -} - -void stub_update_transform_buffer(void* /*data*/, size_t /*size*/) {} - -void stub_set_clear_color(int /*r*/, int /*g*/, int /*b*/) {} - -void stub_set_clip(int /*x*/, int /*y*/, int /*w*/, int /*h*/, int /*resize_mode*/) {} - -int stub_set_cull(int /*cull*/) { return 0; } - -int stub_set_color_buffer(int /*mode*/) { return 0; } - -void stub_set_texture_addressing(int /*mode*/) {} - -void stub_zbias_stub(int /*bias*/) {} - -void stub_zbuffer_clear(int /*mode*/) {} - -int stub_stencil_set(int /*mode*/) { return 0; } - -void stub_stencil_clear() {} - -int stub_alpha_mask_set(int /*mode*/, float /*alpha*/) { return 0; } - -void stub_post_process_set_effect(const char* /*name*/, int /*x*/, const vec3d* /*rgb*/) {} - -void stub_post_process_set_defaults() {} - -void stub_post_process_save_zbuffer() {} - -void stub_post_process_begin() {} - -void stub_post_process_end() {} - -void stub_scene_texture_begin() {} - -void stub_scene_texture_end() {} - -void stub_copy_effect_texture() {} - -void stub_deferred_lighting_begin(bool /*clearNonColorBufs*/) {} - -void stub_deferred_lighting_msaa() {} - -void stub_deferred_lighting_end() {} - -void stub_deferred_lighting_finish() {} - -void stub_set_line_width(float /*width*/) {} - -void stub_draw_sphere(material* /*material_def*/, float /*rad*/) {} - -void stub_clear_states() {} - -void stub_update_texture(int /*bitmap_handle*/, int /*bpp*/, const ubyte* /*data*/, int /*width*/, int /*height*/) {} - -void stub_get_bitmap_from_texture(void* /*data_out*/, int /*bitmap_num*/) {} - -int stub_bm_make_render_target(int /*n*/, int* /*width*/, int* /*height*/, int* /*bpp*/, int* /*mm_lvl*/, int /*flags*/) -{ - return 0; -} - -int stub_bm_set_render_target(int /*n*/, int /*face*/) { return 0; } - -void stub_bm_create(bitmap_slot* /*slot*/) {} - -void stub_bm_free_data(bitmap_slot* /*slot*/, bool /*release*/) {} - -void stub_bm_init(bitmap_slot* /*slot*/) {} - -void stub_bm_page_in_start() {} - -bool stub_bm_data(int /*n*/, bitmap* /*bm*/) { return true; } - -int stub_maybe_create_shader(shader_type /*shader_t*/, unsigned int /*flags*/) { return -1; } - -void stub_shadow_map_start(matrix4* /*shadow_view_matrix*/, const matrix* /*light_matrix*/, vec3d* /*eye_pos*/) {} - -void stub_shadow_map_end() {} - -void stub_start_decal_pass() {} -void stub_stop_decal_pass() {} -void stub_render_decals(decal_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*num_elements*/, - const indexed_vertex_source& /*buffers*/, - const gr_buffer_handle& /*instance_buffer*/, - int /*num_instances*/) {} - -void stub_render_shield_impact(shield_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - gr_buffer_handle /*buffer_handle*/, - int /*n_verts*/) -{ -} - -void stub_render_model(model_material* /*material_info*/, - indexed_vertex_source* /*vert_source*/, - vertex_buffer* /*bufferp*/, - size_t /*texi*/) -{ -} - -void stub_render_primitives(material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*offset*/, - int /*n_verts*/, - gr_buffer_handle /*buffer_handle*/, - size_t /*buffer_offset*/) -{ -} - -void stub_render_primitives_particle(particle_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*offset*/, - int /*n_verts*/, - gr_buffer_handle /*buffer_handle*/) -{ -} - -void stub_render_primitives_distortion(distortion_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*offset*/, - int /*n_verts*/, - gr_buffer_handle /*buffer_handle*/) -{ -} -void stub_render_movie(movie_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*n_verts*/, - gr_buffer_handle /*buffer*/, - size_t /*buffer_offset*/) -{ -} - -void stub_render_nanovg(nanovg_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*offset*/, - int /*n_verts*/, - gr_buffer_handle /*buffer_handle*/) -{ -} - -void stub_render_primitives_batched(batched_bitmap_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*offset*/, - int /*n_verts*/, - gr_buffer_handle /*buffer_handle*/) -{ -} - -void stub_render_rocket_primitives(interface_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*n_indices*/, - gr_buffer_handle /*vertex_buffer*/, - gr_buffer_handle /*index_buffer*/) -{ -} - -bool stub_is_capable(gr_capability /*capability*/) { return false; } -bool stub_get_property(gr_property p, void* dest) -{ - if (p == gr_property::UNIFORM_BUFFER_OFFSET_ALIGNMENT) { - // This is required by the startup code of the uniform buffer manager - *reinterpret_cast(dest) = 4; - return true; - } - return false; -}; - -void stub_push_debug_group(const char*) {} - -void stub_pop_debug_group() {} - -int stub_create_query_object() { return -1; } - -void stub_query_value(int /*obj*/, QueryType /*type*/) {} - -bool stub_query_value_available(int /*obj*/) { return false; } - -std::uint64_t stub_get_query_value(int /*obj*/) { return 0; } - -void stub_delete_query_object(int /*obj*/) {} - -SCP_vector stub_openxr_get_extensions() { return {}; } - -bool stub_openxr_test_capabilities() { return false; } - -bool stub_openxr_create_session() { return false; } - -int64_t stub_openxr_get_swapchain_format(const SCP_vector& /*allowed*/) { return 0; } - -bool stub_openxr_acquire_swapchain_buffers() { return false; } - -bool stub_openxr_flip() { return false; } - -} // namespace - -void init_stub_pointers() -{ - // function pointers... - gr_screen.gf_setup_frame = stub_setup_frame; - gr_screen.gf_set_clip = stub_set_clip; - gr_screen.gf_reset_clip = stub_reset_clip; - - gr_screen.gf_clear = stub_clear; - - gr_screen.gf_print_screen = stub_print_screen; - gr_screen.gf_blob_screen = stub_blob_screen; - - gr_screen.gf_zbuffer_get = stub_zbuffer_get; - gr_screen.gf_zbuffer_set = stub_zbuffer_set; - gr_screen.gf_zbuffer_clear = stub_zbuffer_clear; - - gr_screen.gf_stencil_set = stub_stencil_set; - gr_screen.gf_stencil_clear = stub_stencil_clear; - - gr_screen.gf_alpha_mask_set = stub_alpha_mask_set; - - gr_screen.gf_save_screen = stub_save_screen; - gr_screen.gf_restore_screen = stub_restore_screen; - gr_screen.gf_free_screen = stub_free_screen; - - gr_screen.gf_get_region = stub_get_region; - - // now for the bitmap functions - gr_screen.gf_bm_free_data = stub_bm_free_data; - gr_screen.gf_bm_create = stub_bm_create; - gr_screen.gf_bm_init = stub_bm_init; - gr_screen.gf_bm_page_in_start = stub_bm_page_in_start; - gr_screen.gf_bm_data = stub_bm_data; - gr_screen.gf_bm_make_render_target = stub_bm_make_render_target; - gr_screen.gf_bm_set_render_target = stub_bm_set_render_target; - - gr_screen.gf_set_cull = stub_set_cull; - gr_screen.gf_set_color_buffer = stub_set_color_buffer; - - gr_screen.gf_set_clear_color = stub_set_clear_color; - - gr_screen.gf_preload = stub_preload; - - gr_screen.gf_set_texture_addressing = stub_set_texture_addressing; - gr_screen.gf_zbias = stub_zbias_stub; - gr_screen.gf_set_fill_mode = gr_set_fill_mode_stub; - - gr_screen.gf_create_buffer = stub_create_buffer; - gr_screen.gf_delete_buffer = stub_delete_buffer; - - gr_screen.gf_update_transform_buffer = stub_update_transform_buffer; - gr_screen.gf_update_buffer_data = stub_update_buffer_data; - gr_screen.gf_update_buffer_data_offset = stub_update_buffer_data_offset; - gr_screen.gf_map_buffer = [](gr_buffer_handle) -> void* { return nullptr; }; - gr_screen.gf_flush_mapped_buffer = [](gr_buffer_handle, size_t, size_t) {}; - - gr_screen.gf_post_process_set_effect = stub_post_process_set_effect; - gr_screen.gf_post_process_set_defaults = stub_post_process_set_defaults; - - gr_screen.gf_post_process_begin = stub_post_process_begin; - gr_screen.gf_post_process_end = stub_post_process_end; - gr_screen.gf_post_process_save_zbuffer = stub_post_process_save_zbuffer; - gr_screen.gf_post_process_restore_zbuffer = []() {}; - - gr_screen.gf_scene_texture_begin = stub_scene_texture_begin; - gr_screen.gf_scene_texture_end = stub_scene_texture_end; - gr_screen.gf_copy_effect_texture = stub_copy_effect_texture; - - gr_screen.gf_deferred_lighting_begin = stub_deferred_lighting_begin; - gr_screen.gf_deferred_lighting_msaa = stub_deferred_lighting_msaa; - gr_screen.gf_deferred_lighting_end = stub_deferred_lighting_end; - gr_screen.gf_deferred_lighting_finish = stub_deferred_lighting_finish; - - gr_screen.gf_set_line_width = stub_set_line_width; - - gr_screen.gf_sphere = stub_draw_sphere; - - gr_screen.gf_shadow_map_start = stub_shadow_map_start; - gr_screen.gf_shadow_map_end = stub_shadow_map_end; - - gr_screen.gf_start_decal_pass = stub_start_decal_pass; - gr_screen.gf_stop_decal_pass = stub_stop_decal_pass; - gr_screen.gf_render_decals = stub_render_decals; - - gr_screen.gf_render_shield_impact = stub_render_shield_impact; - - gr_screen.gf_maybe_create_shader = stub_maybe_create_shader; - - gr_screen.gf_clear_states = stub_clear_states; - - gr_screen.gf_update_texture = stub_update_texture; - gr_screen.gf_get_bitmap_from_texture = stub_get_bitmap_from_texture; - - gr_screen.gf_render_model = stub_render_model; - gr_screen.gf_render_primitives = stub_render_primitives; - gr_screen.gf_render_primitives_particle = stub_render_primitives_particle; - gr_screen.gf_render_primitives_distortion = stub_render_primitives_distortion; - gr_screen.gf_render_movie = stub_render_movie; - gr_screen.gf_render_nanovg = stub_render_nanovg; - gr_screen.gf_render_primitives_batched = stub_render_primitives_batched; - gr_screen.gf_render_rocket_primitives = stub_render_rocket_primitives; - - gr_screen.gf_is_capable = stub_is_capable; - gr_screen.gf_get_property = stub_get_property; - - gr_screen.gf_push_debug_group = stub_push_debug_group; - gr_screen.gf_pop_debug_group = stub_pop_debug_group; - - gr_screen.gf_create_query_object = stub_create_query_object; - gr_screen.gf_query_value = stub_query_value; - gr_screen.gf_query_value_available = stub_query_value_available; - gr_screen.gf_get_query_value = stub_get_query_value; - gr_screen.gf_delete_query_object = stub_delete_query_object; - - gr_screen.gf_create_viewport = [](const os::ViewPortProperties&) { return std::unique_ptr(); }; - gr_screen.gf_use_viewport = [](os::Viewport*) {}; - - gr_screen.gf_bind_uniform_buffer = [](uniform_block_type, size_t, size_t, gr_buffer_handle) {}; - - gr_screen.gf_sync_fence = []() -> gr_sync { return nullptr; }; - gr_screen.gf_sync_wait = [](gr_sync /*sync*/, uint64_t /*timeoutns*/) { return true; }; - gr_screen.gf_sync_delete = [](gr_sync /*sync*/) {}; - - gr_screen.gf_set_viewport = [](int /*x*/, int /*y*/, int /*width*/, int /*height*/) {}; - - gr_screen.gf_openxr_get_extensions = stub_openxr_get_extensions; - gr_screen.gf_openxr_test_capabilities = stub_openxr_test_capabilities; - gr_screen.gf_openxr_create_session = stub_openxr_create_session; - gr_screen.gf_openxr_get_swapchain_format = stub_openxr_get_swapchain_format; - gr_screen.gf_openxr_acquire_swapchain_buffers = stub_openxr_acquire_swapchain_buffers; - gr_screen.gf_openxr_flip = stub_openxr_flip; -} - -} // namespace vulkan -} // namespace graphics diff --git a/code/graphics/vulkan/vulkan_stubs.h b/code/graphics/vulkan/vulkan_stubs.h deleted file mode 100644 index caff77f7655..00000000000 --- a/code/graphics/vulkan/vulkan_stubs.h +++ /dev/null @@ -1,8 +0,0 @@ - -namespace graphics { -namespace vulkan { - -void init_stub_pointers(); - -} -} // namespace graphics diff --git a/code/lab/labv2_internal.h b/code/lab/labv2_internal.h index 0a2042512f7..d237e7d86b2 100644 --- a/code/lab/labv2_internal.h +++ b/code/lab/labv2_internal.h @@ -2,7 +2,6 @@ #include "lab/manager/lab_manager.h" #include "imconfig.h" #include "imgui.h" -#include "backends/imgui_impl_opengl3.h" #include "backends/imgui_impl_sdl.h" #include "extensions/imgui_sugar.hpp" diff --git a/code/lab/manager/lab_manager.cpp b/code/lab/manager/lab_manager.cpp index ed585aa813c..d97d4459f2c 100644 --- a/code/lab/manager/lab_manager.cpp +++ b/code/lab/manager/lab_manager.cpp @@ -112,8 +112,7 @@ void LabManager::resetGraphicsSettings() { } void LabManager::onFrame(float frametime) { - if (gr_screen.mode == GR_OPENGL) - ImGui_ImplOpenGL3_NewFrame(); + gr_imgui_new_frame(); ImGui_ImplSDL2_NewFrame(gr_screen.max_w, gr_screen.max_h); ImGui::NewFrame(); @@ -417,8 +416,7 @@ void LabManager::onFrame(float frametime) { if (Cmdline_show_imgui_debug) ImGui::ShowDemoWindow(); ImGui::Render(); - if (gr_screen.mode == GR_OPENGL) - ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); + gr_imgui_render_draw_data(); if (CloseThis) close(); diff --git a/code/model/modelinterp.cpp b/code/model/modelinterp.cpp index e217e0e30f8..583f2fb9d29 100644 --- a/code/model/modelinterp.cpp +++ b/code/model/modelinterp.cpp @@ -2402,8 +2402,7 @@ void interp_create_transparency_index_buffer(polymodel *pm, int mn) SCP_vector &tex_buffers = pm->submodel[mn].buffer.tex_buf; uint current_tri[NUM_VERTS_PER_TRI]; - bool transparent_tri = false; - int num_tris = 0; + SCP_vector transparent_indices; for ( int i = 0; i < (int)tex_buffers.size(); ++i ) { buffer_data *tex_buf = &tex_buffers[i]; @@ -2433,37 +2432,33 @@ void interp_create_transparency_index_buffer(polymodel *pm, int mn) continue; } - SCP_vector transparent_indices; - - transparent_tri = false; - num_tris = 0; + bool transparent_tri = false; + transparent_indices.clear(); for ( size_t j = 0; j < tex_buf->n_verts; ++j ) { - uint index = indices[j]; + if ( j % NUM_VERTS_PER_TRI == 0 && j != 0 && transparent_tri ) { + transparent_tri = false; + + // we have a triangle and it's transparent. + // shove index into the transparency buffer + transparent_indices.push_back(current_tri[0]); + transparent_indices.push_back(current_tri[1]); + transparent_indices.push_back(current_tri[2]); + } + + const uint index = indices[j]; // need the uv coords of the vert at this index float u = model_list->vert[index].texture_position.u; float v = model_list->vert[index].texture_position.v; if ( texture_lookup.get_channel_alpha(u, v) < 0.95f) { + // temporarily(?) reduced from 0.95f to 0.75f due to certain models (MVP 4.7.3 Triton) having the entire diffuse texture with alpha values less than 1.0f + // which ended up putting the entire geometry into the transparency pass. Somehow looked fine in OpenGL but we have to do this for Vulkan. For now? transparent_tri = true; } - current_tri[num_tris] = index; - num_tris++; - - if ( num_tris == NUM_VERTS_PER_TRI ) { - if ( transparent_tri ) { - // we have a triangle and it's transparent. - // shove index into the transparency buffer - transparent_indices.push_back(current_tri[0]); - transparent_indices.push_back(current_tri[1]); - transparent_indices.push_back(current_tri[2]); - } - - transparent_tri = false; - num_tris = 0; - } + current_tri[j % NUM_VERTS_PER_TRI] = index; } if ( transparent_indices.empty() ) { @@ -2478,6 +2473,10 @@ void interp_create_transparency_index_buffer(polymodel *pm, int mn) buffer_data &new_buff = trans_buffer->tex_buf.back(); new_buff.texture = tex_buf->texture; + if ( transparent_indices.size() > USHRT_MAX ) { + new_buff.flags |= VB_FLAG_LARGE_INDEX; + } + for ( int j = 0; j < (int)transparent_indices.size(); ++j ) { new_buff.assign(j, transparent_indices[j]); } diff --git a/code/model/modelrender.cpp b/code/model/modelrender.cpp index 681aac11e93..b7fcbc10d4a 100644 --- a/code/model/modelrender.cpp +++ b/code/model/modelrender.cpp @@ -1322,7 +1322,7 @@ void model_render_buffers(model_draw_list* scene, model_material *rendering_mate } rendering_material->set_depth_mode(depth_mode); - rendering_material->set_blend_mode(blend_mode); + rendering_material->set_blend_mode(depth_mode == ZBUFFER_TYPE_FULL ? ALPHA_BLEND_NONE : blend_mode); color clr = interp->get_color(); model_render_determine_color(&clr, alpha, blend_mode, no_texturing ? true : false, rendering_material->is_desaturated()); diff --git a/code/options/Ingame_Options_internal.h b/code/options/Ingame_Options_internal.h index 1d43d9b62a8..aa9982c5008 100644 --- a/code/options/Ingame_Options_internal.h +++ b/code/options/Ingame_Options_internal.h @@ -2,7 +2,6 @@ #include "options/manager/ingame_options_manager.h" #include "imconfig.h" #include "imgui.h" -#include "backends/imgui_impl_opengl3.h" #include "backends/imgui_impl_sdl.h" #include "extensions/imgui_sugar.hpp" diff --git a/code/options/manager/ingame_options_manager.cpp b/code/options/manager/ingame_options_manager.cpp index bd14f6dbb6e..320d22f8a2d 100644 --- a/code/options/manager/ingame_options_manager.cpp +++ b/code/options/manager/ingame_options_manager.cpp @@ -138,8 +138,7 @@ void OptConfigurator::offer_save_options_popup() // The main Imgui rendering happens here as well as any i/o checking void OptConfigurator::onFrame() { - if (gr_screen.mode == GR_OPENGL) - ImGui_ImplOpenGL3_NewFrame(); + gr_imgui_new_frame(); ImGui_ImplSDL2_NewFrame(gr_screen.max_w, gr_screen.max_h); ImGui::NewFrame(); @@ -178,8 +177,7 @@ void OptConfigurator::onFrame() { if (Cmdline_show_imgui_debug) ImGui::ShowDemoWindow(); ImGui::Render(); - if (gr_screen.mode == GR_OPENGL) - ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); + gr_imgui_render_draw_data(); if (CloseThis) { close(); diff --git a/code/shaders.cmake b/code/shaders.cmake index 4c7763a2e62..8cab87b63ed 100644 --- a/code/shaders.cmake +++ b/code/shaders.cmake @@ -1,37 +1,46 @@ -set(SHADER_DIR "${CMAKE_CURRENT_SOURCE_DIR}/graphics/shaders") -# This is the legacy location of shader code. To avoid duplicating included files, this is added as an include directory -set(LEGACY_SHADER_DIR "${CMAKE_CURRENT_SOURCE_DIR}/def_files/data/effects") +set(SHADER_DIR "${CMAKE_CURRENT_SOURCE_DIR}/def_files/data/effects") -set(SHADERS - ${SHADER_DIR}/default-material.frag - ${SHADER_DIR}/default-material.vert - ${SHADER_DIR}/vulkan.frag - ${SHADER_DIR}/vulkan.vert -) +# All Vulkan GLSL shaders live alongside OpenGL shaders in def_files/data/effects/*.sdr, +# unified with #ifdef VULKAN / #ifdef OPENGL guards. They are embedded into the executable +# via source_groups.cmake (target_embed_files) and compiled to SPIR-V at runtime via shaderc. -target_sources(code PRIVATE ${SHADERS}) -source_group("Graphics\\Shaders" FILES ${SHADERS}) +# Shaders that need C++ struct header generation from SPIR-V reflection. +# Generated structs are included via shader_structs.h for compile-time layout validation. +set(SHADERS_NEED_STRUCT_GEN + ${SHADER_DIR}/default-material-f.sdr + ${SHADER_DIR}/default-material-v.sdr +) +# Struct header generation via shadertool (SPIR-V reflection). +# When SHADERS_ENABLE_COMPILATION is ON, shaders in SHADERS_NEED_STRUCT_GEN are +# compiled to temporary SPIR-V with glslc, then shadertool generates C++ struct +# headers from the reflection data. The generated headers are checked into VCS +# so that builds without glslc/shadertool still work. set(_structHeaderList) +set(_shaderCompiledDir "${CMAKE_CURRENT_SOURCE_DIR}/graphics/shaders/compiled") -foreach (_shader ${SHADERS}) - if ("${_shader}" MATCHES "\\.glsl$") - # Ignore include files since they will only be used but not compiled - continue() - endif () - +foreach (_shader ${SHADERS_NEED_STRUCT_GEN}) get_filename_component(_fileName "${_shader}" NAME) + get_filename_component(_baseShaderName "${_shader}" NAME_WE) - # We write the compiled/generated shader files to the source directory so that they can be included in the VCS - # This way, it is not necessary to have the tools for compiling shaders when doing non-shader related work - set(_shaderOutputDir "${CMAKE_CURRENT_SOURCE_DIR}/graphics/shaders/compiled") - set(_spirvFile "${_shaderOutputDir}/${_fileName}.spv") + # Determine shader stage from filename convention: *-v.sdr = vertex, *-f.sdr = fragment + string(REGEX MATCH "-([vf])\\.sdr$" _match "${_fileName}") + if (CMAKE_MATCH_1 STREQUAL "v") + set(_stage "vertex") + set(_structSuffix ".vert.h") + else() + set(_stage "fragment") + set(_structSuffix ".frag.h") + endif() - get_filename_component(_baseShaderName "${_shader}" NAME_WE) - get_filename_component(_shaderExt "${_shader}" EXT) + # Map e.g. "default-material-f" to "default-material_structs.frag.h" + string(REGEX REPLACE "-[vf]$" "" _baseName "${_baseShaderName}") + set(_structOutput "${_shaderCompiledDir}/${_baseName}_structs${_structSuffix}") + list(APPEND _structHeaderList "${_structOutput}") if (TARGET glslc) + set(_spirvFile "${CMAKE_CURRENT_BINARY_DIR}/shaders/${_fileName}.spv") set(_depFileDir "${CMAKE_CURRENT_BINARY_DIR}/shaders") set(_depFile "${_depFileDir}/${_fileName}.spv.d") file(RELATIVE_PATH _relativeSpirvPath "${CMAKE_BINARY_DIR}" "${_spirvFile}") @@ -43,36 +52,20 @@ foreach (_shader ${SHADERS}) add_custom_command(OUTPUT "${_spirvFile}" COMMAND ${CMAKE_COMMAND} -E make_directory "${_depFileDir}" - COMMAND glslc "${_shader}" -o "${_spirvFile}" --target-env=vulkan1.0 -O -g "-I${SHADER_DIR}" - "-I${LEGACY_SHADER_DIR}" -MD -MF "${_depFile}" -MT "${_relativeSpirvPath}" -Werror -x glsl - MAIN_DEPENDENCY "${shader}" - COMMENT "Compiling shader ${_fileName}" + COMMAND glslc -x glsl -fshader-stage=${_stage} + "${_shader}" -o "${_spirvFile}" --target-env=vulkan1.0 -std=450 -O -g + "-I${SHADER_DIR}" + -MD -MF "${_depFile}" -MT "${_relativeSpirvPath}" -Werror + MAIN_DEPENDENCY "${_shader}" + COMMENT "Compiling shader ${_fileName} (for struct generation)" ${DEPFILE_PARAM} ) - target_embed_files(code FILES "${_spirvFile}" RELATIVE_TO "${_shaderOutputDir}" PATH_TYPE_PREFIX "data/effects") - - set(_glslOutput "${_spirvFile}.glsl") - set(_structOutput "${_shaderOutputDir}/${_baseShaderName}_structs${_shaderExt}.h") - - list(APPEND _structHeaderList "${_structOutput}") - - add_custom_command(OUTPUT "${_glslOutput}" "${_structOutput}" - COMMAND shadertool --glsl "--glsl-output=${_glslOutput}" --structs "--structs-output=${_structOutput}" ${_spirvFile} + add_custom_command(OUTPUT "${_structOutput}" + COMMAND shadertool --structs "--structs-output=${_structOutput}" "${_spirvFile}" MAIN_DEPENDENCY "${_spirvFile}" - COMMENT "Processing shader ${_spirvFile}" + COMMENT "Generating struct header from ${_fileName}" ) - - target_embed_files(code FILES "${_glslOutput}" RELATIVE_TO "${_shaderOutputDir}" PATH_TYPE_PREFIX "data/effects") - else() - target_embed_files(code FILES "${_spirvFile}" RELATIVE_TO "${_shaderOutputDir}" PATH_TYPE_PREFIX "data/effects") - - set(_glslOutput "${_spirvFile}.glsl") - set(_structOutput "${_shaderOutputDir}/${_baseShaderName}_structs${_shaderExt}.h") - - list(APPEND _structHeaderList "${_structOutput}") - - target_embed_files(code FILES "${_glslOutput}" RELATIVE_TO "${_shaderOutputDir}" PATH_TYPE_PREFIX "data/effects") endif() endforeach () diff --git a/code/source_groups.cmake b/code/source_groups.cmake index d1483a75dbc..7f7ce3880dc 100644 --- a/code/source_groups.cmake +++ b/code/source_groups.cmake @@ -231,6 +231,8 @@ add_file_folder("Default files\\\\data\\\\effects" def_files/data/effects/brightpass-f.sdr def_files/data/effects/copy-f.sdr def_files/data/effects/decal-f.sdr + def_files/data/effects/default-material-f.sdr + def_files/data/effects/default-material-v.sdr def_files/data/effects/decal-v.sdr def_files/data/effects/deferred-clear-f.sdr def_files/data/effects/deferred-clear-v.sdr @@ -469,6 +471,10 @@ add_file_folder("Graphics" graphics/post_processing.h graphics/render.cpp graphics/render.h + graphics/shader_preprocess.cpp + graphics/shader_preprocess.h + graphics/shader_types.cpp + graphics/shader_types.h graphics/shadows.cpp graphics/shadows.h graphics/tmapper.h @@ -564,6 +570,8 @@ add_file_folder("Graphics\\\\SoftwareGr\\\\Font" add_file_folder("Graphics\\\\Util" graphics/util/GPUMemoryHeap.cpp graphics/util/GPUMemoryHeap.h + graphics/util/primitives.h + graphics/util/primitives.cpp graphics/util/uniform_structs.h graphics/util/UniformAligner.h graphics/util/UniformAligner.cpp @@ -577,12 +585,52 @@ if (FSO_BUILD_WITH_VULKAN) add_file_folder("Graphics\\\\Vulkan" graphics/vulkan/gr_vulkan.cpp graphics/vulkan/gr_vulkan.h - graphics/vulkan/RenderFrame.cpp - graphics/vulkan/RenderFrame.h - graphics/vulkan/vulkan_stubs.cpp - graphics/vulkan/vulkan_stubs.h + graphics/vulkan/VulkanBuffer.cpp + graphics/vulkan/VulkanBuffer.h + graphics/vulkan/VulkanConvert.cpp + graphics/vulkan/VulkanConvert.h + graphics/vulkan/VulkanDeferred.cpp + graphics/vulkan/VulkanDeferred.h + graphics/vulkan/VulkanDeletionQueue.cpp + graphics/vulkan/VulkanDeletionQueue.h + graphics/vulkan/VulkanDescriptorManager.cpp + graphics/vulkan/VulkanDescriptorManager.h + graphics/vulkan/VulkanDraw.cpp + graphics/vulkan/VulkanDraw.h + graphics/vulkan/VulkanMemory.cpp + graphics/vulkan/VulkanMemory.h + graphics/vulkan/VulkanPipeline.cpp + graphics/vulkan/VulkanPipeline.h + graphics/vulkan/VulkanPostProcessing.cpp + graphics/vulkan/VulkanPostProcessing.h + graphics/vulkan/VulkanPostProcessingBloom.cpp + graphics/vulkan/VulkanPostProcessingCommon.cpp + graphics/vulkan/VulkanPostProcessingDistortion.cpp + graphics/vulkan/VulkanPostProcessingFog.cpp + graphics/vulkan/VulkanPostProcessingGBuffer.cpp + graphics/vulkan/VulkanPostProcessingLDR.cpp + graphics/vulkan/VulkanPostProcessingLighting.cpp + graphics/vulkan/VulkanPostProcessingMSAA.cpp + graphics/vulkan/VulkanPostProcessingShadow.cpp + graphics/vulkan/VulkanQuery.cpp + graphics/vulkan/VulkanQuery.h graphics/vulkan/VulkanRenderer.cpp graphics/vulkan/VulkanRenderer.h + graphics/vulkan/VulkanRendererImGui.cpp + graphics/vulkan/VulkanRendererLoop.cpp + graphics/vulkan/VulkanRendererSetup.cpp + graphics/vulkan/VulkanRenderFrame.cpp + graphics/vulkan/VulkanRenderFrame.h + graphics/vulkan/VulkanShader.cpp + graphics/vulkan/VulkanShader.h + graphics/vulkan/VulkanShaderCompiler.cpp + graphics/vulkan/VulkanShaderCompiler.h + graphics/vulkan/VulkanState.cpp + graphics/vulkan/VulkanState.h + graphics/vulkan/VulkanTexture.cpp + graphics/vulkan/VulkanTexture.h + graphics/vulkan/VulkanVertexFormat.cpp + graphics/vulkan/VulkanVertexFormat.h ) endif() diff --git a/code/tracing/tracing.cpp b/code/tracing/tracing.cpp index e88c666c47d..71298784e14 100644 --- a/code/tracing/tracing.cpp +++ b/code/tracing/tracing.cpp @@ -1,6 +1,7 @@ #include "tracing/tracing.h" #include "graphics/2d.h" +#include "globalincs/systemvars.h" #include "parse/parselo.h" #include "io/timer.h" @@ -57,9 +58,12 @@ std::unique_ptr mainFrameTimer; std::unique_ptr frameProfiler; SCP_vector query_objects; -// The GPU timestamp queries use an internal free list to reduce the number of graphics API calls +// Free list for backends where queries are immediately reusable (OpenGL). +// When queries are NOT reusable (Vulkan), the free list is bypassed and +// handles are returned to the backend. SCP_queue free_query_objects; bool do_gpu_queries = true; +bool queries_reusable = true; int get_query_object() { if (!free_query_objects.empty()) { @@ -69,7 +73,12 @@ int get_query_object() { } auto id = gr_create_query_object(); - query_objects.push_back(id); + if (queries_reusable) { + // Track for bulk cleanup at shutdown. When not reusable, the backend + // owns the lifecycle — handles are returned via gr_delete_query_object + // and the backend's own shutdown destroys the pool. + query_objects.push_back(id); + } return id; } @@ -83,7 +92,12 @@ int get_gpu_timestamp_query() { } void free_query_object(int obj) { - free_query_objects.push(obj); + if (queries_reusable) { + free_query_objects.push(obj); + } else { + // Backend manages reset lifecycle internally — hand it back. + gr_delete_query_object(obj); + } } struct gpu_trace_event { @@ -231,6 +245,7 @@ void init() { } do_gpu_queries = gr_is_capable(gr_capability::CAPABILITY_TIMESTAMP_QUERY); + queries_reusable = gr_is_capable(gr_capability::CAPABILITY_QUERIES_REUSABLE); if (do_gpu_queries) { gpu_start_query = get_gpu_timestamp_query(); @@ -261,11 +276,23 @@ SCP_string get_frame_profile_output() { } void shutdown() { - while (!gpu_events.empty()) { - process_events(); + if (queries_reusable) { + while (!gpu_events.empty()) { + process_events(); - // Don't do busy waiting... - os_sleep(5); + // Don't do busy waiting... + os_sleep(5); + } + } else { + // Discard remaining GPU events — no more frames will + // be submitted, so unsubmitted queries can never become + // available. + while (!gpu_events.empty()) { + auto& first = gpu_events.front(); + gr_delete_query_object(first.gpu_begin_query); + gr_delete_query_object(first.gpu_end_query); + gpu_events.pop(); + } } for (auto query : query_objects) { @@ -273,6 +300,10 @@ void shutdown() { } query_objects.clear(); + while (!free_query_objects.empty()) { + free_query_objects.pop(); + } + mainFrameTimer = nullptr; traceEventWriter = nullptr; diff --git a/freespace2/SDLGraphicsOperations.cpp b/freespace2/SDLGraphicsOperations.cpp index 6993b285fa2..8bfaced40d9 100644 --- a/freespace2/SDLGraphicsOperations.cpp +++ b/freespace2/SDLGraphicsOperations.cpp @@ -128,7 +128,15 @@ class SDLWindowViewPort: public os::Viewport { SDL_SetWindowBordered(_window, SDL_FALSE); break; case os::ViewportState::Fullscreen: - SDL_SetWindowFullscreen(_window, SDL_WINDOW_FULLSCREEN); + // Use desktop (borderless) fullscreen rather than exclusive + // fullscreen. Exclusive fullscreen performs a real video mode + // change which is unreliable with an active Vulkan surface + // (it can fail or invalidate the surface, stranding swap chain + // resources). Desktop fullscreen keeps the surface valid so the + // swap chain can simply be recreated. + if (SDL_SetWindowFullscreen(_window, SDL_WINDOW_FULLSCREEN_DESKTOP) != 0) { + mprintf(("Failed to enter fullscreen: %s\n", SDL_GetError())); + } break; default: UNREACHABLE("Invalid window state!"); @@ -192,7 +200,9 @@ std::unique_ptr SDLGraphicsOperations::createViewport(const os::Vi windowflags |= SDL_WINDOW_BORDERLESS; } if (props.flags[os::ViewPortFlags::Fullscreen]) { - windowflags |= SDL_WINDOW_FULLSCREEN; + // Desktop (borderless) fullscreen avoids an exclusive video mode change, + // which is unreliable with Vulkan surfaces. See setState() for details. + windowflags |= SDL_WINDOW_FULLSCREEN_DESKTOP; } if (props.flags[os::ViewPortFlags::Resizeable]) { windowflags |= SDL_WINDOW_RESIZABLE; diff --git a/lib/VulkanMemoryAllocator/CMakeLists.txt b/lib/VulkanMemoryAllocator/CMakeLists.txt new file mode 100644 index 00000000000..eeb4e6947dd --- /dev/null +++ b/lib/VulkanMemoryAllocator/CMakeLists.txt @@ -0,0 +1,4 @@ +add_library(VulkanMemoryAllocator INTERFACE) +target_include_directories(VulkanMemoryAllocator SYSTEM INTERFACE + "${CMAKE_CURRENT_SOURCE_DIR}/include" +) diff --git a/lib/VulkanMemoryAllocator/include/vk_mem_alloc.h b/lib/VulkanMemoryAllocator/include/vk_mem_alloc.h new file mode 100644 index 00000000000..6f71d5b4604 --- /dev/null +++ b/lib/VulkanMemoryAllocator/include/vk_mem_alloc.h @@ -0,0 +1,19111 @@ +// +// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#ifndef AMD_VULKAN_MEMORY_ALLOCATOR_H +#define AMD_VULKAN_MEMORY_ALLOCATOR_H + +/** \mainpage Vulkan Memory Allocator + +Version 3.2.1 + +Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. \n +License: MIT \n +See also: [product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/), +[repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) + + +API documentation divided into groups: [Topics](topics.html) + +General documentation chapters: + +- User guide + - \subpage quick_start + - [Project setup](@ref quick_start_project_setup) + - [Initialization](@ref quick_start_initialization) + - [Resource allocation](@ref quick_start_resource_allocation) + - \subpage choosing_memory_type + - [Usage](@ref choosing_memory_type_usage) + - [Required and preferred flags](@ref choosing_memory_type_required_preferred_flags) + - [Explicit memory types](@ref choosing_memory_type_explicit_memory_types) + - [Custom memory pools](@ref choosing_memory_type_custom_memory_pools) + - [Dedicated allocations](@ref choosing_memory_type_dedicated_allocations) + - \subpage memory_mapping + - [Copy functions](@ref memory_mapping_copy_functions) + - [Mapping functions](@ref memory_mapping_mapping_functions) + - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory) + - [Cache flush and invalidate](@ref memory_mapping_cache_control) + - \subpage staying_within_budget + - [Querying for budget](@ref staying_within_budget_querying_for_budget) + - [Controlling memory usage](@ref staying_within_budget_controlling_memory_usage) + - \subpage resource_aliasing + - \subpage custom_memory_pools + - [Choosing memory type index](@ref custom_memory_pools_MemTypeIndex) + - [When not to use custom pools](@ref custom_memory_pools_when_not_use) + - [Linear allocation algorithm](@ref linear_algorithm) + - [Free-at-once](@ref linear_algorithm_free_at_once) + - [Stack](@ref linear_algorithm_stack) + - [Double stack](@ref linear_algorithm_double_stack) + - [Ring buffer](@ref linear_algorithm_ring_buffer) + - \subpage defragmentation + - \subpage statistics + - [Numeric statistics](@ref statistics_numeric_statistics) + - [JSON dump](@ref statistics_json_dump) + - \subpage allocation_annotation + - [Allocation user data](@ref allocation_user_data) + - [Allocation names](@ref allocation_names) + - \subpage virtual_allocator + - \subpage debugging_memory_usage + - [Memory initialization](@ref debugging_memory_usage_initialization) + - [Margins](@ref debugging_memory_usage_margins) + - [Corruption detection](@ref debugging_memory_usage_corruption_detection) + - [Leak detection features](@ref debugging_memory_usage_leak_detection) + - \subpage other_api_interop +- \subpage usage_patterns + - [GPU-only resource](@ref usage_patterns_gpu_only) + - [Staging copy for upload](@ref usage_patterns_staging_copy_upload) + - [Readback](@ref usage_patterns_readback) + - [Advanced data uploading](@ref usage_patterns_advanced_data_uploading) + - [Other use cases](@ref usage_patterns_other_use_cases) +- \subpage configuration + - [Pointers to Vulkan functions](@ref config_Vulkan_functions) + - [Custom host memory allocator](@ref custom_memory_allocator) + - [Device memory allocation callbacks](@ref allocation_callbacks) + - [Device heap memory limit](@ref heap_memory_limit) +- Extension support + - \subpage vk_khr_dedicated_allocation + - \subpage enabling_buffer_device_address + - \subpage vk_ext_memory_priority + - \subpage vk_amd_device_coherent_memory + - \subpage vk_khr_external_memory_win32 +- \subpage general_considerations + - [Thread safety](@ref general_considerations_thread_safety) + - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) + - [Validation layer warnings](@ref general_considerations_validation_layer_warnings) + - [Allocation algorithm](@ref general_considerations_allocation_algorithm) + - [Features not supported](@ref general_considerations_features_not_supported) + +\defgroup group_init Library initialization + +\brief API elements related to the initialization and management of the entire library, especially #VmaAllocator object. + +\defgroup group_alloc Memory allocation + +\brief API elements related to the allocation, deallocation, and management of Vulkan memory, buffers, images. +Most basic ones being: vmaCreateBuffer(), vmaCreateImage(). + +\defgroup group_virtual Virtual allocator + +\brief API elements related to the mechanism of \ref virtual_allocator - using the core allocation algorithm +for user-defined purpose without allocating any real GPU memory. + +\defgroup group_stats Statistics + +\brief API elements that query current status of the allocator, from memory usage, budget, to full dump of the internal state in JSON format. +See documentation chapter: \ref statistics. +*/ + + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(VULKAN_H_) +#include +#endif + +#if !defined(VMA_VULKAN_VERSION) + #if defined(VK_VERSION_1_4) + #define VMA_VULKAN_VERSION 1004000 + #elif defined(VK_VERSION_1_3) + #define VMA_VULKAN_VERSION 1003000 + #elif defined(VK_VERSION_1_2) + #define VMA_VULKAN_VERSION 1002000 + #elif defined(VK_VERSION_1_1) + #define VMA_VULKAN_VERSION 1001000 + #else + #define VMA_VULKAN_VERSION 1000000 + #endif +#endif + +#if defined(__ANDROID__) && defined(VK_NO_PROTOTYPES) && VMA_STATIC_VULKAN_FUNCTIONS + extern PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; + extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; + extern PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; + extern PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; + extern PFN_vkAllocateMemory vkAllocateMemory; + extern PFN_vkFreeMemory vkFreeMemory; + extern PFN_vkMapMemory vkMapMemory; + extern PFN_vkUnmapMemory vkUnmapMemory; + extern PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges; + extern PFN_vkInvalidateMappedMemoryRanges vkInvalidateMappedMemoryRanges; + extern PFN_vkBindBufferMemory vkBindBufferMemory; + extern PFN_vkBindImageMemory vkBindImageMemory; + extern PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; + extern PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; + extern PFN_vkCreateBuffer vkCreateBuffer; + extern PFN_vkDestroyBuffer vkDestroyBuffer; + extern PFN_vkCreateImage vkCreateImage; + extern PFN_vkDestroyImage vkDestroyImage; + extern PFN_vkCmdCopyBuffer vkCmdCopyBuffer; + #if VMA_VULKAN_VERSION >= 1001000 + extern PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2; + extern PFN_vkGetImageMemoryRequirements2 vkGetImageMemoryRequirements2; + extern PFN_vkBindBufferMemory2 vkBindBufferMemory2; + extern PFN_vkBindImageMemory2 vkBindImageMemory2; + extern PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2; + #endif // #if VMA_VULKAN_VERSION >= 1001000 +#endif // #if defined(__ANDROID__) && VMA_STATIC_VULKAN_FUNCTIONS && VK_NO_PROTOTYPES + +#if !defined(VMA_DEDICATED_ALLOCATION) + #if VK_KHR_get_memory_requirements2 && VK_KHR_dedicated_allocation + #define VMA_DEDICATED_ALLOCATION 1 + #else + #define VMA_DEDICATED_ALLOCATION 0 + #endif +#endif + +#if !defined(VMA_BIND_MEMORY2) + #if VK_KHR_bind_memory2 + #define VMA_BIND_MEMORY2 1 + #else + #define VMA_BIND_MEMORY2 0 + #endif +#endif + +#if !defined(VMA_MEMORY_BUDGET) + #if VK_EXT_memory_budget && (VK_KHR_get_physical_device_properties2 || VMA_VULKAN_VERSION >= 1001000) + #define VMA_MEMORY_BUDGET 1 + #else + #define VMA_MEMORY_BUDGET 0 + #endif +#endif + +// Defined to 1 when VK_KHR_buffer_device_address device extension or equivalent core Vulkan 1.2 feature is defined in its headers. +#if !defined(VMA_BUFFER_DEVICE_ADDRESS) + #if VK_KHR_buffer_device_address || VMA_VULKAN_VERSION >= 1002000 + #define VMA_BUFFER_DEVICE_ADDRESS 1 + #else + #define VMA_BUFFER_DEVICE_ADDRESS 0 + #endif +#endif + +// Defined to 1 when VK_EXT_memory_priority device extension is defined in Vulkan headers. +#if !defined(VMA_MEMORY_PRIORITY) + #if VK_EXT_memory_priority + #define VMA_MEMORY_PRIORITY 1 + #else + #define VMA_MEMORY_PRIORITY 0 + #endif +#endif + +// Defined to 1 when VK_KHR_maintenance4 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE4) + #if VK_KHR_maintenance4 + #define VMA_KHR_MAINTENANCE4 1 + #else + #define VMA_KHR_MAINTENANCE4 0 + #endif +#endif + +// Defined to 1 when VK_KHR_maintenance5 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE5) + #if VK_KHR_maintenance5 + #define VMA_KHR_MAINTENANCE5 1 + #else + #define VMA_KHR_MAINTENANCE5 0 + #endif +#endif + + +// Defined to 1 when VK_KHR_external_memory device extension is defined in Vulkan headers. +#if !defined(VMA_EXTERNAL_MEMORY) + #if VK_KHR_external_memory + #define VMA_EXTERNAL_MEMORY 1 + #else + #define VMA_EXTERNAL_MEMORY 0 + #endif +#endif + +// Defined to 1 when VK_KHR_external_memory_win32 device extension is defined in Vulkan headers. +#if !defined(VMA_EXTERNAL_MEMORY_WIN32) + #if VK_KHR_external_memory_win32 + #define VMA_EXTERNAL_MEMORY_WIN32 1 + #else + #define VMA_EXTERNAL_MEMORY_WIN32 0 + #endif +#endif + +// Define these macros to decorate all public functions with additional code, +// before and after returned type, appropriately. This may be useful for +// exporting the functions when compiling VMA as a separate library. Example: +// #define VMA_CALL_PRE __declspec(dllexport) +// #define VMA_CALL_POST __cdecl +#ifndef VMA_CALL_PRE + #define VMA_CALL_PRE +#endif +#ifndef VMA_CALL_POST + #define VMA_CALL_POST +#endif + +// Define this macro to decorate pNext pointers with an attribute specifying the Vulkan +// structure that will be extended via the pNext chain. +#ifndef VMA_EXTENDS_VK_STRUCT + #define VMA_EXTENDS_VK_STRUCT(vkStruct) +#endif + +// Define this macro to decorate pointers with an attribute specifying the +// length of the array they point to if they are not null. +// +// The length may be one of +// - The name of another parameter in the argument list where the pointer is declared +// - The name of another member in the struct where the pointer is declared +// - The name of a member of a struct type, meaning the value of that member in +// the context of the call. For example +// VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount"), +// this means the number of memory heaps available in the device associated +// with the VmaAllocator being dealt with. +#ifndef VMA_LEN_IF_NOT_NULL + #define VMA_LEN_IF_NOT_NULL(len) +#endif + +// The VMA_NULLABLE macro is defined to be _Nullable when compiling with Clang. +// see: https://clang.llvm.org/docs/AttributeReference.html#nullable +#ifndef VMA_NULLABLE + #ifdef __clang__ + #define VMA_NULLABLE _Nullable + #else + #define VMA_NULLABLE + #endif +#endif + +// The VMA_NOT_NULL macro is defined to be _Nonnull when compiling with Clang. +// see: https://clang.llvm.org/docs/AttributeReference.html#nonnull +#ifndef VMA_NOT_NULL + #ifdef __clang__ + #define VMA_NOT_NULL _Nonnull + #else + #define VMA_NOT_NULL + #endif +#endif + +// If non-dispatchable handles are represented as pointers then we can give +// then nullability annotations +#ifndef VMA_NOT_NULL_NON_DISPATCHABLE + #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + #define VMA_NOT_NULL_NON_DISPATCHABLE VMA_NOT_NULL + #else + #define VMA_NOT_NULL_NON_DISPATCHABLE + #endif +#endif + +#ifndef VMA_NULLABLE_NON_DISPATCHABLE + #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + #define VMA_NULLABLE_NON_DISPATCHABLE VMA_NULLABLE + #else + #define VMA_NULLABLE_NON_DISPATCHABLE + #endif +#endif + +#ifndef VMA_STATS_STRING_ENABLED + #define VMA_STATS_STRING_ENABLED 1 +#endif + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// INTERFACE +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// Sections for managing code placement in file, only for development purposes e.g. for convenient folding inside an IDE. +#ifndef _VMA_ENUM_DECLARATIONS + +/** +\addtogroup group_init +@{ +*/ + +/// Flags for created #VmaAllocator. +typedef enum VmaAllocatorCreateFlagBits +{ + /** \brief Allocator and all objects created from it will not be synchronized internally, so you must guarantee they are used from only one thread at a time or synchronized externally by you. + + Using this flag may increase performance because internal mutexes are not used. + */ + VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT = 0x00000001, + /** \brief Enables usage of VK_KHR_dedicated_allocation extension. + + The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. + When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + + Using this extension will automatically allocate dedicated blocks of memory for + some buffers and images instead of suballocating place for them out of bigger + memory blocks (as if you explicitly used #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT + flag) when it is recommended by the driver. It may improve performance on some + GPUs. + + You may set this flag only if you found out that following device extensions are + supported, you enabled them while creating Vulkan device passed as + VmaAllocatorCreateInfo::device, and you want them to be used internally by this + library: + + - VK_KHR_get_memory_requirements2 (device extension) + - VK_KHR_dedicated_allocation (device extension) + + When this flag is set, you can experience following warnings reported by Vulkan + validation layer. You can ignore them. + + > vkBindBufferMemory(): Binding memory to buffer 0x2d but vkGetBufferMemoryRequirements() has not been called on that buffer. + */ + VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT = 0x00000002, + /** + Enables usage of VK_KHR_bind_memory2 extension. + + The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. + When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + + You may set this flag only if you found out that this device extension is supported, + you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + and you want it to be used internally by this library. + + The extension provides functions `vkBindBufferMemory2KHR` and `vkBindImageMemory2KHR`, + which allow to pass a chain of `pNext` structures while binding. + This flag is required if you use `pNext` parameter in vmaBindBufferMemory2() or vmaBindImageMemory2(). + */ + VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT = 0x00000004, + /** + Enables usage of VK_EXT_memory_budget extension. + + You may set this flag only if you found out that this device extension is supported, + you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + and you want it to be used internally by this library, along with another instance extension + VK_KHR_get_physical_device_properties2, which is required by it (or Vulkan 1.1, where this extension is promoted). + + The extension provides query for current memory usage and budget, which will probably + be more accurate than an estimation used by the library otherwise. + */ + VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT = 0x00000008, + /** + Enables usage of VK_AMD_device_coherent_memory extension. + + You may set this flag only if you: + + - found out that this device extension is supported and enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + - checked that `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true and set it while creating the Vulkan device, + - want it to be used internally by this library. + + The extension and accompanying device feature provide access to memory types with + `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flags. + They are useful mostly for writing breadcrumb markers - a common method for debugging GPU crash/hang/TDR. + + When the extension is not enabled, such memory types are still enumerated, but their usage is illegal. + To protect from this error, if you don't create the allocator with this flag, it will refuse to allocate any memory or create a custom pool in such memory type, + returning `VK_ERROR_FEATURE_NOT_PRESENT`. + */ + VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT = 0x00000010, + /** + Enables usage of "buffer device address" feature, which allows you to use function + `vkGetBufferDeviceAddress*` to get raw GPU pointer to a buffer and pass it for usage inside a shader. + + You may set this flag only if you: + + 1. (For Vulkan version < 1.2) Found as available and enabled device extension + VK_KHR_buffer_device_address. + This extension is promoted to core Vulkan 1.2. + 2. Found as available and enabled device feature `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress`. + + When this flag is set, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT` using VMA. + The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT` to + allocated memory blocks wherever it might be needed. + + For more information, see documentation chapter \ref enabling_buffer_device_address. + */ + VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT = 0x00000020, + /** + Enables usage of VK_EXT_memory_priority extension in the library. + + You may set this flag only if you found available and enabled this device extension, + along with `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority == VK_TRUE`, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + + When this flag is used, VmaAllocationCreateInfo::priority and VmaPoolCreateInfo::priority + are used to set priorities of allocated Vulkan memory. Without it, these variables are ignored. + + A priority must be a floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. + Larger values are higher priority. The granularity of the priorities is implementation-dependent. + It is automatically passed to every call to `vkAllocateMemory` done by the library using structure `VkMemoryPriorityAllocateInfoEXT`. + The value to be used for default priority is 0.5. + For more details, see the documentation of the VK_EXT_memory_priority extension. + */ + VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT = 0x00000040, + /** + Enables usage of VK_KHR_maintenance4 extension in the library. + + You may set this flag only if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + */ + VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT = 0x00000080, + /** + Enables usage of VK_KHR_maintenance5 extension in the library. + + You should set this flag if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + */ + VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT = 0x00000100, + + /** + Enables usage of VK_KHR_external_memory_win32 extension in the library. + + You should set this flag if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + For more information, see \ref vk_khr_external_memory_win32. + */ + VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT = 0x00000200, + + VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaAllocatorCreateFlagBits; +/// See #VmaAllocatorCreateFlagBits. +typedef VkFlags VmaAllocatorCreateFlags; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/// \brief Intended usage of the allocated memory. +typedef enum VmaMemoryUsage +{ + /** No intended memory usage specified. + Use other members of VmaAllocationCreateInfo to specify your requirements. + */ + VMA_MEMORY_USAGE_UNKNOWN = 0, + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_GPU_ONLY = 1, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT`. + */ + VMA_MEMORY_USAGE_CPU_ONLY = 2, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_CPU_TO_GPU = 3, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. + */ + VMA_MEMORY_USAGE_GPU_TO_CPU = 4, + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers not `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_CPU_COPY = 5, + /** + Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. + Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation. + + Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`. + + Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + */ + VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6, + /** + Selects best memory type automatically. + This flag is recommended for most common use cases. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO = 7, + /** + Selects best memory type automatically with preference for GPU (device) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE = 8, + /** + Selects best memory type automatically with preference for CPU (host) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_HOST = 9, + + VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF +} VmaMemoryUsage; + +/// Flags to be passed as VmaAllocationCreateInfo::flags. +typedef enum VmaAllocationCreateFlagBits +{ + /** \brief Set this flag if the allocation should have its own memory block. + + Use it for special, big resources, like fullscreen images used as attachments. + + If you use this flag while creating a buffer or an image, `VkMemoryDedicatedAllocateInfo` + structure is applied if possible. + */ + VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001, + + /** \brief Set this flag to only try to allocate from existing `VkDeviceMemory` blocks and never create new such block. + + If new allocation cannot be placed in any of the existing blocks, allocation + fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY` error. + + You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and + #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense. + */ + VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002, + /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it. + + Pointer to mapped memory will be returned through VmaAllocationInfo::pMappedData. + + It is valid to use this flag for allocation made from memory type that is not + `HOST_VISIBLE`. This flag is then ignored and memory is not mapped. This is + useful if you need an allocation that is efficient to use on GPU + (`DEVICE_LOCAL`) and still want to map it directly if possible on platforms that + support it (e.g. Intel GPU). + */ + VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004, + /** \deprecated Preserved for backward compatibility. Consider using vmaSetAllocationName() instead. + + Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a + null-terminated string. Instead of copying pointer value, a local copy of the + string is made and stored in allocation's `pName`. The string is automatically + freed together with the allocation. It is also used in vmaBuildStatsString(). + */ + VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020, + /** Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT flag. + */ + VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040, + /** Create both buffer/image and allocation, but don't bind them together. + It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions. + The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage(). + Otherwise it is ignored. + + If you want to make sure the new buffer/image is not tied to the new memory allocation + through `VkMemoryDedicatedAllocateInfoKHR` structure in case the allocation ends up in its own memory block, + use also flag #VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT. + */ + VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080, + /** Create allocation only if additional device memory required for it, if any, won't exceed + memory budget. Otherwise return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + */ + VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100, + /** \brief Set this flag if the allocated memory will have aliasing resources. + + Usage of this flag prevents supplying `VkMemoryDedicatedAllocateInfoKHR` when #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT is specified. + Otherwise created dedicated memory will not be suitable for aliasing resources, resulting in Vulkan Validation Layer errors. + */ + VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT = 0x00000200, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory will only be written sequentially, e.g. using `memcpy()` or a loop writing number-by-number, + never read or accessed randomly, so a memory type can be selected that is uncached and write-combined. + + \warning Violating this declaration may work correctly, but will likely be very slow. + Watch out for implicit reads introduced by doing e.g. `pMappedData[i] += x;` + Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory can be read, written, and accessed in random order, + so a `HOST_CACHED` memory type is preferred. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800, + /** + Together with #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT, + it says that despite request for host access, a not-`HOST_VISIBLE` memory type can be selected + if it may improve performance. + + By using this flag, you declare that you will check if the allocation ended up in a `HOST_VISIBLE` memory type + (e.g. using vmaGetAllocationMemoryProperties()) and if not, you will create some "staging" buffer and + issue an explicit transfer to write/read your data. + To prepare for this possibility, don't forget to add appropriate flags like + `VK_BUFFER_USAGE_TRANSFER_DST_BIT`, `VK_BUFFER_USAGE_TRANSFER_SRC_BIT` to the parameters of created buffer or image. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT = 0x00001000, + /** Allocation strategy that chooses smallest possible free range for the allocation + to minimize memory usage and fragmentation, possibly at the expense of allocation time. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = 0x00010000, + /** Allocation strategy that chooses first suitable free range for the allocation - + not necessarily in terms of the smallest offset but the one that is easiest and fastest to find + to minimize allocation time, possibly at the expense of allocation quality. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = 0x00020000, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + Used internally by defragmentation, not recommended in typical usage. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = 0x00040000, + /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT. + */ + VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, + /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT. + */ + VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + /** A bit mask to extract only `STRATEGY` bits from entire set of flags. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MASK = + VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT | + VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT | + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + + VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaAllocationCreateFlagBits; +/// See #VmaAllocationCreateFlagBits. +typedef VkFlags VmaAllocationCreateFlags; + +/// Flags to be passed as VmaPoolCreateInfo::flags. +typedef enum VmaPoolCreateFlagBits +{ + /** \brief Use this flag if you always allocate only buffers and linear images or only optimal images out of this pool and so Buffer-Image Granularity can be ignored. + + This is an optional optimization flag. + + If you always allocate using vmaCreateBuffer(), vmaCreateImage(), + vmaAllocateMemoryForBuffer(), then you don't need to use it because allocator + knows exact type of your allocations so it can handle Buffer-Image Granularity + in the optimal way. + + If you also allocate using vmaAllocateMemoryForImage() or vmaAllocateMemory(), + exact type of such allocations is not known, so allocator must be conservative + in handling Buffer-Image Granularity, which can lead to suboptimal allocation + (wasted memory). In that case, if you can make sure you always allocate only + buffers and linear images or only optimal images out of this pool, use this flag + to make allocator disregard Buffer-Image Granularity and so make allocations + faster and more optimal. + */ + VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT = 0x00000002, + + /** \brief Enables alternative, linear allocation algorithm in this pool. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT = 0x00000004, + + /** Bit mask to extract only `ALGORITHM` bits from entire set of flags. + */ + VMA_POOL_CREATE_ALGORITHM_MASK = + VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT, + + VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaPoolCreateFlagBits; +/// Flags to be passed as VmaPoolCreateInfo::flags. See #VmaPoolCreateFlagBits. +typedef VkFlags VmaPoolCreateFlags; + +/// Flags to be passed as VmaDefragmentationInfo::flags. +typedef enum VmaDefragmentationFlagBits +{ + /* \brief Use simple but fast algorithm for defragmentation. + May not achieve best results but will require least time to compute and least allocations to copy. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT = 0x1, + /* \brief Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified. + Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT = 0x2, + /* \brief Perform full defragmentation of memory. + Can result in notably more time to compute and allocations to copy, but will achieve best memory packing. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT = 0x4, + /** \brief Use the most roboust algorithm at the cost of time to compute and number of copies to make. + Only available when bufferImageGranularity is greater than 1, since it aims to reduce + alignment issues between different types of resources. + Otherwise falls back to same behavior as #VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT = 0x8, + + /// A bit mask to extract only `ALGORITHM` bits from entire set of flags. + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK = + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT, + + VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaDefragmentationFlagBits; +/// See #VmaDefragmentationFlagBits. +typedef VkFlags VmaDefragmentationFlags; + +/// Operation performed on single defragmentation move. See structure #VmaDefragmentationMove. +typedef enum VmaDefragmentationMoveOperation +{ + /// Buffer/image has been recreated at `dstTmpAllocation`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass(). + VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY = 0, + /// Set this value if you cannot move the allocation. New place reserved at `dstTmpAllocation` will be freed. `srcAllocation` will remain unchanged. + VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1, + /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved at `dstTmpAllocation` will be freed, along with `srcAllocation`, which will be destroyed. + VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2, +} VmaDefragmentationMoveOperation; + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. +typedef enum VmaVirtualBlockCreateFlagBits +{ + /** \brief Enables alternative, linear allocation algorithm in this virtual block. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT = 0x00000001, + + /** \brief Bit mask to extract only `ALGORITHM` bits from entire set of flags. + */ + VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK = + VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT, + + VMA_VIRTUAL_BLOCK_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaVirtualBlockCreateFlagBits; +/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. See #VmaVirtualBlockCreateFlagBits. +typedef VkFlags VmaVirtualBlockCreateFlags; + +/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. +typedef enum VmaVirtualAllocationCreateFlagBits +{ + /** \brief Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for virtual blocks created with #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT flag. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT, + /** \brief Allocation strategy that tries to minimize memory usage. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, + /** \brief Allocation strategy that tries to minimize allocation time. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags. + + These strategy flags are binary compatible with equivalent flags in #VmaAllocationCreateFlagBits. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK = VMA_ALLOCATION_CREATE_STRATEGY_MASK, + + VMA_VIRTUAL_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaVirtualAllocationCreateFlagBits; +/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. See #VmaVirtualAllocationCreateFlagBits. +typedef VkFlags VmaVirtualAllocationCreateFlags; + +/** @} */ + +#endif // _VMA_ENUM_DECLARATIONS + +#ifndef _VMA_DATA_TYPES_DECLARATIONS + +/** +\addtogroup group_init +@{ */ + +/** \struct VmaAllocator +\brief Represents main object of this library initialized. + +Fill structure #VmaAllocatorCreateInfo and call function vmaCreateAllocator() to create it. +Call function vmaDestroyAllocator() to destroy it. + +It is recommended to create just one object of this type per `VkDevice` object, +right after Vulkan is initialized and keep it alive until before Vulkan device is destroyed. +*/ +VK_DEFINE_HANDLE(VmaAllocator) + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \struct VmaPool +\brief Represents custom memory pool + +Fill structure VmaPoolCreateInfo and call function vmaCreatePool() to create it. +Call function vmaDestroyPool() to destroy it. + +For more information see [Custom memory pools](@ref choosing_memory_type_custom_memory_pools). +*/ +VK_DEFINE_HANDLE(VmaPool) + +/** \struct VmaAllocation +\brief Represents single memory allocation. + +It may be either dedicated block of `VkDeviceMemory` or a specific region of a bigger block of this type +plus unique offset. + +There are multiple ways to create such object. +You need to fill structure VmaAllocationCreateInfo. +For more information see [Choosing memory type](@ref choosing_memory_type). + +Although the library provides convenience functions that create Vulkan buffer or image, +allocate memory for it and bind them together, +binding of the allocation to a buffer or an image is out of scope of the allocation itself. +Allocation object can exist without buffer/image bound, +binding can be done manually by the user, and destruction of it can be done +independently of destruction of the allocation. + +The object also remembers its size and some other information. +To retrieve this information, use function vmaGetAllocationInfo() and inspect +returned structure VmaAllocationInfo. +*/ +VK_DEFINE_HANDLE(VmaAllocation) + +/** \struct VmaDefragmentationContext +\brief An opaque object that represents started defragmentation process. + +Fill structure #VmaDefragmentationInfo and call function vmaBeginDefragmentation() to create it. +Call function vmaEndDefragmentation() to destroy it. +*/ +VK_DEFINE_HANDLE(VmaDefragmentationContext) + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \struct VmaVirtualAllocation +\brief Represents single memory allocation done inside VmaVirtualBlock. + +Use it as a unique identifier to virtual allocation within the single block. + +Use value `VK_NULL_HANDLE` to represent a null/invalid allocation. +*/ +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaVirtualAllocation) + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \struct VmaVirtualBlock +\brief Handle to a virtual block object that allows to use core allocation algorithm without allocating any real GPU memory. + +Fill in #VmaVirtualBlockCreateInfo structure and use vmaCreateVirtualBlock() to create it. Use vmaDestroyVirtualBlock() to destroy it. +For more information, see documentation chapter \ref virtual_allocator. + +This object is not thread-safe - should not be used from multiple threads simultaneously, must be synchronized externally. +*/ +VK_DEFINE_HANDLE(VmaVirtualBlock) + +/** @} */ + +/** +\addtogroup group_init +@{ +*/ + +/// Callback function called after successful vkAllocateMemory. +typedef void (VKAPI_PTR* PFN_vmaAllocateDeviceMemoryFunction)( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryType, + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, + VkDeviceSize size, + void* VMA_NULLABLE pUserData); + +/// Callback function called before vkFreeMemory. +typedef void (VKAPI_PTR* PFN_vmaFreeDeviceMemoryFunction)( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryType, + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, + VkDeviceSize size, + void* VMA_NULLABLE pUserData); + +/** \brief Set of callbacks that the library will call for `vkAllocateMemory` and `vkFreeMemory`. + +Provided for informative purpose, e.g. to gather statistics about number of +allocations or total amount of memory allocated in Vulkan. + +Used in VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. +*/ +typedef struct VmaDeviceMemoryCallbacks +{ + /// Optional, can be null. + PFN_vmaAllocateDeviceMemoryFunction VMA_NULLABLE pfnAllocate; + /// Optional, can be null. + PFN_vmaFreeDeviceMemoryFunction VMA_NULLABLE pfnFree; + /// Optional, can be null. + void* VMA_NULLABLE pUserData; +} VmaDeviceMemoryCallbacks; + +/** \brief Pointers to some Vulkan functions - a subset used by the library. + +Used in VmaAllocatorCreateInfo::pVulkanFunctions. +*/ +typedef struct VmaVulkanFunctions +{ + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + PFN_vkGetInstanceProcAddr VMA_NULLABLE vkGetInstanceProcAddr; + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + PFN_vkGetDeviceProcAddr VMA_NULLABLE vkGetDeviceProcAddr; + PFN_vkGetPhysicalDeviceProperties VMA_NULLABLE vkGetPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceMemoryProperties VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties; + PFN_vkAllocateMemory VMA_NULLABLE vkAllocateMemory; + PFN_vkFreeMemory VMA_NULLABLE vkFreeMemory; + PFN_vkMapMemory VMA_NULLABLE vkMapMemory; + PFN_vkUnmapMemory VMA_NULLABLE vkUnmapMemory; + PFN_vkFlushMappedMemoryRanges VMA_NULLABLE vkFlushMappedMemoryRanges; + PFN_vkInvalidateMappedMemoryRanges VMA_NULLABLE vkInvalidateMappedMemoryRanges; + PFN_vkBindBufferMemory VMA_NULLABLE vkBindBufferMemory; + PFN_vkBindImageMemory VMA_NULLABLE vkBindImageMemory; + PFN_vkGetBufferMemoryRequirements VMA_NULLABLE vkGetBufferMemoryRequirements; + PFN_vkGetImageMemoryRequirements VMA_NULLABLE vkGetImageMemoryRequirements; + PFN_vkCreateBuffer VMA_NULLABLE vkCreateBuffer; + PFN_vkDestroyBuffer VMA_NULLABLE vkDestroyBuffer; + PFN_vkCreateImage VMA_NULLABLE vkCreateImage; + PFN_vkDestroyImage VMA_NULLABLE vkDestroyImage; + PFN_vkCmdCopyBuffer VMA_NULLABLE vkCmdCopyBuffer; +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. + PFN_vkGetBufferMemoryRequirements2KHR VMA_NULLABLE vkGetBufferMemoryRequirements2KHR; + /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. + PFN_vkGetImageMemoryRequirements2KHR VMA_NULLABLE vkGetImageMemoryRequirements2KHR; +#endif +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension. + PFN_vkBindBufferMemory2KHR VMA_NULLABLE vkBindBufferMemory2KHR; + /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension. + PFN_vkBindImageMemory2KHR VMA_NULLABLE vkBindImageMemory2KHR; +#endif +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2. + PFN_vkGetPhysicalDeviceMemoryProperties2KHR VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties2KHR; +#endif +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + PFN_vkGetDeviceBufferMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceBufferMemoryRequirements; + /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + PFN_vkGetDeviceImageMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceImageMemoryRequirements; +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 + PFN_vkGetMemoryWin32HandleKHR VMA_NULLABLE vkGetMemoryWin32HandleKHR; +#else + void* VMA_NULLABLE vkGetMemoryWin32HandleKHR; +#endif +} VmaVulkanFunctions; + +/// Description of a Allocator to be created. +typedef struct VmaAllocatorCreateInfo +{ + /// Flags for created allocator. Use #VmaAllocatorCreateFlagBits enum. + VmaAllocatorCreateFlags flags; + /// Vulkan physical device. + /** It must be valid throughout whole lifetime of created allocator. */ + VkPhysicalDevice VMA_NOT_NULL physicalDevice; + /// Vulkan device. + /** It must be valid throughout whole lifetime of created allocator. */ + VkDevice VMA_NOT_NULL device; + /// Preferred size of a single `VkDeviceMemory` block to be allocated from large heaps > 1 GiB. Optional. + /** Set to 0 to use default, which is currently 256 MiB. */ + VkDeviceSize preferredLargeHeapBlockSize; + /// Custom CPU memory allocation callbacks. Optional. + /** Optional, can be null. When specified, will also be used for all CPU-side memory allocations. */ + const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks; + /// Informative callbacks for `vkAllocateMemory`, `vkFreeMemory`. Optional. + /** Optional, can be null. */ + const VmaDeviceMemoryCallbacks* VMA_NULLABLE pDeviceMemoryCallbacks; + /** \brief Either null or a pointer to an array of limits on maximum number of bytes that can be allocated out of particular Vulkan memory heap. + + If not NULL, it must be a pointer to an array of + `VkPhysicalDeviceMemoryProperties::memoryHeapCount` elements, defining limit on + maximum number of bytes that can be allocated out of particular Vulkan memory + heap. + + Any of the elements may be equal to `VK_WHOLE_SIZE`, which means no limit on that + heap. This is also the default in case of `pHeapSizeLimit` = NULL. + + If there is a limit defined for a heap: + + - If user tries to allocate more memory from that heap using this allocator, + the allocation fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + - If the limit is smaller than heap size reported in `VkMemoryHeap::size`, the + value of this limit will be reported instead when using vmaGetMemoryProperties(). + + Warning! Using this feature may not be equivalent to installing a GPU with + smaller amount of memory, because graphics driver doesn't necessary fail new + allocations with `VK_ERROR_OUT_OF_DEVICE_MEMORY` result when memory capacity is + exceeded. It may return success and just silently migrate some device memory + blocks to system RAM. This driver behavior can also be controlled using + VK_AMD_memory_overallocation_behavior extension. + */ + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pHeapSizeLimit; + + /** \brief Pointers to Vulkan functions. Can be null. + + For details see [Pointers to Vulkan functions](@ref config_Vulkan_functions). + */ + const VmaVulkanFunctions* VMA_NULLABLE pVulkanFunctions; + /** \brief Handle to Vulkan instance object. + + Starting from version 3.0.0 this member is no longer optional, it must be set! + */ + VkInstance VMA_NOT_NULL instance; + /** \brief Optional. Vulkan version that the application uses. + + It must be a value in the format as created by macro `VK_MAKE_VERSION` or a constant like: `VK_API_VERSION_1_1`, `VK_API_VERSION_1_0`. + The patch version number specified is ignored. Only the major and minor versions are considered. + Only versions 1.0...1.4 are supported by the current implementation. + Leaving it initialized to zero is equivalent to `VK_API_VERSION_1_0`. + It must match the Vulkan version used by the application and supported on the selected physical device, + so it must be no higher than `VkApplicationInfo::apiVersion` passed to `vkCreateInstance` + and no higher than `VkPhysicalDeviceProperties::apiVersion` found on the physical device used. + */ + uint32_t vulkanApiVersion; +#if VMA_EXTERNAL_MEMORY + /** \brief Either null or a pointer to an array of external memory handle types for each Vulkan memory type. + + If not NULL, it must be a pointer to an array of `VkPhysicalDeviceMemoryProperties::memoryTypeCount` + elements, defining external memory handle types of particular Vulkan memory type, + to be passed using `VkExportMemoryAllocateInfoKHR`. + + Any of the elements may be equal to 0, which means not to use `VkExportMemoryAllocateInfoKHR` on this memory type. + This is also the default in case of `pTypeExternalMemoryHandleTypes` = NULL. + */ + const VkExternalMemoryHandleTypeFlagsKHR* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryTypeCount") pTypeExternalMemoryHandleTypes; +#endif // #if VMA_EXTERNAL_MEMORY +} VmaAllocatorCreateInfo; + +/// Information about existing #VmaAllocator object. +typedef struct VmaAllocatorInfo +{ + /** \brief Handle to Vulkan instance object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::instance. + */ + VkInstance VMA_NOT_NULL instance; + /** \brief Handle to Vulkan physical device object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::physicalDevice. + */ + VkPhysicalDevice VMA_NOT_NULL physicalDevice; + /** \brief Handle to Vulkan device object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::device. + */ + VkDevice VMA_NOT_NULL device; +} VmaAllocatorInfo; + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Calculated statistics of memory usage e.g. in a specific memory type, heap, custom pool, or total. + +These are fast to calculate. +See functions: vmaGetHeapBudgets(), vmaGetPoolStatistics(). +*/ +typedef struct VmaStatistics +{ + /** \brief Number of `VkDeviceMemory` objects - Vulkan memory blocks allocated. + */ + uint32_t blockCount; + /** \brief Number of #VmaAllocation objects allocated. + + Dedicated allocations have their own blocks, so each one adds 1 to `allocationCount` as well as `blockCount`. + */ + uint32_t allocationCount; + /** \brief Number of bytes allocated in `VkDeviceMemory` blocks. + + \note To avoid confusion, please be aware that what Vulkan calls an "allocation" - a whole `VkDeviceMemory` object + (e.g. as in `VkPhysicalDeviceLimits::maxMemoryAllocationCount`) is called a "block" in VMA, while VMA calls + "allocation" a #VmaAllocation object that represents a memory region sub-allocated from such block, usually for a single buffer or image. + */ + VkDeviceSize blockBytes; + /** \brief Total number of bytes occupied by all #VmaAllocation objects. + + Always less or equal than `blockBytes`. + Difference `(blockBytes - allocationBytes)` is the amount of memory allocated from Vulkan + but unused by any #VmaAllocation. + */ + VkDeviceSize allocationBytes; +} VmaStatistics; + +/** \brief More detailed statistics than #VmaStatistics. + +These are slower to calculate. Use for debugging purposes. +See functions: vmaCalculateStatistics(), vmaCalculatePoolStatistics(). + +Previous version of the statistics API provided averages, but they have been removed +because they can be easily calculated as: + +\code +VkDeviceSize allocationSizeAvg = detailedStats.statistics.allocationBytes / detailedStats.statistics.allocationCount; +VkDeviceSize unusedBytes = detailedStats.statistics.blockBytes - detailedStats.statistics.allocationBytes; +VkDeviceSize unusedRangeSizeAvg = unusedBytes / detailedStats.unusedRangeCount; +\endcode +*/ +typedef struct VmaDetailedStatistics +{ + /// Basic statistics. + VmaStatistics statistics; + /// Number of free ranges of memory between allocations. + uint32_t unusedRangeCount; + /// Smallest allocation size. `VK_WHOLE_SIZE` if there are 0 allocations. + VkDeviceSize allocationSizeMin; + /// Largest allocation size. 0 if there are 0 allocations. + VkDeviceSize allocationSizeMax; + /// Smallest empty range size. `VK_WHOLE_SIZE` if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMin; + /// Largest empty range size. 0 if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMax; +} VmaDetailedStatistics; + +/** \brief General statistics from current state of the Allocator - +total memory usage across all memory heaps and types. + +These are slower to calculate. Use for debugging purposes. +See function vmaCalculateStatistics(). +*/ +typedef struct VmaTotalStatistics +{ + VmaDetailedStatistics memoryType[VK_MAX_MEMORY_TYPES]; + VmaDetailedStatistics memoryHeap[VK_MAX_MEMORY_HEAPS]; + VmaDetailedStatistics total; +} VmaTotalStatistics; + +/** \brief Statistics of current memory usage and available budget for a specific memory heap. + +These are fast to calculate. +See function vmaGetHeapBudgets(). +*/ +typedef struct VmaBudget +{ + /** \brief Statistics fetched from the library. + */ + VmaStatistics statistics; + /** \brief Estimated current memory usage of the program, in bytes. + + Fetched from system using VK_EXT_memory_budget extension if enabled. + + It might be different than `statistics.blockBytes` (usually higher) due to additional implicit objects + also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or + `VkDeviceMemory` blocks allocated outside of this library, if any. + */ + VkDeviceSize usage; + /** \brief Estimated amount of memory available to the program, in bytes. + + Fetched from system using VK_EXT_memory_budget extension if enabled. + + It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors + external to the program, decided by the operating system. + Difference `budget - usage` is the amount of additional memory that can probably + be allocated without problems. Exceeding the budget may result in various problems. + */ + VkDeviceSize budget; +} VmaBudget; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \brief Parameters of new #VmaAllocation. + +To be used with functions like vmaCreateBuffer(), vmaCreateImage(), and many others. +*/ +typedef struct VmaAllocationCreateInfo +{ + /// Use #VmaAllocationCreateFlagBits enum. + VmaAllocationCreateFlags flags; + /** \brief Intended usage of memory. + + You can leave #VMA_MEMORY_USAGE_UNKNOWN if you specify memory requirements in other way. \n + If `pool` is not null, this member is ignored. + */ + VmaMemoryUsage usage; + /** \brief Flags that must be set in a Memory Type chosen for an allocation. + + Leave 0 if you specify memory requirements in other way. \n + If `pool` is not null, this member is ignored.*/ + VkMemoryPropertyFlags requiredFlags; + /** \brief Flags that preferably should be set in a memory type chosen for an allocation. + + Set to 0 if no additional flags are preferred. \n + If `pool` is not null, this member is ignored. */ + VkMemoryPropertyFlags preferredFlags; + /** \brief Bitmask containing one bit set for every memory type acceptable for this allocation. + + Value 0 is equivalent to `UINT32_MAX` - it means any memory type is accepted if + it meets other requirements specified by this structure, with no further + restrictions on memory type index. \n + If `pool` is not null, this member is ignored. + */ + uint32_t memoryTypeBits; + /** \brief Pool that this allocation should be created in. + + Leave `VK_NULL_HANDLE` to allocate from default pool. If not null, members: + `usage`, `requiredFlags`, `preferredFlags`, `memoryTypeBits` are ignored. + */ + VmaPool VMA_NULLABLE pool; + /** \brief Custom general-purpose pointer that will be stored in #VmaAllocation, can be read as VmaAllocationInfo::pUserData and changed using vmaSetAllocationUserData(). + + If #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT is used, it must be either + null or pointer to a null-terminated string. The string will be then copied to + internal buffer, so it doesn't need to be valid after allocation call. + */ + void* VMA_NULLABLE pUserData; + /** \brief A floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. + + It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object + and this allocation ends up as dedicated or is explicitly forced as dedicated using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + Otherwise, it has the priority of a memory block where it is placed and this variable is ignored. + */ + float priority; +} VmaAllocationCreateInfo; + +/// Describes parameter of created #VmaPool. +typedef struct VmaPoolCreateInfo +{ + /** \brief Vulkan memory type index to allocate this pool from. + */ + uint32_t memoryTypeIndex; + /** \brief Use combination of #VmaPoolCreateFlagBits. + */ + VmaPoolCreateFlags flags; + /** \brief Size of a single `VkDeviceMemory` block to be allocated as part of this pool, in bytes. Optional. + + Specify nonzero to set explicit, constant size of memory blocks used by this + pool. + + Leave 0 to use default and let the library manage block sizes automatically. + Sizes of particular blocks may vary. + In this case, the pool will also support dedicated allocations. + */ + VkDeviceSize blockSize; + /** \brief Minimum number of blocks to be always allocated in this pool, even if they stay empty. + + Set to 0 to have no preallocated blocks and allow the pool be completely empty. + */ + size_t minBlockCount; + /** \brief Maximum number of blocks that can be allocated in this pool. Optional. + + Set to 0 to use default, which is `SIZE_MAX`, which means no limit. + + Set to same value as VmaPoolCreateInfo::minBlockCount to have fixed amount of memory allocated + throughout whole lifetime of this pool. + */ + size_t maxBlockCount; + /** \brief A floating-point value between 0 and 1, indicating the priority of the allocations in this pool relative to other memory allocations. + + It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object. + Otherwise, this variable is ignored. + */ + float priority; + /** \brief Additional minimum alignment to be used for all allocations created from this pool. Can be 0. + + Leave 0 (default) not to impose any additional alignment. If not 0, it must be a power of two. + It can be useful in cases where alignment returned by Vulkan by functions like `vkGetBufferMemoryRequirements` is not enough, + e.g. when doing interop with OpenGL. + */ + VkDeviceSize minAllocationAlignment; + /** \brief Additional `pNext` chain to be attached to `VkMemoryAllocateInfo` used for every allocation made by this pool. Optional. + + Optional, can be null. If not null, it must point to a `pNext` chain of structures that can be attached to `VkMemoryAllocateInfo`. + It can be useful for special needs such as adding `VkExportMemoryAllocateInfoKHR`. + Structures pointed by this member must remain alive and unchanged for the whole lifetime of the custom pool. + + Please note that some structures, e.g. `VkMemoryPriorityAllocateInfoEXT`, `VkMemoryDedicatedAllocateInfoKHR`, + can be attached automatically by this library when using other, more convenient of its features. + */ + void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkMemoryAllocateInfo) pMemoryAllocateNext; +} VmaPoolCreateInfo; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** +Parameters of #VmaAllocation objects, that can be retrieved using function vmaGetAllocationInfo(). + +There is also an extended version of this structure that carries additional parameters: #VmaAllocationInfo2. +*/ +typedef struct VmaAllocationInfo +{ + /** \brief Memory type index that this allocation was allocated from. + + It never changes. + */ + uint32_t memoryType; + /** \brief Handle to Vulkan memory object. + + Same memory object can be shared by multiple allocations. + + It can change after the allocation is moved during \ref defragmentation. + */ + VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory; + /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation. + + You usually don't need to use this offset. If you create a buffer or an image together with the allocation using e.g. function + vmaCreateBuffer(), vmaCreateImage(), functions that operate on these resources refer to the beginning of the buffer or image, + not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation + and apply this offset automatically. + + It can change after the allocation is moved during \ref defragmentation. + */ + VkDeviceSize offset; + /** \brief Size of this allocation, in bytes. + + It never changes. + + \note Allocation size returned in this variable may be greater than the size + requested for the resource e.g. as `VkBufferCreateInfo::size`. Whole size of the + allocation is accessible for operations on memory e.g. using a pointer after + mapping with vmaMapMemory(), but operations on the resource e.g. using + `vkCmdCopyBuffer` must be limited to the size of the resource. + */ + VkDeviceSize size; + /** \brief Pointer to the beginning of this allocation as mapped data. + + If the allocation hasn't been mapped using vmaMapMemory() and hasn't been + created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null. + + It can change after call to vmaMapMemory(), vmaUnmapMemory(). + It can also change after the allocation is moved during \ref defragmentation. + */ + void* VMA_NULLABLE pMappedData; + /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData(). + + It can change after call to vmaSetAllocationUserData() for this allocation. + */ + void* VMA_NULLABLE pUserData; + /** \brief Custom allocation name that was set with vmaSetAllocationName(). + + It can change after call to vmaSetAllocationName() for this allocation. + + Another way to set custom name is to pass it in VmaAllocationCreateInfo::pUserData with + additional flag #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT set [DEPRECATED]. + */ + const char* VMA_NULLABLE pName; +} VmaAllocationInfo; + +/// Extended parameters of a #VmaAllocation object that can be retrieved using function vmaGetAllocationInfo2(). +typedef struct VmaAllocationInfo2 +{ + /** \brief Basic parameters of the allocation. + + If you need only these, you can use function vmaGetAllocationInfo() and structure #VmaAllocationInfo instead. + */ + VmaAllocationInfo allocationInfo; + /** \brief Size of the `VkDeviceMemory` block that the allocation belongs to. + + In case of an allocation with dedicated memory, it will be equal to `allocationInfo.size`. + */ + VkDeviceSize blockSize; + /** \brief `VK_TRUE` if the allocation has dedicated memory, `VK_FALSE` if it was placed as part of a larger memory block. + + When `VK_TRUE`, it also means `VkMemoryDedicatedAllocateInfo` was used when creating the allocation + (if VK_KHR_dedicated_allocation extension or Vulkan version >= 1.1 is enabled). + */ + VkBool32 dedicatedMemory; +} VmaAllocationInfo2; + +/** Callback function called during vmaBeginDefragmentation() to check custom criterion about ending current defragmentation pass. + +Should return true if the defragmentation needs to stop current pass. +*/ +typedef VkBool32 (VKAPI_PTR* PFN_vmaCheckDefragmentationBreakFunction)(void* VMA_NULLABLE pUserData); + +/** \brief Parameters for defragmentation. + +To be used with function vmaBeginDefragmentation(). +*/ +typedef struct VmaDefragmentationInfo +{ + /// \brief Use combination of #VmaDefragmentationFlagBits. + VmaDefragmentationFlags flags; + /** \brief Custom pool to be defragmented. + + If null then default pools will undergo defragmentation process. + */ + VmaPool VMA_NULLABLE pool; + /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places. + + `0` means no limit. + */ + VkDeviceSize maxBytesPerPass; + /** \brief Maximum number of allocations that can be moved during single pass to a different place. + + `0` means no limit. + */ + uint32_t maxAllocationsPerPass; + /** \brief Optional custom callback for stopping vmaBeginDefragmentation(). + + Have to return true for breaking current defragmentation pass. + */ + PFN_vmaCheckDefragmentationBreakFunction VMA_NULLABLE pfnBreakCallback; + /// \brief Optional data to pass to custom callback for stopping pass of defragmentation. + void* VMA_NULLABLE pBreakCallbackUserData; +} VmaDefragmentationInfo; + +/// Single move of an allocation to be done for defragmentation. +typedef struct VmaDefragmentationMove +{ + /// Operation to be performed on the allocation by vmaEndDefragmentationPass(). Default value is #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it. + VmaDefragmentationMoveOperation operation; + /// Allocation that should be moved. + VmaAllocation VMA_NOT_NULL srcAllocation; + /** \brief Temporary allocation pointing to destination memory that will replace `srcAllocation`. + + \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass, + to be used for binding new buffer/image to the destination memory using e.g. vmaBindBufferMemory(). + vmaEndDefragmentationPass() will destroy it and make `srcAllocation` point to this memory. + */ + VmaAllocation VMA_NOT_NULL dstTmpAllocation; +} VmaDefragmentationMove; + +/** \brief Parameters for incremental defragmentation steps. + +To be used with function vmaBeginDefragmentationPass(). +*/ +typedef struct VmaDefragmentationPassMoveInfo +{ + /// Number of elements in the `pMoves` array. + uint32_t moveCount; + /** \brief Array of moves to be performed by the user in the current defragmentation pass. + + Pointer to an array of `moveCount` elements, owned by VMA, created in vmaBeginDefragmentationPass(), destroyed in vmaEndDefragmentationPass(). + + For each element, you should: + + 1. Create a new buffer/image in the place pointed by VmaDefragmentationMove::dstMemory + VmaDefragmentationMove::dstOffset. + 2. Copy data from the VmaDefragmentationMove::srcAllocation e.g. using `vkCmdCopyBuffer`, `vkCmdCopyImage`. + 3. Make sure these commands finished executing on the GPU. + 4. Destroy the old buffer/image. + + Only then you can finish defragmentation pass by calling vmaEndDefragmentationPass(). + After this call, the allocation will point to the new place in memory. + + Alternatively, if you cannot move specific allocation, you can set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + + Alternatively, if you decide you want to completely remove the allocation: + + 1. Destroy its buffer/image. + 2. Set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + + Then, after vmaEndDefragmentationPass() the allocation will be freed. + */ + VmaDefragmentationMove* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(moveCount) pMoves; +} VmaDefragmentationPassMoveInfo; + +/// Statistics returned for defragmentation process in function vmaEndDefragmentation(). +typedef struct VmaDefragmentationStats +{ + /// Total number of bytes that have been copied while moving allocations to different places. + VkDeviceSize bytesMoved; + /// Total number of bytes that have been released to the system by freeing empty `VkDeviceMemory` objects. + VkDeviceSize bytesFreed; + /// Number of allocations that have been moved to different places. + uint32_t allocationsMoved; + /// Number of empty `VkDeviceMemory` objects that have been released to the system. + uint32_t deviceMemoryBlocksFreed; +} VmaDefragmentationStats; + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/// Parameters of created #VmaVirtualBlock object to be passed to vmaCreateVirtualBlock(). +typedef struct VmaVirtualBlockCreateInfo +{ + /** \brief Total size of the virtual block. + + Sizes can be expressed in bytes or any units you want as long as you are consistent in using them. + For example, if you allocate from some array of structures, 1 can mean single instance of entire structure. + */ + VkDeviceSize size; + + /** \brief Use combination of #VmaVirtualBlockCreateFlagBits. + */ + VmaVirtualBlockCreateFlags flags; + + /** \brief Custom CPU memory allocation callbacks. Optional. + + Optional, can be null. When specified, they will be used for all CPU-side memory allocations. + */ + const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks; +} VmaVirtualBlockCreateInfo; + +/// Parameters of created virtual allocation to be passed to vmaVirtualAllocate(). +typedef struct VmaVirtualAllocationCreateInfo +{ + /** \brief Size of the allocation. + + Cannot be zero. + */ + VkDeviceSize size; + /** \brief Required alignment of the allocation. Optional. + + Must be power of two. Special value 0 has the same meaning as 1 - means no special alignment is required, so allocation can start at any offset. + */ + VkDeviceSize alignment; + /** \brief Use combination of #VmaVirtualAllocationCreateFlagBits. + */ + VmaVirtualAllocationCreateFlags flags; + /** \brief Custom pointer to be associated with the allocation. Optional. + + It can be any value and can be used for user-defined purposes. It can be fetched or changed later. + */ + void* VMA_NULLABLE pUserData; +} VmaVirtualAllocationCreateInfo; + +/// Parameters of an existing virtual allocation, returned by vmaGetVirtualAllocationInfo(). +typedef struct VmaVirtualAllocationInfo +{ + /** \brief Offset of the allocation. + + Offset at which the allocation was made. + */ + VkDeviceSize offset; + /** \brief Size of the allocation. + + Same value as passed in VmaVirtualAllocationCreateInfo::size. + */ + VkDeviceSize size; + /** \brief Custom pointer associated with the allocation. + + Same value as passed in VmaVirtualAllocationCreateInfo::pUserData or to vmaSetVirtualAllocationUserData(). + */ + void* VMA_NULLABLE pUserData; +} VmaVirtualAllocationInfo; + +/** @} */ + +#endif // _VMA_DATA_TYPES_DECLARATIONS + +#ifndef _VMA_FUNCTION_HEADERS + +/** +\addtogroup group_init +@{ +*/ + +/// Creates #VmaAllocator object. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( + const VmaAllocatorCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocator VMA_NULLABLE* VMA_NOT_NULL pAllocator); + +/// Destroys allocator object. +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( + VmaAllocator VMA_NULLABLE allocator); + +/** \brief Returns information about existing #VmaAllocator object - handle to Vulkan device etc. + +It might be useful if you want to keep just the #VmaAllocator handle and fetch other required handles to +`VkPhysicalDevice`, `VkDevice` etc. every time using this function. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocatorInfo* VMA_NOT_NULL pAllocatorInfo); + +/** +PhysicalDeviceProperties are fetched from physicalDevice by the allocator. +You can access it here, without fetching it again on your own. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( + VmaAllocator VMA_NOT_NULL allocator, + const VkPhysicalDeviceProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceProperties); + +/** +PhysicalDeviceMemoryProperties are fetched from physicalDevice by the allocator. +You can access it here, without fetching it again on your own. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + const VkPhysicalDeviceMemoryProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceMemoryProperties); + +/** +\brief Given Memory Type Index, returns Property Flags of this memory type. + +This is just a convenience function. Same information can be obtained using +vmaGetMemoryProperties(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeIndex, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + +/** \brief Sets index of the current frame. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t frameIndex); + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Retrieves statistics from current state of the Allocator. + +This function is called "calculate" not "get" because it has to traverse all +internal data structures, so it may be quite slow. Use it for debugging purposes. +For faster but more brief statistics suitable to be called every frame or every allocation, +use vmaGetHeapBudgets(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaTotalStatistics* VMA_NOT_NULL pStats); + +/** \brief Retrieves information about current memory usage and budget for all memory heaps. + +\param allocator +\param[out] pBudgets Must point to array with number of elements at least equal to number of memory heaps in physical device used. + +This function is called "get" not "calculate" because it is very fast, suitable to be called +every frame or every allocation. For more detailed statistics use vmaCalculateStatistics(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( + VmaAllocator VMA_NOT_NULL allocator, + VmaBudget* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pBudgets); + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** +\brief Helps to find memoryTypeIndex, given memoryTypeBits and VmaAllocationCreateInfo. + +This algorithm tries to find a memory type that: + +- Is allowed by memoryTypeBits. +- Contains all the flags from pAllocationCreateInfo->requiredFlags. +- Matches intended usage. +- Has as many flags from pAllocationCreateInfo->preferredFlags as possible. + +\return Returns VK_ERROR_FEATURE_NOT_PRESENT if not found. Receiving such result +from this function or any other allocating function probably means that your +device doesn't support any memory type with requested features for the specific +type of resource you want to use it for. Please check parameters of your +resource, like image layout (OPTIMAL versus LINEAR) or mip level count. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** +\brief Helps to find memoryTypeIndex, given VkBufferCreateInfo and VmaAllocationCreateInfo. + +It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. +It internally creates a temporary, dummy buffer that never has memory bound. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** +\brief Helps to find memoryTypeIndex, given VkImageCreateInfo and VmaAllocationCreateInfo. + +It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. +It internally creates a temporary, dummy image that never has memory bound. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( + VmaAllocator VMA_NOT_NULL allocator, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** \brief Allocates Vulkan device memory and creates #VmaPool object. + +\param allocator Allocator object. +\param pCreateInfo Parameters of pool to create. +\param[out] pPool Handle to created pool. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( + VmaAllocator VMA_NOT_NULL allocator, + const VmaPoolCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaPool VMA_NULLABLE* VMA_NOT_NULL pPool); + +/** \brief Destroys #VmaPool object and frees Vulkan device memory. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NULLABLE pool); + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Retrieves statistics of existing #VmaPool object. + +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. + +Note that when using the pool from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + VmaStatistics* VMA_NOT_NULL pPoolStats); + +/** \brief Retrieves detailed statistics of existing #VmaPool object. + +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + VmaDetailedStatistics* VMA_NOT_NULL pPoolStats); + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \brief Checks magic number in margins around all allocations in given memory pool in search for corruptions. + +Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero, +`VMA_DEBUG_MARGIN` is defined to nonzero and the pool is created in memory type that is +`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection). + +Possible return values: + +- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for specified pool. +- `VK_SUCCESS` - corruption detection has been performed and succeeded. +- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations. + `VMA_ASSERT` is also fired in that case. +- Other value: Error returned by Vulkan, e.g. memory mapping failure. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool); + +/** \brief Retrieves name of a custom pool. + +After the call `ppName` is either null or points to an internally-owned null-terminated string +containing name of the pool that was previously set. The pointer becomes invalid when the pool is +destroyed or its name is changed using vmaSetPoolName(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + const char* VMA_NULLABLE* VMA_NOT_NULL ppName); + +/** \brief Sets name of a custom pool. + +`pName` can be either null or pointer to a null-terminated string with new name for the pool. +Function makes internal copy of the string, so it can be changed or freed immediately after this call. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + const char* VMA_NULLABLE pName); + +/** \brief General purpose memory allocation. + +\param allocator +\param pVkMemoryRequirements +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). + +It is recommended to use vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(), +vmaCreateBuffer(), vmaCreateImage() instead whenever possible. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( + VmaAllocator VMA_NOT_NULL allocator, + const VkMemoryRequirements* VMA_NOT_NULL pVkMemoryRequirements, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief General purpose memory allocation for multiple allocation objects at once. + +\param allocator Allocator object. +\param pVkMemoryRequirements Memory requirements for each allocation. +\param pCreateInfo Creation parameters for each allocation. +\param allocationCount Number of allocations to make. +\param[out] pAllocations Pointer to array that will be filled with handles to created allocations. +\param[out] pAllocationInfo Optional. Pointer to array that will be filled with parameters of created allocations. + +You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). + +Word "pages" is just a suggestion to use this function to allocate pieces of memory needed for sparse binding. +It is just a general purpose allocation function able to make multiple allocations at once. +It may be internally optimized to be more efficient than calling vmaAllocateMemory() `allocationCount` times. + +All allocations are made using same parameters. All of them are created out of the same memory pool and type. +If any allocation fails, all allocations already made within this function call are also freed, so that when +returned result is not `VK_SUCCESS`, `pAllocation` array is always entirely filled with `VK_NULL_HANDLE`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( + VmaAllocator VMA_NOT_NULL allocator, + const VkMemoryRequirements* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pVkMemoryRequirements, + const VmaAllocationCreateInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pCreateInfo, + size_t allocationCount, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, + VmaAllocationInfo* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationInfo); + +/** \brief Allocates memory suitable for given `VkBuffer`. + +\param allocator +\param buffer +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindBufferMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateBuffer(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Allocates memory suitable for given `VkImage`. + +\param allocator +\param image +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindImageMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateImage(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Frees memory previously allocated using vmaAllocateMemory(), vmaAllocateMemoryForBuffer(), or vmaAllocateMemoryForImage(). + +Passing `VK_NULL_HANDLE` as `allocation` is valid. Such function call is just skipped. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( + VmaAllocator VMA_NOT_NULL allocator, + const VmaAllocation VMA_NULLABLE allocation); + +/** \brief Frees memory and destroys multiple allocations. + +Word "pages" is just a suggestion to use this function to free pieces of memory used for sparse binding. +It is just a general purpose function to free memory and destroy allocations made using e.g. vmaAllocateMemory(), +vmaAllocateMemoryPages() and other functions. +It may be internally optimized to be more efficient than calling vmaFreeMemory() `allocationCount` times. + +Allocations in `pAllocations` array can come from any memory pools and types. +Passing `VK_NULL_HANDLE` as elements of `pAllocations` array is valid. Such entries are just skipped. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( + VmaAllocator VMA_NOT_NULL allocator, + size_t allocationCount, + const VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations); + +/** \brief Returns current information about specified allocation. + +Current parameters of given allocation are returned in `pAllocationInfo`. + +Although this function doesn't lock any mutex, so it should be quite efficient, +you should avoid calling it too often. +You can retrieve same VmaAllocationInfo structure while creating your resource, from function +vmaCreateBuffer(), vmaCreateImage(). You can remember it if you are sure parameters don't change +(e.g. due to defragmentation). + +There is also a new function vmaGetAllocationInfo2() that offers extended information +about the allocation, returned using new structure #VmaAllocationInfo2. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VmaAllocationInfo* VMA_NOT_NULL pAllocationInfo); + +/** \brief Returns extended information about specified allocation. + +Current parameters of given allocation are returned in `pAllocationInfo`. +Extended parameters in structure #VmaAllocationInfo2 include memory block size +and a flag telling whether the allocation has dedicated memory. +It can be useful e.g. for interop with OpenGL. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VmaAllocationInfo2* VMA_NOT_NULL pAllocationInfo); + +/** \brief Sets pUserData in given allocation to new value. + +The value of pointer `pUserData` is copied to allocation's `pUserData`. +It is opaque, so you can use it however you want - e.g. +as a pointer, ordinal number or some handle to you own data. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + void* VMA_NULLABLE pUserData); + +/** \brief Sets pName in given allocation to new value. + +`pName` must be either null, or pointer to a null-terminated string. The function +makes local copy of the string and sets it as allocation's `pName`. String +passed as pName doesn't need to be valid for whole lifetime of the allocation - +you can free it after this call. String previously pointed by allocation's +`pName` is freed from memory. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName); + +/** +\brief Given an allocation, returns Property Flags of its memory type. + +This is just a convenience function. Same information can be obtained using +vmaGetAllocationInfo() + vmaGetMemoryProperties(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + + +#if VMA_EXTERNAL_MEMORY_WIN32 +/** +\brief Given an allocation, returns Win32 handle that may be imported by other processes or APIs. + +\param hTargetProcess Must be a valid handle to target process or null. If it's null, the function returns + handle for the current process. +\param[out] pHandle Output parameter that returns the handle. + +The function fills `pHandle` with handle that can be used in target process. +The handle is fetched using function `vkGetMemoryWin32HandleKHR`. +When no longer needed, you must close it using: + +\code +CloseHandle(handle); +\endcode + +You can close it any time, before or after destroying the allocation object. +It is reference-counted internally by Windows. + +Note the handle is returned for the entire `VkDeviceMemory` block that the allocation belongs to. +If the allocation is sub-allocated from a larger block, you may need to consider the offset of the allocation +(VmaAllocationInfo::offset). + +If the function fails with `VK_ERROR_FEATURE_NOT_PRESENT` error code, please double-check +that VmaVulkanFunctions::vkGetMemoryWin32HandleKHR function pointer is set, e.g. either by using `VMA_DYNAMIC_VULKAN_FUNCTIONS` +or by manually passing it through VmaAllocatorCreateInfo::pVulkanFunctions. + +For more information, see chapter \ref vk_khr_external_memory_win32. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle); +#endif // VMA_EXTERNAL_MEMORY_WIN32 + +/** \brief Maps memory represented by given allocation and returns pointer to it. + +Maps memory represented by given allocation to make it accessible to CPU code. +When succeeded, `*ppData` contains pointer to first byte of this memory. + +\warning +If the allocation is part of a bigger `VkDeviceMemory` block, returned pointer is +correctly offsetted to the beginning of region assigned to this particular allocation. +Unlike the result of `vkMapMemory`, it points to the allocation, not to the beginning of the whole block. +You should not add VmaAllocationInfo::offset to it! + +Mapping is internally reference-counted and synchronized, so despite raw Vulkan +function `vkMapMemory()` cannot be used to map same block of `VkDeviceMemory` +multiple times simultaneously, it is safe to call this function on allocations +assigned to the same memory block. Actual Vulkan memory will be mapped on first +mapping and unmapped on last unmapping. + +If the function succeeded, you must call vmaUnmapMemory() to unmap the +allocation when mapping is no longer needed or before freeing the allocation, at +the latest. + +It also safe to call this function multiple times on the same allocation. You +must call vmaUnmapMemory() same number of times as you called vmaMapMemory(). + +It is also safe to call this function on allocation created with +#VMA_ALLOCATION_CREATE_MAPPED_BIT flag. Its memory stays mapped all the time. +You must still call vmaUnmapMemory() same number of times as you called +vmaMapMemory(). You must not call vmaUnmapMemory() additional time to free the +"0-th" mapping made automatically due to #VMA_ALLOCATION_CREATE_MAPPED_BIT flag. + +This function fails when used on allocation made in memory type that is not +`HOST_VISIBLE`. + +This function doesn't automatically flush or invalidate caches. +If the allocation is made from a memory types that is not `HOST_COHERENT`, +you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + void* VMA_NULLABLE* VMA_NOT_NULL ppData); + +/** \brief Unmaps memory represented by given allocation, mapped previously using vmaMapMemory(). + +For details, see description of vmaMapMemory(). + +This function doesn't automatically flush or invalidate caches. +If the allocation is made from a memory types that is not `HOST_COHERENT`, +you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation); + +/** \brief Flushes memory of given allocation. + +Calls `vkFlushMappedMemoryRanges()` for memory associated with given range of given allocation. +It needs to be called after writing to a mapped memory for memory types that are not `HOST_COHERENT`. +Unmap operation doesn't do that automatically. + +- `offset` must be relative to the beginning of allocation. +- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation. +- `offset` and `size` don't have to be aligned. + They are internally rounded down/up to multiply of `nonCoherentAtomSize`. +- If `size` is 0, this call is ignored. +- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`, + this call is ignored. + +Warning! `offset` and `size` are relative to the contents of given `allocation`. +If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively. +Do not pass allocation's offset as `offset`!!! + +This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize offset, + VkDeviceSize size); + +/** \brief Invalidates memory of given allocation. + +Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given range of given allocation. +It needs to be called before reading from a mapped memory for memory types that are not `HOST_COHERENT`. +Map operation doesn't do that automatically. + +- `offset` must be relative to the beginning of allocation. +- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation. +- `offset` and `size` don't have to be aligned. + They are internally rounded down/up to multiply of `nonCoherentAtomSize`. +- If `size` is 0, this call is ignored. +- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`, + this call is ignored. + +Warning! `offset` and `size` are relative to the contents of given `allocation`. +If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively. +Do not pass allocation's offset as `offset`!!! + +This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if +it is called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize offset, + VkDeviceSize size); + +/** \brief Flushes memory of given set of allocations. + +Calls `vkFlushMappedMemoryRanges()` for memory associated with given ranges of given allocations. +For more information, see documentation of vmaFlushAllocation(). + +\param allocator +\param allocationCount +\param allocations +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. +\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. + +This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t allocationCount, + const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); + +/** \brief Invalidates memory of given set of allocations. + +Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given ranges of given allocations. +For more information, see documentation of vmaInvalidateAllocation(). + +\param allocator +\param allocationCount +\param allocations +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. +\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. + +This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t allocationCount, + const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); + +/** \brief Maps the allocation temporarily if needed, copies data from specified host pointer to it, and flushes the memory from the host caches if needed. + +\param allocator +\param pSrcHostPointer Pointer to the host data that become source of the copy. +\param dstAllocation Handle to the allocation that becomes destination of the copy. +\param dstAllocationLocalOffset Offset within `dstAllocation` where to write copied data, in bytes. +\param size Number of bytes to copy. + +This is a convenience function that allows to copy data from a host pointer to an allocation easily. +Same behavior can be achieved by calling vmaMapMemory(), `memcpy()`, vmaUnmapMemory(), vmaFlushAllocation(). + +This function can be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function will fail and generate a Validation Layers error. + +`dstAllocationLocalOffset` is relative to the contents of given `dstAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( + VmaAllocator VMA_NOT_NULL allocator, + const void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pSrcHostPointer, + VmaAllocation VMA_NOT_NULL dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size); + +/** \brief Invalidates memory in the host caches if needed, maps the allocation temporarily if needed, and copies data from it to a specified host pointer. + +\param allocator +\param srcAllocation Handle to the allocation that becomes source of the copy. +\param srcAllocationLocalOffset Offset within `srcAllocation` where to read copied data, in bytes. +\param pDstHostPointer Pointer to the host memory that become destination of the copy. +\param size Number of bytes to copy. + +This is a convenience function that allows to copy data from an allocation to a host pointer easily. +Same behavior can be achieved by calling vmaInvalidateAllocation(), vmaMapMemory(), `memcpy()`, vmaUnmapMemory(). + +This function should be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function may fail and generate a Validation Layers error. +It may also work very slowly when reading from an uncached memory. + +`srcAllocationLocalOffset` is relative to the contents of given `srcAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block as this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pDstHostPointer, + VkDeviceSize size); + +/** \brief Checks magic number in margins around all allocations in given memory types (in both default and custom pools) in search for corruptions. + +\param allocator +\param memoryTypeBits Bit mask, where each bit set means that a memory type with that index should be checked. + +Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero, +`VMA_DEBUG_MARGIN` is defined to nonzero and only for memory types that are +`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection). + +Possible return values: + +- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for any of specified memory types. +- `VK_SUCCESS` - corruption detection has been performed and succeeded. +- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations. + `VMA_ASSERT` is also fired in that case. +- Other value: Error returned by Vulkan, e.g. memory mapping failure. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeBits); + +/** \brief Begins defragmentation process. + +\param allocator Allocator object. +\param pInfo Structure filled with parameters of defragmentation. +\param[out] pContext Context object that must be passed to vmaEndDefragmentation() to finish defragmentation. +\returns +- `VK_SUCCESS` if defragmentation can begin. +- `VK_ERROR_FEATURE_NOT_PRESENT` if defragmentation is not supported. + +For more information about defragmentation, see documentation chapter: +[Defragmentation](@ref defragmentation). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( + VmaAllocator VMA_NOT_NULL allocator, + const VmaDefragmentationInfo* VMA_NOT_NULL pInfo, + VmaDefragmentationContext VMA_NULLABLE* VMA_NOT_NULL pContext); + +/** \brief Ends defragmentation process. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pStats Optional stats for the defragmentation. Can be null. + +Use this function to finish defragmentation started by vmaBeginDefragmentation(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationStats* VMA_NULLABLE pStats); + +/** \brief Starts single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pPassInfo Computed information for current pass. +\returns +- `VK_SUCCESS` if no more moves are possible. Then you can omit call to vmaEndDefragmentationPass() and simply end whole defragmentation. +- `VK_INCOMPLETE` if there are pending moves returned in `pPassInfo`. You need to perform them, call vmaEndDefragmentationPass(), + and then preferably try another pass with vmaBeginDefragmentationPass(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); + +/** \brief Ends single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param pPassInfo Computed information for current pass filled by vmaBeginDefragmentationPass() and possibly modified by you. + +Returns `VK_SUCCESS` if no more moves are possible or `VK_INCOMPLETE` if more defragmentations are possible. + +Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`. +After this call: + +- Allocations at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY + (which is the default) will be pointing to the new destination place. +- Allocation at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY + will be freed. + +If no more moves are possible you can end whole defragmentation. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); + +/** \brief Binds buffer to allocation. + +Binds specified buffer to region of memory represented by specified allocation. +Gets `VkDeviceMemory` handle and offset from the allocation. +If you want to create a buffer, allocate memory for it and bind them together separately, +you should use this function for binding instead of standard `vkBindBufferMemory()`, +because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple +allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously +(which is illegal in Vulkan). + +It is recommended to use function vmaCreateBuffer() instead of this one. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer); + +/** \brief Binds buffer to allocation with additional parameters. + +\param allocator +\param allocation +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0. +\param buffer +\param pNext A chain of structures to be attached to `VkBindBufferMemoryInfoKHR` structure used internally. Normally it should be null. + +This function is similar to vmaBindBufferMemory(), but it provides additional parameters. + +If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag +or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, + const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindBufferMemoryInfoKHR) pNext); + +/** \brief Binds image to allocation. + +Binds specified image to region of memory represented by specified allocation. +Gets `VkDeviceMemory` handle and offset from the allocation. +If you want to create an image, allocate memory for it and bind them together separately, +you should use this function for binding instead of standard `vkBindImageMemory()`, +because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple +allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously +(which is illegal in Vulkan). + +It is recommended to use function vmaCreateImage() instead of this one. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image); + +/** \brief Binds image to allocation with additional parameters. + +\param allocator +\param allocation +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0. +\param image +\param pNext A chain of structures to be attached to `VkBindImageMemoryInfoKHR` structure used internally. Normally it should be null. + +This function is similar to vmaBindImageMemory(), but it provides additional parameters. + +If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag +or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, + const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindImageMemoryInfoKHR) pNext); + +/** \brief Creates a new `VkBuffer`, allocates and binds memory for it. + +\param allocator +\param pBufferCreateInfo +\param pAllocationCreateInfo +\param[out] pBuffer Buffer that was created. +\param[out] pAllocation Allocation that was created. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +This function automatically: + +-# Creates buffer. +-# Allocates appropriate memory for it. +-# Binds the buffer with the memory. + +If any of these operations fail, buffer and allocation are not created, +returned value is negative error code, `*pBuffer` and `*pAllocation` are null. + +If the function succeeded, you must destroy both buffer and allocation when you +no longer need them using either convenience function vmaDestroyBuffer() or +separately, using `vkDestroyBuffer()` and vmaFreeMemory(). + +If #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag was used, +VK_KHR_dedicated_allocation extension is used internally to query driver whether +it requires or prefers the new buffer to have dedicated allocation. If yes, +and if dedicated allocation is possible +(#VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated +allocation for this buffer, just like when using +#VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + +\note This function creates a new `VkBuffer`. Sub-allocation of parts of one large buffer, +although recommended as a good practice, is out of scope of this library and could be implemented +by the user as a higher-level logic on top of VMA. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Creates a buffer with additional minimum alignment. + +Similar to vmaCreateBuffer() but provides additional parameter `minAlignment` which allows to specify custom, +minimum alignment to be used when placing the buffer inside a larger memory block, which may be needed e.g. +for interop with OpenGL. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkDeviceSize minAlignment, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). + +\note There is a new version of this function augmented with parameter `allocationLocalOffset` - see vmaCreateAliasingBuffer2(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the allocation. Normally it should be 0. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). + +\note This is a new version of the function augmented with parameter `allocationLocalOffset`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + +/** \brief Destroys Vulkan buffer and frees allocated memory. + +This is just a convenience function equivalent to: + +\code +vkDestroyBuffer(device, buffer, allocationCallbacks); +vmaFreeMemory(allocator, allocation); +\endcode + +It is safe to pass null as buffer and/or allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE buffer, + VmaAllocation VMA_NULLABLE allocation); + +/// Function similar to vmaCreateBuffer(). +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( + VmaAllocator VMA_NOT_NULL allocator, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/// Function similar to vmaCreateAliasingBuffer() but for images. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + +/// Function similar to vmaCreateAliasingBuffer2() but for images. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + +/** \brief Destroys Vulkan image and frees allocated memory. + +This is just a convenience function equivalent to: + +\code +vkDestroyImage(device, image, allocationCallbacks); +vmaFreeMemory(allocator, allocation); +\endcode + +It is safe to pass null as image and/or allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NULLABLE_NON_DISPATCHABLE image, + VmaAllocation VMA_NULLABLE allocation); + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \brief Creates new #VmaVirtualBlock object. + +\param pCreateInfo Parameters for creation. +\param[out] pVirtualBlock Returned virtual block object or `VMA_NULL` if creation failed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock( + const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualBlock VMA_NULLABLE* VMA_NOT_NULL pVirtualBlock); + +/** \brief Destroys #VmaVirtualBlock object. + +Please note that you should consciously handle virtual allocations that could remain unfreed in the block. +You should either free them individually using vmaVirtualFree() or call vmaClearVirtualBlock() +if you are sure this is what you want. If you do neither, an assert is called. + +If you keep pointers to some additional metadata associated with your virtual allocations in their `pUserData`, +don't forget to free them. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock( + VmaVirtualBlock VMA_NULLABLE virtualBlock); + +/** \brief Returns true of the #VmaVirtualBlock is empty - contains 0 virtual allocations and has all its space available for new allocations. +*/ +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty( + VmaVirtualBlock VMA_NOT_NULL virtualBlock); + +/** \brief Returns information about a specific virtual allocation within a virtual block, like its size and `pUserData` pointer. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo); + +/** \brief Allocates new virtual allocation inside given #VmaVirtualBlock. + +If the allocation fails due to not enough free space available, `VK_ERROR_OUT_OF_DEVICE_MEMORY` is returned +(despite the function doesn't ever allocate actual GPU memory). +`pAllocation` is then set to `VK_NULL_HANDLE` and `pOffset`, if not null, it set to `UINT64_MAX`. + +\param virtualBlock Virtual block +\param pCreateInfo Parameters for the allocation +\param[out] pAllocation Returned handle of the new allocation +\param[out] pOffset Returned offset of the new allocation. Optional, can be null. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation, + VkDeviceSize* VMA_NULLABLE pOffset); + +/** \brief Frees virtual allocation inside given #VmaVirtualBlock. + +It is correct to call this function with `allocation == VK_NULL_HANDLE` - it does nothing. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation); + +/** \brief Frees all virtual allocations inside given #VmaVirtualBlock. + +You must either call this function or free each virtual allocation individually with vmaVirtualFree() +before destroying a virtual block. Otherwise, an assert is called. + +If you keep pointer to some additional metadata associated with your virtual allocation in its `pUserData`, +don't forget to free it as well. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock( + VmaVirtualBlock VMA_NOT_NULL virtualBlock); + +/** \brief Changes custom pointer associated with given virtual allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, + void* VMA_NULLABLE pUserData); + +/** \brief Calculates and returns statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is fast to call. For more detailed statistics, see vmaCalculateVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats); + +/** \brief Calculates and returns detailed statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is slow to call. Use for debugging purposes. +For less detailed statistics, see vmaGetVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats); + +/** @} */ + +#if VMA_STATS_STRING_ENABLED +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Builds and returns a null-terminated string in JSON format with information about given #VmaVirtualBlock. +\param virtualBlock Virtual block. +\param[out] ppStatsString Returned string. +\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStatistics(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces. + +Returned string must be freed using vmaFreeVirtualBlockStatsString(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString, + VkBool32 detailedMap); + +/// Frees a string returned by vmaBuildVirtualBlockStatsString(). +VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE pStatsString); + +/** \brief Builds and returns statistics as a null-terminated string in JSON format. +\param allocator +\param[out] ppStatsString Must be freed using vmaFreeStatsString() function. +\param detailedMap +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( + VmaAllocator VMA_NOT_NULL allocator, + char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString, + VkBool32 detailedMap); + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( + VmaAllocator VMA_NOT_NULL allocator, + char* VMA_NULLABLE pStatsString); + +/** @} */ + +#endif // VMA_STATS_STRING_ENABLED + +#endif // _VMA_FUNCTION_HEADERS + +#ifdef __cplusplus +} +#endif + +#endif // AMD_VULKAN_MEMORY_ALLOCATOR_H + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// IMPLEMENTATION +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// For Visual Studio IntelliSense. +#if defined(__cplusplus) && defined(__INTELLISENSE__) +#define VMA_IMPLEMENTATION +#endif + +#ifdef VMA_IMPLEMENTATION +#undef VMA_IMPLEMENTATION + +#include +#include +#include +#include +#include +#include + +#if !defined(VMA_CPP20) + #if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20 + #define VMA_CPP20 1 + #else + #define VMA_CPP20 0 + #endif +#endif + +#ifdef _MSC_VER + #include // For functions like __popcnt, _BitScanForward etc. +#endif +#if VMA_CPP20 + #include +#endif + +#if VMA_STATS_STRING_ENABLED + #include // For snprintf +#endif + +/******************************************************************************* +CONFIGURATION SECTION + +Define some of these macros before each #include of this header or change them +here if you need other then default behavior depending on your environment. +*/ +#ifndef _VMA_CONFIGURATION + +/* +Define this macro to 1 to make the library fetch pointers to Vulkan functions +internally, like: + + vulkanFunctions.vkAllocateMemory = &vkAllocateMemory; +*/ +#if !defined(VMA_STATIC_VULKAN_FUNCTIONS) && !defined(VK_NO_PROTOTYPES) + #define VMA_STATIC_VULKAN_FUNCTIONS 1 +#endif + +/* +Define this macro to 1 to make the library fetch pointers to Vulkan functions +internally, like: + + vulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkGetDeviceProcAddr(device, "vkAllocateMemory"); + +To use this feature in new versions of VMA you now have to pass +VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as +VmaAllocatorCreateInfo::pVulkanFunctions. Other members can be null. +*/ +#if !defined(VMA_DYNAMIC_VULKAN_FUNCTIONS) + #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#endif + +#ifndef VMA_USE_STL_SHARED_MUTEX + #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 + #define VMA_USE_STL_SHARED_MUTEX 1 + // Visual studio defines __cplusplus properly only when passed additional parameter: /Zc:__cplusplus + // Otherwise it is always 199711L, despite shared_mutex works since Visual Studio 2015 Update 2. + #elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918 && __cplusplus == 199711L && _MSVC_LANG >= 201703L + #define VMA_USE_STL_SHARED_MUTEX 1 + #else + #define VMA_USE_STL_SHARED_MUTEX 0 + #endif +#endif + +/* +Define this macro to include custom header files without having to edit this file directly, e.g.: + + // Inside of "my_vma_configuration_user_includes.h": + + #include "my_custom_assert.h" // for MY_CUSTOM_ASSERT + #include "my_custom_min.h" // for my_custom_min + #include + #include + + // Inside a different file, which includes "vk_mem_alloc.h": + + #define VMA_CONFIGURATION_USER_INCLUDES_H "my_vma_configuration_user_includes.h" + #define VMA_ASSERT(expr) MY_CUSTOM_ASSERT(expr) + #define VMA_MIN(v1, v2) (my_custom_min(v1, v2)) + #include "vk_mem_alloc.h" + ... + +The following headers are used in this CONFIGURATION section only, so feel free to +remove them if not needed. +*/ +#if !defined(VMA_CONFIGURATION_USER_INCLUDES_H) + #include // for assert + #include // for min, max, swap + #include +#else + #include VMA_CONFIGURATION_USER_INCLUDES_H +#endif + +#ifndef VMA_NULL + // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0. + #define VMA_NULL nullptr +#endif + +#ifndef VMA_FALLTHROUGH + #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 + #define VMA_FALLTHROUGH [[fallthrough]] + #else + #define VMA_FALLTHROUGH + #endif +#endif + +// Normal assert to check for programmer's errors, especially in Debug configuration. +#ifndef VMA_ASSERT + #ifdef NDEBUG + #define VMA_ASSERT(expr) + #else + #define VMA_ASSERT(expr) assert(expr) + #endif +#endif + +// Assert that will be called very often, like inside data structures e.g. operator[]. +// Making it non-empty can make program slow. +#ifndef VMA_HEAVY_ASSERT + #ifdef NDEBUG + #define VMA_HEAVY_ASSERT(expr) + #else + #define VMA_HEAVY_ASSERT(expr) //VMA_ASSERT(expr) + #endif +#endif + +// Assert used for reporting memory leaks - unfreed allocations. +#ifndef VMA_ASSERT_LEAK + #define VMA_ASSERT_LEAK(expr) VMA_ASSERT(expr) +#endif + +// If your compiler is not compatible with C++17 and definition of +// aligned_alloc() function is missing, uncommenting following line may help: + +//#include + +#if defined(__ANDROID_API__) && (__ANDROID_API__ < 16) +#include +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + // alignment must be >= sizeof(void*) + if(alignment < sizeof(void*)) + { + alignment = sizeof(void*); + } + + return memalign(alignment, size); +} +#elif defined(__APPLE__) || defined(__ANDROID__) || (defined(__linux__) && defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)) +#include + +#if defined(__APPLE__) +#include +#endif + +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + // Unfortunately, aligned_alloc causes VMA to crash due to it returning null pointers. (At least under 11.4) + // Therefore, for now disable this specific exception until a proper solution is found. + //#if defined(__APPLE__) && (defined(MAC_OS_X_VERSION_10_16) || defined(__IPHONE_14_0)) + //#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_16 || __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_14_0 + // // For C++14, usr/include/malloc/_malloc.h declares aligned_alloc()) only + // // with the MacOSX11.0 SDK in Xcode 12 (which is what adds + // // MAC_OS_X_VERSION_10_16), even though the function is marked + // // available for 10.15. That is why the preprocessor checks for 10.16 but + // // the __builtin_available checks for 10.15. + // // People who use C++17 could call aligned_alloc with the 10.15 SDK already. + // if (__builtin_available(macOS 10.15, iOS 13, *)) + // return aligned_alloc(alignment, size); + //#endif + //#endif + + // alignment must be >= sizeof(void*) + if(alignment < sizeof(void*)) + { + alignment = sizeof(void*); + } + + void *pointer; + if(posix_memalign(&pointer, alignment, size) == 0) + return pointer; + return VMA_NULL; +} +#elif defined(_WIN32) +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + return _aligned_malloc(size, alignment); +} +#elif __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + return aligned_alloc(alignment, size); +} +#else +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + VMA_ASSERT(0 && "Could not implement aligned_alloc automatically. Please enable C++17 or later in your compiler or provide custom implementation of macro VMA_SYSTEM_ALIGNED_MALLOC (and VMA_SYSTEM_ALIGNED_FREE if needed) using the API of your system."); + return VMA_NULL; +} +#endif + +#if defined(_WIN32) +static void vma_aligned_free(void* ptr) +{ + _aligned_free(ptr); +} +#else +static void vma_aligned_free(void* VMA_NULLABLE ptr) +{ + free(ptr); +} +#endif + +#ifndef VMA_ALIGN_OF + #define VMA_ALIGN_OF(type) (alignof(type)) +#endif + +#ifndef VMA_SYSTEM_ALIGNED_MALLOC + #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) vma_aligned_alloc((alignment), (size)) +#endif + +#ifndef VMA_SYSTEM_ALIGNED_FREE + // VMA_SYSTEM_FREE is the old name, but might have been defined by the user + #if defined(VMA_SYSTEM_FREE) + #define VMA_SYSTEM_ALIGNED_FREE(ptr) VMA_SYSTEM_FREE(ptr) + #else + #define VMA_SYSTEM_ALIGNED_FREE(ptr) vma_aligned_free(ptr) + #endif +#endif + +#ifndef VMA_COUNT_BITS_SET + // Returns number of bits set to 1 in (v) + #define VMA_COUNT_BITS_SET(v) VmaCountBitsSet(v) +#endif + +#ifndef VMA_BITSCAN_LSB + // Scans integer for index of first nonzero value from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX + #define VMA_BITSCAN_LSB(mask) VmaBitScanLSB(mask) +#endif + +#ifndef VMA_BITSCAN_MSB + // Scans integer for index of first nonzero value from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX + #define VMA_BITSCAN_MSB(mask) VmaBitScanMSB(mask) +#endif + +#ifndef VMA_MIN + #define VMA_MIN(v1, v2) ((std::min)((v1), (v2))) +#endif + +#ifndef VMA_MAX + #define VMA_MAX(v1, v2) ((std::max)((v1), (v2))) +#endif + +#ifndef VMA_SORT + #define VMA_SORT(beg, end, cmp) std::sort(beg, end, cmp) +#endif + +#ifndef VMA_DEBUG_LOG_FORMAT + #define VMA_DEBUG_LOG_FORMAT(format, ...) + /* + #define VMA_DEBUG_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ + printf("\n"); \ + } while(false) + */ +#endif + +#ifndef VMA_DEBUG_LOG + #define VMA_DEBUG_LOG(str) VMA_DEBUG_LOG_FORMAT("%s", (str)) +#endif + +#ifndef VMA_LEAK_LOG_FORMAT + #define VMA_LEAK_LOG_FORMAT(format, ...) VMA_DEBUG_LOG_FORMAT(format, __VA_ARGS__) +#endif + +#ifndef VMA_CLASS_NO_COPY + #define VMA_CLASS_NO_COPY(className) \ + private: \ + className(const className&) = delete; \ + className& operator=(const className&) = delete; +#endif +#ifndef VMA_CLASS_NO_COPY_NO_MOVE + #define VMA_CLASS_NO_COPY_NO_MOVE(className) \ + private: \ + className(const className&) = delete; \ + className(className&&) = delete; \ + className& operator=(const className&) = delete; \ + className& operator=(className&&) = delete; +#endif + +// Define this macro to 1 to enable functions: vmaBuildStatsString, vmaFreeStatsString. +#if VMA_STATS_STRING_ENABLED + static inline void VmaUint32ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint32_t num) + { + snprintf(outStr, strLen, "%" PRIu32, num); + } + static inline void VmaUint64ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint64_t num) + { + snprintf(outStr, strLen, "%" PRIu64, num); + } + static inline void VmaPtrToStr(char* VMA_NOT_NULL outStr, size_t strLen, const void* ptr) + { + snprintf(outStr, strLen, "%p", ptr); + } +#endif + +#ifndef VMA_MUTEX + class VmaMutex + { + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutex) + public: + VmaMutex() { } + void Lock() { m_Mutex.lock(); } + void Unlock() { m_Mutex.unlock(); } + bool TryLock() { return m_Mutex.try_lock(); } + private: + std::mutex m_Mutex; + }; + #define VMA_MUTEX VmaMutex +#endif + +// Read-write mutex, where "read" is shared access, "write" is exclusive access. +#ifndef VMA_RW_MUTEX + #if VMA_USE_STL_SHARED_MUTEX + // Use std::shared_mutex from C++17. + #include + class VmaRWMutex + { + public: + void LockRead() { m_Mutex.lock_shared(); } + void UnlockRead() { m_Mutex.unlock_shared(); } + bool TryLockRead() { return m_Mutex.try_lock_shared(); } + void LockWrite() { m_Mutex.lock(); } + void UnlockWrite() { m_Mutex.unlock(); } + bool TryLockWrite() { return m_Mutex.try_lock(); } + private: + std::shared_mutex m_Mutex; + }; + #define VMA_RW_MUTEX VmaRWMutex + #elif defined(_WIN32) && defined(WINVER) && defined(SRWLOCK_INIT) && WINVER >= 0x0600 + // Use SRWLOCK from WinAPI. + // Minimum supported client = Windows Vista, server = Windows Server 2008. + class VmaRWMutex + { + public: + VmaRWMutex() { InitializeSRWLock(&m_Lock); } + void LockRead() { AcquireSRWLockShared(&m_Lock); } + void UnlockRead() { ReleaseSRWLockShared(&m_Lock); } + bool TryLockRead() { return TryAcquireSRWLockShared(&m_Lock) != FALSE; } + void LockWrite() { AcquireSRWLockExclusive(&m_Lock); } + void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); } + bool TryLockWrite() { return TryAcquireSRWLockExclusive(&m_Lock) != FALSE; } + private: + SRWLOCK m_Lock; + }; + #define VMA_RW_MUTEX VmaRWMutex + #else + // Less efficient fallback: Use normal mutex. + class VmaRWMutex + { + public: + void LockRead() { m_Mutex.Lock(); } + void UnlockRead() { m_Mutex.Unlock(); } + bool TryLockRead() { return m_Mutex.TryLock(); } + void LockWrite() { m_Mutex.Lock(); } + void UnlockWrite() { m_Mutex.Unlock(); } + bool TryLockWrite() { return m_Mutex.TryLock(); } + private: + VMA_MUTEX m_Mutex; + }; + #define VMA_RW_MUTEX VmaRWMutex + #endif // #if VMA_USE_STL_SHARED_MUTEX +#endif // #ifndef VMA_RW_MUTEX + +/* +If providing your own implementation, you need to implement a subset of std::atomic. +*/ +#ifndef VMA_ATOMIC_UINT32 + #include + #define VMA_ATOMIC_UINT32 std::atomic +#endif + +#ifndef VMA_ATOMIC_UINT64 + #include + #define VMA_ATOMIC_UINT64 std::atomic +#endif + +#ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY + /** + Every allocation will have its own memory block. + Define to 1 for debugging purposes only. + */ + #define VMA_DEBUG_ALWAYS_DEDICATED_MEMORY (0) +#endif + +#ifndef VMA_MIN_ALIGNMENT + /** + Minimum alignment of all allocations, in bytes. + Set to more than 1 for debugging purposes. Must be power of two. + */ + #ifdef VMA_DEBUG_ALIGNMENT // Old name + #define VMA_MIN_ALIGNMENT VMA_DEBUG_ALIGNMENT + #else + #define VMA_MIN_ALIGNMENT (1) + #endif +#endif + +#ifndef VMA_DEBUG_MARGIN + /** + Minimum margin after every allocation, in bytes. + Set nonzero for debugging purposes only. + */ + #define VMA_DEBUG_MARGIN (0) +#endif + +#ifndef VMA_DEBUG_INITIALIZE_ALLOCATIONS + /** + Define this macro to 1 to automatically fill new allocations and destroyed + allocations with some bit pattern. + */ + #define VMA_DEBUG_INITIALIZE_ALLOCATIONS (0) +#endif + +#ifndef VMA_DEBUG_DETECT_CORRUPTION + /** + Define this macro to 1 together with non-zero value of VMA_DEBUG_MARGIN to + enable writing magic value to the margin after every allocation and + validating it, so that memory corruptions (out-of-bounds writes) are detected. + */ + #define VMA_DEBUG_DETECT_CORRUPTION (0) +#endif + +#ifndef VMA_DEBUG_GLOBAL_MUTEX + /** + Set this to 1 for debugging purposes only, to enable single mutex protecting all + entry calls to the library. Can be useful for debugging multithreading issues. + */ + #define VMA_DEBUG_GLOBAL_MUTEX (0) +#endif + +#ifndef VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY + /** + Minimum value for VkPhysicalDeviceLimits::bufferImageGranularity. + Set to more than 1 for debugging purposes only. Must be power of two. + */ + #define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY (1) +#endif + +#ifndef VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT + /* + Set this to 1 to make VMA never exceed VkPhysicalDeviceLimits::maxMemoryAllocationCount + and return error instead of leaving up to Vulkan implementation what to do in such cases. + */ + #define VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT (0) +#endif + +#ifndef VMA_SMALL_HEAP_MAX_SIZE + /// Maximum size of a memory heap in Vulkan to consider it "small". + #define VMA_SMALL_HEAP_MAX_SIZE (1024ull * 1024 * 1024) +#endif + +#ifndef VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE + /// Default size of a block allocated as single VkDeviceMemory from a "large" heap. + #define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (256ull * 1024 * 1024) +#endif + +/* +Mapping hysteresis is a logic that launches when vmaMapMemory/vmaUnmapMemory is called +or a persistently mapped allocation is created and destroyed several times in a row. +It keeps additional +1 mapping of a device memory block to prevent calling actual +vkMapMemory/vkUnmapMemory too many times, which may improve performance and help +tools like RenderDoc. +*/ +#ifndef VMA_MAPPING_HYSTERESIS_ENABLED + #define VMA_MAPPING_HYSTERESIS_ENABLED 1 +#endif + +#define VMA_VALIDATE(cond) do { if(!(cond)) { \ + VMA_ASSERT(0 && "Validation failed: " #cond); \ + return false; \ + } } while(false) + +/******************************************************************************* +END OF CONFIGURATION +*/ +#endif // _VMA_CONFIGURATION + + +static const uint8_t VMA_ALLOCATION_FILL_PATTERN_CREATED = 0xDC; +static const uint8_t VMA_ALLOCATION_FILL_PATTERN_DESTROYED = 0xEF; +// Decimal 2139416166, float NaN, little-endian binary 66 E6 84 7F. +static const uint32_t VMA_CORRUPTION_DETECTION_MAGIC_VALUE = 0x7F84E666; + +// Copy of some Vulkan definitions so we don't need to check their existence just to handle few constants. +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY = 0x00000040; +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY = 0x00000080; +static const uint32_t VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY = 0x00020000; +static const uint32_t VK_IMAGE_CREATE_DISJOINT_BIT_COPY = 0x00000200; +static const int32_t VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY = 1000158000; +static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u; +static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32; +static const uint32_t VMA_VENDOR_ID_AMD = 4098; + +// This one is tricky. Vulkan specification defines this code as available since +// Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131. +// See pull request #207. +#define VK_ERROR_UNKNOWN_COPY ((VkResult)-13) + + +#if VMA_STATS_STRING_ENABLED +// Correspond to values of enum VmaSuballocationType. +static const char* VMA_SUBALLOCATION_TYPE_NAMES[] = +{ + "FREE", + "UNKNOWN", + "BUFFER", + "IMAGE_UNKNOWN", + "IMAGE_LINEAR", + "IMAGE_OPTIMAL", +}; +#endif + +static VkAllocationCallbacks VmaEmptyAllocationCallbacks = + { VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL }; + + +#ifndef _VMA_ENUM_DECLARATIONS + +enum VmaSuballocationType +{ + VMA_SUBALLOCATION_TYPE_FREE = 0, + VMA_SUBALLOCATION_TYPE_UNKNOWN = 1, + VMA_SUBALLOCATION_TYPE_BUFFER = 2, + VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN = 3, + VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR = 4, + VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL = 5, + VMA_SUBALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF +}; + +enum VMA_CACHE_OPERATION +{ + VMA_CACHE_FLUSH, + VMA_CACHE_INVALIDATE +}; + +enum class VmaAllocationRequestType +{ + Normal, + TLSF, + // Used by "Linear" algorithm. + UpperAddress, + EndOf1st, + EndOf2nd, +}; + +#endif // _VMA_ENUM_DECLARATIONS + +#ifndef _VMA_FORWARD_DECLARATIONS +// Opaque handle used by allocation algorithms to identify single allocation in any conforming way. +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaAllocHandle); + +struct VmaMutexLock; +struct VmaMutexLockRead; +struct VmaMutexLockWrite; + +template +struct AtomicTransactionalIncrement; + +template +struct VmaStlAllocator; + +template +class VmaVector; + +template +class VmaSmallVector; + +template +class VmaPoolAllocator; + +template +struct VmaListItem; + +template +class VmaRawList; + +template +class VmaList; + +template +class VmaIntrusiveLinkedList; + +#if VMA_STATS_STRING_ENABLED +class VmaStringBuilder; +class VmaJsonWriter; +#endif + +class VmaDeviceMemoryBlock; + +struct VmaDedicatedAllocationListItemTraits; +class VmaDedicatedAllocationList; + +struct VmaSuballocation; +struct VmaSuballocationOffsetLess; +struct VmaSuballocationOffsetGreater; +struct VmaSuballocationItemSizeLess; + +typedef VmaList> VmaSuballocationList; + +struct VmaAllocationRequest; + +class VmaBlockMetadata; +class VmaBlockMetadata_Linear; +class VmaBlockMetadata_TLSF; + +class VmaBlockVector; + +struct VmaPoolListItemTraits; + +struct VmaCurrentBudgetData; + +class VmaAllocationObjectAllocator; + +#endif // _VMA_FORWARD_DECLARATIONS + + +#ifndef _VMA_FUNCTIONS + +/* +Returns number of bits set to 1 in (v). + +On specific platforms and compilers you can use intrinsics like: + +Visual Studio: + return __popcnt(v); +GCC, Clang: + return static_cast(__builtin_popcount(v)); + +Define macro VMA_COUNT_BITS_SET to provide your optimized implementation. +But you need to check in runtime whether user's CPU supports these, as some old processors don't. +*/ +static inline uint32_t VmaCountBitsSet(uint32_t v) +{ +#if VMA_CPP20 + return std::popcount(v); +#else + uint32_t c = v - ((v >> 1) & 0x55555555); + c = ((c >> 2) & 0x33333333) + (c & 0x33333333); + c = ((c >> 4) + c) & 0x0F0F0F0F; + c = ((c >> 8) + c) & 0x00FF00FF; + c = ((c >> 16) + c) & 0x0000FFFF; + return c; +#endif +} + +static inline uint8_t VmaBitScanLSB(uint64_t mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanForward64(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif VMA_CPP20 + if(mask) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffsll(mask)) - 1U; +#else + uint8_t pos = 0; + uint64_t bit = 1; + do + { + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 63); + return UINT8_MAX; +#endif +} + +static inline uint8_t VmaBitScanLSB(uint32_t mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanForward(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif VMA_CPP20 + if(mask) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffs(mask)) - 1U; +#else + uint8_t pos = 0; + uint32_t bit = 1; + do + { + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 31); + return UINT8_MAX; +#endif +} + +static inline uint8_t VmaBitScanMSB(uint64_t mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanReverse64(&pos, mask)) + return static_cast(pos); +#elif VMA_CPP20 + if(mask) + return 63 - static_cast(std::countl_zero(mask)); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 63 - static_cast(__builtin_clzll(mask)); +#else + uint8_t pos = 63; + uint64_t bit = 1ULL << 63; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} + +static inline uint8_t VmaBitScanMSB(uint32_t mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanReverse(&pos, mask)) + return static_cast(pos); +#elif VMA_CPP20 + if(mask) + return 31 - static_cast(std::countl_zero(mask)); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 31 - static_cast(__builtin_clz(mask)); +#else + uint8_t pos = 31; + uint32_t bit = 1UL << 31; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} + +/* +Returns true if given number is a power of two. +T must be unsigned integer number or signed integer but always nonnegative. +For 0 returns true. +*/ +template +inline bool VmaIsPow2(T x) +{ + return (x & (x - 1)) == 0; +} + +// Aligns given value up to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 16. +// Use types like uint32_t, uint64_t as T. +template +static inline T VmaAlignUp(T val, T alignment) +{ + VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); + return (val + alignment - 1) & ~(alignment - 1); +} + +// Aligns given value down to nearest multiply of align value. For example: VmaAlignDown(11, 8) = 8. +// Use types like uint32_t, uint64_t as T. +template +static inline T VmaAlignDown(T val, T alignment) +{ + VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); + return val & ~(alignment - 1); +} + +// Division with mathematical rounding to nearest number. +template +static inline T VmaRoundDiv(T x, T y) +{ + return (x + (y / (T)2)) / y; +} + +// Divide by 'y' and round up to nearest integer. +template +static inline T VmaDivideRoundingUp(T x, T y) +{ + return (x + y - (T)1) / y; +} + +// Returns smallest power of 2 greater or equal to v. +static inline uint32_t VmaNextPow2(uint32_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +static inline uint64_t VmaNextPow2(uint64_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + v++; + return v; +} + +// Returns largest power of 2 less or equal to v. +static inline uint32_t VmaPrevPow2(uint32_t v) +{ + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v = v ^ (v >> 1); + return v; +} + +static inline uint64_t VmaPrevPow2(uint64_t v) +{ + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + v = v ^ (v >> 1); + return v; +} + +static inline bool VmaStrIsEmpty(const char* pStr) +{ + return pStr == VMA_NULL || *pStr == '\0'; +} + +/* +Returns true if two memory blocks occupy overlapping pages. +ResourceA must be in less memory offset than ResourceB. + +Algorithm is based on "Vulkan 1.0.39 - A Specification (with all registered Vulkan extensions)" +chapter 11.6 "Resource Memory Association", paragraph "Buffer-Image Granularity". +*/ +static inline bool VmaBlocksOnSamePage( + VkDeviceSize resourceAOffset, + VkDeviceSize resourceASize, + VkDeviceSize resourceBOffset, + VkDeviceSize pageSize) +{ + VMA_ASSERT(resourceAOffset + resourceASize <= resourceBOffset && resourceASize > 0 && pageSize > 0); + VkDeviceSize resourceAEnd = resourceAOffset + resourceASize - 1; + VkDeviceSize resourceAEndPage = resourceAEnd & ~(pageSize - 1); + VkDeviceSize resourceBStart = resourceBOffset; + VkDeviceSize resourceBStartPage = resourceBStart & ~(pageSize - 1); + return resourceAEndPage == resourceBStartPage; +} + +/* +Returns true if given suballocation types could conflict and must respect +VkPhysicalDeviceLimits::bufferImageGranularity. They conflict if one is buffer +or linear image and another one is optimal image. If type is unknown, behave +conservatively. +*/ +static inline bool VmaIsBufferImageGranularityConflict( + VmaSuballocationType suballocType1, + VmaSuballocationType suballocType2) +{ + if (suballocType1 > suballocType2) + { + std::swap(suballocType1, suballocType2); + } + + switch (suballocType1) + { + case VMA_SUBALLOCATION_TYPE_FREE: + return false; + case VMA_SUBALLOCATION_TYPE_UNKNOWN: + return true; + case VMA_SUBALLOCATION_TYPE_BUFFER: + return + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; + case VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN: + return + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR || + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; + case VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR: + return + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; + case VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL: + return false; + default: + VMA_ASSERT(0); + return true; + } +} + +static void VmaWriteMagicValue(void* pData, VkDeviceSize offset) +{ +#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION + uint32_t* pDst = (uint32_t*)((char*)pData + offset); + const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); + for (size_t i = 0; i < numberCount; ++i, ++pDst) + { + *pDst = VMA_CORRUPTION_DETECTION_MAGIC_VALUE; + } +#else + // no-op +#endif +} + +static bool VmaValidateMagicValue(const void* pData, VkDeviceSize offset) +{ +#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION + const uint32_t* pSrc = (const uint32_t*)((const char*)pData + offset); + const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); + for (size_t i = 0; i < numberCount; ++i, ++pSrc) + { + if (*pSrc != VMA_CORRUPTION_DETECTION_MAGIC_VALUE) + { + return false; + } + } +#endif + return true; +} + +/* +Fills structure with parameters of an example buffer to be used for transfers +during GPU memory defragmentation. +*/ +static void VmaFillGpuDefragmentationBufferCreateInfo(VkBufferCreateInfo& outBufCreateInfo) +{ + memset(&outBufCreateInfo, 0, sizeof(outBufCreateInfo)); + outBufCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + outBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + outBufCreateInfo.size = (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE; // Example size. +} + + +/* +Performs binary search and returns iterator to first element that is greater or +equal to (key), according to comparison (cmp). + +Cmp should return true if first argument is less than second argument. + +Returned value is the found element, if present in the collection or place where +new element with value (key) should be inserted. +*/ +template +static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT& key, const CmpLess& cmp) +{ + size_t down = 0, up = size_t(end - beg); + while (down < up) + { + const size_t mid = down + (up - down) / 2; // Overflow-safe midpoint calculation + if (cmp(*(beg + mid), key)) + { + down = mid + 1; + } + else + { + up = mid; + } + } + return beg + down; +} + +template +IterT VmaBinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, const CmpLess& cmp) +{ + IterT it = VmaBinaryFindFirstNotLess( + beg, end, value, cmp); + if (it == end || + (!cmp(*it, value) && !cmp(value, *it))) + { + return it; + } + return end; +} + +/* +Returns true if all pointers in the array are not-null and unique. +Warning! O(n^2) complexity. Use only inside VMA_HEAVY_ASSERT. +T must be pointer type, e.g. VmaAllocation, VmaPool. +*/ +template +static bool VmaValidatePointerArray(uint32_t count, const T* arr) +{ + for (uint32_t i = 0; i < count; ++i) + { + const T iPtr = arr[i]; + if (iPtr == VMA_NULL) + { + return false; + } + for (uint32_t j = i + 1; j < count; ++j) + { + if (iPtr == arr[j]) + { + return false; + } + } + } + return true; +} + +template +static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct) +{ + newStruct->pNext = mainStruct->pNext; + mainStruct->pNext = newStruct; +} +// Finds structure with s->sType == sType in mainStruct->pNext chain. +// Returns pointer to it. If not found, returns null. +template +static inline const FindT* VmaPnextChainFind(const MainT* mainStruct, VkStructureType sType) +{ + for(const VkBaseInStructure* s = (const VkBaseInStructure*)mainStruct->pNext; + s != VMA_NULL; s = s->pNext) + { + if(s->sType == sType) + { + return (const FindT*)s; + } + } + return VMA_NULL; +} + +// An abstraction over buffer or image `usage` flags, depending on available extensions. +struct VmaBufferImageUsage +{ +#if VMA_KHR_MAINTENANCE5 + typedef uint64_t BaseType; // VkFlags64 +#else + typedef uint32_t BaseType; // VkFlags32 +#endif + + static const VmaBufferImageUsage UNKNOWN; + + BaseType Value; + + VmaBufferImageUsage() { *this = UNKNOWN; } + explicit VmaBufferImageUsage(BaseType usage) : Value(usage) { } + VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5); + explicit VmaBufferImageUsage(const VkImageCreateInfo &createInfo); + + bool operator==(const VmaBufferImageUsage& rhs) const { return Value == rhs.Value; } + bool operator!=(const VmaBufferImageUsage& rhs) const { return Value != rhs.Value; } + + bool Contains(BaseType flag) const { return (Value & flag) != 0; } + bool ContainsDeviceAccess() const + { + // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same as VK_BUFFER_IMAGE_TRANSFER*. + return (Value & ~BaseType(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0; + } +}; + +const VmaBufferImageUsage VmaBufferImageUsage::UNKNOWN = VmaBufferImageUsage(0); + +VmaBufferImageUsage::VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, + bool useKhrMaintenance5) +{ +#if VMA_KHR_MAINTENANCE5 + if(useKhrMaintenance5) + { + // If VkBufferCreateInfo::pNext chain contains VkBufferUsageFlags2CreateInfoKHR, + // take usage from it and ignore VkBufferCreateInfo::usage, per specification + // of the VK_KHR_maintenance5 extension. + const VkBufferUsageFlags2CreateInfoKHR* const usageFlags2 = + VmaPnextChainFind(&createInfo, VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR); + if(usageFlags2) + { + this->Value = usageFlags2->usage; + return; + } + } +#endif + + this->Value = (BaseType)createInfo.usage; +} + +VmaBufferImageUsage::VmaBufferImageUsage(const VkImageCreateInfo &createInfo) +{ + // Maybe in the future there will be VK_KHR_maintenanceN extension with structure + // VkImageUsageFlags2CreateInfoKHR, like the one for buffers... + + this->Value = (BaseType)createInfo.usage; +} + +// This is the main algorithm that guides the selection of a memory type best for an allocation - +// converts usage to required/preferred/not preferred flags. +static bool FindMemoryPreferences( + bool isIntegratedGPU, + const VmaAllocationCreateInfo& allocCreateInfo, + VmaBufferImageUsage bufImgUsage, + VkMemoryPropertyFlags& outRequiredFlags, + VkMemoryPropertyFlags& outPreferredFlags, + VkMemoryPropertyFlags& outNotPreferredFlags) +{ + outRequiredFlags = allocCreateInfo.requiredFlags; + outPreferredFlags = allocCreateInfo.preferredFlags; + outNotPreferredFlags = 0; + + switch(allocCreateInfo.usage) + { + case VMA_MEMORY_USAGE_UNKNOWN: + break; + case VMA_MEMORY_USAGE_GPU_ONLY: + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_CPU_ONLY: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + break; + case VMA_MEMORY_USAGE_CPU_TO_GPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_GPU_TO_CPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + break; + case VMA_MEMORY_USAGE_CPU_COPY: + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: + outRequiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; + break; + case VMA_MEMORY_USAGE_AUTO: + case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE: + case VMA_MEMORY_USAGE_AUTO_PREFER_HOST: + { + if(bufImgUsage == VmaBufferImageUsage::UNKNOWN) + { + VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known." + " Maybe you use VkBufferUsageFlags2CreateInfoKHR but forgot to use VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT?" ); + return false; + } + + const bool deviceAccess = bufImgUsage.ContainsDeviceAccess(); + const bool hostAccessSequentialWrite = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT) != 0; + const bool hostAccessRandom = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) != 0; + const bool hostAccessAllowTransferInstead = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) != 0; + const bool preferDevice = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + const bool preferHost = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + + // CPU random access - e.g. a buffer written to or transferred from GPU to read back on CPU. + if(hostAccessRandom) + { + // Prefer cached. Cannot require it, because some platforms don't have it (e.g. Raspberry Pi - see #362)! + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if (!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + // Nice if it will end up in HOST_VISIBLE, but more importantly prefer DEVICE_LOCAL. + // Omitting HOST_VISIBLE here is intentional. + // In case there is DEVICE_LOCAL | HOST_VISIBLE | HOST_CACHED, it will pick that one. + // Otherwise, this will give same weight to DEVICE_LOCAL as HOST_VISIBLE | HOST_CACHED and select the former if occurs first on the list. + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + else + { + // Always CPU memory. + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + } + // CPU sequential write - may be CPU or host-visible GPU memory, uncached and write-combined. + else if(hostAccessSequentialWrite) + { + // Want uncached and write-combined. + outNotPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + else + { + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + // Direct GPU access, CPU sequential write (e.g. a dynamic uniform buffer updated every frame) + if(deviceAccess) + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose GPU memory. + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + // GPU no direct access, CPU sequential write (e.g. an upload buffer to be transferred to the GPU) + else + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose CPU memory. + if(preferDevice) + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + } + } + // No CPU access + else + { + // if(deviceAccess) + // + // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory, + // unless there is a clear preference from the user not to do so. + // + // else: + // + // No direct GPU access, no CPU access, just transfers. + // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or + // a "swap file" copy to free some GPU memory (then better CPU memory). + // Up to the user to decide. If no preferece, assume the former and choose GPU memory. + + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + } + default: + VMA_ASSERT(0); + } + + // Avoid DEVICE_COHERENT unless explicitly requested. + if(((allocCreateInfo.requiredFlags | allocCreateInfo.preferredFlags) & + (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) + { + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY; + } + + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +// Memory allocation + +static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t size, size_t alignment) +{ + void* result = VMA_NULL; + if ((pAllocationCallbacks != VMA_NULL) && + (pAllocationCallbacks->pfnAllocation != VMA_NULL)) + { + result = (*pAllocationCallbacks->pfnAllocation)( + pAllocationCallbacks->pUserData, + size, + alignment, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + } + else + { + result = VMA_SYSTEM_ALIGNED_MALLOC(size, alignment); + } + VMA_ASSERT(result != VMA_NULL && "CPU memory allocation failed."); + return result; +} + +static void VmaFree(const VkAllocationCallbacks* pAllocationCallbacks, void* ptr) +{ + if ((pAllocationCallbacks != VMA_NULL) && + (pAllocationCallbacks->pfnFree != VMA_NULL)) + { + (*pAllocationCallbacks->pfnFree)(pAllocationCallbacks->pUserData, ptr); + } + else + { + VMA_SYSTEM_ALIGNED_FREE(ptr); + } +} + +template +static T* VmaAllocate(const VkAllocationCallbacks* pAllocationCallbacks) +{ + return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T), VMA_ALIGN_OF(T)); +} + +template +static T* VmaAllocateArray(const VkAllocationCallbacks* pAllocationCallbacks, size_t count) +{ + return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T) * count, VMA_ALIGN_OF(T)); +} + +#define vma_new(allocator, type) new(VmaAllocate(allocator))(type) + +#define vma_new_array(allocator, type, count) new(VmaAllocateArray((allocator), (count)))(type) + +template +static void vma_delete(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr) +{ + ptr->~T(); + VmaFree(pAllocationCallbacks, ptr); +} + +template +static void vma_delete_array(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr, size_t count) +{ + if (ptr != VMA_NULL) + { + for (size_t i = count; i--; ) + { + ptr[i].~T(); + } + VmaFree(pAllocationCallbacks, ptr); + } +} + +static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr) +{ + if (srcStr != VMA_NULL) + { + const size_t len = strlen(srcStr); + char* const result = vma_new_array(allocs, char, len + 1); + memcpy(result, srcStr, len + 1); + return result; + } + return VMA_NULL; +} + +#if VMA_STATS_STRING_ENABLED +static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr, size_t strLen) +{ + if (srcStr != VMA_NULL) + { + char* const result = vma_new_array(allocs, char, strLen + 1); + memcpy(result, srcStr, strLen); + result[strLen] = '\0'; + return result; + } + return VMA_NULL; +} +#endif // VMA_STATS_STRING_ENABLED + +static void VmaFreeString(const VkAllocationCallbacks* allocs, char* str) +{ + if (str != VMA_NULL) + { + const size_t len = strlen(str); + vma_delete_array(allocs, str, len + 1); + } +} + +template +size_t VmaVectorInsertSorted(VectorT& vector, const typename VectorT::value_type& value) +{ + const size_t indexToInsert = VmaBinaryFindFirstNotLess( + vector.data(), + vector.data() + vector.size(), + value, + CmpLess()) - vector.data(); + VmaVectorInsert(vector, indexToInsert, value); + return indexToInsert; +} + +template +bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& value) +{ + CmpLess comparator; + typename VectorT::iterator it = VmaBinaryFindFirstNotLess( + vector.begin(), + vector.end(), + value, + comparator); + if ((it != vector.end()) && !comparator(*it, value) && !comparator(value, *it)) + { + size_t indexToRemove = it - vector.begin(); + VmaVectorRemove(vector, indexToRemove); + return true; + } + return false; +} +#endif // _VMA_FUNCTIONS + +#ifndef _VMA_STATISTICS_FUNCTIONS + +static void VmaClearStatistics(VmaStatistics& outStats) +{ + outStats.blockCount = 0; + outStats.allocationCount = 0; + outStats.blockBytes = 0; + outStats.allocationBytes = 0; +} + +static void VmaAddStatistics(VmaStatistics& inoutStats, const VmaStatistics& src) +{ + inoutStats.blockCount += src.blockCount; + inoutStats.allocationCount += src.allocationCount; + inoutStats.blockBytes += src.blockBytes; + inoutStats.allocationBytes += src.allocationBytes; +} + +static void VmaClearDetailedStatistics(VmaDetailedStatistics& outStats) +{ + VmaClearStatistics(outStats.statistics); + outStats.unusedRangeCount = 0; + outStats.allocationSizeMin = VK_WHOLE_SIZE; + outStats.allocationSizeMax = 0; + outStats.unusedRangeSizeMin = VK_WHOLE_SIZE; + outStats.unusedRangeSizeMax = 0; +} + +static void VmaAddDetailedStatisticsAllocation(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ + inoutStats.statistics.allocationCount++; + inoutStats.statistics.allocationBytes += size; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, size); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, size); +} + +static void VmaAddDetailedStatisticsUnusedRange(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ + inoutStats.unusedRangeCount++; + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, size); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, size); +} + +static void VmaAddDetailedStatistics(VmaDetailedStatistics& inoutStats, const VmaDetailedStatistics& src) +{ + VmaAddStatistics(inoutStats.statistics, src.statistics); + inoutStats.unusedRangeCount += src.unusedRangeCount; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, src.allocationSizeMin); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, src.allocationSizeMax); + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, src.unusedRangeSizeMin); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, src.unusedRangeSizeMax); +} + +#endif // _VMA_STATISTICS_FUNCTIONS + +#ifndef _VMA_MUTEX_LOCK +// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). +struct VmaMutexLock +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLock) +public: + VmaMutexLock(VMA_MUTEX& mutex, bool useMutex = true) : + m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->Lock(); } + } + ~VmaMutexLock() { if (m_pMutex) { m_pMutex->Unlock(); } } + +private: + VMA_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading. +struct VmaMutexLockRead +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockRead) +public: + VmaMutexLockRead(VMA_RW_MUTEX& mutex, bool useMutex) : + m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->LockRead(); } + } + ~VmaMutexLockRead() { if (m_pMutex) { m_pMutex->UnlockRead(); } } + +private: + VMA_RW_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing. +struct VmaMutexLockWrite +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockWrite) +public: + VmaMutexLockWrite(VMA_RW_MUTEX& mutex, bool useMutex) + : m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->LockWrite(); } + } + ~VmaMutexLockWrite() { if (m_pMutex) { m_pMutex->UnlockWrite(); } } + +private: + VMA_RW_MUTEX* m_pMutex; +}; + +#if VMA_DEBUG_GLOBAL_MUTEX + static VMA_MUTEX gDebugGlobalMutex; + #define VMA_DEBUG_GLOBAL_MUTEX_LOCK VmaMutexLock debugGlobalMutexLock(gDebugGlobalMutex, true); +#else + #define VMA_DEBUG_GLOBAL_MUTEX_LOCK +#endif +#endif // _VMA_MUTEX_LOCK + +#ifndef _VMA_ATOMIC_TRANSACTIONAL_INCREMENT +// An object that increments given atomic but decrements it back in the destructor unless Commit() is called. +template +struct AtomicTransactionalIncrement +{ +public: + using T = decltype(AtomicT().load()); + + ~AtomicTransactionalIncrement() + { + if(m_Atomic) + --(*m_Atomic); + } + + void Commit() { m_Atomic = VMA_NULL; } + T Increment(AtomicT* atomic) + { + m_Atomic = atomic; + return m_Atomic->fetch_add(1); + } + +private: + AtomicT* m_Atomic = VMA_NULL; +}; +#endif // _VMA_ATOMIC_TRANSACTIONAL_INCREMENT + +#ifndef _VMA_STL_ALLOCATOR +// STL-compatible allocator. +template +struct VmaStlAllocator +{ + const VkAllocationCallbacks* const m_pCallbacks; + typedef T value_type; + + VmaStlAllocator(const VkAllocationCallbacks* pCallbacks) : m_pCallbacks(pCallbacks) {} + template + VmaStlAllocator(const VmaStlAllocator& src) : m_pCallbacks(src.m_pCallbacks) {} + VmaStlAllocator(const VmaStlAllocator&) = default; + VmaStlAllocator& operator=(const VmaStlAllocator&) = delete; + + T* allocate(size_t n) { return VmaAllocateArray(m_pCallbacks, n); } + void deallocate(T* p, size_t n) { VmaFree(m_pCallbacks, p); } + + template + bool operator==(const VmaStlAllocator& rhs) const + { + return m_pCallbacks == rhs.m_pCallbacks; + } + template + bool operator!=(const VmaStlAllocator& rhs) const + { + return m_pCallbacks != rhs.m_pCallbacks; + } +}; +#endif // _VMA_STL_ALLOCATOR + +#ifndef _VMA_VECTOR +/* Class with interface compatible with subset of std::vector. +T must be POD because constructors and destructors are not called and memcpy is +used for these objects. */ +template +class VmaVector +{ +public: + typedef T value_type; + typedef T* iterator; + typedef const T* const_iterator; + + VmaVector(const AllocatorT& allocator); + VmaVector(size_t count, const AllocatorT& allocator); + // This version of the constructor is here for compatibility with pre-C++14 std::vector. + // value is unused. + VmaVector(size_t count, const T& value, const AllocatorT& allocator) : VmaVector(count, allocator) {} + VmaVector(const VmaVector& src); + VmaVector& operator=(const VmaVector& rhs); + ~VmaVector() { VmaFree(m_Allocator.m_pCallbacks, m_pArray); } + + bool empty() const { return m_Count == 0; } + size_t size() const { return m_Count; } + T* data() { return m_pArray; } + T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; } + T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; } + const T* data() const { return m_pArray; } + const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; } + const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; } + + iterator begin() { return m_pArray; } + iterator end() { return m_pArray + m_Count; } + const_iterator cbegin() const { return m_pArray; } + const_iterator cend() const { return m_pArray + m_Count; } + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); } + void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); } + void push_front(const T& src) { insert(0, src); } + + void push_back(const T& src); + void reserve(size_t newCapacity, bool freeMemory = false); + void resize(size_t newCount); + void clear() { resize(0); } + void shrink_to_fit(); + void insert(size_t index, const T& src); + void remove(size_t index); + + T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; } + const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; } + +private: + AllocatorT m_Allocator; + T* m_pArray; + size_t m_Count; + size_t m_Capacity; +}; + +#ifndef _VMA_VECTOR_FUNCTIONS +template +VmaVector::VmaVector(const AllocatorT& allocator) + : m_Allocator(allocator), + m_pArray(VMA_NULL), + m_Count(0), + m_Capacity(0) {} + +template +VmaVector::VmaVector(size_t count, const AllocatorT& allocator) + : m_Allocator(allocator), + m_pArray(count ? (T*)VmaAllocateArray(allocator.m_pCallbacks, count) : VMA_NULL), + m_Count(count), + m_Capacity(count) {} + +template +VmaVector::VmaVector(const VmaVector& src) + : m_Allocator(src.m_Allocator), + m_pArray(src.m_Count ? (T*)VmaAllocateArray(src.m_Allocator.m_pCallbacks, src.m_Count) : VMA_NULL), + m_Count(src.m_Count), + m_Capacity(src.m_Count) +{ + if (m_Count != 0) + { + memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); + } +} + +template +VmaVector& VmaVector::operator=(const VmaVector& rhs) +{ + if (&rhs != this) + { + resize(rhs.m_Count); + if (m_Count != 0) + { + memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T)); + } + } + return *this; +} + +template +void VmaVector::push_back(const T& src) +{ + const size_t newIndex = size(); + resize(newIndex + 1); + m_pArray[newIndex] = src; +} + +template +void VmaVector::reserve(size_t newCapacity, bool freeMemory) +{ + newCapacity = VMA_MAX(newCapacity, m_Count); + + if ((newCapacity < m_Capacity) && !freeMemory) + { + newCapacity = m_Capacity; + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator, newCapacity) : VMA_NULL; + if (m_Count != 0) + { + memcpy(newArray, m_pArray, m_Count * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } +} + +template +void VmaVector::resize(size_t newCount) +{ + size_t newCapacity = m_Capacity; + if (newCount > m_Capacity) + { + newCapacity = VMA_MAX(newCount, VMA_MAX(m_Capacity * 3 / 2, (size_t)8)); + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator.m_pCallbacks, newCapacity) : VMA_NULL; + const size_t elementsToCopy = VMA_MIN(m_Count, newCount); + if (elementsToCopy != 0) + { + memcpy(newArray, m_pArray, elementsToCopy * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } + + m_Count = newCount; +} + +template +void VmaVector::shrink_to_fit() +{ + if (m_Capacity > m_Count) + { + T* newArray = VMA_NULL; + if (m_Count > 0) + { + newArray = VmaAllocateArray(m_Allocator.m_pCallbacks, m_Count); + memcpy(newArray, m_pArray, m_Count * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = m_Count; + m_pArray = newArray; + } +} + +template +void VmaVector::insert(size_t index, const T& src) +{ + VMA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + if (index < oldCount) + { + memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T)); + } + m_pArray[index] = src; +} + +template +void VmaVector::remove(size_t index) +{ + VMA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); +} +#endif // _VMA_VECTOR_FUNCTIONS + +template +static void VmaVectorInsert(VmaVector& vec, size_t index, const T& item) +{ + vec.insert(index, item); +} + +template +static void VmaVectorRemove(VmaVector& vec, size_t index) +{ + vec.remove(index); +} +#endif // _VMA_VECTOR + +#ifndef _VMA_SMALL_VECTOR +/* +This is a vector (a variable-sized array), optimized for the case when the array is small. + +It contains some number of elements in-place, which allows it to avoid heap allocation +when the actual number of elements is below that threshold. This allows normal "small" +cases to be fast without losing generality for large inputs. +*/ +template +class VmaSmallVector +{ +public: + typedef T value_type; + typedef T* iterator; + + VmaSmallVector(const AllocatorT& allocator); + VmaSmallVector(size_t count, const AllocatorT& allocator); + template + VmaSmallVector(const VmaSmallVector&) = delete; + template + VmaSmallVector& operator=(const VmaSmallVector&) = delete; + ~VmaSmallVector() = default; + + bool empty() const { return m_Count == 0; } + size_t size() const { return m_Count; } + T* data() { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } + T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; } + T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; } + const T* data() const { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } + const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; } + const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; } + + iterator begin() { return data(); } + iterator end() { return data() + m_Count; } + + void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); } + void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); } + void push_front(const T& src) { insert(0, src); } + + void push_back(const T& src); + void resize(size_t newCount, bool freeMemory = false); + void clear(bool freeMemory = false); + void insert(size_t index, const T& src); + void remove(size_t index); + + T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; } + const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; } + +private: + size_t m_Count; + T m_StaticArray[N]; // Used when m_Size <= N + VmaVector m_DynamicArray; // Used when m_Size > N +}; + +#ifndef _VMA_SMALL_VECTOR_FUNCTIONS +template +VmaSmallVector::VmaSmallVector(const AllocatorT& allocator) + : m_Count(0), + m_DynamicArray(allocator) {} + +template +VmaSmallVector::VmaSmallVector(size_t count, const AllocatorT& allocator) + : m_Count(count), + m_DynamicArray(count > N ? count : 0, allocator) {} + +template +void VmaSmallVector::push_back(const T& src) +{ + const size_t newIndex = size(); + resize(newIndex + 1); + data()[newIndex] = src; +} + +template +void VmaSmallVector::resize(size_t newCount, bool freeMemory) +{ + if (newCount > N && m_Count > N) + { + // Any direction, staying in m_DynamicArray + m_DynamicArray.resize(newCount); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + } + else if (newCount > N && m_Count <= N) + { + // Growing, moving from m_StaticArray to m_DynamicArray + m_DynamicArray.resize(newCount); + if (m_Count > 0) + { + memcpy(m_DynamicArray.data(), m_StaticArray, m_Count * sizeof(T)); + } + } + else if (newCount <= N && m_Count > N) + { + // Shrinking, moving from m_DynamicArray to m_StaticArray + if (newCount > 0) + { + memcpy(m_StaticArray, m_DynamicArray.data(), newCount * sizeof(T)); + } + m_DynamicArray.resize(0); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + } + else + { + // Any direction, staying in m_StaticArray - nothing to do here + } + m_Count = newCount; +} + +template +void VmaSmallVector::clear(bool freeMemory) +{ + m_DynamicArray.clear(); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + m_Count = 0; +} + +template +void VmaSmallVector::insert(size_t index, const T& src) +{ + VMA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + T* const dataPtr = data(); + if (index < oldCount) + { + // I know, this could be more optimal for case where memmove can be memcpy directly from m_StaticArray to m_DynamicArray. + memmove(dataPtr + (index + 1), dataPtr + index, (oldCount - index) * sizeof(T)); + } + dataPtr[index] = src; +} + +template +void VmaSmallVector::remove(size_t index) +{ + VMA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + // I know, this could be more optimal for case where memmove can be memcpy directly from m_DynamicArray to m_StaticArray. + T* const dataPtr = data(); + memmove(dataPtr + index, dataPtr + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); +} +#endif // _VMA_SMALL_VECTOR_FUNCTIONS +#endif // _VMA_SMALL_VECTOR + +#ifndef _VMA_POOL_ALLOCATOR +/* +Allocator for objects of type T using a list of arrays (pools) to speed up +allocation. Number of elements that can be allocated is not bounded because +allocator can create multiple blocks. +*/ +template +class VmaPoolAllocator +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaPoolAllocator) +public: + VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity); + ~VmaPoolAllocator(); + template T* Alloc(Types&&... args); + void Free(T* ptr); + +private: + union Item + { + uint32_t NextFreeIndex; + alignas(T) char Value[sizeof(T)]; + }; + struct ItemBlock + { + Item* pItems; + uint32_t Capacity; + uint32_t FirstFreeIndex; + }; + + const VkAllocationCallbacks* m_pAllocationCallbacks; + const uint32_t m_FirstBlockCapacity; + VmaVector> m_ItemBlocks; + + ItemBlock& CreateNewBlock(); +}; + +#ifndef _VMA_POOL_ALLOCATOR_FUNCTIONS +template +VmaPoolAllocator::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity) + : m_pAllocationCallbacks(pAllocationCallbacks), + m_FirstBlockCapacity(firstBlockCapacity), + m_ItemBlocks(VmaStlAllocator(pAllocationCallbacks)) +{ + VMA_ASSERT(m_FirstBlockCapacity > 1); +} + +template +VmaPoolAllocator::~VmaPoolAllocator() +{ + for (size_t i = m_ItemBlocks.size(); i--;) + vma_delete_array(m_pAllocationCallbacks, m_ItemBlocks[i].pItems, m_ItemBlocks[i].Capacity); + m_ItemBlocks.clear(); +} + +template +template T* VmaPoolAllocator::Alloc(Types&&... args) +{ + for (size_t i = m_ItemBlocks.size(); i--; ) + { + ItemBlock& block = m_ItemBlocks[i]; + // This block has some free items: Use first one. + if (block.FirstFreeIndex != UINT32_MAX) + { + Item* const pItem = &block.pItems[block.FirstFreeIndex]; + block.FirstFreeIndex = pItem->NextFreeIndex; + T* result = (T*)&pItem->Value; + new(result)T(std::forward(args)...); // Explicit constructor call. + return result; + } + } + + // No block has free item: Create new one and use it. + ItemBlock& newBlock = CreateNewBlock(); + Item* const pItem = &newBlock.pItems[0]; + newBlock.FirstFreeIndex = pItem->NextFreeIndex; + T* result = (T*)&pItem->Value; + new(result) T(std::forward(args)...); // Explicit constructor call. + return result; +} + +template +void VmaPoolAllocator::Free(T* ptr) +{ + // Search all memory blocks to find ptr. + for (size_t i = m_ItemBlocks.size(); i--; ) + { + ItemBlock& block = m_ItemBlocks[i]; + + // Casting to union. + Item* pItemPtr; + memcpy(&pItemPtr, &ptr, sizeof(pItemPtr)); + + // Check if pItemPtr is in address range of this block. + if ((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + block.Capacity)) + { + ptr->~T(); // Explicit destructor call. + const uint32_t index = static_cast(pItemPtr - block.pItems); + pItemPtr->NextFreeIndex = block.FirstFreeIndex; + block.FirstFreeIndex = index; + return; + } + } + VMA_ASSERT(0 && "Pointer doesn't belong to this memory pool."); +} + +template +typename VmaPoolAllocator::ItemBlock& VmaPoolAllocator::CreateNewBlock() +{ + const uint32_t newBlockCapacity = m_ItemBlocks.empty() ? + m_FirstBlockCapacity : m_ItemBlocks.back().Capacity * 3 / 2; + + const ItemBlock newBlock = + { + vma_new_array(m_pAllocationCallbacks, Item, newBlockCapacity), + newBlockCapacity, + 0 + }; + + m_ItemBlocks.push_back(newBlock); + + // Setup singly-linked list of all free items in this block. + for (uint32_t i = 0; i < newBlockCapacity - 1; ++i) + newBlock.pItems[i].NextFreeIndex = i + 1; + newBlock.pItems[newBlockCapacity - 1].NextFreeIndex = UINT32_MAX; + return m_ItemBlocks.back(); +} +#endif // _VMA_POOL_ALLOCATOR_FUNCTIONS +#endif // _VMA_POOL_ALLOCATOR + +#ifndef _VMA_RAW_LIST +template +struct VmaListItem +{ + VmaListItem* pPrev; + VmaListItem* pNext; + T Value; +}; + +// Doubly linked list. +template +class VmaRawList +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaRawList) +public: + typedef VmaListItem ItemType; + + VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks); + // Intentionally not calling Clear, because that would be unnecessary + // computations to return all items to m_ItemAllocator as free. + ~VmaRawList() = default; + + size_t GetCount() const { return m_Count; } + bool IsEmpty() const { return m_Count == 0; } + + ItemType* Front() { return m_pFront; } + ItemType* Back() { return m_pBack; } + const ItemType* Front() const { return m_pFront; } + const ItemType* Back() const { return m_pBack; } + + ItemType* PushFront(); + ItemType* PushBack(); + ItemType* PushFront(const T& value); + ItemType* PushBack(const T& value); + void PopFront(); + void PopBack(); + + // Item can be null - it means PushBack. + ItemType* InsertBefore(ItemType* pItem); + // Item can be null - it means PushFront. + ItemType* InsertAfter(ItemType* pItem); + ItemType* InsertBefore(ItemType* pItem, const T& value); + ItemType* InsertAfter(ItemType* pItem, const T& value); + + void Clear(); + void Remove(ItemType* pItem); + +private: + const VkAllocationCallbacks* const m_pAllocationCallbacks; + VmaPoolAllocator m_ItemAllocator; + ItemType* m_pFront; + ItemType* m_pBack; + size_t m_Count; +}; + +#ifndef _VMA_RAW_LIST_FUNCTIONS +template +VmaRawList::VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks) + : m_pAllocationCallbacks(pAllocationCallbacks), + m_ItemAllocator(pAllocationCallbacks, 128), + m_pFront(VMA_NULL), + m_pBack(VMA_NULL), + m_Count(0) {} + +template +VmaListItem* VmaRawList::PushFront() +{ + ItemType* const pNewItem = m_ItemAllocator.Alloc(); + pNewItem->pPrev = VMA_NULL; + if (IsEmpty()) + { + pNewItem->pNext = VMA_NULL; + m_pFront = pNewItem; + m_pBack = pNewItem; + m_Count = 1; + } + else + { + pNewItem->pNext = m_pFront; + m_pFront->pPrev = pNewItem; + m_pFront = pNewItem; + ++m_Count; + } + return pNewItem; +} + +template +VmaListItem* VmaRawList::PushBack() +{ + ItemType* const pNewItem = m_ItemAllocator.Alloc(); + pNewItem->pNext = VMA_NULL; + if(IsEmpty()) + { + pNewItem->pPrev = VMA_NULL; + m_pFront = pNewItem; + m_pBack = pNewItem; + m_Count = 1; + } + else + { + pNewItem->pPrev = m_pBack; + m_pBack->pNext = pNewItem; + m_pBack = pNewItem; + ++m_Count; + } + return pNewItem; +} + +template +VmaListItem* VmaRawList::PushFront(const T& value) +{ + ItemType* const pNewItem = PushFront(); + pNewItem->Value = value; + return pNewItem; +} + +template +VmaListItem* VmaRawList::PushBack(const T& value) +{ + ItemType* const pNewItem = PushBack(); + pNewItem->Value = value; + return pNewItem; +} + +template +void VmaRawList::PopFront() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const pFrontItem = m_pFront; + ItemType* const pNextItem = pFrontItem->pNext; + if (pNextItem != VMA_NULL) + { + pNextItem->pPrev = VMA_NULL; + } + m_pFront = pNextItem; + m_ItemAllocator.Free(pFrontItem); + --m_Count; +} + +template +void VmaRawList::PopBack() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const pBackItem = m_pBack; + ItemType* const pPrevItem = pBackItem->pPrev; + if(pPrevItem != VMA_NULL) + { + pPrevItem->pNext = VMA_NULL; + } + m_pBack = pPrevItem; + m_ItemAllocator.Free(pBackItem); + --m_Count; +} + +template +void VmaRawList::Clear() +{ + if (IsEmpty() == false) + { + ItemType* pItem = m_pBack; + while (pItem != VMA_NULL) + { + ItemType* const pPrevItem = pItem->pPrev; + m_ItemAllocator.Free(pItem); + pItem = pPrevItem; + } + m_pFront = VMA_NULL; + m_pBack = VMA_NULL; + m_Count = 0; + } +} + +template +void VmaRawList::Remove(ItemType* pItem) +{ + VMA_HEAVY_ASSERT(pItem != VMA_NULL); + VMA_HEAVY_ASSERT(m_Count > 0); + + if(pItem->pPrev != VMA_NULL) + { + pItem->pPrev->pNext = pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(m_pFront == pItem); + m_pFront = pItem->pNext; + } + + if(pItem->pNext != VMA_NULL) + { + pItem->pNext->pPrev = pItem->pPrev; + } + else + { + VMA_HEAVY_ASSERT(m_pBack == pItem); + m_pBack = pItem->pPrev; + } + + m_ItemAllocator.Free(pItem); + --m_Count; +} + +template +VmaListItem* VmaRawList::InsertBefore(ItemType* pItem) +{ + if(pItem != VMA_NULL) + { + ItemType* const prevItem = pItem->pPrev; + ItemType* const newItem = m_ItemAllocator.Alloc(); + newItem->pPrev = prevItem; + newItem->pNext = pItem; + pItem->pPrev = newItem; + if(prevItem != VMA_NULL) + { + prevItem->pNext = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_pFront == pItem); + m_pFront = newItem; + } + ++m_Count; + return newItem; + } + else + return PushBack(); +} + +template +VmaListItem* VmaRawList::InsertAfter(ItemType* pItem) +{ + if(pItem != VMA_NULL) + { + ItemType* const nextItem = pItem->pNext; + ItemType* const newItem = m_ItemAllocator.Alloc(); + newItem->pNext = nextItem; + newItem->pPrev = pItem; + pItem->pNext = newItem; + if(nextItem != VMA_NULL) + { + nextItem->pPrev = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_pBack == pItem); + m_pBack = newItem; + } + ++m_Count; + return newItem; + } + else + return PushFront(); +} + +template +VmaListItem* VmaRawList::InsertBefore(ItemType* pItem, const T& value) +{ + ItemType* const newItem = InsertBefore(pItem); + newItem->Value = value; + return newItem; +} + +template +VmaListItem* VmaRawList::InsertAfter(ItemType* pItem, const T& value) +{ + ItemType* const newItem = InsertAfter(pItem); + newItem->Value = value; + return newItem; +} +#endif // _VMA_RAW_LIST_FUNCTIONS +#endif // _VMA_RAW_LIST + +#ifndef _VMA_LIST +template +class VmaList +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaList) +public: + class reverse_iterator; + class const_iterator; + class const_reverse_iterator; + + class iterator + { + friend class const_iterator; + friend class VmaList; + public: + iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + iterator operator++(int) { iterator result = *this; ++*this; return result; } + iterator operator--(int) { iterator result = *this; --*this; return result; } + + iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; } + iterator& operator--(); + + private: + VmaRawList* m_pList; + VmaListItem* m_pItem; + + iterator(VmaRawList* pList, VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class reverse_iterator + { + friend class const_reverse_iterator; + friend class VmaList; + public: + reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + reverse_iterator operator++(int) { reverse_iterator result = *this; ++* this; return result; } + reverse_iterator operator--(int) { reverse_iterator result = *this; --* this; return result; } + + reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; } + reverse_iterator& operator--(); + + private: + VmaRawList* m_pList; + VmaListItem* m_pItem; + + reverse_iterator(VmaRawList* pList, VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class const_iterator + { + friend class VmaList; + public: + const_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + const_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + iterator drop_const() { return { const_cast*>(m_pList), const_cast*>(m_pItem) }; } + + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + const_iterator operator++(int) { const_iterator result = *this; ++* this; return result; } + const_iterator operator--(int) { const_iterator result = *this; --* this; return result; } + + const_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; } + const_iterator& operator--(); + + private: + const VmaRawList* m_pList; + const VmaListItem* m_pItem; + + const_iterator(const VmaRawList* pList, const VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class const_reverse_iterator + { + friend class VmaList; + public: + const_reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + const_reverse_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + reverse_iterator drop_const() { return { const_cast*>(m_pList), const_cast*>(m_pItem) }; } + + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + const_reverse_iterator operator++(int) { const_reverse_iterator result = *this; ++* this; return result; } + const_reverse_iterator operator--(int) { const_reverse_iterator result = *this; --* this; return result; } + + const_reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; } + const_reverse_iterator& operator--(); + + private: + const VmaRawList* m_pList; + const VmaListItem* m_pItem; + + const_reverse_iterator(const VmaRawList* pList, const VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + + VmaList(const AllocatorT& allocator) : m_RawList(allocator.m_pCallbacks) {} + + bool empty() const { return m_RawList.IsEmpty(); } + size_t size() const { return m_RawList.GetCount(); } + + iterator begin() { return iterator(&m_RawList, m_RawList.Front()); } + iterator end() { return iterator(&m_RawList, VMA_NULL); } + + const_iterator cbegin() const { return const_iterator(&m_RawList, m_RawList.Front()); } + const_iterator cend() const { return const_iterator(&m_RawList, VMA_NULL); } + + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + reverse_iterator rbegin() { return reverse_iterator(&m_RawList, m_RawList.Back()); } + reverse_iterator rend() { return reverse_iterator(&m_RawList, VMA_NULL); } + + const_reverse_iterator crbegin() const { return const_reverse_iterator(&m_RawList, m_RawList.Back()); } + const_reverse_iterator crend() const { return const_reverse_iterator(&m_RawList, VMA_NULL); } + + const_reverse_iterator rbegin() const { return crbegin(); } + const_reverse_iterator rend() const { return crend(); } + + void push_back(const T& value) { m_RawList.PushBack(value); } + iterator insert(iterator it, const T& value) { return iterator(&m_RawList, m_RawList.InsertBefore(it.m_pItem, value)); } + + void clear() { m_RawList.Clear(); } + void erase(iterator it) { m_RawList.Remove(it.m_pItem); } + +private: + VmaRawList m_RawList; +}; + +#ifndef _VMA_LIST_FUNCTIONS +template +typename VmaList::iterator& VmaList::iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename VmaList::reverse_iterator& VmaList::reverse_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Front(); + } + return *this; +} + +template +typename VmaList::const_iterator& VmaList::const_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename VmaList::const_reverse_iterator& VmaList::const_reverse_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} +#endif // _VMA_LIST_FUNCTIONS +#endif // _VMA_LIST + +#ifndef _VMA_INTRUSIVE_LINKED_LIST +/* +Expected interface of ItemTypeTraits: +struct MyItemTypeTraits +{ + typedef MyItem ItemType; + static ItemType* GetPrev(const ItemType* item) { return item->myPrevPtr; } + static ItemType* GetNext(const ItemType* item) { return item->myNextPtr; } + static ItemType*& AccessPrev(ItemType* item) { return item->myPrevPtr; } + static ItemType*& AccessNext(ItemType* item) { return item->myNextPtr; } +}; +*/ +template +class VmaIntrusiveLinkedList +{ +public: + typedef typename ItemTypeTraits::ItemType ItemType; + static ItemType* GetPrev(const ItemType* item) { return ItemTypeTraits::GetPrev(item); } + static ItemType* GetNext(const ItemType* item) { return ItemTypeTraits::GetNext(item); } + + // Movable, not copyable. + VmaIntrusiveLinkedList() = default; + VmaIntrusiveLinkedList(VmaIntrusiveLinkedList && src); + VmaIntrusiveLinkedList(const VmaIntrusiveLinkedList&) = delete; + VmaIntrusiveLinkedList& operator=(VmaIntrusiveLinkedList&& src); + VmaIntrusiveLinkedList& operator=(const VmaIntrusiveLinkedList&) = delete; + ~VmaIntrusiveLinkedList() { VMA_HEAVY_ASSERT(IsEmpty()); } + + size_t GetCount() const { return m_Count; } + bool IsEmpty() const { return m_Count == 0; } + ItemType* Front() { return m_Front; } + ItemType* Back() { return m_Back; } + const ItemType* Front() const { return m_Front; } + const ItemType* Back() const { return m_Back; } + + void PushBack(ItemType* item); + void PushFront(ItemType* item); + ItemType* PopBack(); + ItemType* PopFront(); + + // MyItem can be null - it means PushBack. + void InsertBefore(ItemType* existingItem, ItemType* newItem); + // MyItem can be null - it means PushFront. + void InsertAfter(ItemType* existingItem, ItemType* newItem); + void Remove(ItemType* item); + void RemoveAll(); + +private: + ItemType* m_Front = VMA_NULL; + ItemType* m_Back = VMA_NULL; + size_t m_Count = 0; +}; + +#ifndef _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS +template +VmaIntrusiveLinkedList::VmaIntrusiveLinkedList(VmaIntrusiveLinkedList&& src) + : m_Front(src.m_Front), m_Back(src.m_Back), m_Count(src.m_Count) +{ + src.m_Front = src.m_Back = VMA_NULL; + src.m_Count = 0; +} + +template +VmaIntrusiveLinkedList& VmaIntrusiveLinkedList::operator=(VmaIntrusiveLinkedList&& src) +{ + if (&src != this) + { + VMA_HEAVY_ASSERT(IsEmpty()); + m_Front = src.m_Front; + m_Back = src.m_Back; + m_Count = src.m_Count; + src.m_Front = src.m_Back = VMA_NULL; + src.m_Count = 0; + } + return *this; +} + +template +void VmaIntrusiveLinkedList::PushBack(ItemType* item) +{ + VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessPrev(item) = m_Back; + ItemTypeTraits::AccessNext(m_Back) = item; + m_Back = item; + ++m_Count; + } +} + +template +void VmaIntrusiveLinkedList::PushFront(ItemType* item) +{ + VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessNext(item) = m_Front; + ItemTypeTraits::AccessPrev(m_Front) = item; + m_Front = item; + ++m_Count; + } +} + +template +typename VmaIntrusiveLinkedList::ItemType* VmaIntrusiveLinkedList::PopBack() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const backItem = m_Back; + ItemType* const prevItem = ItemTypeTraits::GetPrev(backItem); + if (prevItem != VMA_NULL) + { + ItemTypeTraits::AccessNext(prevItem) = VMA_NULL; + } + m_Back = prevItem; + --m_Count; + ItemTypeTraits::AccessPrev(backItem) = VMA_NULL; + ItemTypeTraits::AccessNext(backItem) = VMA_NULL; + return backItem; +} + +template +typename VmaIntrusiveLinkedList::ItemType* VmaIntrusiveLinkedList::PopFront() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const frontItem = m_Front; + ItemType* const nextItem = ItemTypeTraits::GetNext(frontItem); + if (nextItem != VMA_NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = VMA_NULL; + } + m_Front = nextItem; + --m_Count; + ItemTypeTraits::AccessPrev(frontItem) = VMA_NULL; + ItemTypeTraits::AccessNext(frontItem) = VMA_NULL; + return frontItem; +} + +template +void VmaIntrusiveLinkedList::InsertBefore(ItemType* existingItem, ItemType* newItem) +{ + VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL); + if (existingItem != VMA_NULL) + { + ItemType* const prevItem = ItemTypeTraits::GetPrev(existingItem); + ItemTypeTraits::AccessPrev(newItem) = prevItem; + ItemTypeTraits::AccessNext(newItem) = existingItem; + ItemTypeTraits::AccessPrev(existingItem) = newItem; + if (prevItem != VMA_NULL) + { + ItemTypeTraits::AccessNext(prevItem) = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_Front == existingItem); + m_Front = newItem; + } + ++m_Count; + } + else + PushBack(newItem); +} + +template +void VmaIntrusiveLinkedList::InsertAfter(ItemType* existingItem, ItemType* newItem) +{ + VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL); + if (existingItem != VMA_NULL) + { + ItemType* const nextItem = ItemTypeTraits::GetNext(existingItem); + ItemTypeTraits::AccessNext(newItem) = nextItem; + ItemTypeTraits::AccessPrev(newItem) = existingItem; + ItemTypeTraits::AccessNext(existingItem) = newItem; + if (nextItem != VMA_NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_Back == existingItem); + m_Back = newItem; + } + ++m_Count; + } + else + return PushFront(newItem); +} + +template +void VmaIntrusiveLinkedList::Remove(ItemType* item) +{ + VMA_HEAVY_ASSERT(item != VMA_NULL && m_Count > 0); + if (ItemTypeTraits::GetPrev(item) != VMA_NULL) + { + ItemTypeTraits::AccessNext(ItemTypeTraits::AccessPrev(item)) = ItemTypeTraits::GetNext(item); + } + else + { + VMA_HEAVY_ASSERT(m_Front == item); + m_Front = ItemTypeTraits::GetNext(item); + } + + if (ItemTypeTraits::GetNext(item) != VMA_NULL) + { + ItemTypeTraits::AccessPrev(ItemTypeTraits::AccessNext(item)) = ItemTypeTraits::GetPrev(item); + } + else + { + VMA_HEAVY_ASSERT(m_Back == item); + m_Back = ItemTypeTraits::GetPrev(item); + } + ItemTypeTraits::AccessPrev(item) = VMA_NULL; + ItemTypeTraits::AccessNext(item) = VMA_NULL; + --m_Count; +} + +template +void VmaIntrusiveLinkedList::RemoveAll() +{ + if (!IsEmpty()) + { + ItemType* item = m_Back; + while (item != VMA_NULL) + { + ItemType* const prevItem = ItemTypeTraits::AccessPrev(item); + ItemTypeTraits::AccessPrev(item) = VMA_NULL; + ItemTypeTraits::AccessNext(item) = VMA_NULL; + item = prevItem; + } + m_Front = VMA_NULL; + m_Back = VMA_NULL; + m_Count = 0; + } +} +#endif // _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS +#endif // _VMA_INTRUSIVE_LINKED_LIST + +#if !defined(_VMA_STRING_BUILDER) && VMA_STATS_STRING_ENABLED +class VmaStringBuilder +{ +public: + VmaStringBuilder(const VkAllocationCallbacks* allocationCallbacks) : m_Data(VmaStlAllocator(allocationCallbacks)) {} + ~VmaStringBuilder() = default; + + size_t GetLength() const { return m_Data.size(); } + const char* GetData() const { return m_Data.data(); } + void AddNewLine() { Add('\n'); } + void Add(char ch) { m_Data.push_back(ch); } + + void Add(const char* pStr); + void AddNumber(uint32_t num); + void AddNumber(uint64_t num); + void AddPointer(const void* ptr); + +private: + VmaVector> m_Data; +}; + +#ifndef _VMA_STRING_BUILDER_FUNCTIONS +void VmaStringBuilder::Add(const char* pStr) +{ + const size_t strLen = strlen(pStr); + if (strLen > 0) + { + const size_t oldCount = m_Data.size(); + m_Data.resize(oldCount + strLen); + memcpy(m_Data.data() + oldCount, pStr, strLen); + } +} + +void VmaStringBuilder::AddNumber(uint32_t num) +{ + char buf[11]; + buf[10] = '\0'; + char* p = &buf[10]; + do + { + *--p = '0' + (char)(num % 10); + num /= 10; + } while (num); + Add(p); +} + +void VmaStringBuilder::AddNumber(uint64_t num) +{ + char buf[21]; + buf[20] = '\0'; + char* p = &buf[20]; + do + { + *--p = '0' + (char)(num % 10); + num /= 10; + } while (num); + Add(p); +} + +void VmaStringBuilder::AddPointer(const void* ptr) +{ + char buf[21]; + VmaPtrToStr(buf, sizeof(buf), ptr); + Add(buf); +} +#endif //_VMA_STRING_BUILDER_FUNCTIONS +#endif // _VMA_STRING_BUILDER + +#if !defined(_VMA_JSON_WRITER) && VMA_STATS_STRING_ENABLED +/* +Allows to conveniently build a correct JSON document to be written to the +VmaStringBuilder passed to the constructor. +*/ +class VmaJsonWriter +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaJsonWriter) +public: + // sb - string builder to write the document to. Must remain alive for the whole lifetime of this object. + VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb); + ~VmaJsonWriter(); + + // Begins object by writing "{". + // Inside an object, you must call pairs of WriteString and a value, e.g.: + // j.BeginObject(true); j.WriteString("A"); j.WriteNumber(1); j.WriteString("B"); j.WriteNumber(2); j.EndObject(); + // Will write: { "A": 1, "B": 2 } + void BeginObject(bool singleLine = false); + // Ends object by writing "}". + void EndObject(); + + // Begins array by writing "[". + // Inside an array, you can write a sequence of any values. + void BeginArray(bool singleLine = false); + // Ends array by writing "[". + void EndArray(); + + // Writes a string value inside "". + // pStr can contain any ANSI characters, including '"', new line etc. - they will be properly escaped. + void WriteString(const char* pStr); + + // Begins writing a string value. + // Call BeginString, ContinueString, ContinueString, ..., EndString instead of + // WriteString to conveniently build the string content incrementally, made of + // parts including numbers. + void BeginString(const char* pStr = VMA_NULL); + // Posts next part of an open string. + void ContinueString(const char* pStr); + // Posts next part of an open string. The number is converted to decimal characters. + void ContinueString(uint32_t n); + void ContinueString(uint64_t n); + // Posts next part of an open string. Pointer value is converted to characters + // using "%p" formatting - shown as hexadecimal number, e.g.: 000000081276Ad00 + void ContinueString_Pointer(const void* ptr); + // Ends writing a string value by writing '"'. + void EndString(const char* pStr = VMA_NULL); + + // Writes a number value. + void WriteNumber(uint32_t n); + void WriteNumber(uint64_t n); + // Writes a boolean value - false or true. + void WriteBool(bool b); + // Writes a null value. + void WriteNull(); + +private: + enum COLLECTION_TYPE + { + COLLECTION_TYPE_OBJECT, + COLLECTION_TYPE_ARRAY, + }; + struct StackItem + { + COLLECTION_TYPE type; + uint32_t valueCount; + bool singleLineMode; + }; + + static const char* const INDENT; + + VmaStringBuilder& m_SB; + VmaVector< StackItem, VmaStlAllocator > m_Stack; + bool m_InsideString; + + void BeginValue(bool isString); + void WriteIndent(bool oneLess = false); +}; +const char* const VmaJsonWriter::INDENT = " "; + +#ifndef _VMA_JSON_WRITER_FUNCTIONS +VmaJsonWriter::VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb) + : m_SB(sb), + m_Stack(VmaStlAllocator(pAllocationCallbacks)), + m_InsideString(false) {} + +VmaJsonWriter::~VmaJsonWriter() +{ + VMA_ASSERT(!m_InsideString); + VMA_ASSERT(m_Stack.empty()); +} + +void VmaJsonWriter::BeginObject(bool singleLine) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(false); + m_SB.Add('{'); + + StackItem item; + item.type = COLLECTION_TYPE_OBJECT; + item.valueCount = 0; + item.singleLineMode = singleLine; + m_Stack.push_back(item); +} + +void VmaJsonWriter::EndObject() +{ + VMA_ASSERT(!m_InsideString); + + WriteIndent(true); + m_SB.Add('}'); + + VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_OBJECT); + m_Stack.pop_back(); +} + +void VmaJsonWriter::BeginArray(bool singleLine) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(false); + m_SB.Add('['); + + StackItem item; + item.type = COLLECTION_TYPE_ARRAY; + item.valueCount = 0; + item.singleLineMode = singleLine; + m_Stack.push_back(item); +} + +void VmaJsonWriter::EndArray() +{ + VMA_ASSERT(!m_InsideString); + + WriteIndent(true); + m_SB.Add(']'); + + VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_ARRAY); + m_Stack.pop_back(); +} + +void VmaJsonWriter::WriteString(const char* pStr) +{ + BeginString(pStr); + EndString(); +} + +void VmaJsonWriter::BeginString(const char* pStr) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(true); + m_SB.Add('"'); + m_InsideString = true; + if (pStr != VMA_NULL && pStr[0] != '\0') + { + ContinueString(pStr); + } +} + +void VmaJsonWriter::ContinueString(const char* pStr) +{ + VMA_ASSERT(m_InsideString); + + const size_t strLen = strlen(pStr); + for (size_t i = 0; i < strLen; ++i) + { + char ch = pStr[i]; + if (ch == '\\') + { + m_SB.Add("\\\\"); + } + else if (ch == '"') + { + m_SB.Add("\\\""); + } + else if ((uint8_t)ch >= 32) + { + m_SB.Add(ch); + } + else switch (ch) + { + case '\b': + m_SB.Add("\\b"); + break; + case '\f': + m_SB.Add("\\f"); + break; + case '\n': + m_SB.Add("\\n"); + break; + case '\r': + m_SB.Add("\\r"); + break; + case '\t': + m_SB.Add("\\t"); + break; + default: + VMA_ASSERT(0 && "Character not currently supported."); + } + } +} + +void VmaJsonWriter::ContinueString(uint32_t n) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::ContinueString(uint64_t n) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::ContinueString_Pointer(const void* ptr) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddPointer(ptr); +} + +void VmaJsonWriter::EndString(const char* pStr) +{ + VMA_ASSERT(m_InsideString); + if (pStr != VMA_NULL && pStr[0] != '\0') + { + ContinueString(pStr); + } + m_SB.Add('"'); + m_InsideString = false; +} + +void VmaJsonWriter::WriteNumber(uint32_t n) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::WriteNumber(uint64_t n) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::WriteBool(bool b) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.Add(b ? "true" : "false"); +} + +void VmaJsonWriter::WriteNull() +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.Add("null"); +} + +void VmaJsonWriter::BeginValue(bool isString) +{ + if (!m_Stack.empty()) + { + StackItem& currItem = m_Stack.back(); + if (currItem.type == COLLECTION_TYPE_OBJECT && + currItem.valueCount % 2 == 0) + { + VMA_ASSERT(isString); + } + + if (currItem.type == COLLECTION_TYPE_OBJECT && + currItem.valueCount % 2 != 0) + { + m_SB.Add(": "); + } + else if (currItem.valueCount > 0) + { + m_SB.Add(", "); + WriteIndent(); + } + else + { + WriteIndent(); + } + ++currItem.valueCount; + } +} + +void VmaJsonWriter::WriteIndent(bool oneLess) +{ + if (!m_Stack.empty() && !m_Stack.back().singleLineMode) + { + m_SB.AddNewLine(); + + size_t count = m_Stack.size(); + if (count > 0 && oneLess) + { + --count; + } + for (size_t i = 0; i < count; ++i) + { + m_SB.Add(INDENT); + } + } +} +#endif // _VMA_JSON_WRITER_FUNCTIONS + +static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedStatistics& stat) +{ + json.BeginObject(); + + json.WriteString("BlockCount"); + json.WriteNumber(stat.statistics.blockCount); + json.WriteString("BlockBytes"); + json.WriteNumber(stat.statistics.blockBytes); + json.WriteString("AllocationCount"); + json.WriteNumber(stat.statistics.allocationCount); + json.WriteString("AllocationBytes"); + json.WriteNumber(stat.statistics.allocationBytes); + json.WriteString("UnusedRangeCount"); + json.WriteNumber(stat.unusedRangeCount); + + if (stat.statistics.allocationCount > 1) + { + json.WriteString("AllocationSizeMin"); + json.WriteNumber(stat.allocationSizeMin); + json.WriteString("AllocationSizeMax"); + json.WriteNumber(stat.allocationSizeMax); + } + if (stat.unusedRangeCount > 1) + { + json.WriteString("UnusedRangeSizeMin"); + json.WriteNumber(stat.unusedRangeSizeMin); + json.WriteString("UnusedRangeSizeMax"); + json.WriteNumber(stat.unusedRangeSizeMax); + } + json.EndObject(); +} +#endif // _VMA_JSON_WRITER + +#ifndef _VMA_MAPPING_HYSTERESIS + +class VmaMappingHysteresis +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMappingHysteresis) +public: + VmaMappingHysteresis() = default; + + uint32_t GetExtraMapping() const { return m_ExtraMapping; } + + // Call when Map was called. + // Returns true if switched to extra +1 mapping reference count. + bool PostMap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING) + { + m_ExtraMapping = 1; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + + // Call when Unmap was called. + void PostUnmap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + ++m_MajorCounter; + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was made from the memory block. + void PostAlloc() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + ++m_MajorCounter; + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was freed from the memory block. + // Returns true if switched to extra -1 mapping reference count. + bool PostFree() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING && + m_MajorCounter > m_MinorCounter + 1) + { + m_ExtraMapping = 0; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + +private: + static const int32_t COUNTER_MIN_EXTRA_MAPPING = 7; + + uint32_t m_MinorCounter = 0; + uint32_t m_MajorCounter = 0; + uint32_t m_ExtraMapping = 0; // 0 or 1. + + void PostMinorCounter() + { + if(m_MinorCounter < m_MajorCounter) + { + ++m_MinorCounter; + } + else if(m_MajorCounter > 0) + { + --m_MajorCounter; + --m_MinorCounter; + } + } +}; + +#endif // _VMA_MAPPING_HYSTERESIS + +#if VMA_EXTERNAL_MEMORY_WIN32 +class VmaWin32Handle +{ +public: + VmaWin32Handle() noexcept : m_hHandle(VMA_NULL) { } + explicit VmaWin32Handle(HANDLE hHandle) noexcept : m_hHandle(hHandle) { } + ~VmaWin32Handle() noexcept { if (m_hHandle != VMA_NULL) { ::CloseHandle(m_hHandle); } } + VMA_CLASS_NO_COPY_NO_MOVE(VmaWin32Handle) + +public: + // Strengthened + VkResult GetHandle(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, bool useMutex, HANDLE* pHandle) noexcept + { + *pHandle = VMA_NULL; + // Try to get handle first. + if (m_hHandle != VMA_NULL) + { + *pHandle = Duplicate(hTargetProcess); + return VK_SUCCESS; + } + + VkResult res = VK_SUCCESS; + // If failed, try to create it. + { + VmaMutexLockWrite lock(m_Mutex, useMutex); + if (m_hHandle == VMA_NULL) + { + res = Create(device, memory, pvkGetMemoryWin32HandleKHR, &m_hHandle); + } + } + + *pHandle = Duplicate(hTargetProcess); + return res; + } + + operator bool() const noexcept { return m_hHandle != VMA_NULL; } +private: + // Not atomic + static VkResult Create(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE* pHandle) noexcept + { + VkResult res = VK_ERROR_FEATURE_NOT_PRESENT; + if (pvkGetMemoryWin32HandleKHR != VMA_NULL) + { + VkMemoryGetWin32HandleInfoKHR handleInfo{ }; + handleInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + handleInfo.memory = memory; + handleInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; + res = pvkGetMemoryWin32HandleKHR(device, &handleInfo, pHandle); + } + return res; + } + HANDLE Duplicate(HANDLE hTargetProcess = VMA_NULL) const noexcept + { + if (!m_hHandle) + return m_hHandle; + + HANDLE hCurrentProcess = ::GetCurrentProcess(); + HANDLE hDupHandle = VMA_NULL; + if (!::DuplicateHandle(hCurrentProcess, m_hHandle, hTargetProcess ? hTargetProcess : hCurrentProcess, &hDupHandle, 0, FALSE, DUPLICATE_SAME_ACCESS)) + { + VMA_ASSERT(0 && "Failed to duplicate handle."); + } + return hDupHandle; + } +private: + HANDLE m_hHandle; + VMA_RW_MUTEX m_Mutex; // Protects access m_Handle +}; +#else +class VmaWin32Handle +{ + // ABI compatibility + void* placeholder = VMA_NULL; + VMA_RW_MUTEX placeholder2; +}; +#endif // VMA_EXTERNAL_MEMORY_WIN32 + + +#ifndef _VMA_DEVICE_MEMORY_BLOCK +/* +Represents a single block of device memory (`VkDeviceMemory`) with all the +data about its regions (aka suballocations, #VmaAllocation), assigned and free. + +Thread-safety: +- Access to m_pMetadata must be externally synchronized. +- Map, Unmap, Bind* are synchronized internally. +*/ +class VmaDeviceMemoryBlock +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDeviceMemoryBlock) +public: + VmaBlockMetadata* m_pMetadata; + + VmaDeviceMemoryBlock(VmaAllocator hAllocator); + ~VmaDeviceMemoryBlock(); + + // Always call after construction. + void Init( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t newMemoryTypeIndex, + VkDeviceMemory newMemory, + VkDeviceSize newSize, + uint32_t id, + uint32_t algorithm, + VkDeviceSize bufferImageGranularity); + // Always call before destruction. + void Destroy(VmaAllocator allocator); + + VmaPool GetParentPool() const { return m_hParentPool; } + VkDeviceMemory GetDeviceMemory() const { return m_hMemory; } + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } + uint32_t GetId() const { return m_Id; } + void* GetMappedData() const { return m_pMappedData; } + uint32_t GetMapRefCount() const { return m_MapCount; } + + // Call when allocation/free was made from m_pMetadata. + // Used for m_MappingHysteresis. + void PostAlloc(VmaAllocator hAllocator); + void PostFree(VmaAllocator hAllocator); + + // Validates all data structures inside this object. If not valid, returns false. + bool Validate() const; + VkResult CheckCorruption(VmaAllocator hAllocator); + + // ppData can be null. + VkResult Map(VmaAllocator hAllocator, uint32_t count, void** ppData); + void Unmap(VmaAllocator hAllocator, uint32_t count); + + VkResult WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); + VkResult ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); + + VkResult BindBufferMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext); + VkResult BindImageMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext); +#if VMA_EXTERNAL_MEMORY_WIN32 + VkResult CreateWin32Handle( + const VmaAllocator hAllocator, + PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, + HANDLE hTargetProcess, + HANDLE* pHandle)noexcept; +#endif // VMA_EXTERNAL_MEMORY_WIN32 +private: + VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. + uint32_t m_MemoryTypeIndex; + uint32_t m_Id; + VkDeviceMemory m_hMemory; + + /* + Protects access to m_hMemory so it is not used by multiple threads simultaneously, e.g. vkMapMemory, vkBindBufferMemory. + Also protects m_MapCount, m_pMappedData. + Allocations, deallocations, any change in m_pMetadata is protected by parent's VmaBlockVector::m_Mutex. + */ + VMA_MUTEX m_MapAndBindMutex; + VmaMappingHysteresis m_MappingHysteresis; + uint32_t m_MapCount; + void* m_pMappedData; + + VmaWin32Handle m_Handle; +}; +#endif // _VMA_DEVICE_MEMORY_BLOCK + +#ifndef _VMA_ALLOCATION_T +struct VmaAllocationExtraData +{ + void* m_pMappedData = VMA_NULL; // Not null means memory is mapped. + VmaWin32Handle m_Handle; +}; + +struct VmaAllocation_T +{ + friend struct VmaDedicatedAllocationListItemTraits; + + enum FLAGS + { + FLAG_PERSISTENT_MAP = 0x01, + FLAG_MAPPING_ALLOWED = 0x02, + }; + +public: + enum ALLOCATION_TYPE + { + ALLOCATION_TYPE_NONE, + ALLOCATION_TYPE_BLOCK, + ALLOCATION_TYPE_DEDICATED, + }; + + // This struct is allocated using VmaPoolAllocator. + VmaAllocation_T(bool mappingAllowed); + ~VmaAllocation_T(); + + void InitBlockAllocation( + VmaDeviceMemoryBlock* block, + VmaAllocHandle allocHandle, + VkDeviceSize alignment, + VkDeviceSize size, + uint32_t memoryTypeIndex, + VmaSuballocationType suballocationType, + bool mapped); + // pMappedData not null means allocation is created with MAPPED flag. + void InitDedicatedAllocation( + VmaAllocator allocator, + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceMemory hMemory, + VmaSuballocationType suballocationType, + void* pMappedData, + VkDeviceSize size); + void Destroy(VmaAllocator allocator); + + ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; } + VkDeviceSize GetAlignment() const { return m_Alignment; } + VkDeviceSize GetSize() const { return m_Size; } + void* GetUserData() const { return m_pUserData; } + const char* GetName() const { return m_pName; } + VmaSuballocationType GetSuballocationType() const { return (VmaSuballocationType)m_SuballocationType; } + + VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; } + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } + bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; } + bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; } + + void SetUserData(VmaAllocator hAllocator, void* pUserData) { m_pUserData = pUserData; } + void SetName(VmaAllocator hAllocator, const char* pName); + void FreeName(VmaAllocator hAllocator); + uint8_t SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation); + VmaAllocHandle GetAllocHandle() const; + VkDeviceSize GetOffset() const; + VmaPool GetParentPool() const; + VkDeviceMemory GetMemory() const; + void* GetMappedData() const; + + void BlockAllocMap(); + void BlockAllocUnmap(); + VkResult DedicatedAllocMap(VmaAllocator hAllocator, void** ppData); + void DedicatedAllocUnmap(VmaAllocator hAllocator); + +#if VMA_STATS_STRING_ENABLED + VmaBufferImageUsage GetBufferImageUsage() const { return m_BufferImageUsage; } + void InitBufferUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5) + { + VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); + m_BufferImageUsage = VmaBufferImageUsage(createInfo, useKhrMaintenance5); + } + void InitImageUsage(const VkImageCreateInfo &createInfo) + { + VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); + m_BufferImageUsage = VmaBufferImageUsage(createInfo); + } + void PrintParameters(class VmaJsonWriter& json) const; +#endif + +#if VMA_EXTERNAL_MEMORY_WIN32 + VkResult GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* hHandle) noexcept; +#endif // VMA_EXTERNAL_MEMORY_WIN32 + +private: + // Allocation out of VmaDeviceMemoryBlock. + struct BlockAllocation + { + VmaDeviceMemoryBlock* m_Block; + VmaAllocHandle m_AllocHandle; + }; + // Allocation for an object that has its own private VkDeviceMemory. + struct DedicatedAllocation + { + VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. + VkDeviceMemory m_hMemory; + VmaAllocationExtraData* m_ExtraData; + VmaAllocation_T* m_Prev; + VmaAllocation_T* m_Next; + }; + union + { + // Allocation out of VmaDeviceMemoryBlock. + BlockAllocation m_BlockAllocation; + // Allocation for an object that has its own private VkDeviceMemory. + DedicatedAllocation m_DedicatedAllocation; + }; + + VkDeviceSize m_Alignment; + VkDeviceSize m_Size; + void* m_pUserData; + char* m_pName; + uint32_t m_MemoryTypeIndex; + uint8_t m_Type; // ALLOCATION_TYPE + uint8_t m_SuballocationType; // VmaSuballocationType + // Reference counter for vmaMapMemory()/vmaUnmapMemory(). + uint8_t m_MapCount; + uint8_t m_Flags; // enum FLAGS +#if VMA_STATS_STRING_ENABLED + VmaBufferImageUsage m_BufferImageUsage; // 0 if unknown. +#endif + + void EnsureExtraData(VmaAllocator hAllocator); +}; +#endif // _VMA_ALLOCATION_T + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS +struct VmaDedicatedAllocationListItemTraits +{ + typedef VmaAllocation_T ItemType; + + static ItemType* GetPrev(const ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Prev; + } + static ItemType* GetNext(const ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Next; + } + static ItemType*& AccessPrev(ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Prev; + } + static ItemType*& AccessNext(ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Next; + } +}; +#endif // _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST +/* +Stores linked list of VmaAllocation_T objects. +Thread-safe, synchronized internally. +*/ +class VmaDedicatedAllocationList +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDedicatedAllocationList) +public: + VmaDedicatedAllocationList() {} + ~VmaDedicatedAllocationList(); + + void Init(bool useMutex) { m_UseMutex = useMutex; } + bool Validate(); + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); + void AddStatistics(VmaStatistics& inoutStats); +#if VMA_STATS_STRING_ENABLED + // Writes JSON array with the list of allocations. + void BuildStatsString(VmaJsonWriter& json); +#endif + + bool IsEmpty(); + void Register(VmaAllocation alloc); + void Unregister(VmaAllocation alloc); + +private: + typedef VmaIntrusiveLinkedList DedicatedAllocationLinkedList; + + bool m_UseMutex = true; + VMA_RW_MUTEX m_Mutex; + DedicatedAllocationLinkedList m_AllocationList; +}; + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS + +VmaDedicatedAllocationList::~VmaDedicatedAllocationList() +{ + VMA_HEAVY_ASSERT(Validate()); + + if (!m_AllocationList.IsEmpty()) + { + VMA_ASSERT_LEAK(false && "Unfreed dedicated allocations found!"); + } +} + +bool VmaDedicatedAllocationList::Validate() +{ + const size_t declaredCount = m_AllocationList.GetCount(); + size_t actualCount = 0; + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + for (VmaAllocation alloc = m_AllocationList.Front(); + alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) + { + ++actualCount; + } + VMA_VALIDATE(actualCount == declaredCount); + + return true; +} + +void VmaDedicatedAllocationList::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ + for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) + { + const VkDeviceSize size = item->GetSize(); + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; + VmaAddDetailedStatisticsAllocation(inoutStats, item->GetSize()); + } +} + +void VmaDedicatedAllocationList::AddStatistics(VmaStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + + const uint32_t allocCount = (uint32_t)m_AllocationList.GetCount(); + inoutStats.blockCount += allocCount; + inoutStats.allocationCount += allocCount; + + for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) + { + const VkDeviceSize size = item->GetSize(); + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size; + } +} + +#if VMA_STATS_STRING_ENABLED +void VmaDedicatedAllocationList::BuildStatsString(VmaJsonWriter& json) +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + json.BeginArray(); + for (VmaAllocation alloc = m_AllocationList.Front(); + alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) + { + json.BeginObject(true); + alloc->PrintParameters(json); + json.EndObject(); + } + json.EndArray(); +} +#endif // VMA_STATS_STRING_ENABLED + +bool VmaDedicatedAllocationList::IsEmpty() +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + return m_AllocationList.IsEmpty(); +} + +void VmaDedicatedAllocationList::Register(VmaAllocation alloc) +{ + VmaMutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.PushBack(alloc); +} + +void VmaDedicatedAllocationList::Unregister(VmaAllocation alloc) +{ + VmaMutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.Remove(alloc); +} +#endif // _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS +#endif // _VMA_DEDICATED_ALLOCATION_LIST + +#ifndef _VMA_SUBALLOCATION +/* +Represents a region of VmaDeviceMemoryBlock that is either assigned and returned as +allocated memory block or free. +*/ +struct VmaSuballocation +{ + VkDeviceSize offset; + VkDeviceSize size; + void* userData; + VmaSuballocationType type; +}; + +// Comparator for offsets. +struct VmaSuballocationOffsetLess +{ + bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const + { + return lhs.offset < rhs.offset; + } +}; + +struct VmaSuballocationOffsetGreater +{ + bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const + { + return lhs.offset > rhs.offset; + } +}; + +struct VmaSuballocationItemSizeLess +{ + bool operator()(const VmaSuballocationList::iterator lhs, + const VmaSuballocationList::iterator rhs) const + { + return lhs->size < rhs->size; + } + + bool operator()(const VmaSuballocationList::iterator lhs, + VkDeviceSize rhsSize) const + { + return lhs->size < rhsSize; + } +}; +#endif // _VMA_SUBALLOCATION + +#ifndef _VMA_ALLOCATION_REQUEST +/* +Parameters of planned allocation inside a VmaDeviceMemoryBlock. +item points to a FREE suballocation. +*/ +struct VmaAllocationRequest +{ + VmaAllocHandle allocHandle; + VkDeviceSize size; + VmaSuballocationList::iterator item; + void* customData; + uint64_t algorithmData; + VmaAllocationRequestType type; +}; +#endif // _VMA_ALLOCATION_REQUEST + +#ifndef _VMA_BLOCK_METADATA +/* +Data structure used for bookkeeping of allocations and unused ranges of memory +in a single VkDeviceMemory block. +*/ +class VmaBlockMetadata +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata) +public: + // pAllocationCallbacks, if not null, must be owned externally - alive and unchanged for the whole lifetime of this object. + VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata() = default; + + virtual void Init(VkDeviceSize size) { m_Size = size; } + bool IsVirtual() const { return m_IsVirtual; } + VkDeviceSize GetSize() const { return m_Size; } + + // Validates all data structures inside this object. If not valid, returns false. + virtual bool Validate() const = 0; + virtual size_t GetAllocationCount() const = 0; + virtual size_t GetFreeRegionsCount() const = 0; + virtual VkDeviceSize GetSumFreeSize() const = 0; + // Returns true if this block is empty - contains only single free suballocation. + virtual bool IsEmpty() const = 0; + virtual void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) = 0; + virtual VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const = 0; + virtual void* GetAllocationUserData(VmaAllocHandle allocHandle) const = 0; + + virtual VmaAllocHandle GetAllocationListBegin() const = 0; + virtual VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const = 0; + virtual VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const = 0; + + // Shouldn't modify blockCount. + virtual void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const = 0; + virtual void AddStatistics(VmaStatistics& inoutStats) const = 0; + +#if VMA_STATS_STRING_ENABLED + virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0; +#endif + + // Tries to find a place for suballocation with given parameters inside this block. + // If succeeded, fills pAllocationRequest and returns true. + // If failed, returns false. + virtual bool CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + // Always one of VMA_ALLOCATION_CREATE_STRATEGY_* or VMA_ALLOCATION_INTERNAL_STRATEGY_* flags. + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) = 0; + + virtual VkResult CheckCorruption(const void* pBlockData) = 0; + + // Makes actual allocation based on request. Request must already be checked and valid. + virtual void Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) = 0; + + // Frees suballocation assigned to given memory region. + virtual void Free(VmaAllocHandle allocHandle) = 0; + + // Frees all allocations. + // Careful! Don't call it if there are VmaAllocation objects owned by userData of cleared allocations! + virtual void Clear() = 0; + + virtual void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) = 0; + virtual void DebugLogAllAllocations() const = 0; + +protected: + const VkAllocationCallbacks* GetAllocationCallbacks() const { return m_pAllocationCallbacks; } + VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } + VkDeviceSize GetDebugMargin() const { return VkDeviceSize(IsVirtual() ? 0 : VMA_DEBUG_MARGIN); } + + void DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const; +#if VMA_STATS_STRING_ENABLED + // mapRefCount == UINT32_MAX means unspecified. + void PrintDetailedMap_Begin(class VmaJsonWriter& json, + VkDeviceSize unusedBytes, + size_t allocationCount, + size_t unusedRangeCount) const; + void PrintDetailedMap_Allocation(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size, void* userData) const; + void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, + VkDeviceSize offset, + VkDeviceSize size) const; + void PrintDetailedMap_End(class VmaJsonWriter& json) const; +#endif + +private: + VkDeviceSize m_Size; + const VkAllocationCallbacks* m_pAllocationCallbacks; + const VkDeviceSize m_BufferImageGranularity; + const bool m_IsVirtual; +}; + +#ifndef _VMA_BLOCK_METADATA_FUNCTIONS +VmaBlockMetadata::VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : m_Size(0), + m_pAllocationCallbacks(pAllocationCallbacks), + m_BufferImageGranularity(bufferImageGranularity), + m_IsVirtual(isVirtual) {} + +void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const +{ + if (IsVirtual()) + { + VMA_LEAK_LOG_FORMAT("UNFREED VIRTUAL ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p", offset, size, userData); + } + else + { + VMA_ASSERT(userData != VMA_NULL); + VmaAllocation allocation = reinterpret_cast(userData); + + userData = allocation->GetUserData(); + const char* name = allocation->GetName(); + +#if VMA_STATS_STRING_ENABLED + VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %s; Usage: %" PRIu64, + offset, size, userData, name ? name : "vma_empty", + VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], + (uint64_t)allocation->GetBufferImageUsage().Value); +#else + VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %u", + offset, size, userData, name ? name : "vma_empty", + (unsigned)allocation->GetSuballocationType()); +#endif // VMA_STATS_STRING_ENABLED + } + +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, + VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const +{ + json.WriteString("TotalBytes"); + json.WriteNumber(GetSize()); + + json.WriteString("UnusedBytes"); + json.WriteNumber(unusedBytes); + + json.WriteString("Allocations"); + json.WriteNumber((uint64_t)allocationCount); + + json.WriteString("UnusedRanges"); + json.WriteNumber((uint64_t)unusedRangeCount); + + json.WriteString("Suballocations"); + json.BeginArray(); +} + +void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size, void* userData) const +{ + json.BeginObject(true); + + json.WriteString("Offset"); + json.WriteNumber(offset); + + if (IsVirtual()) + { + json.WriteString("Size"); + json.WriteNumber(size); + if (userData) + { + json.WriteString("CustomData"); + json.BeginString(); + json.ContinueString_Pointer(userData); + json.EndString(); + } + } + else + { + ((VmaAllocation)userData)->PrintParameters(json); + } + + json.EndObject(); +} + +void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size) const +{ + json.BeginObject(true); + + json.WriteString("Offset"); + json.WriteNumber(offset); + + json.WriteString("Type"); + json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[VMA_SUBALLOCATION_TYPE_FREE]); + + json.WriteString("Size"); + json.WriteNumber(size); + + json.EndObject(); +} + +void VmaBlockMetadata::PrintDetailedMap_End(class VmaJsonWriter& json) const +{ + json.EndArray(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_BLOCK_METADATA_FUNCTIONS +#endif // _VMA_BLOCK_METADATA + +#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY +// Before deleting object of this class remember to call 'Destroy()' +class VmaBlockBufferImageGranularity final +{ +public: + struct ValidationContext + { + const VkAllocationCallbacks* allocCallbacks; + uint16_t* pageAllocs; + }; + + VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity); + ~VmaBlockBufferImageGranularity(); + + bool IsEnabled() const { return m_BufferImageGranularity > MAX_LOW_BUFFER_IMAGE_GRANULARITY; } + + void Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size); + // Before destroying object you must call free it's memory + void Destroy(const VkAllocationCallbacks* pAllocationCallbacks); + + void RoundupAllocRequest(VmaSuballocationType allocType, + VkDeviceSize& inOutAllocSize, + VkDeviceSize& inOutAllocAlignment) const; + + bool CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset, + VkDeviceSize allocSize, + VkDeviceSize blockOffset, + VkDeviceSize blockSize, + VmaSuballocationType allocType) const; + + void AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size); + void FreePages(VkDeviceSize offset, VkDeviceSize size); + void Clear(); + + ValidationContext StartValidation(const VkAllocationCallbacks* pAllocationCallbacks, + bool isVirutal) const; + bool Validate(ValidationContext& ctx, VkDeviceSize offset, VkDeviceSize size) const; + bool FinishValidation(ValidationContext& ctx) const; + +private: + static const uint16_t MAX_LOW_BUFFER_IMAGE_GRANULARITY = 256; + + struct RegionInfo + { + uint8_t allocType; + uint16_t allocCount; + }; + + VkDeviceSize m_BufferImageGranularity; + uint32_t m_RegionCount; + RegionInfo* m_RegionInfo; + + uint32_t GetStartPage(VkDeviceSize offset) const { return OffsetToPageIndex(offset & ~(m_BufferImageGranularity - 1)); } + uint32_t GetEndPage(VkDeviceSize offset, VkDeviceSize size) const { return OffsetToPageIndex((offset + size - 1) & ~(m_BufferImageGranularity - 1)); } + + uint32_t OffsetToPageIndex(VkDeviceSize offset) const; + void AllocPage(RegionInfo& page, uint8_t allocType); +}; + +#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS +VmaBlockBufferImageGranularity::VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity) + : m_BufferImageGranularity(bufferImageGranularity), + m_RegionCount(0), + m_RegionInfo(VMA_NULL) {} + +VmaBlockBufferImageGranularity::~VmaBlockBufferImageGranularity() +{ + VMA_ASSERT(m_RegionInfo == VMA_NULL && "Free not called before destroying object!"); +} + +void VmaBlockBufferImageGranularity::Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size) +{ + if (IsEnabled()) + { + m_RegionCount = static_cast(VmaDivideRoundingUp(size, m_BufferImageGranularity)); + m_RegionInfo = vma_new_array(pAllocationCallbacks, RegionInfo, m_RegionCount); + memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo)); + } +} + +void VmaBlockBufferImageGranularity::Destroy(const VkAllocationCallbacks* pAllocationCallbacks) +{ + if (m_RegionInfo) + { + vma_delete_array(pAllocationCallbacks, m_RegionInfo, m_RegionCount); + m_RegionInfo = VMA_NULL; + } +} + +void VmaBlockBufferImageGranularity::RoundupAllocRequest(VmaSuballocationType allocType, + VkDeviceSize& inOutAllocSize, + VkDeviceSize& inOutAllocAlignment) const +{ + if (m_BufferImageGranularity > 1 && + m_BufferImageGranularity <= MAX_LOW_BUFFER_IMAGE_GRANULARITY) + { + if (allocType == VMA_SUBALLOCATION_TYPE_UNKNOWN || + allocType == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || + allocType == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL) + { + inOutAllocAlignment = VMA_MAX(inOutAllocAlignment, m_BufferImageGranularity); + inOutAllocSize = VmaAlignUp(inOutAllocSize, m_BufferImageGranularity); + } + } +} + +bool VmaBlockBufferImageGranularity::CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset, + VkDeviceSize allocSize, + VkDeviceSize blockOffset, + VkDeviceSize blockSize, + VmaSuballocationType allocType) const +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(inOutAllocOffset); + if (m_RegionInfo[startPage].allocCount > 0 && + VmaIsBufferImageGranularityConflict(static_cast(m_RegionInfo[startPage].allocType), allocType)) + { + inOutAllocOffset = VmaAlignUp(inOutAllocOffset, m_BufferImageGranularity); + if (blockSize < allocSize + inOutAllocOffset - blockOffset) + return true; + ++startPage; + } + uint32_t endPage = GetEndPage(inOutAllocOffset, allocSize); + if (endPage != startPage && + m_RegionInfo[endPage].allocCount > 0 && + VmaIsBufferImageGranularityConflict(static_cast(m_RegionInfo[endPage].allocType), allocType)) + { + return true; + } + } + return false; +} + +void VmaBlockBufferImageGranularity::AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size) +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(offset); + AllocPage(m_RegionInfo[startPage], allocType); + + uint32_t endPage = GetEndPage(offset, size); + if (startPage != endPage) + AllocPage(m_RegionInfo[endPage], allocType); + } +} + +void VmaBlockBufferImageGranularity::FreePages(VkDeviceSize offset, VkDeviceSize size) +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(offset); + --m_RegionInfo[startPage].allocCount; + if (m_RegionInfo[startPage].allocCount == 0) + m_RegionInfo[startPage].allocType = VMA_SUBALLOCATION_TYPE_FREE; + uint32_t endPage = GetEndPage(offset, size); + if (startPage != endPage) + { + --m_RegionInfo[endPage].allocCount; + if (m_RegionInfo[endPage].allocCount == 0) + m_RegionInfo[endPage].allocType = VMA_SUBALLOCATION_TYPE_FREE; + } + } +} + +void VmaBlockBufferImageGranularity::Clear() +{ + if (m_RegionInfo) + memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo)); +} + +VmaBlockBufferImageGranularity::ValidationContext VmaBlockBufferImageGranularity::StartValidation( + const VkAllocationCallbacks* pAllocationCallbacks, bool isVirutal) const +{ + ValidationContext ctx{ pAllocationCallbacks, VMA_NULL }; + if (!isVirutal && IsEnabled()) + { + ctx.pageAllocs = vma_new_array(pAllocationCallbacks, uint16_t, m_RegionCount); + memset(ctx.pageAllocs, 0, m_RegionCount * sizeof(uint16_t)); + } + return ctx; +} + +bool VmaBlockBufferImageGranularity::Validate(ValidationContext& ctx, + VkDeviceSize offset, VkDeviceSize size) const +{ + if (IsEnabled()) + { + uint32_t start = GetStartPage(offset); + ++ctx.pageAllocs[start]; + VMA_VALIDATE(m_RegionInfo[start].allocCount > 0); + + uint32_t end = GetEndPage(offset, size); + if (start != end) + { + ++ctx.pageAllocs[end]; + VMA_VALIDATE(m_RegionInfo[end].allocCount > 0); + } + } + return true; +} + +bool VmaBlockBufferImageGranularity::FinishValidation(ValidationContext& ctx) const +{ + // Check proper page structure + if (IsEnabled()) + { + VMA_ASSERT(ctx.pageAllocs != VMA_NULL && "Validation context not initialized!"); + + for (uint32_t page = 0; page < m_RegionCount; ++page) + { + VMA_VALIDATE(ctx.pageAllocs[page] == m_RegionInfo[page].allocCount); + } + vma_delete_array(ctx.allocCallbacks, ctx.pageAllocs, m_RegionCount); + ctx.pageAllocs = VMA_NULL; + } + return true; +} + +uint32_t VmaBlockBufferImageGranularity::OffsetToPageIndex(VkDeviceSize offset) const +{ + return static_cast(offset >> VMA_BITSCAN_MSB(m_BufferImageGranularity)); +} + +void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocType) +{ + // When current alloc type is free then it can be overridden by new type + if (page.allocCount == 0 || (page.allocCount > 0 && page.allocType == VMA_SUBALLOCATION_TYPE_FREE)) + page.allocType = allocType; + + ++page.allocCount; +} +#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS +#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY + +#ifndef _VMA_BLOCK_METADATA_LINEAR +/* +Allocations and their references in internal data structure look like this: + +if(m_2ndVectorMode == SECOND_VECTOR_EMPTY): + + 0 +-------+ + | | + | | + | | + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount] + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount + 1] + +-------+ + | ... | + +-------+ + | Alloc | 1st[1st.size() - 1] + +-------+ + | | + | | + | | +GetSize() +-------+ + +if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER): + + 0 +-------+ + | Alloc | 2nd[0] + +-------+ + | Alloc | 2nd[1] + +-------+ + | ... | + +-------+ + | Alloc | 2nd[2nd.size() - 1] + +-------+ + | | + | | + | | + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount] + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount + 1] + +-------+ + | ... | + +-------+ + | Alloc | 1st[1st.size() - 1] + +-------+ + | | +GetSize() +-------+ + +if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK): + + 0 +-------+ + | | + | | + | | + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount] + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount + 1] + +-------+ + | ... | + +-------+ + | Alloc | 1st[1st.size() - 1] + +-------+ + | | + | | + | | + +-------+ + | Alloc | 2nd[2nd.size() - 1] + +-------+ + | ... | + +-------+ + | Alloc | 2nd[1] + +-------+ + | Alloc | 2nd[0] +GetSize() +-------+ + +*/ +class VmaBlockMetadata_Linear : public VmaBlockMetadata +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_Linear) +public: + VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata_Linear() = default; + + VkDeviceSize GetSumFreeSize() const override { return m_SumFreeSize; } + bool IsEmpty() const override { return GetAllocationCount() == 0; } + VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; } + + void Init(VkDeviceSize size) override; + bool Validate() const override; + size_t GetAllocationCount() const override; + size_t GetFreeRegionsCount() const override; + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json) const override; +#endif + + bool CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) override; + + VkResult CheckCorruption(const void* pBlockData) override; + + void Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) override; + + void Free(VmaAllocHandle allocHandle) override; + void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; + void Clear() override; + void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; + void DebugLogAllAllocations() const override; + +private: + /* + There are two suballocation vectors, used in ping-pong way. + The one with index m_1stVectorIndex is called 1st. + The one with index (m_1stVectorIndex ^ 1) is called 2nd. + 2nd can be non-empty only when 1st is not empty. + When 2nd is not empty, m_2ndVectorMode indicates its mode of operation. + */ + typedef VmaVector> SuballocationVectorType; + + enum SECOND_VECTOR_MODE + { + SECOND_VECTOR_EMPTY, + /* + Suballocations in 2nd vector are created later than the ones in 1st, but they + all have smaller offset. + */ + SECOND_VECTOR_RING_BUFFER, + /* + Suballocations in 2nd vector are upper side of double stack. + They all have offsets higher than those in 1st vector. + Top of this stack means smaller offsets, but higher indices in this vector. + */ + SECOND_VECTOR_DOUBLE_STACK, + }; + + VkDeviceSize m_SumFreeSize; + SuballocationVectorType m_Suballocations0, m_Suballocations1; + uint32_t m_1stVectorIndex; + SECOND_VECTOR_MODE m_2ndVectorMode; + // Number of items in 1st vector with hAllocation = null at the beginning. + size_t m_1stNullItemsBeginCount; + // Number of other items in 1st vector with hAllocation = null somewhere in the middle. + size_t m_1stNullItemsMiddleCount; + // Number of items in 2nd vector with hAllocation = null. + size_t m_2ndNullItemsCount; + + SuballocationVectorType& AccessSuballocations1st() { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + + VmaSuballocation& FindSuballocation(VkDeviceSize offset) const; + bool ShouldCompact1st() const; + void CleanupAfterFree(); + + bool CreateAllocationRequest_LowerAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest); + bool CreateAllocationRequest_UpperAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest); +}; + +#ifndef _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS +VmaBlockMetadata_Linear::VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), + m_SumFreeSize(0), + m_Suballocations0(VmaStlAllocator(pAllocationCallbacks)), + m_Suballocations1(VmaStlAllocator(pAllocationCallbacks)), + m_1stVectorIndex(0), + m_2ndVectorMode(SECOND_VECTOR_EMPTY), + m_1stNullItemsBeginCount(0), + m_1stNullItemsMiddleCount(0), + m_2ndNullItemsCount(0) {} + +void VmaBlockMetadata_Linear::Init(VkDeviceSize size) +{ + VmaBlockMetadata::Init(size); + m_SumFreeSize = size; +} + +bool VmaBlockMetadata_Linear::Validate() const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + VMA_VALIDATE(suballocations2nd.empty() == (m_2ndVectorMode == SECOND_VECTOR_EMPTY)); + VMA_VALIDATE(!suballocations1st.empty() || + suballocations2nd.empty() || + m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER); + + if (!suballocations1st.empty()) + { + // Null item at the beginning should be accounted into m_1stNullItemsBeginCount. + VMA_VALIDATE(suballocations1st[m_1stNullItemsBeginCount].type != VMA_SUBALLOCATION_TYPE_FREE); + // Null item at the end should be just pop_back(). + VMA_VALIDATE(suballocations1st.back().type != VMA_SUBALLOCATION_TYPE_FREE); + } + if (!suballocations2nd.empty()) + { + // Null item at the end should be just pop_back(). + VMA_VALIDATE(suballocations2nd.back().type != VMA_SUBALLOCATION_TYPE_FREE); + } + + VMA_VALIDATE(m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount <= suballocations1st.size()); + VMA_VALIDATE(m_2ndNullItemsCount <= suballocations2nd.size()); + + VkDeviceSize sumUsedSize = 0; + const size_t suballoc1stCount = suballocations1st.size(); + const VkDeviceSize debugMargin = GetDebugMargin(); + VkDeviceSize offset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = 0; i < suballoc2ndCount; ++i) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + + VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + for (size_t i = 0; i < m_1stNullItemsBeginCount; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + VMA_VALIDATE(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE && + suballoc.userData == VMA_NULL); + } + + size_t nullItem1stCount = m_1stNullItemsBeginCount; + + for (size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + VMA_VALIDATE(i >= m_1stNullItemsBeginCount || currFree); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem1stCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + VMA_VALIDATE(nullItem1stCount == m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount); + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = suballoc2ndCount; i--; ) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + + VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + VMA_VALIDATE(offset <= GetSize()); + VMA_VALIDATE(m_SumFreeSize == GetSize() - sumUsedSize); + + return true; +} + +size_t VmaBlockMetadata_Linear::GetAllocationCount() const +{ + return AccessSuballocations1st().size() - m_1stNullItemsBeginCount - m_1stNullItemsMiddleCount + + AccessSuballocations2nd().size() - m_2ndNullItemsCount; +} + +size_t VmaBlockMetadata_Linear::GetFreeRegionsCount() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return SIZE_MAX; +} + +void VmaBlockMetadata_Linear::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const +{ + const VkDeviceSize size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; + + VkDeviceSize lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + if (lastOffset < freeSpace2ndTo1stEnd) + { + const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + if (lastOffset < freeSpace1stTo2ndEnd) + { + const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to size. + if (lastOffset < size) + { + const VkDeviceSize unusedRangeSize = size - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } +} + +void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const VkDeviceSize size = GetSize(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + inoutStats.blockCount++; + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size - m_SumFreeSize; + + VkDeviceSize lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = m_1stNullItemsBeginCount; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = size; + } + } + } +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const +{ + const VkDeviceSize size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + // FIRST PASS + + size_t unusedRangeCount = 0; + VkDeviceSize usedBytes = 0; + + VkDeviceSize lastOffset = 0; + + size_t alloc2ndCount = 0; + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + size_t alloc1stCount = 0; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc1stCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace1stTo2ndEnd) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = size; + } + } + } + + const VkDeviceSize unusedBytes = size - usedBytes; + PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); + + // SECOND PASS + lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + nextAlloc1stIndex = m_1stNullItemsBeginCount; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace1stTo2ndEnd) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + const VkDeviceSize unusedRangeSize = size - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } + + PrintDetailedMap_End(json); +} +#endif // VMA_STATS_STRING_ENABLED + +bool VmaBlockMetadata_Linear::CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(allocSize > 0); + VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); + VMA_ASSERT(pAllocationRequest != VMA_NULL); + VMA_HEAVY_ASSERT(Validate()); + + if(allocSize > GetSize()) + return false; + + pAllocationRequest->size = allocSize; + return upperAddress ? + CreateAllocationRequest_UpperAddress( + allocSize, allocAlignment, allocType, strategy, pAllocationRequest) : + CreateAllocationRequest_LowerAddress( + allocSize, allocAlignment, allocType, strategy, pAllocationRequest); +} + +VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData) +{ + VMA_ASSERT(!IsVirtual()); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + for (size_t i = m_1stNullItemsBeginCount, count = suballocations1st.size(); i < count; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) + { + if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + for (size_t i = 0, count = suballocations2nd.size(); i < count; ++i) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) + { + if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + return VK_SUCCESS; +} + +void VmaBlockMetadata_Linear::Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) +{ + const VkDeviceSize offset = (VkDeviceSize)request.allocHandle - 1; + const VmaSuballocation newSuballoc = { offset, request.size, userData, type }; + + switch (request.type) + { + case VmaAllocationRequestType::UpperAddress: + { + VMA_ASSERT(m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER && + "CRITICAL ERROR: Trying to use linear allocator as double stack while it was already used as ring buffer."); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + suballocations2nd.push_back(newSuballoc); + m_2ndVectorMode = SECOND_VECTOR_DOUBLE_STACK; + } + break; + case VmaAllocationRequestType::EndOf1st: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + + VMA_ASSERT(suballocations1st.empty() || + offset >= suballocations1st.back().offset + suballocations1st.back().size); + // Check if it fits before the end of the block. + VMA_ASSERT(offset + request.size <= GetSize()); + + suballocations1st.push_back(newSuballoc); + } + break; + case VmaAllocationRequestType::EndOf2nd: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + // New allocation at the end of 2-part ring buffer, so before first allocation from 1st vector. + VMA_ASSERT(!suballocations1st.empty() && + offset + request.size <= suballocations1st[m_1stNullItemsBeginCount].offset); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + switch (m_2ndVectorMode) + { + case SECOND_VECTOR_EMPTY: + // First allocation from second part ring buffer. + VMA_ASSERT(suballocations2nd.empty()); + m_2ndVectorMode = SECOND_VECTOR_RING_BUFFER; + break; + case SECOND_VECTOR_RING_BUFFER: + // 2-part ring buffer is already started. + VMA_ASSERT(!suballocations2nd.empty()); + break; + case SECOND_VECTOR_DOUBLE_STACK: + VMA_ASSERT(0 && "CRITICAL ERROR: Trying to use linear allocator as ring buffer while it was already used as double stack."); + break; + default: + VMA_ASSERT(0); + } + + suballocations2nd.push_back(newSuballoc); + } + break; + default: + VMA_ASSERT(0 && "CRITICAL INTERNAL ERROR."); + } + + m_SumFreeSize -= newSuballoc.size; +} + +void VmaBlockMetadata_Linear::Free(VmaAllocHandle allocHandle) +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + VkDeviceSize offset = (VkDeviceSize)allocHandle - 1; + + if (!suballocations1st.empty()) + { + // First allocation: Mark it as next empty at the beginning. + VmaSuballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount]; + if (firstSuballoc.offset == offset) + { + firstSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; + firstSuballoc.userData = VMA_NULL; + m_SumFreeSize += firstSuballoc.size; + ++m_1stNullItemsBeginCount; + CleanupAfterFree(); + return; + } + } + + // Last allocation in 2-part ring buffer or top of upper stack (same logic). + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER || + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + VmaSuballocation& lastSuballoc = suballocations2nd.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations2nd.pop_back(); + CleanupAfterFree(); + return; + } + } + // Last allocation in 1st vector. + else if (m_2ndVectorMode == SECOND_VECTOR_EMPTY) + { + VmaSuballocation& lastSuballoc = suballocations1st.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations1st.pop_back(); + CleanupAfterFree(); + return; + } + } + + VmaSuballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the middle of 1st vector. + { + const SuballocationVectorType::iterator it = VmaBinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + VmaSuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + it->type = VMA_SUBALLOCATION_TYPE_FREE; + it->userData = VMA_NULL; + ++m_1stNullItemsMiddleCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Item from the middle of 2nd vector. + const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + it->type = VMA_SUBALLOCATION_TYPE_FREE; + it->userData = VMA_NULL; + ++m_2ndNullItemsCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + VMA_ASSERT(0 && "Allocation to free not found in linear allocator!"); +} + +void VmaBlockMetadata_Linear::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) +{ + outInfo.offset = (VkDeviceSize)allocHandle - 1; + VmaSuballocation& suballoc = FindSuballocation(outInfo.offset); + outInfo.size = suballoc.size; + outInfo.pUserData = suballoc.userData; +} + +void* VmaBlockMetadata_Linear::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + return FindSuballocation((VkDeviceSize)allocHandle - 1).userData; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetAllocationListBegin() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_Linear::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return 0; +} + +void VmaBlockMetadata_Linear::Clear() +{ + m_SumFreeSize = GetSize(); + m_Suballocations0.clear(); + m_Suballocations1.clear(); + // Leaving m_1stVectorIndex unchanged - it doesn't matter. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; +} + +void VmaBlockMetadata_Linear::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) +{ + VmaSuballocation& suballoc = FindSuballocation((VkDeviceSize)allocHandle - 1); + suballoc.userData = userData; +} + +void VmaBlockMetadata_Linear::DebugLogAllAllocations() const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + for (auto it = suballocations1st.begin() + m_1stNullItemsBeginCount; it != suballocations1st.end(); ++it) + if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + DebugLogAllocation(it->offset, it->size, it->userData); + + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + for (auto it = suballocations2nd.begin(); it != suballocations2nd.end(); ++it) + if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + DebugLogAllocation(it->offset, it->size, it->userData); +} + +VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + VmaSuballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the 1st vector. + { + SuballocationVectorType::const_iterator it = VmaBinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + VmaSuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + return const_cast(*it); + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Rest of members stays uninitialized intentionally for better performance. + SuballocationVectorType::const_iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + return const_cast(*it); + } + } + + VMA_ASSERT(0 && "Allocation not found in linear allocator!"); + return const_cast(suballocations1st.back()); // Should never occur. +} + +bool VmaBlockMetadata_Linear::ShouldCompact1st() const +{ + const size_t nullItemCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + const size_t suballocCount = AccessSuballocations1st().size(); + return suballocCount > 32 && nullItemCount * 2 >= (suballocCount - nullItemCount) * 3; +} + +void VmaBlockMetadata_Linear::CleanupAfterFree() +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (IsEmpty()) + { + suballocations1st.clear(); + suballocations2nd.clear(); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + else + { + const size_t suballoc1stCount = suballocations1st.size(); + const size_t nullItem1stCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + VMA_ASSERT(nullItem1stCount <= suballoc1stCount); + + // Find more null items at the beginning of 1st vector. + while (m_1stNullItemsBeginCount < suballoc1stCount && + suballocations1st[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + + // Find more null items at the end of 1st vector. + while (m_1stNullItemsMiddleCount > 0 && + suballocations1st.back().type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_1stNullItemsMiddleCount; + suballocations1st.pop_back(); + } + + // Find more null items at the end of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd.back().type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + suballocations2nd.pop_back(); + } + + // Find more null items at the beginning of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd[0].type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + VmaVectorRemove(suballocations2nd, 0); + } + + if (ShouldCompact1st()) + { + const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount; + size_t srcIndex = m_1stNullItemsBeginCount; + for (size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex) + { + while (suballocations1st[srcIndex].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++srcIndex; + } + if (dstIndex != srcIndex) + { + suballocations1st[dstIndex] = suballocations1st[srcIndex]; + } + ++srcIndex; + } + suballocations1st.resize(nonNullItemCount); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + } + + // 2nd vector became empty. + if (suballocations2nd.empty()) + { + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + + // 1st vector became empty. + if (suballocations1st.size() - m_1stNullItemsBeginCount == 0) + { + suballocations1st.clear(); + m_1stNullItemsBeginCount = 0; + + if (!suballocations2nd.empty() && m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + // Swap 1st with 2nd. Now 2nd is empty. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsMiddleCount = m_2ndNullItemsCount; + while (m_1stNullItemsBeginCount < suballocations2nd.size() && + suballocations2nd[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + m_2ndNullItemsCount = 0; + m_1stVectorIndex ^= 1; + } + } + } + + VMA_HEAVY_ASSERT(Validate()); +} + +bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + const VkDeviceSize blockSize = GetSize(); + const VkDeviceSize debugMargin = GetDebugMargin(); + const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + // Try to allocate at the end of 1st vector. + + VkDeviceSize resultBaseOffset = 0; + if (!suballocations1st.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations1st.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin; + } + + // Start from offset equal to beginning of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + // Apply alignment. + resultOffset = VmaAlignUp(resultOffset, allocAlignment); + + // Check previous suballocations for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations1st.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); + } + } + + const VkDeviceSize freeSpaceEnd = m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? + suballocations2nd.back().offset : blockSize; + + // There is enough free space at the end after alignment. + if (resultOffset + allocSize + debugMargin <= freeSpaceEnd) + { + // Check next suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if ((allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) && m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) + { + const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) + { + return false; + } + } + else + { + // Already on previous page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + // pAllocationRequest->item, customData unused. + pAllocationRequest->type = VmaAllocationRequestType::EndOf1st; + return true; + } + } + + // Wrap-around to end of 2nd vector. Try to allocate there, watching for the + // beginning of 1st vector as the end of free space. + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + VMA_ASSERT(!suballocations1st.empty()); + + VkDeviceSize resultBaseOffset = 0; + if (!suballocations2nd.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin; + } + + // Start from offset equal to beginning of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + // Apply alignment. + resultOffset = VmaAlignUp(resultOffset, allocAlignment); + + // Check previous suballocations for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t prevSuballocIndex = suballocations2nd.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations2nd[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); + } + } + + size_t index1st = m_1stNullItemsBeginCount; + + // There is enough free space at the end after alignment. + if ((index1st == suballocations1st.size() && resultOffset + allocSize + debugMargin <= blockSize) || + (index1st < suballocations1st.size() && resultOffset + allocSize + debugMargin <= suballocations1st[index1st].offset)) + { + // Check next suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if (allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) + { + for (size_t nextSuballocIndex = index1st; + nextSuballocIndex < suballocations1st.size(); + nextSuballocIndex++) + { + const VmaSuballocation& nextSuballoc = suballocations1st[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) + { + return false; + } + } + else + { + // Already on next page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + pAllocationRequest->type = VmaAllocationRequestType::EndOf2nd; + // pAllocationRequest->item, customData unused. + return true; + } + } + + return false; +} + +bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + const VkDeviceSize blockSize = GetSize(); + const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + VMA_ASSERT(0 && "Trying to use pool with linear algorithm as double stack, while it is already being used as ring buffer."); + return false; + } + + // Try to allocate before 2nd.back(), or end of block if 2nd.empty(). + if (allocSize > blockSize) + { + return false; + } + VkDeviceSize resultBaseOffset = blockSize - allocSize; + if (!suballocations2nd.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset - allocSize; + if (allocSize > lastSuballoc.offset) + { + return false; + } + } + + // Start from offset equal to end of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + const VkDeviceSize debugMargin = GetDebugMargin(); + + // Apply debugMargin at the end. + if (debugMargin > 0) + { + if (resultOffset < debugMargin) + { + return false; + } + resultOffset -= debugMargin; + } + + // Apply alignment. + resultOffset = VmaAlignDown(resultOffset, allocAlignment); + + // Check next suballocations from 2nd for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) + { + const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignDown(resultOffset, bufferImageGranularity); + } + } + + // There is enough free space. + const VkDeviceSize endOf1st = !suballocations1st.empty() ? + suballocations1st.back().offset + suballocations1st.back().size : + 0; + if (endOf1st + debugMargin <= resultOffset) + { + // Check previous suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if (bufferImageGranularity > 1) + { + for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type)) + { + return false; + } + } + else + { + // Already on next page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + // pAllocationRequest->item unused. + pAllocationRequest->type = VmaAllocationRequestType::UpperAddress; + return true; + } + + return false; +} +#endif // _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS +#endif // _VMA_BLOCK_METADATA_LINEAR + +#ifndef _VMA_BLOCK_METADATA_TLSF +// To not search current larger region if first allocation won't succeed and skip to smaller range +// use with VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT as strategy in CreateAllocationRequest(). +// When fragmentation and reusal of previous blocks doesn't matter then use with +// VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT for fastest alloc time possible. +class VmaBlockMetadata_TLSF : public VmaBlockMetadata +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_TLSF) +public: + VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata_TLSF(); + + size_t GetAllocationCount() const override { return m_AllocCount; } + size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; } + VkDeviceSize GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; } + bool IsEmpty() const override { return m_NullBlock->offset == 0; } + VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; } + + void Init(VkDeviceSize size) override; + bool Validate() const override; + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json) const override; +#endif + + bool CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) override; + + VkResult CheckCorruption(const void* pBlockData) override; + void Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) override; + + void Free(VmaAllocHandle allocHandle) override; + void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; + void Clear() override; + void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; + void DebugLogAllAllocations() const override; + +private: + // According to original paper it should be preferable 4 or 5: + // M. Masmano, I. Ripoll, A. Crespo, and J. Real "TLSF: a New Dynamic Memory Allocator for Real-Time Systems" + // http://www.gii.upv.es/tlsf/files/ecrts04_tlsf.pdf + static const uint8_t SECOND_LEVEL_INDEX = 5; + static const uint16_t SMALL_BUFFER_SIZE = 256; + static const uint32_t INITIAL_BLOCK_ALLOC_COUNT = 16; + static const uint8_t MEMORY_CLASS_SHIFT = 7; + static const uint8_t MAX_MEMORY_CLASSES = 65 - MEMORY_CLASS_SHIFT; + + class Block + { + public: + VkDeviceSize offset; + VkDeviceSize size; + Block* prevPhysical; + Block* nextPhysical; + + void MarkFree() { prevFree = VMA_NULL; } + void MarkTaken() { prevFree = this; } + bool IsFree() const { return prevFree != this; } + void*& UserData() { VMA_HEAVY_ASSERT(!IsFree()); return userData; } + Block*& PrevFree() { return prevFree; } + Block*& NextFree() { VMA_HEAVY_ASSERT(IsFree()); return nextFree; } + + private: + Block* prevFree; // Address of the same block here indicates that block is taken + union + { + Block* nextFree; + void* userData; + }; + }; + + size_t m_AllocCount; + // Total number of free blocks besides null block + size_t m_BlocksFreeCount; + // Total size of free blocks excluding null block + VkDeviceSize m_BlocksFreeSize; + uint32_t m_IsFreeBitmap; + uint8_t m_MemoryClasses; + uint32_t m_InnerIsFreeBitmap[MAX_MEMORY_CLASSES]; + uint32_t m_ListsCount; + /* + * 0: 0-3 lists for small buffers + * 1+: 0-(2^SLI-1) lists for normal buffers + */ + Block** m_FreeList; + VmaPoolAllocator m_BlockAllocator; + Block* m_NullBlock; + VmaBlockBufferImageGranularity m_GranularityHandler; + + uint8_t SizeToMemoryClass(VkDeviceSize size) const; + uint16_t SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const; + uint32_t GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const; + uint32_t GetListIndex(VkDeviceSize size) const; + + void RemoveFreeBlock(Block* block); + void InsertFreeBlock(Block* block); + void MergeBlock(Block* block, Block* prev); + + Block* FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const; + bool CheckBlock( + Block& block, + uint32_t listIndex, + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + VmaAllocationRequest* pAllocationRequest); +}; + +#ifndef _VMA_BLOCK_METADATA_TLSF_FUNCTIONS +VmaBlockMetadata_TLSF::VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), + m_AllocCount(0), + m_BlocksFreeCount(0), + m_BlocksFreeSize(0), + m_IsFreeBitmap(0), + m_MemoryClasses(0), + m_ListsCount(0), + m_FreeList(VMA_NULL), + m_BlockAllocator(pAllocationCallbacks, INITIAL_BLOCK_ALLOC_COUNT), + m_NullBlock(VMA_NULL), + m_GranularityHandler(bufferImageGranularity) {} + +VmaBlockMetadata_TLSF::~VmaBlockMetadata_TLSF() +{ + if (m_FreeList) + vma_delete_array(GetAllocationCallbacks(), m_FreeList, m_ListsCount); + m_GranularityHandler.Destroy(GetAllocationCallbacks()); +} + +void VmaBlockMetadata_TLSF::Init(VkDeviceSize size) +{ + VmaBlockMetadata::Init(size); + + if (!IsVirtual()) + m_GranularityHandler.Init(GetAllocationCallbacks(), size); + + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = size; + m_NullBlock->offset = 0; + m_NullBlock->prevPhysical = VMA_NULL; + m_NullBlock->nextPhysical = VMA_NULL; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = VMA_NULL; + m_NullBlock->PrevFree() = VMA_NULL; + uint8_t memoryClass = SizeToMemoryClass(size); + uint16_t sli = SizeToSecondIndex(size, memoryClass); + m_ListsCount = (memoryClass == 0 ? 0 : (memoryClass - 1) * (1UL << SECOND_LEVEL_INDEX) + sli) + 1; + if (IsVirtual()) + m_ListsCount += 1UL << SECOND_LEVEL_INDEX; + else + m_ListsCount += 4; + + m_MemoryClasses = memoryClass + uint8_t(2); + memset(m_InnerIsFreeBitmap, 0, MAX_MEMORY_CLASSES * sizeof(uint32_t)); + + m_FreeList = vma_new_array(GetAllocationCallbacks(), Block*, m_ListsCount); + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); +} + +bool VmaBlockMetadata_TLSF::Validate() const +{ + VMA_VALIDATE(GetSumFreeSize() <= GetSize()); + + VkDeviceSize calculatedSize = m_NullBlock->size; + VkDeviceSize calculatedFreeSize = m_NullBlock->size; + size_t allocCount = 0; + size_t freeCount = 0; + + // Check integrity of free lists + for (uint32_t list = 0; list < m_ListsCount; ++list) + { + Block* block = m_FreeList[list]; + if (block != VMA_NULL) + { + VMA_VALIDATE(block->IsFree()); + VMA_VALIDATE(block->PrevFree() == VMA_NULL); + while (block->NextFree()) + { + VMA_VALIDATE(block->NextFree()->IsFree()); + VMA_VALIDATE(block->NextFree()->PrevFree() == block); + block = block->NextFree(); + } + } + } + + VkDeviceSize nextOffset = m_NullBlock->offset; + auto validateCtx = m_GranularityHandler.StartValidation(GetAllocationCallbacks(), IsVirtual()); + + VMA_VALIDATE(m_NullBlock->nextPhysical == VMA_NULL); + if (m_NullBlock->prevPhysical) + { + VMA_VALIDATE(m_NullBlock->prevPhysical->nextPhysical == m_NullBlock); + } + // Check all blocks + for (Block* prev = m_NullBlock->prevPhysical; prev != VMA_NULL; prev = prev->prevPhysical) + { + VMA_VALIDATE(prev->offset + prev->size == nextOffset); + nextOffset = prev->offset; + calculatedSize += prev->size; + + uint32_t listIndex = GetListIndex(prev->size); + if (prev->IsFree()) + { + ++freeCount; + // Check if free block belongs to free list + Block* freeBlock = m_FreeList[listIndex]; + VMA_VALIDATE(freeBlock != VMA_NULL); + + bool found = false; + do + { + if (freeBlock == prev) + found = true; + + freeBlock = freeBlock->NextFree(); + } while (!found && freeBlock != VMA_NULL); + + VMA_VALIDATE(found); + calculatedFreeSize += prev->size; + } + else + { + ++allocCount; + // Check if taken block is not on a free list + Block* freeBlock = m_FreeList[listIndex]; + while (freeBlock) + { + VMA_VALIDATE(freeBlock != prev); + freeBlock = freeBlock->NextFree(); + } + + if (!IsVirtual()) + { + VMA_VALIDATE(m_GranularityHandler.Validate(validateCtx, prev->offset, prev->size)); + } + } + + if (prev->prevPhysical) + { + VMA_VALIDATE(prev->prevPhysical->nextPhysical == prev); + } + } + + if (!IsVirtual()) + { + VMA_VALIDATE(m_GranularityHandler.FinishValidation(validateCtx)); + } + + VMA_VALIDATE(nextOffset == 0); + VMA_VALIDATE(calculatedSize == GetSize()); + VMA_VALIDATE(calculatedFreeSize == GetSumFreeSize()); + VMA_VALIDATE(allocCount == m_AllocCount); + VMA_VALIDATE(freeCount == m_BlocksFreeCount); + + return true; +} + +void VmaBlockMetadata_TLSF::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const +{ + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += GetSize(); + if (m_NullBlock->size > 0) + VmaAddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size); + + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (block->IsFree()) + VmaAddDetailedStatisticsUnusedRange(inoutStats, block->size); + else + VmaAddDetailedStatisticsAllocation(inoutStats, block->size); + } +} + +void VmaBlockMetadata_TLSF::AddStatistics(VmaStatistics& inoutStats) const +{ + inoutStats.blockCount++; + inoutStats.allocationCount += (uint32_t)m_AllocCount; + inoutStats.blockBytes += GetSize(); + inoutStats.allocationBytes += GetSize() - GetSumFreeSize(); +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const +{ + size_t blockCount = m_AllocCount + m_BlocksFreeCount; + VmaStlAllocator allocator(GetAllocationCallbacks()); + VmaVector> blockList(blockCount, allocator); + + size_t i = blockCount; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + blockList[--i] = block; + } + VMA_ASSERT(i == 0); + + VmaDetailedStatistics stats; + VmaClearDetailedStatistics(stats); + AddDetailedStatistics(stats); + + PrintDetailedMap_Begin(json, + stats.statistics.blockBytes - stats.statistics.allocationBytes, + stats.statistics.allocationCount, + stats.unusedRangeCount); + + for (; i < blockCount; ++i) + { + Block* block = blockList[i]; + if (block->IsFree()) + PrintDetailedMap_UnusedRange(json, block->offset, block->size); + else + PrintDetailedMap_Allocation(json, block->offset, block->size, block->UserData()); + } + if (m_NullBlock->size > 0) + PrintDetailedMap_UnusedRange(json, m_NullBlock->offset, m_NullBlock->size); + + PrintDetailedMap_End(json); +} +#endif + +bool VmaBlockMetadata_TLSF::CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(allocSize > 0 && "Cannot allocate empty block!"); + VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm."); + + // For small granularity round up + if (!IsVirtual()) + m_GranularityHandler.RoundupAllocRequest(allocType, allocSize, allocAlignment); + + allocSize += GetDebugMargin(); + // Quick check for too small pool + if (allocSize > GetSumFreeSize()) + return false; + + // If no free blocks in pool then check only null block + if (m_BlocksFreeCount == 0) + return CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest); + + // Round up to the next block + VkDeviceSize sizeForNextList = allocSize; + VkDeviceSize smallSizeStep = VkDeviceSize(SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4)); + if (allocSize > SMALL_BUFFER_SIZE) + { + sizeForNextList += (1ULL << (VMA_BITSCAN_MSB(allocSize) - SECOND_LEVEL_INDEX)); + } + else if (allocSize > SMALL_BUFFER_SIZE - smallSizeStep) + sizeForNextList = SMALL_BUFFER_SIZE + 1; + else + sizeForNextList += smallSizeStep; + + uint32_t nextListIndex = m_ListsCount; + uint32_t prevListIndex = m_ListsCount; + Block* nextListBlock = VMA_NULL; + Block* prevListBlock = VMA_NULL; + + // Check blocks according to strategies + if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) + { + // Quick check for larger block first + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + if (nextListBlock != VMA_NULL && CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // If not fitted then null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Null block failed, search larger bucket + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // Failed again, check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT) + { + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT ) + { + // Perform search from the start + VmaStlAllocator allocator(GetAllocationCallbacks()); + VmaVector> blockList(m_BlocksFreeCount, allocator); + + size_t i = m_BlocksFreeCount; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (block->IsFree() && block->size >= allocSize) + blockList[--i] = block; + } + + for (; i < m_BlocksFreeCount; ++i) + { + Block& block = *blockList[i]; + if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Whole range searched, no more memory + return false; + } + else + { + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + + // Worst case, full search has to be done + while (++nextListIndex < m_ListsCount) + { + nextListBlock = m_FreeList[nextListIndex]; + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + + // No more memory sadly + return false; +} + +VkResult VmaBlockMetadata_TLSF::CheckCorruption(const void* pBlockData) +{ + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (!block->IsFree()) + { + if (!VmaValidateMagicValue(pBlockData, block->offset + block->size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + return VK_SUCCESS; +} + +void VmaBlockMetadata_TLSF::Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) +{ + VMA_ASSERT(request.type == VmaAllocationRequestType::TLSF); + + // Get block and pop it from the free list + Block* currentBlock = (Block*)request.allocHandle; + VkDeviceSize offset = request.algorithmData; + VMA_ASSERT(currentBlock != VMA_NULL); + VMA_ASSERT(currentBlock->offset <= offset); + + if (currentBlock != m_NullBlock) + RemoveFreeBlock(currentBlock); + + VkDeviceSize debugMargin = GetDebugMargin(); + VkDeviceSize missingAlignment = offset - currentBlock->offset; + + // Append missing alignment to prev block or create new one + if (missingAlignment) + { + Block* prevBlock = currentBlock->prevPhysical; + VMA_ASSERT(prevBlock != VMA_NULL && "There should be no missing alignment at offset 0!"); + + if (prevBlock->IsFree() && prevBlock->size != debugMargin) + { + uint32_t oldList = GetListIndex(prevBlock->size); + prevBlock->size += missingAlignment; + // Check if new size crosses list bucket + if (oldList != GetListIndex(prevBlock->size)) + { + prevBlock->size -= missingAlignment; + RemoveFreeBlock(prevBlock); + prevBlock->size += missingAlignment; + InsertFreeBlock(prevBlock); + } + else + m_BlocksFreeSize += missingAlignment; + } + else + { + Block* newBlock = m_BlockAllocator.Alloc(); + currentBlock->prevPhysical = newBlock; + prevBlock->nextPhysical = newBlock; + newBlock->prevPhysical = prevBlock; + newBlock->nextPhysical = currentBlock; + newBlock->size = missingAlignment; + newBlock->offset = currentBlock->offset; + newBlock->MarkTaken(); + + InsertFreeBlock(newBlock); + } + + currentBlock->size -= missingAlignment; + currentBlock->offset += missingAlignment; + } + + VkDeviceSize size = request.size + debugMargin; + if (currentBlock->size == size) + { + if (currentBlock == m_NullBlock) + { + // Setup new null block + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = 0; + m_NullBlock->offset = currentBlock->offset + size; + m_NullBlock->prevPhysical = currentBlock; + m_NullBlock->nextPhysical = VMA_NULL; + m_NullBlock->MarkFree(); + m_NullBlock->PrevFree() = VMA_NULL; + m_NullBlock->NextFree() = VMA_NULL; + currentBlock->nextPhysical = m_NullBlock; + currentBlock->MarkTaken(); + } + } + else + { + VMA_ASSERT(currentBlock->size > size && "Proper block already found, shouldn't find smaller one!"); + + // Create new free block + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = currentBlock->size - size; + newBlock->offset = currentBlock->offset + size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + currentBlock->nextPhysical = newBlock; + currentBlock->size = size; + + if (currentBlock == m_NullBlock) + { + m_NullBlock = newBlock; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = VMA_NULL; + m_NullBlock->PrevFree() = VMA_NULL; + currentBlock->MarkTaken(); + } + else + { + newBlock->nextPhysical->prevPhysical = newBlock; + newBlock->MarkTaken(); + InsertFreeBlock(newBlock); + } + } + currentBlock->UserData() = userData; + + if (debugMargin > 0) + { + currentBlock->size -= debugMargin; + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = debugMargin; + newBlock->offset = currentBlock->offset + currentBlock->size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + newBlock->MarkTaken(); + currentBlock->nextPhysical->prevPhysical = newBlock; + currentBlock->nextPhysical = newBlock; + InsertFreeBlock(newBlock); + } + + if (!IsVirtual()) + m_GranularityHandler.AllocPages((uint8_t)(uintptr_t)request.customData, + currentBlock->offset, currentBlock->size); + ++m_AllocCount; +} + +void VmaBlockMetadata_TLSF::Free(VmaAllocHandle allocHandle) +{ + Block* block = (Block*)allocHandle; + Block* next = block->nextPhysical; + VMA_ASSERT(!block->IsFree() && "Block is already free!"); + + if (!IsVirtual()) + m_GranularityHandler.FreePages(block->offset, block->size); + --m_AllocCount; + + VkDeviceSize debugMargin = GetDebugMargin(); + if (debugMargin > 0) + { + RemoveFreeBlock(next); + MergeBlock(next, block); + block = next; + next = next->nextPhysical; + } + + // Try merging + Block* prev = block->prevPhysical; + if (prev != VMA_NULL && prev->IsFree() && prev->size != debugMargin) + { + RemoveFreeBlock(prev); + MergeBlock(block, prev); + } + + if (!next->IsFree()) + InsertFreeBlock(block); + else if (next == m_NullBlock) + MergeBlock(m_NullBlock, block); + else + { + RemoveFreeBlock(next); + MergeBlock(next, block); + InsertFreeBlock(next); + } +} + +void VmaBlockMetadata_TLSF::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Cannot get allocation info for free block!"); + outInfo.offset = block->offset; + outInfo.size = block->size; + outInfo.pUserData = block->UserData(); +} + +void* VmaBlockMetadata_TLSF::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Cannot get user data for free block!"); + return block->UserData(); +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetAllocationListBegin() const +{ + if (m_AllocCount == 0) + return VK_NULL_HANDLE; + + for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + VMA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!"); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + Block* startBlock = (Block*)prevAlloc; + VMA_ASSERT(!startBlock->IsFree() && "Incorrect block!"); + + for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_TLSF::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + Block* block = (Block*)alloc; + VMA_ASSERT(!block->IsFree() && "Incorrect block!"); + + if (block->prevPhysical) + return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0; + return 0; +} + +void VmaBlockMetadata_TLSF::Clear() +{ + m_AllocCount = 0; + m_BlocksFreeCount = 0; + m_BlocksFreeSize = 0; + m_IsFreeBitmap = 0; + m_NullBlock->offset = 0; + m_NullBlock->size = GetSize(); + Block* block = m_NullBlock->prevPhysical; + m_NullBlock->prevPhysical = VMA_NULL; + while (block) + { + Block* prev = block->prevPhysical; + m_BlockAllocator.Free(block); + block = prev; + } + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); + memset(m_InnerIsFreeBitmap, 0, m_MemoryClasses * sizeof(uint32_t)); + m_GranularityHandler.Clear(); +} + +void VmaBlockMetadata_TLSF::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Trying to set user data for not allocated block!"); + block->UserData() = userData; +} + +void VmaBlockMetadata_TLSF::DebugLogAllAllocations() const +{ + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + if (!block->IsFree()) + DebugLogAllocation(block->offset, block->size, block->UserData()); +} + +uint8_t VmaBlockMetadata_TLSF::SizeToMemoryClass(VkDeviceSize size) const +{ + if (size > SMALL_BUFFER_SIZE) + return uint8_t(VMA_BITSCAN_MSB(size) - MEMORY_CLASS_SHIFT); + return 0; +} + +uint16_t VmaBlockMetadata_TLSF::SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const +{ + if (memoryClass == 0) + { + if (IsVirtual()) + return static_cast((size - 1) / 8); + else + return static_cast((size - 1) / 64); + } + return static_cast((size >> (memoryClass + MEMORY_CLASS_SHIFT - SECOND_LEVEL_INDEX)) ^ (1U << SECOND_LEVEL_INDEX)); +} + +uint32_t VmaBlockMetadata_TLSF::GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const +{ + if (memoryClass == 0) + return secondIndex; + + const uint32_t index = static_cast(memoryClass - 1) * (1 << SECOND_LEVEL_INDEX) + secondIndex; + if (IsVirtual()) + return index + (1 << SECOND_LEVEL_INDEX); + else + return index + 4; +} + +uint32_t VmaBlockMetadata_TLSF::GetListIndex(VkDeviceSize size) const +{ + uint8_t memoryClass = SizeToMemoryClass(size); + return GetListIndex(memoryClass, SizeToSecondIndex(size, memoryClass)); +} + +void VmaBlockMetadata_TLSF::RemoveFreeBlock(Block* block) +{ + VMA_ASSERT(block != m_NullBlock); + VMA_ASSERT(block->IsFree()); + + if (block->NextFree() != VMA_NULL) + block->NextFree()->PrevFree() = block->PrevFree(); + if (block->PrevFree() != VMA_NULL) + block->PrevFree()->NextFree() = block->NextFree(); + else + { + uint8_t memClass = SizeToMemoryClass(block->size); + uint16_t secondIndex = SizeToSecondIndex(block->size, memClass); + uint32_t index = GetListIndex(memClass, secondIndex); + VMA_ASSERT(m_FreeList[index] == block); + m_FreeList[index] = block->NextFree(); + if (block->NextFree() == VMA_NULL) + { + m_InnerIsFreeBitmap[memClass] &= ~(1U << secondIndex); + if (m_InnerIsFreeBitmap[memClass] == 0) + m_IsFreeBitmap &= ~(1UL << memClass); + } + } + block->MarkTaken(); + block->UserData() = VMA_NULL; + --m_BlocksFreeCount; + m_BlocksFreeSize -= block->size; +} + +void VmaBlockMetadata_TLSF::InsertFreeBlock(Block* block) +{ + VMA_ASSERT(block != m_NullBlock); + VMA_ASSERT(!block->IsFree() && "Cannot insert block twice!"); + + uint8_t memClass = SizeToMemoryClass(block->size); + uint16_t secondIndex = SizeToSecondIndex(block->size, memClass); + uint32_t index = GetListIndex(memClass, secondIndex); + VMA_ASSERT(index < m_ListsCount); + block->PrevFree() = VMA_NULL; + block->NextFree() = m_FreeList[index]; + m_FreeList[index] = block; + if (block->NextFree() != VMA_NULL) + block->NextFree()->PrevFree() = block; + else + { + m_InnerIsFreeBitmap[memClass] |= 1U << secondIndex; + m_IsFreeBitmap |= 1UL << memClass; + } + ++m_BlocksFreeCount; + m_BlocksFreeSize += block->size; +} + +void VmaBlockMetadata_TLSF::MergeBlock(Block* block, Block* prev) +{ + VMA_ASSERT(block->prevPhysical == prev && "Cannot merge separate physical regions!"); + VMA_ASSERT(!prev->IsFree() && "Cannot merge block that belongs to free list!"); + + block->offset = prev->offset; + block->size += prev->size; + block->prevPhysical = prev->prevPhysical; + if (block->prevPhysical) + block->prevPhysical->nextPhysical = block; + m_BlockAllocator.Free(prev); +} + +VmaBlockMetadata_TLSF::Block* VmaBlockMetadata_TLSF::FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const +{ + uint8_t memoryClass = SizeToMemoryClass(size); + uint32_t innerFreeMap = m_InnerIsFreeBitmap[memoryClass] & (~0U << SizeToSecondIndex(size, memoryClass)); + if (!innerFreeMap) + { + // Check higher levels for available blocks + uint32_t freeMap = m_IsFreeBitmap & (~0UL << (memoryClass + 1)); + if (!freeMap) + return VMA_NULL; // No more memory available + + // Find lowest free region + memoryClass = VMA_BITSCAN_LSB(freeMap); + innerFreeMap = m_InnerIsFreeBitmap[memoryClass]; + VMA_ASSERT(innerFreeMap != 0); + } + // Find lowest free subregion + listIndex = GetListIndex(memoryClass, VMA_BITSCAN_LSB(innerFreeMap)); + VMA_ASSERT(m_FreeList[listIndex]); + return m_FreeList[listIndex]; +} + +bool VmaBlockMetadata_TLSF::CheckBlock( + Block& block, + uint32_t listIndex, + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(block.IsFree() && "Block is already taken!"); + + VkDeviceSize alignedOffset = VmaAlignUp(block.offset, allocAlignment); + if (block.size < allocSize + alignedOffset - block.offset) + return false; + + // Check for granularity conflicts + if (!IsVirtual() && + m_GranularityHandler.CheckConflictAndAlignUp(alignedOffset, allocSize, block.offset, block.size, allocType)) + return false; + + // Alloc successful + pAllocationRequest->type = VmaAllocationRequestType::TLSF; + pAllocationRequest->allocHandle = (VmaAllocHandle)█ + pAllocationRequest->size = allocSize - GetDebugMargin(); + pAllocationRequest->customData = (void*)allocType; + pAllocationRequest->algorithmData = alignedOffset; + + // Place block at the start of list if it's normal block + if (listIndex != m_ListsCount && block.PrevFree()) + { + block.PrevFree()->NextFree() = block.NextFree(); + if (block.NextFree()) + block.NextFree()->PrevFree() = block.PrevFree(); + block.PrevFree() = VMA_NULL; + block.NextFree() = m_FreeList[listIndex]; + m_FreeList[listIndex] = █ + if (block.NextFree()) + block.NextFree()->PrevFree() = █ + } + + return true; +} +#endif // _VMA_BLOCK_METADATA_TLSF_FUNCTIONS +#endif // _VMA_BLOCK_METADATA_TLSF + +#ifndef _VMA_BLOCK_VECTOR +/* +Sequence of VmaDeviceMemoryBlock. Represents memory blocks allocated for a specific +Vulkan memory type. + +Synchronized internally with a mutex. +*/ +class VmaBlockVector +{ + friend struct VmaDefragmentationContext_T; + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockVector) +public: + VmaBlockVector( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceSize preferredBlockSize, + size_t minBlockCount, + size_t maxBlockCount, + VkDeviceSize bufferImageGranularity, + bool explicitBlockSize, + uint32_t algorithm, + float priority, + VkDeviceSize minAllocationAlignment, + void* pMemoryAllocateNext); + ~VmaBlockVector(); + + VmaAllocator GetAllocator() const { return m_hAllocator; } + VmaPool GetParentPool() const { return m_hParentPool; } + bool IsCustomPool() const { return m_hParentPool != VMA_NULL; } + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } + VkDeviceSize GetPreferredBlockSize() const { return m_PreferredBlockSize; } + VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } + uint32_t GetAlgorithm() const { return m_Algorithm; } + bool HasExplicitBlockSize() const { return m_ExplicitBlockSize; } + float GetPriority() const { return m_Priority; } + const void* GetAllocationNextPtr() const { return m_pMemoryAllocateNext; } + // To be used only while the m_Mutex is locked. Used during defragmentation. + size_t GetBlockCount() const { return m_Blocks.size(); } + // To be used only while the m_Mutex is locked. Used during defragmentation. + VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } + VMA_RW_MUTEX &GetMutex() { return m_Mutex; } + + VkResult CreateMinBlocks(); + void AddStatistics(VmaStatistics& inoutStats); + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); + bool IsEmpty(); + bool IsCorruptionDetectionEnabled() const; + + VkResult Allocate( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations); + + void Free(const VmaAllocation hAllocation); + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json); +#endif + + VkResult CheckCorruption(); + +private: + const VmaAllocator m_hAllocator; + const VmaPool m_hParentPool; + const uint32_t m_MemoryTypeIndex; + const VkDeviceSize m_PreferredBlockSize; + const size_t m_MinBlockCount; + const size_t m_MaxBlockCount; + const VkDeviceSize m_BufferImageGranularity; + const bool m_ExplicitBlockSize; + const uint32_t m_Algorithm; + const float m_Priority; + const VkDeviceSize m_MinAllocationAlignment; + + void* const m_pMemoryAllocateNext; + VMA_RW_MUTEX m_Mutex; + // Incrementally sorted by sumFreeSize, ascending. + VmaVector> m_Blocks; + uint32_t m_NextBlockId; + bool m_IncrementalSort = true; + + void SetIncrementalSort(bool val) { m_IncrementalSort = val; } + + VkDeviceSize CalcMaxBlockSize() const; + // Finds and removes given block from vector. + void Remove(VmaDeviceMemoryBlock* pBlock); + // Performs single step in sorting m_Blocks. They may not be fully sorted + // after this call. + void IncrementallySortBlocks(); + void SortByFreeSize(); + + VkResult AllocatePage( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation); + + VkResult AllocateFromBlock( + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize size, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + uint32_t strategy, + VmaAllocation* pAllocation); + + VkResult CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation); + + VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex); + bool HasEmptyBlock(); +}; +#endif // _VMA_BLOCK_VECTOR + +#ifndef _VMA_DEFRAGMENTATION_CONTEXT +struct VmaDefragmentationContext_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDefragmentationContext_T) +public: + VmaDefragmentationContext_T( + VmaAllocator hAllocator, + const VmaDefragmentationInfo& info); + ~VmaDefragmentationContext_T(); + + void GetStats(VmaDefragmentationStats& outStats) { outStats = m_GlobalStats; } + + VkResult DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo); + VkResult DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo); + +private: + // Max number of allocations to ignore due to size constraints before ending single pass + static const uint8_t MAX_ALLOCS_TO_IGNORE = 16; + enum class CounterStatus { Pass, Ignore, End }; + + struct FragmentedBlock + { + uint32_t data; + VmaDeviceMemoryBlock* block; + }; + struct StateBalanced + { + VkDeviceSize avgFreeSize = 0; + VkDeviceSize avgAllocSize = UINT64_MAX; + }; + struct StateExtensive + { + enum class Operation : uint8_t + { + FindFreeBlockBuffer, FindFreeBlockTexture, FindFreeBlockAll, + MoveBuffers, MoveTextures, MoveAll, + Cleanup, Done + }; + + Operation operation = Operation::FindFreeBlockTexture; + size_t firstFreeBlock = SIZE_MAX; + }; + struct MoveAllocationData + { + VkDeviceSize size; + VkDeviceSize alignment; + VmaSuballocationType type; + VmaAllocationCreateFlags flags; + VmaDefragmentationMove move = {}; + }; + + const VkDeviceSize m_MaxPassBytes; + const uint32_t m_MaxPassAllocations; + const PFN_vmaCheckDefragmentationBreakFunction m_BreakCallback; + void* m_BreakCallbackUserData; + + VmaStlAllocator m_MoveAllocator; + VmaVector> m_Moves; + + uint8_t m_IgnoredAllocs = 0; + uint32_t m_Algorithm; + uint32_t m_BlockVectorCount; + VmaBlockVector* m_PoolBlockVector; + VmaBlockVector** m_pBlockVectors; + size_t m_ImmovableBlockCount = 0; + VmaDefragmentationStats m_GlobalStats = { 0 }; + VmaDefragmentationStats m_PassStats = { 0 }; + void* m_AlgorithmState = VMA_NULL; + + static MoveAllocationData GetMoveData(VmaAllocHandle handle, VmaBlockMetadata* metadata); + CounterStatus CheckCounters(VkDeviceSize bytes); + bool IncrementCounters(VkDeviceSize bytes); + bool ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block); + bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector); + + bool ComputeDefragmentation(VmaBlockVector& vector, size_t index); + bool ComputeDefragmentation_Fast(VmaBlockVector& vector); + bool ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update); + bool ComputeDefragmentation_Full(VmaBlockVector& vector); + bool ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index); + + void UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state); + bool MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent); +}; +#endif // _VMA_DEFRAGMENTATION_CONTEXT + +#ifndef _VMA_POOL_T +struct VmaPool_T +{ + friend struct VmaPoolListItemTraits; + VMA_CLASS_NO_COPY_NO_MOVE(VmaPool_T) +public: + VmaBlockVector m_BlockVector; + VmaDedicatedAllocationList m_DedicatedAllocations; + + VmaPool_T( + VmaAllocator hAllocator, + const VmaPoolCreateInfo& createInfo, + VkDeviceSize preferredBlockSize); + ~VmaPool_T(); + + uint32_t GetId() const { return m_Id; } + void SetId(uint32_t id) { VMA_ASSERT(m_Id == 0); m_Id = id; } + + const char* GetName() const { return m_Name; } + void SetName(const char* pName); + +#if VMA_STATS_STRING_ENABLED + //void PrintDetailedMap(class VmaStringBuilder& sb); +#endif + +private: + uint32_t m_Id; + char* m_Name; + VmaPool_T* m_PrevPool = VMA_NULL; + VmaPool_T* m_NextPool = VMA_NULL; +}; + +struct VmaPoolListItemTraits +{ + typedef VmaPool_T ItemType; + + static ItemType* GetPrev(const ItemType* item) { return item->m_PrevPool; } + static ItemType* GetNext(const ItemType* item) { return item->m_NextPool; } + static ItemType*& AccessPrev(ItemType* item) { return item->m_PrevPool; } + static ItemType*& AccessNext(ItemType* item) { return item->m_NextPool; } +}; +#endif // _VMA_POOL_T + +#ifndef _VMA_CURRENT_BUDGET_DATA +struct VmaCurrentBudgetData +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaCurrentBudgetData) +public: + + VMA_ATOMIC_UINT32 m_BlockCount[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT32 m_AllocationCount[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS]; + +#if VMA_MEMORY_BUDGET + VMA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch; + VMA_RW_MUTEX m_BudgetMutex; + uint64_t m_VulkanUsage[VK_MAX_MEMORY_HEAPS]; + uint64_t m_VulkanBudget[VK_MAX_MEMORY_HEAPS]; + uint64_t m_BlockBytesAtBudgetFetch[VK_MAX_MEMORY_HEAPS]; +#endif // VMA_MEMORY_BUDGET + + VmaCurrentBudgetData(); + + void AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize); + void RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize); +}; + +#ifndef _VMA_CURRENT_BUDGET_DATA_FUNCTIONS +VmaCurrentBudgetData::VmaCurrentBudgetData() +{ + for (uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex) + { + m_BlockCount[heapIndex] = 0; + m_AllocationCount[heapIndex] = 0; + m_BlockBytes[heapIndex] = 0; + m_AllocationBytes[heapIndex] = 0; +#if VMA_MEMORY_BUDGET + m_VulkanUsage[heapIndex] = 0; + m_VulkanBudget[heapIndex] = 0; + m_BlockBytesAtBudgetFetch[heapIndex] = 0; +#endif + } + +#if VMA_MEMORY_BUDGET + m_OperationsSinceBudgetFetch = 0; +#endif +} + +void VmaCurrentBudgetData::AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) +{ + m_AllocationBytes[heapIndex] += allocationSize; + ++m_AllocationCount[heapIndex]; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif +} + +void VmaCurrentBudgetData::RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) +{ + VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize); + m_AllocationBytes[heapIndex] -= allocationSize; + VMA_ASSERT(m_AllocationCount[heapIndex] > 0); + --m_AllocationCount[heapIndex]; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif +} +#endif // _VMA_CURRENT_BUDGET_DATA_FUNCTIONS +#endif // _VMA_CURRENT_BUDGET_DATA + +#ifndef _VMA_ALLOCATION_OBJECT_ALLOCATOR +/* +Thread-safe wrapper over VmaPoolAllocator free list, for allocation of VmaAllocation_T objects. +*/ +class VmaAllocationObjectAllocator +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocationObjectAllocator) +public: + VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks) + : m_Allocator(pAllocationCallbacks, 1024) {} + + template VmaAllocation Allocate(Types&&... args); + void Free(VmaAllocation hAlloc); + +private: + VMA_MUTEX m_Mutex; + VmaPoolAllocator m_Allocator; +}; + +template +VmaAllocation VmaAllocationObjectAllocator::Allocate(Types&&... args) +{ + VmaMutexLock mutexLock(m_Mutex); + return m_Allocator.Alloc(std::forward(args)...); +} + +void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc) +{ + VmaMutexLock mutexLock(m_Mutex); + m_Allocator.Free(hAlloc); +} +#endif // _VMA_ALLOCATION_OBJECT_ALLOCATOR + +#ifndef _VMA_VIRTUAL_BLOCK_T +struct VmaVirtualBlock_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaVirtualBlock_T) +public: + const bool m_AllocationCallbacksSpecified; + const VkAllocationCallbacks m_AllocationCallbacks; + + VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo); + ~VmaVirtualBlock_T(); + + VkResult Init() { return VK_SUCCESS; } + bool IsEmpty() const { return m_Metadata->IsEmpty(); } + void Free(VmaVirtualAllocation allocation) { m_Metadata->Free((VmaAllocHandle)allocation); } + void SetAllocationUserData(VmaVirtualAllocation allocation, void* userData) { m_Metadata->SetAllocationUserData((VmaAllocHandle)allocation, userData); } + void Clear() { m_Metadata->Clear(); } + + const VkAllocationCallbacks* GetAllocationCallbacks() const; + void GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo); + VkResult Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, + VkDeviceSize* outOffset); + void GetStatistics(VmaStatistics& outStats) const; + void CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const; +#if VMA_STATS_STRING_ENABLED + void BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const; +#endif + +private: + VmaBlockMetadata* m_Metadata; +}; + +#ifndef _VMA_VIRTUAL_BLOCK_T_FUNCTIONS +VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo) + : m_AllocationCallbacksSpecified(createInfo.pAllocationCallbacks != VMA_NULL), + m_AllocationCallbacks(createInfo.pAllocationCallbacks != VMA_NULL ? *createInfo.pAllocationCallbacks : VmaEmptyAllocationCallbacks) +{ + const uint32_t algorithm = createInfo.flags & VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK; + switch (algorithm) + { + case 0: + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); + break; + case VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT: + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Linear)(VK_NULL_HANDLE, 1, true); + break; + default: + VMA_ASSERT(0); + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); + } + + m_Metadata->Init(createInfo.size); +} + +VmaVirtualBlock_T::~VmaVirtualBlock_T() +{ + // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT + // to receive the list of the unfreed allocations. + if (!m_Metadata->IsEmpty()) + m_Metadata->DebugLogAllAllocations(); + // This is the most important assert in the entire library. + // Hitting it means you have some memory leak - unreleased virtual allocations. + VMA_ASSERT_LEAK(m_Metadata->IsEmpty() && "Some virtual allocations were not freed before destruction of this virtual block!"); + + vma_delete(GetAllocationCallbacks(), m_Metadata); +} + +const VkAllocationCallbacks* VmaVirtualBlock_T::GetAllocationCallbacks() const +{ + return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL; +} + +void VmaVirtualBlock_T::GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo) +{ + m_Metadata->GetAllocationInfo((VmaAllocHandle)allocation, outInfo); +} + +VkResult VmaVirtualBlock_T::Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, + VkDeviceSize* outOffset) +{ + VmaAllocationRequest request = {}; + if (m_Metadata->CreateAllocationRequest( + createInfo.size, // allocSize + VMA_MAX(createInfo.alignment, (VkDeviceSize)1), // allocAlignment + (createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0, // upperAddress + VMA_SUBALLOCATION_TYPE_UNKNOWN, // allocType - unimportant + createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK, // strategy + &request)) + { + m_Metadata->Alloc(request, + VMA_SUBALLOCATION_TYPE_UNKNOWN, // type - unimportant + createInfo.pUserData); + outAllocation = (VmaVirtualAllocation)request.allocHandle; + if(outOffset) + *outOffset = m_Metadata->GetAllocationOffset(request.allocHandle); + return VK_SUCCESS; + } + outAllocation = (VmaVirtualAllocation)VK_NULL_HANDLE; + if (outOffset) + *outOffset = UINT64_MAX; + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +void VmaVirtualBlock_T::GetStatistics(VmaStatistics& outStats) const +{ + VmaClearStatistics(outStats); + m_Metadata->AddStatistics(outStats); +} + +void VmaVirtualBlock_T::CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const +{ + VmaClearDetailedStatistics(outStats); + m_Metadata->AddDetailedStatistics(outStats); +} + +#if VMA_STATS_STRING_ENABLED +void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const +{ + VmaJsonWriter json(GetAllocationCallbacks(), sb); + json.BeginObject(); + + VmaDetailedStatistics stats; + CalculateDetailedStatistics(stats); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats); + + if (detailedMap) + { + json.WriteString("Details"); + json.BeginObject(); + m_Metadata->PrintDetailedMap(json); + json.EndObject(); + } + + json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_VIRTUAL_BLOCK_T_FUNCTIONS +#endif // _VMA_VIRTUAL_BLOCK_T + + +// Main allocator object. +struct VmaAllocator_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocator_T) +public: + const bool m_UseMutex; + const uint32_t m_VulkanApiVersion; + bool m_UseKhrDedicatedAllocation; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). + bool m_UseKhrBindMemory2; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). + bool m_UseExtMemoryBudget; + bool m_UseAmdDeviceCoherentMemory; + bool m_UseKhrBufferDeviceAddress; + bool m_UseExtMemoryPriority; + bool m_UseKhrMaintenance4; + bool m_UseKhrMaintenance5; + bool m_UseKhrExternalMemoryWin32; + const VkDevice m_hDevice; + const VkInstance m_hInstance; + const bool m_AllocationCallbacksSpecified; + const VkAllocationCallbacks m_AllocationCallbacks; + VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks; + VmaAllocationObjectAllocator m_AllocationObjectAllocator; + + // Each bit (1 << i) is set if HeapSizeLimit is enabled for that heap, so cannot allocate more than the heap size. + uint32_t m_HeapSizeLimitMask; + + VkPhysicalDeviceProperties m_PhysicalDeviceProperties; + VkPhysicalDeviceMemoryProperties m_MemProps; + + // Default pools. + VmaBlockVector* m_pBlockVectors[VK_MAX_MEMORY_TYPES]; + VmaDedicatedAllocationList m_DedicatedAllocations[VK_MAX_MEMORY_TYPES]; + + VmaCurrentBudgetData m_Budget; + VMA_ATOMIC_UINT32 m_DeviceMemoryCount; // Total number of VkDeviceMemory objects. + + VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo); + VkResult Init(const VmaAllocatorCreateInfo* pCreateInfo); + ~VmaAllocator_T(); + + const VkAllocationCallbacks* GetAllocationCallbacks() const + { + return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL; + } + const VmaVulkanFunctions& GetVulkanFunctions() const + { + return m_VulkanFunctions; + } + + VkPhysicalDevice GetPhysicalDevice() const { return m_PhysicalDevice; } + + VkDeviceSize GetBufferImageGranularity() const + { + return VMA_MAX( + static_cast(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY), + m_PhysicalDeviceProperties.limits.bufferImageGranularity); + } + + uint32_t GetMemoryHeapCount() const { return m_MemProps.memoryHeapCount; } + uint32_t GetMemoryTypeCount() const { return m_MemProps.memoryTypeCount; } + + uint32_t MemoryTypeIndexToHeapIndex(uint32_t memTypeIndex) const + { + VMA_ASSERT(memTypeIndex < m_MemProps.memoryTypeCount); + return m_MemProps.memoryTypes[memTypeIndex].heapIndex; + } + // True when specific memory type is HOST_VISIBLE but not HOST_COHERENT. + bool IsMemoryTypeNonCoherent(uint32_t memTypeIndex) const + { + return (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) == + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + // Minimum alignment for all allocations in specific memory type. + VkDeviceSize GetMemoryTypeMinAlignment(uint32_t memTypeIndex) const + { + return IsMemoryTypeNonCoherent(memTypeIndex) ? + VMA_MAX((VkDeviceSize)VMA_MIN_ALIGNMENT, m_PhysicalDeviceProperties.limits.nonCoherentAtomSize) : + (VkDeviceSize)VMA_MIN_ALIGNMENT; + } + + bool IsIntegratedGpu() const + { + return m_PhysicalDeviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; + } + + uint32_t GetGlobalMemoryTypeBits() const { return m_GlobalMemoryTypeBits; } + + void GetBufferMemoryRequirements( + VkBuffer hBuffer, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const; + void GetImageMemoryRequirements( + VkImage hImage, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const; + VkResult FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VmaBufferImageUsage bufImgUsage, + uint32_t* pMemoryTypeIndex) const; + + // Main allocation function. + VkResult AllocateMemory( + const VkMemoryRequirements& vkMemReq, + bool requiresDedicatedAllocation, + bool prefersDedicatedAllocation, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations); + + // Main deallocation function. + void FreeMemory( + size_t allocationCount, + const VmaAllocation* pAllocations); + + void CalculateStatistics(VmaTotalStatistics* pStats); + + void GetHeapBudgets( + VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount); + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json); +#endif + + void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); + void GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo); + + VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool); + void DestroyPool(VmaPool pool); + void GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats); + void CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats); + + void SetCurrentFrameIndex(uint32_t frameIndex); + uint32_t GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } + + VkResult CheckPoolCorruption(VmaPool hPool); + VkResult CheckCorruption(uint32_t memoryTypeBits); + + // Call to Vulkan function vkAllocateMemory with accompanying bookkeeping. + VkResult AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory); + // Call to Vulkan function vkFreeMemory with accompanying bookkeeping. + void FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory); + // Call to Vulkan function vkBindBufferMemory or vkBindBufferMemory2KHR. + VkResult BindVulkanBuffer( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkBuffer buffer, + const void* pNext); + // Call to Vulkan function vkBindImageMemory or vkBindImageMemory2KHR. + VkResult BindVulkanImage( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkImage image, + const void* pNext); + + VkResult Map(VmaAllocation hAllocation, void** ppData); + void Unmap(VmaAllocation hAllocation); + + VkResult BindBufferMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext); + VkResult BindImageMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext); + + VkResult FlushOrInvalidateAllocation( + VmaAllocation hAllocation, + VkDeviceSize offset, VkDeviceSize size, + VMA_CACHE_OPERATION op); + VkResult FlushOrInvalidateAllocations( + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + VMA_CACHE_OPERATION op); + + VkResult CopyMemoryToAllocation( + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size); + VkResult CopyAllocationToMemory( + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size); + + void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern); + + /* + Returns bit mask of memory types that can support defragmentation on GPU as + they support creation of required buffer for copy operations. + */ + uint32_t GetGpuDefragmentationMemoryTypeBits(); + +#if VMA_EXTERNAL_MEMORY + VkExternalMemoryHandleTypeFlagsKHR GetExternalMemoryHandleTypeFlags(uint32_t memTypeIndex) const + { + return m_TypeExternalMemoryHandleTypes[memTypeIndex]; + } +#endif // #if VMA_EXTERNAL_MEMORY + +private: + VkDeviceSize m_PreferredLargeHeapBlockSize; + + VkPhysicalDevice m_PhysicalDevice; + VMA_ATOMIC_UINT32 m_CurrentFrameIndex; + VMA_ATOMIC_UINT32 m_GpuDefragmentationMemoryTypeBits; // UINT32_MAX means uninitialized. +#if VMA_EXTERNAL_MEMORY + VkExternalMemoryHandleTypeFlagsKHR m_TypeExternalMemoryHandleTypes[VK_MAX_MEMORY_TYPES]; +#endif // #if VMA_EXTERNAL_MEMORY + + VMA_RW_MUTEX m_PoolsMutex; + typedef VmaIntrusiveLinkedList PoolList; + // Protected by m_PoolsMutex. + PoolList m_Pools; + uint32_t m_NextPoolId; + + VmaVulkanFunctions m_VulkanFunctions; + + // Global bit mask AND-ed with any memoryTypeBits to disallow certain memory types. + uint32_t m_GlobalMemoryTypeBits; + + void ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions); + +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + void ImportVulkanFunctions_Static(); +#endif + + void ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions); + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + void ImportVulkanFunctions_Dynamic(); +#endif + + void ValidateVulkanFunctions(); + + VkDeviceSize CalcPreferredBlockSize(uint32_t memTypeIndex); + + VkResult AllocateMemoryOfType( + VmaPool pool, + VkDeviceSize size, + VkDeviceSize alignment, + bool dedicatedPreferred, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + uint32_t memTypeIndex, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + VmaBlockVector& blockVector, + size_t allocationCount, + VmaAllocation* pAllocations); + + // Helper function only to be used inside AllocateDedicatedMemory. + VkResult AllocateDedicatedMemoryPage( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + uint32_t memTypeIndex, + const VkMemoryAllocateInfo& allocInfo, + bool map, + bool isUserDataString, + bool isMappingAllowed, + void* pUserData, + VmaAllocation* pAllocation); + + // Allocates and registers new VkDeviceMemory specifically for dedicated allocations. + VkResult AllocateDedicatedMemory( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + uint32_t memTypeIndex, + bool map, + bool isUserDataString, + bool isMappingAllowed, + bool canAliasMemory, + void* pUserData, + float priority, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + size_t allocationCount, + VmaAllocation* pAllocations, + const void* pNextChain = VMA_NULL); + + void FreeDedicatedMemory(const VmaAllocation allocation); + + VkResult CalcMemTypeParams( + VmaAllocationCreateInfo& outCreateInfo, + uint32_t memTypeIndex, + VkDeviceSize size, + size_t allocationCount); + VkResult CalcAllocationParams( + VmaAllocationCreateInfo& outCreateInfo, + bool dedicatedRequired, + bool dedicatedPreferred); + + /* + Calculates and returns bit mask of memory types that can support defragmentation + on GPU as they support creation of required buffer for copy operations. + */ + uint32_t CalculateGpuDefragmentationMemoryTypeBits() const; + uint32_t CalculateGlobalMemoryTypeBits() const; + + bool GetFlushOrInvalidateRange( + VmaAllocation allocation, + VkDeviceSize offset, VkDeviceSize size, + VkMappedMemoryRange& outRange) const; + +#if VMA_MEMORY_BUDGET + void UpdateVulkanBudget(); +#endif // #if VMA_MEMORY_BUDGET +}; + + +#ifndef _VMA_MEMORY_FUNCTIONS +static void* VmaMalloc(VmaAllocator hAllocator, size_t size, size_t alignment) +{ + return VmaMalloc(&hAllocator->m_AllocationCallbacks, size, alignment); +} + +static void VmaFree(VmaAllocator hAllocator, void* ptr) +{ + VmaFree(&hAllocator->m_AllocationCallbacks, ptr); +} + +template +static T* VmaAllocate(VmaAllocator hAllocator) +{ + return (T*)VmaMalloc(hAllocator, sizeof(T), VMA_ALIGN_OF(T)); +} + +template +static T* VmaAllocateArray(VmaAllocator hAllocator, size_t count) +{ + return (T*)VmaMalloc(hAllocator, sizeof(T) * count, VMA_ALIGN_OF(T)); +} + +template +static void vma_delete(VmaAllocator hAllocator, T* ptr) +{ + if(ptr != VMA_NULL) + { + ptr->~T(); + VmaFree(hAllocator, ptr); + } +} + +template +static void vma_delete_array(VmaAllocator hAllocator, T* ptr, size_t count) +{ + if(ptr != VMA_NULL) + { + for(size_t i = count; i--; ) + ptr[i].~T(); + VmaFree(hAllocator, ptr); + } +} +#endif // _VMA_MEMORY_FUNCTIONS + +#ifndef _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS +VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator) + : m_pMetadata(VMA_NULL), + m_MemoryTypeIndex(UINT32_MAX), + m_Id(0), + m_hMemory(VK_NULL_HANDLE), + m_MapCount(0), + m_pMappedData(VMA_NULL){} + +VmaDeviceMemoryBlock::~VmaDeviceMemoryBlock() +{ + VMA_ASSERT_LEAK(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped."); + VMA_ASSERT_LEAK(m_hMemory == VK_NULL_HANDLE); +} + +void VmaDeviceMemoryBlock::Init( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t newMemoryTypeIndex, + VkDeviceMemory newMemory, + VkDeviceSize newSize, + uint32_t id, + uint32_t algorithm, + VkDeviceSize bufferImageGranularity) +{ + VMA_ASSERT(m_hMemory == VK_NULL_HANDLE); + + m_hParentPool = hParentPool; + m_MemoryTypeIndex = newMemoryTypeIndex; + m_Id = id; + m_hMemory = newMemory; + + switch (algorithm) + { + case 0: + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + break; + case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + break; + default: + VMA_ASSERT(0); + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + } + m_pMetadata->Init(newSize); +} + +void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator) +{ + // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT + // to receive the list of the unfreed allocations. + if (!m_pMetadata->IsEmpty()) + m_pMetadata->DebugLogAllAllocations(); + // This is the most important assert in the entire library. + // Hitting it means you have some memory leak - unreleased VmaAllocation objects. + VMA_ASSERT_LEAK(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); + + VMA_ASSERT_LEAK(m_hMemory != VK_NULL_HANDLE); + allocator->FreeVulkanMemory(m_MemoryTypeIndex, m_pMetadata->GetSize(), m_hMemory); + m_hMemory = VK_NULL_HANDLE; + + vma_delete(allocator, m_pMetadata); + m_pMetadata = VMA_NULL; +} + +void VmaDeviceMemoryBlock::PostAlloc(VmaAllocator hAllocator) +{ + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + m_MappingHysteresis.PostAlloc(); +} + +void VmaDeviceMemoryBlock::PostFree(VmaAllocator hAllocator) +{ + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + if(m_MappingHysteresis.PostFree()) + { + VMA_ASSERT(m_MappingHysteresis.GetExtraMapping() == 0); + if (m_MapCount == 0) + { + m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); + } + } +} + +bool VmaDeviceMemoryBlock::Validate() const +{ + VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) && + (m_pMetadata->GetSize() != 0)); + + return m_pMetadata->Validate(); +} + +VkResult VmaDeviceMemoryBlock::CheckCorruption(VmaAllocator hAllocator) +{ + void* pData = VMA_NULL; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + res = m_pMetadata->CheckCorruption(pData); + + Unmap(hAllocator, 1); + + return res; +} + +VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void** ppData) +{ + if (count == 0) + { + return VK_SUCCESS; + } + + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + const uint32_t oldTotalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + if (oldTotalMapCount != 0) + { + VMA_ASSERT(m_pMappedData != VMA_NULL); + m_MappingHysteresis.PostMap(); + m_MapCount += count; + if (ppData != VMA_NULL) + { + *ppData = m_pMappedData; + } + return VK_SUCCESS; + } + else + { + VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( + hAllocator->m_hDevice, + m_hMemory, + 0, // offset + VK_WHOLE_SIZE, + 0, // flags + &m_pMappedData); + if (result == VK_SUCCESS) + { + VMA_ASSERT(m_pMappedData != VMA_NULL); + m_MappingHysteresis.PostMap(); + m_MapCount = count; + if (ppData != VMA_NULL) + { + *ppData = m_pMappedData; + } + } + return result; + } +} + +void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator, uint32_t count) +{ + if (count == 0) + { + return; + } + + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + if (m_MapCount >= count) + { + m_MapCount -= count; + const uint32_t totalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + if (totalMapCount == 0) + { + m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); + } + m_MappingHysteresis.PostUnmap(); + } + else + { + VMA_ASSERT(0 && "VkDeviceMemory block is being unmapped while it was not previously mapped."); + } +} + +VkResult VmaDeviceMemoryBlock::WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) +{ + VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); + + void* pData; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + VmaWriteMagicValue(pData, allocOffset + allocSize); + + Unmap(hAllocator, 1); + return VK_SUCCESS; +} + +VkResult VmaDeviceMemoryBlock::ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) +{ + VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); + + void* pData; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + if (!VmaValidateMagicValue(pData, allocOffset + allocSize)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER FREED ALLOCATION!"); + } + + Unmap(hAllocator, 1); + return VK_SUCCESS; +} + +VkResult VmaDeviceMemoryBlock::BindBufferMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext) +{ + VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && + hAllocation->GetBlock() == this); + VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && + "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); + const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; + // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext); +} + +VkResult VmaDeviceMemoryBlock::BindImageMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext) +{ + VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && + hAllocation->GetBlock() == this); + VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && + "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); + const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; + // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); +} + +#if VMA_EXTERNAL_MEMORY_WIN32 +VkResult VmaDeviceMemoryBlock::CreateWin32Handle(const VmaAllocator hAllocator, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, HANDLE* pHandle) noexcept +{ + VMA_ASSERT(pHandle); + return m_Handle.GetHandle(hAllocator->m_hDevice, m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle); +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 +#endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS + +#ifndef _VMA_ALLOCATION_T_FUNCTIONS +VmaAllocation_T::VmaAllocation_T(bool mappingAllowed) + : m_Alignment{ 1 }, + m_Size{ 0 }, + m_pUserData{ VMA_NULL }, + m_pName{ VMA_NULL }, + m_MemoryTypeIndex{ 0 }, + m_Type{ (uint8_t)ALLOCATION_TYPE_NONE }, + m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN }, + m_MapCount{ 0 }, + m_Flags{ 0 } +{ + if(mappingAllowed) + m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED; +} + +VmaAllocation_T::~VmaAllocation_T() +{ + VMA_ASSERT_LEAK(m_MapCount == 0 && "Allocation was not unmapped before destruction."); + + // Check if owned string was freed. + VMA_ASSERT(m_pName == VMA_NULL); +} + +void VmaAllocation_T::InitBlockAllocation( + VmaDeviceMemoryBlock* block, + VmaAllocHandle allocHandle, + VkDeviceSize alignment, + VkDeviceSize size, + uint32_t memoryTypeIndex, + VmaSuballocationType suballocationType, + bool mapped) +{ + VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); + VMA_ASSERT(block != VMA_NULL); + m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; + m_Alignment = alignment; + m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; + if(mapped) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + } + m_SuballocationType = (uint8_t)suballocationType; + m_BlockAllocation.m_Block = block; + m_BlockAllocation.m_AllocHandle = allocHandle; +} + +void VmaAllocation_T::InitDedicatedAllocation( + VmaAllocator allocator, + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceMemory hMemory, + VmaSuballocationType suballocationType, + void* pMappedData, + VkDeviceSize size) +{ + VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); + VMA_ASSERT(hMemory != VK_NULL_HANDLE); + m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED; + m_Alignment = 0; + m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; + m_SuballocationType = (uint8_t)suballocationType; + m_DedicatedAllocation.m_ExtraData = VMA_NULL; + m_DedicatedAllocation.m_hParentPool = hParentPool; + m_DedicatedAllocation.m_hMemory = hMemory; + m_DedicatedAllocation.m_Prev = VMA_NULL; + m_DedicatedAllocation.m_Next = VMA_NULL; + + if (pMappedData != VMA_NULL) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + EnsureExtraData(allocator); + m_DedicatedAllocation.m_ExtraData->m_pMappedData = pMappedData; + } +} + +void VmaAllocation_T::Destroy(VmaAllocator allocator) +{ + FreeName(allocator); + + if (GetType() == ALLOCATION_TYPE_DEDICATED) + { + vma_delete(allocator, m_DedicatedAllocation.m_ExtraData); + } +} + +void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName) +{ + VMA_ASSERT(pName == VMA_NULL || pName != m_pName); + + FreeName(hAllocator); + + if (pName != VMA_NULL) + m_pName = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), pName); +} + +uint8_t VmaAllocation_T::SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation) +{ + VMA_ASSERT(allocation != VMA_NULL); + VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(allocation->m_Type == ALLOCATION_TYPE_BLOCK); + + if (m_MapCount != 0) + m_BlockAllocation.m_Block->Unmap(hAllocator, m_MapCount); + + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation); + std::swap(m_BlockAllocation, allocation->m_BlockAllocation); + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this); + +#if VMA_STATS_STRING_ENABLED + std::swap(m_BufferImageUsage, allocation->m_BufferImageUsage); +#endif + return m_MapCount; +} + +VmaAllocHandle VmaAllocation_T::GetAllocHandle() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_AllocHandle; + case ALLOCATION_TYPE_DEDICATED: + return VK_NULL_HANDLE; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } +} + +VkDeviceSize VmaAllocation_T::GetOffset() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->m_pMetadata->GetAllocationOffset(m_BlockAllocation.m_AllocHandle); + case ALLOCATION_TYPE_DEDICATED: + return 0; + default: + VMA_ASSERT(0); + return 0; + } +} + +VmaPool VmaAllocation_T::GetParentPool() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->GetParentPool(); + case ALLOCATION_TYPE_DEDICATED: + return m_DedicatedAllocation.m_hParentPool; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } +} + +VkDeviceMemory VmaAllocation_T::GetMemory() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->GetDeviceMemory(); + case ALLOCATION_TYPE_DEDICATED: + return m_DedicatedAllocation.m_hMemory; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } +} + +void* VmaAllocation_T::GetMappedData() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + if (m_MapCount != 0 || IsPersistentMap()) + { + void* pBlockData = m_BlockAllocation.m_Block->GetMappedData(); + VMA_ASSERT(pBlockData != VMA_NULL); + return (char*)pBlockData + GetOffset(); + } + else + { + return VMA_NULL; + } + break; + case ALLOCATION_TYPE_DEDICATED: + VMA_ASSERT((m_DedicatedAllocation.m_ExtraData != VMA_NULL && m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL) == + (m_MapCount != 0 || IsPersistentMap())); + return m_DedicatedAllocation.m_ExtraData != VMA_NULL ? m_DedicatedAllocation.m_ExtraData->m_pMappedData : VMA_NULL; + default: + VMA_ASSERT(0); + return VMA_NULL; + } +} + +void VmaAllocation_T::BlockAllocMap() +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + + if (m_MapCount < 0xFF) + { + ++m_MapCount; + } + else + { + VMA_ASSERT(0 && "Allocation mapped too many times simultaneously."); + } +} + +void VmaAllocation_T::BlockAllocUnmap() +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); + + if (m_MapCount > 0) + { + --m_MapCount; + } + else + { + VMA_ASSERT(0 && "Unmapping allocation not previously mapped."); + } +} + +VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppData) +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + + EnsureExtraData(hAllocator); + + if (m_MapCount != 0 || IsPersistentMap()) + { + if (m_MapCount < 0xFF) + { + VMA_ASSERT(m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL); + *ppData = m_DedicatedAllocation.m_ExtraData->m_pMappedData; + ++m_MapCount; + return VK_SUCCESS; + } + else + { + VMA_ASSERT(0 && "Dedicated allocation mapped too many times simultaneously."); + return VK_ERROR_MEMORY_MAP_FAILED; + } + } + else + { + VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( + hAllocator->m_hDevice, + m_DedicatedAllocation.m_hMemory, + 0, // offset + VK_WHOLE_SIZE, + 0, // flags + ppData); + if (result == VK_SUCCESS) + { + m_DedicatedAllocation.m_ExtraData->m_pMappedData = *ppData; + m_MapCount = 1; + } + return result; + } +} + +void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); + + if (m_MapCount > 0) + { + --m_MapCount; + if (m_MapCount == 0 && !IsPersistentMap()) + { + VMA_ASSERT(m_DedicatedAllocation.m_ExtraData != VMA_NULL); + m_DedicatedAllocation.m_ExtraData->m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)( + hAllocator->m_hDevice, + m_DedicatedAllocation.m_hMemory); + } + } + else + { + VMA_ASSERT(0 && "Unmapping dedicated allocation not previously mapped."); + } +} + +#if VMA_STATS_STRING_ENABLED +void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const +{ + json.WriteString("Type"); + json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[m_SuballocationType]); + + json.WriteString("Size"); + json.WriteNumber(m_Size); + json.WriteString("Usage"); + json.WriteNumber(m_BufferImageUsage.Value); // It may be uint32_t or uint64_t. + + if (m_pUserData != VMA_NULL) + { + json.WriteString("CustomData"); + json.BeginString(); + json.ContinueString_Pointer(m_pUserData); + json.EndString(); + } + if (m_pName != VMA_NULL) + { + json.WriteString("Name"); + json.WriteString(m_pName); + } +} +#if VMA_EXTERNAL_MEMORY_WIN32 +VkResult VmaAllocation_T::GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* pHandle) noexcept +{ + auto pvkGetMemoryWin32HandleKHR = hAllocator->GetVulkanFunctions().vkGetMemoryWin32HandleKHR; + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->CreateWin32Handle(hAllocator, pvkGetMemoryWin32HandleKHR, hTargetProcess, pHandle); + case ALLOCATION_TYPE_DEDICATED: + EnsureExtraData(hAllocator); + return m_DedicatedAllocation.m_ExtraData->m_Handle.GetHandle(hAllocator->m_hDevice, m_DedicatedAllocation.m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle); + default: + VMA_ASSERT(0); + return VK_ERROR_FEATURE_NOT_PRESENT; + } +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 +#endif // VMA_STATS_STRING_ENABLED + +void VmaAllocation_T::EnsureExtraData(VmaAllocator hAllocator) +{ + if (m_DedicatedAllocation.m_ExtraData == VMA_NULL) + { + m_DedicatedAllocation.m_ExtraData = vma_new(hAllocator, VmaAllocationExtraData)(); + } +} + +void VmaAllocation_T::FreeName(VmaAllocator hAllocator) +{ + if(m_pName) + { + VmaFreeString(hAllocator->GetAllocationCallbacks(), m_pName); + m_pName = VMA_NULL; + } +} +#endif // _VMA_ALLOCATION_T_FUNCTIONS + +#ifndef _VMA_BLOCK_VECTOR_FUNCTIONS +VmaBlockVector::VmaBlockVector( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceSize preferredBlockSize, + size_t minBlockCount, + size_t maxBlockCount, + VkDeviceSize bufferImageGranularity, + bool explicitBlockSize, + uint32_t algorithm, + float priority, + VkDeviceSize minAllocationAlignment, + void* pMemoryAllocateNext) + : m_hAllocator(hAllocator), + m_hParentPool(hParentPool), + m_MemoryTypeIndex(memoryTypeIndex), + m_PreferredBlockSize(preferredBlockSize), + m_MinBlockCount(minBlockCount), + m_MaxBlockCount(maxBlockCount), + m_BufferImageGranularity(bufferImageGranularity), + m_ExplicitBlockSize(explicitBlockSize), + m_Algorithm(algorithm), + m_Priority(priority), + m_MinAllocationAlignment(minAllocationAlignment), + m_pMemoryAllocateNext(pMemoryAllocateNext), + m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), + m_NextBlockId(0) {} + +VmaBlockVector::~VmaBlockVector() +{ + for (size_t i = m_Blocks.size(); i--; ) + { + m_Blocks[i]->Destroy(m_hAllocator); + vma_delete(m_hAllocator, m_Blocks[i]); + } +} + +VkResult VmaBlockVector::CreateMinBlocks() +{ + for (size_t i = 0; i < m_MinBlockCount; ++i) + { + VkResult res = CreateBlock(m_PreferredBlockSize, VMA_NULL); + if (res != VK_SUCCESS) + { + return res; + } + } + return VK_SUCCESS; +} + +void VmaBlockVector::AddStatistics(VmaStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + const size_t blockCount = m_Blocks.size(); + for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + { + const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VMA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddStatistics(inoutStats); + } +} + +void VmaBlockVector::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + const size_t blockCount = m_Blocks.size(); + for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + { + const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VMA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddDetailedStatistics(inoutStats); + } +} + +bool VmaBlockVector::IsEmpty() +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + return m_Blocks.empty(); +} + +bool VmaBlockVector::IsCorruptionDetectionEnabled() const +{ + const uint32_t requiredMemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + return (VMA_DEBUG_DETECT_CORRUPTION != 0) && + (VMA_DEBUG_MARGIN > 0) && + (m_Algorithm == 0 || m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) && + (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & requiredMemFlags) == requiredMemFlags; +} + +VkResult VmaBlockVector::Allocate( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations) +{ + size_t allocIndex; + VkResult res = VK_SUCCESS; + + alignment = VMA_MAX(alignment, m_MinAllocationAlignment); + + if (IsCorruptionDetectionEnabled()) + { + size = VmaAlignUp(size, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE)); + alignment = VmaAlignUp(alignment, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE)); + } + + { + VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + res = AllocatePage( + size, + alignment, + createInfo, + suballocType, + pAllocations + allocIndex); + if (res != VK_SUCCESS) + { + break; + } + } + } + + if (res != VK_SUCCESS) + { + // Free all already created allocations. + while (allocIndex--) + Free(pAllocations[allocIndex]); + memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); + } + + return res; +} + +VkResult VmaBlockVector::AllocatePage( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation) +{ + const bool isUpperAddress = (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; + + VkDeviceSize freeMemory; + { + const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); + VmaBudget heapBudget = {}; + m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1); + freeMemory = (heapBudget.usage < heapBudget.budget) ? (heapBudget.budget - heapBudget.usage) : 0; + } + + const bool canFallbackToDedicated = !HasExplicitBlockSize() && + (createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0; + const bool canCreateNewBlock = + ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) && + (m_Blocks.size() < m_MaxBlockCount) && + (freeMemory >= size || !canFallbackToDedicated); + uint32_t strategy = createInfo.flags & VMA_ALLOCATION_CREATE_STRATEGY_MASK; + + // Upper address can only be used with linear allocator and within single memory block. + if (isUpperAddress && + (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT || m_MaxBlockCount > 1)) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + // Early reject: requested allocation size is larger that maximum block size for this block vector. + if (size + VMA_DEBUG_MARGIN > m_PreferredBlockSize) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + + // 1. Search existing allocations. Try to allocate. + if (m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + { + // Use only last block. + if (!m_Blocks.empty()) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks.back(); + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from last block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + else + { + if (strategy != VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) // MIN_MEMORY or default + { + const bool isHostVisible = + (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; + if(isHostVisible) + { + const bool isMappingAllowed = (createInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + /* + For non-mappable allocations, check blocks that are not mapped first. + For mappable allocations, check blocks that are already mapped first. + This way, having many blocks, we will separate mappable and non-mappable allocations, + hopefully limiting the number of blocks that are mapped, which will help tools like RenderDoc. + */ + for(size_t mappingI = 0; mappingI < 2; ++mappingI) + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + const bool isBlockMapped = pCurrBlock->GetMappedData() != VMA_NULL; + if((mappingI == 0) == (isMappingAllowed == isBlockMapped)) + { + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + } + else + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + else // VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT + { + // Backward order in m_Blocks - prefer blocks with largest amount of free space. + for (size_t blockIndex = m_Blocks.size(); blockIndex--; ) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock(pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + + // 2. Try to create new block. + if (canCreateNewBlock) + { + // Calculate optimal size for new block. + VkDeviceSize newBlockSize = m_PreferredBlockSize; + uint32_t newBlockSizeShift = 0; + const uint32_t NEW_BLOCK_SIZE_SHIFT_MAX = 3; + + if (!m_ExplicitBlockSize) + { + // Allocate 1/8, 1/4, 1/2 as first blocks. + const VkDeviceSize maxExistingBlockSize = CalcMaxBlockSize(); + for (uint32_t i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i) + { + const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + } + else + { + break; + } + } + } + + size_t newBlockIndex = 0; + VkResult res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? + CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; + // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. + if (!m_ExplicitBlockSize) + { + while (res < 0 && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX) + { + const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize >= size) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? + CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + else + { + break; + } + } + } + + if (res == VK_SUCCESS) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[newBlockIndex]; + VMA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); + + res = AllocateFromBlock( + pBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Created new block #%" PRIu32 " Size=%" PRIu64, pBlock->GetId(), newBlockSize); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + else + { + // Allocation from new block failed, possibly due to VMA_DEBUG_MARGIN or alignment. + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + } + } + + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +void VmaBlockVector::Free(const VmaAllocation hAllocation) +{ + VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL; + + bool budgetExceeded = false; + { + const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); + VmaBudget heapBudget = {}; + m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1); + budgetExceeded = heapBudget.usage >= heapBudget.budget; + } + + // Scope for lock. + { + VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + + VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); + + if (IsCorruptionDetectionEnabled()) + { + VkResult res = pBlock->ValidateMagicValueAfterAllocation(m_hAllocator, hAllocation->GetOffset(), hAllocation->GetSize()); + VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to validate magic value."); + } + + if (hAllocation->IsPersistentMap()) + { + pBlock->Unmap(m_hAllocator, 1); + } + + const bool hadEmptyBlockBeforeFree = HasEmptyBlock(); + pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle()); + pBlock->PostFree(m_hAllocator); + VMA_HEAVY_ASSERT(pBlock->Validate()); + + VMA_DEBUG_LOG_FORMAT(" Freed from MemoryTypeIndex=%" PRIu32, m_MemoryTypeIndex); + + const bool canDeleteBlock = m_Blocks.size() > m_MinBlockCount; + // pBlock became empty after this deallocation. + if (pBlock->m_pMetadata->IsEmpty()) + { + // Already had empty block. We don't want to have two, so delete this one. + if ((hadEmptyBlockBeforeFree || budgetExceeded) && canDeleteBlock) + { + pBlockToDelete = pBlock; + Remove(pBlock); + } + // else: We now have one empty block - leave it. A hysteresis to avoid allocating whole block back and forth. + } + // pBlock didn't become empty, but we have another empty block - find and free that one. + // (This is optional, heuristics.) + else if (hadEmptyBlockBeforeFree && canDeleteBlock) + { + VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back(); + if (pLastBlock->m_pMetadata->IsEmpty()) + { + pBlockToDelete = pLastBlock; + m_Blocks.pop_back(); + } + } + + IncrementallySortBlocks(); + + m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); + hAllocation->Destroy(m_hAllocator); + m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); + } + + // Destruction of a free block. Deferred until this point, outside of mutex + // lock, for performance reason. + if (pBlockToDelete != VMA_NULL) + { + VMA_DEBUG_LOG_FORMAT(" Deleted empty block #%" PRIu32, pBlockToDelete->GetId()); + pBlockToDelete->Destroy(m_hAllocator); + vma_delete(m_hAllocator, pBlockToDelete); + } +} + +VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const +{ + VkDeviceSize result = 0; + for (size_t i = m_Blocks.size(); i--; ) + { + result = VMA_MAX(result, m_Blocks[i]->m_pMetadata->GetSize()); + if (result >= m_PreferredBlockSize) + { + break; + } + } + return result; +} + +void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock) +{ + for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + if (m_Blocks[blockIndex] == pBlock) + { + VmaVectorRemove(m_Blocks, blockIndex); + return; + } + } + VMA_ASSERT(0); +} + +void VmaBlockVector::IncrementallySortBlocks() +{ + if (!m_IncrementalSort) + return; + if (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + { + // Bubble sort only until first swap. + for (size_t i = 1; i < m_Blocks.size(); ++i) + { + if (m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize()) + { + std::swap(m_Blocks[i - 1], m_Blocks[i]); + return; + } + } + } +} + +void VmaBlockVector::SortByFreeSize() +{ + VMA_SORT(m_Blocks.begin(), m_Blocks.end(), + [](VmaDeviceMemoryBlock* b1, VmaDeviceMemoryBlock* b2) -> bool + { + return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize(); + }); +} + +VkResult VmaBlockVector::AllocateFromBlock( + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize size, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + uint32_t strategy, + VmaAllocation* pAllocation) +{ + const bool isUpperAddress = (allocFlags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; + + VmaAllocationRequest currRequest = {}; + if (pBlock->m_pMetadata->CreateAllocationRequest( + size, + alignment, + isUpperAddress, + suballocType, + strategy, + &currRequest)) + { + return CommitAllocationRequest(currRequest, pBlock, alignment, allocFlags, pUserData, suballocType, pAllocation); + } + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +VkResult VmaBlockVector::CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation) +{ + const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; + const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; + const bool isMappingAllowed = (allocFlags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + + pBlock->PostAlloc(m_hAllocator); + // Allocate from pCurrBlock. + if (mapped) + { + VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL); + if (res != VK_SUCCESS) + { + return res; + } + } + + *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isMappingAllowed); + pBlock->m_pMetadata->Alloc(allocRequest, suballocType, *pAllocation); + (*pAllocation)->InitBlockAllocation( + pBlock, + allocRequest.allocHandle, + alignment, + allocRequest.size, // Not size, as actual allocation size may be larger than requested! + m_MemoryTypeIndex, + suballocType, + mapped); + VMA_HEAVY_ASSERT(pBlock->Validate()); + if (isUserDataString) + (*pAllocation)->SetName(m_hAllocator, (const char*)pUserData); + else + (*pAllocation)->SetUserData(m_hAllocator, pUserData); + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), allocRequest.size); + if (VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); + } + if (IsCorruptionDetectionEnabled()) + { + VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), allocRequest.size); + VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); + } + return VK_SUCCESS; +} + +VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex) +{ + VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + allocInfo.pNext = m_pMemoryAllocateNext; + allocInfo.memoryTypeIndex = m_MemoryTypeIndex; + allocInfo.allocationSize = blockSize; + +#if VMA_BUFFER_DEVICE_ADDRESS + // Every standalone block can potentially contain a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT - always enable the feature. + VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; + if (m_hAllocator->m_UseKhrBufferDeviceAddress) + { + allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; + VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); + } +#endif // VMA_BUFFER_DEVICE_ADDRESS + +#if VMA_MEMORY_PRIORITY + VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; + if (m_hAllocator->m_UseExtMemoryPriority) + { + VMA_ASSERT(m_Priority >= 0.f && m_Priority <= 1.f); + priorityInfo.priority = m_Priority; + VmaPnextChainPushFront(&allocInfo, &priorityInfo); + } +#endif // VMA_MEMORY_PRIORITY + +#if VMA_EXTERNAL_MEMORY + // Attach VkExportMemoryAllocateInfoKHR if necessary. + VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR }; + exportMemoryAllocInfo.handleTypes = m_hAllocator->GetExternalMemoryHandleTypeFlags(m_MemoryTypeIndex); + if (exportMemoryAllocInfo.handleTypes != 0) + { + VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo); + } +#endif // VMA_EXTERNAL_MEMORY + + VkDeviceMemory mem = VK_NULL_HANDLE; + VkResult res = m_hAllocator->AllocateVulkanMemory(&allocInfo, &mem); + if (res < 0) + { + return res; + } + + // New VkDeviceMemory successfully created. + + // Create new Allocation for it. + VmaDeviceMemoryBlock* const pBlock = vma_new(m_hAllocator, VmaDeviceMemoryBlock)(m_hAllocator); + pBlock->Init( + m_hAllocator, + m_hParentPool, + m_MemoryTypeIndex, + mem, + allocInfo.allocationSize, + m_NextBlockId++, + m_Algorithm, + m_BufferImageGranularity); + + m_Blocks.push_back(pBlock); + if (pNewBlockIndex != VMA_NULL) + { + *pNewBlockIndex = m_Blocks.size() - 1; + } + + return VK_SUCCESS; +} + +bool VmaBlockVector::HasEmptyBlock() +{ + for (size_t index = 0, count = m_Blocks.size(); index < count; ++index) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[index]; + if (pBlock->m_pMetadata->IsEmpty()) + { + return true; + } + } + return false; +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + + json.BeginObject(); + for (size_t i = 0; i < m_Blocks.size(); ++i) + { + json.BeginString(); + json.ContinueString(m_Blocks[i]->GetId()); + json.EndString(); + + json.BeginObject(); + json.WriteString("MapRefCount"); + json.WriteNumber(m_Blocks[i]->GetMapRefCount()); + + m_Blocks[i]->m_pMetadata->PrintDetailedMap(json); + json.EndObject(); + } + json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED + +VkResult VmaBlockVector::CheckCorruption() +{ + if (!IsCorruptionDetectionEnabled()) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VkResult res = pBlock->CheckCorruption(m_hAllocator); + if (res != VK_SUCCESS) + { + return res; + } + } + return VK_SUCCESS; +} + +#endif // _VMA_BLOCK_VECTOR_FUNCTIONS + +#ifndef _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS +VmaDefragmentationContext_T::VmaDefragmentationContext_T( + VmaAllocator hAllocator, + const VmaDefragmentationInfo& info) + : m_MaxPassBytes(info.maxBytesPerPass == 0 ? VK_WHOLE_SIZE : info.maxBytesPerPass), + m_MaxPassAllocations(info.maxAllocationsPerPass == 0 ? UINT32_MAX : info.maxAllocationsPerPass), + m_BreakCallback(info.pfnBreakCallback), + m_BreakCallbackUserData(info.pBreakCallbackUserData), + m_MoveAllocator(hAllocator->GetAllocationCallbacks()), + m_Moves(m_MoveAllocator) +{ + m_Algorithm = info.flags & VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK; + + if (info.pool != VMA_NULL) + { + m_BlockVectorCount = 1; + m_PoolBlockVector = &info.pool->m_BlockVector; + m_pBlockVectors = &m_PoolBlockVector; + m_PoolBlockVector->SetIncrementalSort(false); + m_PoolBlockVector->SortByFreeSize(); + } + else + { + m_BlockVectorCount = hAllocator->GetMemoryTypeCount(); + m_PoolBlockVector = VMA_NULL; + m_pBlockVectors = hAllocator->m_pBlockVectors; + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + { + vector->SetIncrementalSort(false); + vector->SortByFreeSize(); + } + } + } + + switch (m_Algorithm) + { + case 0: // Default algorithm + m_Algorithm = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT; + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + if (hAllocator->GetBufferImageGranularity() > 1) + { + m_AlgorithmState = vma_new_array(hAllocator, StateExtensive, m_BlockVectorCount); + } + break; + } +} + +VmaDefragmentationContext_T::~VmaDefragmentationContext_T() +{ + if (m_PoolBlockVector != VMA_NULL) + { + m_PoolBlockVector->SetIncrementalSort(true); + } + else + { + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + vector->SetIncrementalSort(true); + } + } + + if (m_AlgorithmState) + { + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + default: + VMA_ASSERT(0); + } + } +} + +VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo) +{ + if (m_PoolBlockVector != VMA_NULL) + { + VmaMutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->GetAllocator()->m_UseMutex); + + if (m_PoolBlockVector->GetBlockCount() > 1) + ComputeDefragmentation(*m_PoolBlockVector, 0); + else if (m_PoolBlockVector->GetBlockCount() == 1) + ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0)); + } + else + { + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + if (m_pBlockVectors[i] != VMA_NULL) + { + VmaMutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->GetAllocator()->m_UseMutex); + + if (m_pBlockVectors[i]->GetBlockCount() > 1) + { + if (ComputeDefragmentation(*m_pBlockVectors[i], i)) + break; + } + else if (m_pBlockVectors[i]->GetBlockCount() == 1) + { + if (ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0))) + break; + } + } + } + } + + moveInfo.moveCount = static_cast(m_Moves.size()); + if (moveInfo.moveCount > 0) + { + moveInfo.pMoves = m_Moves.data(); + return VK_INCOMPLETE; + } + + moveInfo.pMoves = VMA_NULL; + return VK_SUCCESS; +} + +VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo) +{ + VMA_ASSERT(moveInfo.moveCount > 0 ? moveInfo.pMoves != VMA_NULL : true); + + VkResult result = VK_SUCCESS; + VmaStlAllocator blockAllocator(m_MoveAllocator.m_pCallbacks); + VmaVector> immovableBlocks(blockAllocator); + VmaVector> mappedBlocks(blockAllocator); + + VmaAllocator allocator = VMA_NULL; + for (uint32_t i = 0; i < moveInfo.moveCount; ++i) + { + VmaDefragmentationMove& move = moveInfo.pMoves[i]; + size_t prevCount = 0, currentCount = 0; + VkDeviceSize freedBlockSize = 0; + + uint32_t vectorIndex; + VmaBlockVector* vector; + if (m_PoolBlockVector != VMA_NULL) + { + vectorIndex = 0; + vector = m_PoolBlockVector; + } + else + { + vectorIndex = move.srcAllocation->GetMemoryTypeIndex(); + vector = m_pBlockVectors[vectorIndex]; + VMA_ASSERT(vector != VMA_NULL); + } + + switch (move.operation) + { + case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY: + { + uint8_t mapCount = move.srcAllocation->SwapBlockAllocation(vector->m_hAllocator, move.dstTmpAllocation); + if (mapCount > 0) + { + allocator = vector->m_hAllocator; + VmaDeviceMemoryBlock* newMapBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (FragmentedBlock& block : mappedBlocks) + { + if (block.block == newMapBlock) + { + notPresent = false; + block.data += mapCount; + break; + } + } + if (notPresent) + mappedBlocks.push_back({ mapCount, newMapBlock }); + } + + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } + + result = VK_INCOMPLETE; + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE: + { + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + vector->Free(move.dstTmpAllocation); + + VmaDeviceMemoryBlock* newBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (const FragmentedBlock& block : immovableBlocks) + { + if (block.block == newBlock) + { + notPresent = false; + break; + } + } + if (notPresent) + immovableBlocks.push_back({ vectorIndex, newBlock }); + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY: + { + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.srcAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } + freedBlockSize *= prevCount - currentCount; + + VkDeviceSize dstBlockSize; + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + dstBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); + currentCount = vector->GetBlockCount(); + } + + result = VK_INCOMPLETE; + break; + } + default: + VMA_ASSERT(0); + } + + if (prevCount > currentCount) + { + size_t freedBlocks = prevCount - currentCount; + m_PassStats.deviceMemoryBlocksFreed += static_cast(freedBlocks); + m_PassStats.bytesFreed += freedBlockSize; + } + + if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT && + m_AlgorithmState != VMA_NULL) + { + // Avoid unnecessary tries to allocate when new free block is available + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[vectorIndex]; + if (state.firstFreeBlock != SIZE_MAX) + { + const size_t diff = prevCount - currentCount; + if (state.firstFreeBlock >= diff) + { + state.firstFreeBlock -= diff; + if (state.firstFreeBlock != 0) + state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); + } + else + state.firstFreeBlock = 0; + } + } + } + moveInfo.moveCount = 0; + moveInfo.pMoves = VMA_NULL; + m_Moves.clear(); + + // Update stats + m_GlobalStats.allocationsMoved += m_PassStats.allocationsMoved; + m_GlobalStats.bytesFreed += m_PassStats.bytesFreed; + m_GlobalStats.bytesMoved += m_PassStats.bytesMoved; + m_GlobalStats.deviceMemoryBlocksFreed += m_PassStats.deviceMemoryBlocksFreed; + m_PassStats = { 0 }; + + // Move blocks with immovable allocations according to algorithm + if (immovableBlocks.size() > 0) + { + do + { + if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT) + { + if (m_AlgorithmState != VMA_NULL) + { + bool swapped = false; + // Move to the start of free blocks range + for (const FragmentedBlock& block : immovableBlocks) + { + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[block.data]; + if (state.operation != StateExtensive::Operation::Cleanup) + { + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i) + { + if (vector->GetBlock(i) == block.block) + { + std::swap(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]); + if (state.firstFreeBlock != SIZE_MAX) + { + if (i + 1 < state.firstFreeBlock) + { + if (state.firstFreeBlock > 1) + std::swap(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); + else + --state.firstFreeBlock; + } + } + swapped = true; + break; + } + } + } + } + if (swapped) + result = VK_INCOMPLETE; + break; + } + } + + // Move to the beginning + for (const FragmentedBlock& block : immovableBlocks) + { + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i) + { + if (vector->GetBlock(i) == block.block) + { + std::swap(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); + break; + } + } + } + } while (false); + } + + // Bulk-map destination blocks + for (const FragmentedBlock& block : mappedBlocks) + { + VkResult res = block.block->Map(allocator, block.data, VMA_NULL); + VMA_ASSERT(res == VK_SUCCESS); + } + return result; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, size_t index) +{ + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT: + return ComputeDefragmentation_Fast(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + return ComputeDefragmentation_Balanced(vector, index, true); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT: + return ComputeDefragmentation_Full(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + return ComputeDefragmentation_Extensive(vector, index); + default: + VMA_ASSERT(0); + return ComputeDefragmentation_Balanced(vector, index, true); + } +} + +VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::GetMoveData( + VmaAllocHandle handle, VmaBlockMetadata* metadata) +{ + MoveAllocationData moveData; + moveData.move.srcAllocation = (VmaAllocation)metadata->GetAllocationUserData(handle); + moveData.size = moveData.move.srcAllocation->GetSize(); + moveData.alignment = moveData.move.srcAllocation->GetAlignment(); + moveData.type = moveData.move.srcAllocation->GetSuballocationType(); + moveData.flags = 0; + + if (moveData.move.srcAllocation->IsPersistentMap()) + moveData.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; + if (moveData.move.srcAllocation->IsMappingAllowed()) + moveData.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + + return moveData; +} + +VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCounters(VkDeviceSize bytes) +{ + // Check custom criteria if exists + if (m_BreakCallback && m_BreakCallback(m_BreakCallbackUserData)) + return CounterStatus::End; + + // Ignore allocation if will exceed max size for copy + if (m_PassStats.bytesMoved + bytes > m_MaxPassBytes) + { + if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE) + return CounterStatus::Ignore; + else + return CounterStatus::End; + } + else + m_IgnoredAllocs = 0; + return CounterStatus::Pass; +} + +bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes) +{ + m_PassStats.bytesMoved += bytes; + // Early return when max found + if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes) + { + VMA_ASSERT((m_PassStats.allocationsMoved == m_MaxPassAllocations || + m_PassStats.bytesMoved == m_MaxPassBytes) && "Exceeded maximal pass threshold!"); + return true; + } + return false; +} + +bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block) +{ + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector) +{ + for (; start < end; ++start) + { + VmaDeviceMemoryBlock* dstBlock = vector.GetBlock(start); + if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size) + { + if (vector.AllocateFromBlock(dstBlock, + data.size, + data.alignment, + data.flags, + this, + data.type, + 0, + &data.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(data.move); + if (IncrementCounters(data.size)) + return true; + break; + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& vector) +{ + // Move only between blocks + + // Go through allocations in last blocks and try to fit them inside first ones + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0), + // but only if there are noticeable gaps between them (some heuristic, ex. average size of allocation in block) + VMA_ASSERT(m_AlgorithmState != VMA_NULL); + + StateBalanced& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + if (update && vectorState.avgAllocSize == UINT64_MAX) + UpdateVectorStatistics(vector, vectorState); + + const size_t startMoveCount = m_Moves.size(); + VkDeviceSize minimalFreeRegion = vectorState.avgFreeSize / 2; + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + VkDeviceSize prevFreeRegionSize = 0; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + VkDeviceSize nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle); + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + // Check if realloc will make sense + if (prevFreeRegionSize >= minimalFreeRegion || + nextFreeRegionSize >= minimalFreeRegion || + moveData.size <= vectorState.avgFreeSize || + moveData.size <= vectorState.avgAllocSize) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + prevFreeRegionSize = nextFreeRegionSize; + } + } + + // No moves performed, update statistics to current vector state + if (startMoveCount == m_Moves.size() && !update) + { + vectorState.avgAllocSize = UINT64_MAX; + return ComputeDefragmentation_Balanced(vector, index, false); + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& vector) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0) + + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index) +{ + // First free single block, then populate it to the brim, then free another block, and so on + + // Fallback to previous algorithm since without granularity conflicts it can achieve max packing + if (vector.m_BufferImageGranularity == 1) + return ComputeDefragmentation_Full(vector); + + VMA_ASSERT(m_AlgorithmState != VMA_NULL); + + StateExtensive& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + + bool texturePresent = false, bufferPresent = false, otherPresent = false; + switch (vectorState.operation) + { + case StateExtensive::Operation::Done: // Vector defragmented + return false; + case StateExtensive::Operation::FindFreeBlockBuffer: + case StateExtensive::Operation::FindFreeBlockTexture: + case StateExtensive::Operation::FindFreeBlockAll: + { + // No more blocks to free, just perform fast realloc and move to cleanup + if (vectorState.firstFreeBlock == 0) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + return ComputeDefragmentation_Fast(vector); + } + + // No free blocks, have to clear last one + size_t last = (vectorState.firstFreeBlock == SIZE_MAX ? vector.GetBlockCount() : vectorState.firstFreeBlock) - 1; + VmaBlockMetadata* freeMetadata = vector.GetBlock(last)->m_pMetadata; + + const size_t prevMoveCount = m_Moves.size(); + for (VmaAllocHandle handle = freeMetadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = freeMetadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, freeMetadata); + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, last, moveData, vector)) + { + // Full clear performed already + if (prevMoveCount != m_Moves.size() && freeMetadata->GetNextAllocation(handle) == VK_NULL_HANDLE) + vectorState.firstFreeBlock = last; + return true; + } + } + + if (prevMoveCount == m_Moves.size()) + { + // Cannot perform full clear, have to move data in other blocks around + if (last != 0) + { + for (size_t i = last - 1; i; --i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; + } + } + + if (prevMoveCount == m_Moves.size()) + { + // No possible reallocs within blocks, try to move them around fast + return ComputeDefragmentation_Fast(vector); + } + } + else + { + switch (vectorState.operation) + { + case StateExtensive::Operation::FindFreeBlockBuffer: + vectorState.operation = StateExtensive::Operation::MoveBuffers; + break; + case StateExtensive::Operation::FindFreeBlockTexture: + vectorState.operation = StateExtensive::Operation::MoveTextures; + break; + case StateExtensive::Operation::FindFreeBlockAll: + vectorState.operation = StateExtensive::Operation::MoveAll; + break; + default: + VMA_ASSERT(0); + vectorState.operation = StateExtensive::Operation::MoveTextures; + } + vectorState.firstFreeBlock = last; + // Nothing done, block found without reallocations, can perform another reallocs in same pass + return ComputeDefragmentation_Extensive(vector, index); + } + break; + } + case StateExtensive::Operation::MoveTextures: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (texturePresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockTexture; + return ComputeDefragmentation_Extensive(vector, index); + } + + if (!bufferPresent && !otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } + + // No more textures to move, check buffers + vectorState.operation = StateExtensive::Operation::MoveBuffers; + bufferPresent = false; + otherPresent = false; + } + else + break; + VMA_FALLTHROUGH; // Fallthrough + } + case StateExtensive::Operation::MoveBuffers: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_BUFFER, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (bufferPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } + + if (!otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } + + // No more buffers to move, check all others + vectorState.operation = StateExtensive::Operation::MoveAll; + otherPresent = false; + } + else + break; + VMA_FALLTHROUGH; // Fallthrough + } + case StateExtensive::Operation::MoveAll: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_FREE, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (otherPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } + // Everything moved + vectorState.operation = StateExtensive::Operation::Cleanup; + } + break; + } + case StateExtensive::Operation::Cleanup: + // Cleanup is handled below so that other operations may reuse the cleanup code. This case is here to prevent the unhandled enum value warning (C4062). + break; + } + + if (vectorState.operation == StateExtensive::Operation::Cleanup) + { + // All other work done, pack data in blocks even tighter if possible + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; + } + + if (prevMoveCount == m_Moves.size()) + vectorState.operation = StateExtensive::Operation::Done; + } + return false; +} + +void VmaDefragmentationContext_T::UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state) +{ + size_t allocCount = 0; + size_t freeCount = 0; + state.avgFreeSize = 0; + state.avgAllocSize = 0; + + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + allocCount += metadata->GetAllocationCount(); + freeCount += metadata->GetFreeRegionsCount(); + state.avgFreeSize += metadata->GetSumFreeSize(); + state.avgAllocSize += metadata->GetSize(); + } + + state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount; + state.avgFreeSize /= freeCount; +} + +bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent) +{ + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = firstFreeBlock ; i;) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(--i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Move only single type of resources at once + if (!VmaIsBufferImageGranularityConflict(moveData.type, currentType)) + { + // Try to fit allocation into free blocks + if (AllocInOtherBlock(firstFreeBlock, vector.GetBlockCount(), moveData, vector)) + return false; + } + + if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL)) + texturePresent = true; + else if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_BUFFER)) + bufferPresent = true; + else + otherPresent = true; + } + } + return prevMoveCount == m_Moves.size(); +} +#endif // _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS + +#ifndef _VMA_POOL_T_FUNCTIONS +VmaPool_T::VmaPool_T( + VmaAllocator hAllocator, + const VmaPoolCreateInfo& createInfo, + VkDeviceSize preferredBlockSize) + : m_BlockVector( + hAllocator, + this, // hParentPool + createInfo.memoryTypeIndex, + createInfo.blockSize != 0 ? createInfo.blockSize : preferredBlockSize, + createInfo.minBlockCount, + createInfo.maxBlockCount, + (createInfo.flags& VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(), + createInfo.blockSize != 0, // explicitBlockSize + createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, // algorithm + createInfo.priority, + VMA_MAX(hAllocator->GetMemoryTypeMinAlignment(createInfo.memoryTypeIndex), createInfo.minAllocationAlignment), + createInfo.pMemoryAllocateNext), + m_Id(0), + m_Name(VMA_NULL) {} + +VmaPool_T::~VmaPool_T() +{ + VMA_ASSERT(m_PrevPool == VMA_NULL && m_NextPool == VMA_NULL); + + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); +} + +void VmaPool_T::SetName(const char* pName) +{ + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); + + if (pName != VMA_NULL) + { + m_Name = VmaCreateStringCopy(allocs, pName); + } + else + { + m_Name = VMA_NULL; + } +} +#endif // _VMA_POOL_T_FUNCTIONS + +#ifndef _VMA_ALLOCATOR_T_FUNCTIONS +VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : + m_UseMutex((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT) == 0), + m_VulkanApiVersion(pCreateInfo->vulkanApiVersion != 0 ? pCreateInfo->vulkanApiVersion : VK_API_VERSION_1_0), + m_UseKhrDedicatedAllocation((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0), + m_UseKhrBindMemory2((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0), + m_UseExtMemoryBudget((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0), + m_UseAmdDeviceCoherentMemory((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT) != 0), + m_UseKhrBufferDeviceAddress((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT) != 0), + m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0), + m_UseKhrMaintenance4((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT) != 0), + m_UseKhrMaintenance5((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT) != 0), + m_UseKhrExternalMemoryWin32((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT) != 0), + m_hDevice(pCreateInfo->device), + m_hInstance(pCreateInfo->instance), + m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), + m_AllocationCallbacks(pCreateInfo->pAllocationCallbacks ? + *pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks), + m_AllocationObjectAllocator(&m_AllocationCallbacks), + m_HeapSizeLimitMask(0), + m_DeviceMemoryCount(0), + m_PreferredLargeHeapBlockSize(0), + m_PhysicalDevice(pCreateInfo->physicalDevice), + m_GpuDefragmentationMemoryTypeBits(UINT32_MAX), + m_NextPoolId(0), + m_GlobalMemoryTypeBits(UINT32_MAX) +{ + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_UseKhrDedicatedAllocation = false; + m_UseKhrBindMemory2 = false; + } + + if(VMA_DEBUG_DETECT_CORRUPTION) + { + // Needs to be multiply of uint32_t size because we are going to write VMA_CORRUPTION_DETECTION_MAGIC_VALUE to it. + VMA_ASSERT(VMA_DEBUG_MARGIN % sizeof(uint32_t) == 0); + } + + VMA_ASSERT(pCreateInfo->physicalDevice && pCreateInfo->device && pCreateInfo->instance); + + if(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0)) + { +#if !(VMA_DEDICATED_ALLOCATION) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT set but required extensions are disabled by preprocessor macros."); + } +#endif +#if !(VMA_BIND_MEMORY2) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT set but required extension is disabled by preprocessor macros."); + } +#endif + } +#if !(VMA_MEMORY_BUDGET) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT set but required extension is disabled by preprocessor macros."); + } +#endif +#if !(VMA_BUFFER_DEVICE_ADDRESS) + if(m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT is set but required extension or Vulkan 1.2 is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if VMA_VULKAN_VERSION < 1004000 + VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 4, 0) && "vulkanApiVersion >= VK_API_VERSION_1_4 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if VMA_VULKAN_VERSION < 1003000 + VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 3, 0) && "vulkanApiVersion >= VK_API_VERSION_1_3 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if VMA_VULKAN_VERSION < 1002000 + VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 2, 0) && "vulkanApiVersion >= VK_API_VERSION_1_2 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if VMA_VULKAN_VERSION < 1001000 + VMA_ASSERT(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0) && "vulkanApiVersion >= VK_API_VERSION_1_1 but required Vulkan version is disabled by preprocessor macros."); +#endif +#if !(VMA_MEMORY_PRIORITY) + if(m_UseExtMemoryPriority) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if !(VMA_KHR_MAINTENANCE4) + if(m_UseKhrMaintenance4) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if !(VMA_KHR_MAINTENANCE5) + if(m_UseKhrMaintenance5) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if !(VMA_KHR_MAINTENANCE5) + if(m_UseKhrMaintenance5) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif + +#if !(VMA_EXTERNAL_MEMORY_WIN32) + if(m_UseKhrExternalMemoryWin32) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif + + memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks)); + memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties)); + memset(&m_MemProps, 0, sizeof(m_MemProps)); + + memset(&m_pBlockVectors, 0, sizeof(m_pBlockVectors)); + memset(&m_VulkanFunctions, 0, sizeof(m_VulkanFunctions)); + +#if VMA_EXTERNAL_MEMORY + memset(&m_TypeExternalMemoryHandleTypes, 0, sizeof(m_TypeExternalMemoryHandleTypes)); +#endif // #if VMA_EXTERNAL_MEMORY + + if(pCreateInfo->pDeviceMemoryCallbacks != VMA_NULL) + { + m_DeviceMemoryCallbacks.pUserData = pCreateInfo->pDeviceMemoryCallbacks->pUserData; + m_DeviceMemoryCallbacks.pfnAllocate = pCreateInfo->pDeviceMemoryCallbacks->pfnAllocate; + m_DeviceMemoryCallbacks.pfnFree = pCreateInfo->pDeviceMemoryCallbacks->pfnFree; + } + + ImportVulkanFunctions(pCreateInfo->pVulkanFunctions); + + (*m_VulkanFunctions.vkGetPhysicalDeviceProperties)(m_PhysicalDevice, &m_PhysicalDeviceProperties); + (*m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties)(m_PhysicalDevice, &m_MemProps); + + VMA_ASSERT(VmaIsPow2(VMA_MIN_ALIGNMENT)); + VMA_ASSERT(VmaIsPow2(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY)); + VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.bufferImageGranularity)); + VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.nonCoherentAtomSize)); + + m_PreferredLargeHeapBlockSize = (pCreateInfo->preferredLargeHeapBlockSize != 0) ? + pCreateInfo->preferredLargeHeapBlockSize : static_cast(VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE); + + m_GlobalMemoryTypeBits = CalculateGlobalMemoryTypeBits(); + +#if VMA_EXTERNAL_MEMORY + if(pCreateInfo->pTypeExternalMemoryHandleTypes != VMA_NULL) + { + memcpy(m_TypeExternalMemoryHandleTypes, pCreateInfo->pTypeExternalMemoryHandleTypes, + sizeof(VkExternalMemoryHandleTypeFlagsKHR) * GetMemoryTypeCount()); + } +#endif // #if VMA_EXTERNAL_MEMORY + + if(pCreateInfo->pHeapSizeLimit != VMA_NULL) + { + for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) + { + const VkDeviceSize limit = pCreateInfo->pHeapSizeLimit[heapIndex]; + if(limit != VK_WHOLE_SIZE) + { + m_HeapSizeLimitMask |= 1u << heapIndex; + if(limit < m_MemProps.memoryHeaps[heapIndex].size) + { + m_MemProps.memoryHeaps[heapIndex].size = limit; + } + } + } + } + + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + // Create only supported types + if((m_GlobalMemoryTypeBits & (1u << memTypeIndex)) != 0) + { + const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(memTypeIndex); + m_pBlockVectors[memTypeIndex] = vma_new(this, VmaBlockVector)( + this, + VK_NULL_HANDLE, // hParentPool + memTypeIndex, + preferredBlockSize, + 0, + SIZE_MAX, + GetBufferImageGranularity(), + false, // explicitBlockSize + 0, // algorithm + 0.5f, // priority (0.5 is the default per Vulkan spec) + GetMemoryTypeMinAlignment(memTypeIndex), // minAllocationAlignment + VMA_NULL); // // pMemoryAllocateNext + // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here, + // because minBlockCount is 0. + } + } +} + +VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo) +{ + VkResult res = VK_SUCCESS; + +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + UpdateVulkanBudget(); + } +#endif // #if VMA_MEMORY_BUDGET + + return res; +} + +VmaAllocator_T::~VmaAllocator_T() +{ + VMA_ASSERT(m_Pools.IsEmpty()); + + for(size_t memTypeIndex = GetMemoryTypeCount(); memTypeIndex--; ) + { + vma_delete(this, m_pBlockVectors[memTypeIndex]); + } +} + +void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions) +{ +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + ImportVulkanFunctions_Static(); +#endif + + if(pVulkanFunctions != VMA_NULL) + { + ImportVulkanFunctions_Custom(pVulkanFunctions); + } + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + ImportVulkanFunctions_Dynamic(); +#endif + + ValidateVulkanFunctions(); +} + +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Static() +{ + // Vulkan 1.0 + m_VulkanFunctions.vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)vkGetInstanceProcAddr; + m_VulkanFunctions.vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetDeviceProcAddr; + m_VulkanFunctions.vkGetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties)vkGetPhysicalDeviceProperties; + m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vkGetPhysicalDeviceMemoryProperties; + m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; + m_VulkanFunctions.vkFreeMemory = (PFN_vkFreeMemory)vkFreeMemory; + m_VulkanFunctions.vkMapMemory = (PFN_vkMapMemory)vkMapMemory; + m_VulkanFunctions.vkUnmapMemory = (PFN_vkUnmapMemory)vkUnmapMemory; + m_VulkanFunctions.vkFlushMappedMemoryRanges = (PFN_vkFlushMappedMemoryRanges)vkFlushMappedMemoryRanges; + m_VulkanFunctions.vkInvalidateMappedMemoryRanges = (PFN_vkInvalidateMappedMemoryRanges)vkInvalidateMappedMemoryRanges; + m_VulkanFunctions.vkBindBufferMemory = (PFN_vkBindBufferMemory)vkBindBufferMemory; + m_VulkanFunctions.vkBindImageMemory = (PFN_vkBindImageMemory)vkBindImageMemory; + m_VulkanFunctions.vkGetBufferMemoryRequirements = (PFN_vkGetBufferMemoryRequirements)vkGetBufferMemoryRequirements; + m_VulkanFunctions.vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vkGetImageMemoryRequirements; + m_VulkanFunctions.vkCreateBuffer = (PFN_vkCreateBuffer)vkCreateBuffer; + m_VulkanFunctions.vkDestroyBuffer = (PFN_vkDestroyBuffer)vkDestroyBuffer; + m_VulkanFunctions.vkCreateImage = (PFN_vkCreateImage)vkCreateImage; + m_VulkanFunctions.vkDestroyImage = (PFN_vkDestroyImage)vkDestroyImage; + m_VulkanFunctions.vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkCmdCopyBuffer; + + // Vulkan 1.1 +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR = (PFN_vkGetBufferMemoryRequirements2)vkGetBufferMemoryRequirements2; + m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2)vkGetImageMemoryRequirements2; + m_VulkanFunctions.vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2)vkBindBufferMemory2; + m_VulkanFunctions.vkBindImageMemory2KHR = (PFN_vkBindImageMemory2)vkBindImageMemory2; + } +#endif + +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetPhysicalDeviceMemoryProperties2; + } +#endif + +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)vkGetDeviceBufferMemoryRequirements; + m_VulkanFunctions.vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)vkGetDeviceImageMemoryRequirements; + } +#endif +} + +#endif // VMA_STATIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions) +{ + VMA_ASSERT(pVulkanFunctions != VMA_NULL); + +#define VMA_COPY_IF_NOT_NULL(funcName) \ + if(pVulkanFunctions->funcName != VMA_NULL) m_VulkanFunctions.funcName = pVulkanFunctions->funcName; + + VMA_COPY_IF_NOT_NULL(vkGetInstanceProcAddr); + VMA_COPY_IF_NOT_NULL(vkGetDeviceProcAddr); + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceProperties); + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties); + VMA_COPY_IF_NOT_NULL(vkAllocateMemory); + VMA_COPY_IF_NOT_NULL(vkFreeMemory); + VMA_COPY_IF_NOT_NULL(vkMapMemory); + VMA_COPY_IF_NOT_NULL(vkUnmapMemory); + VMA_COPY_IF_NOT_NULL(vkFlushMappedMemoryRanges); + VMA_COPY_IF_NOT_NULL(vkInvalidateMappedMemoryRanges); + VMA_COPY_IF_NOT_NULL(vkBindBufferMemory); + VMA_COPY_IF_NOT_NULL(vkBindImageMemory); + VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkCreateBuffer); + VMA_COPY_IF_NOT_NULL(vkDestroyBuffer); + VMA_COPY_IF_NOT_NULL(vkCreateImage); + VMA_COPY_IF_NOT_NULL(vkDestroyImage); + VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer); + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR); + VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR); +#endif + +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkBindBufferMemory2KHR); + VMA_COPY_IF_NOT_NULL(vkBindImageMemory2KHR); +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR); +#endif + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements); +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 + VMA_COPY_IF_NOT_NULL(vkGetMemoryWin32HandleKHR); +#endif +#undef VMA_COPY_IF_NOT_NULL +} + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Dynamic() +{ + VMA_ASSERT(m_VulkanFunctions.vkGetInstanceProcAddr && m_VulkanFunctions.vkGetDeviceProcAddr && + "To use VMA_DYNAMIC_VULKAN_FUNCTIONS in new versions of VMA you now have to pass " + "VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as VmaAllocatorCreateInfo::pVulkanFunctions. " + "Other members can be null."); + +#define VMA_FETCH_INSTANCE_FUNC(memberName, functionPointerType, functionNameString) \ + if(m_VulkanFunctions.memberName == VMA_NULL) \ + m_VulkanFunctions.memberName = \ + (functionPointerType)m_VulkanFunctions.vkGetInstanceProcAddr(m_hInstance, functionNameString); +#define VMA_FETCH_DEVICE_FUNC(memberName, functionPointerType, functionNameString) \ + if(m_VulkanFunctions.memberName == VMA_NULL) \ + m_VulkanFunctions.memberName = \ + (functionPointerType)m_VulkanFunctions.vkGetDeviceProcAddr(m_hDevice, functionNameString); + + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceProperties, PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties"); + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties, PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties"); + VMA_FETCH_DEVICE_FUNC(vkAllocateMemory, PFN_vkAllocateMemory, "vkAllocateMemory"); + VMA_FETCH_DEVICE_FUNC(vkFreeMemory, PFN_vkFreeMemory, "vkFreeMemory"); + VMA_FETCH_DEVICE_FUNC(vkMapMemory, PFN_vkMapMemory, "vkMapMemory"); + VMA_FETCH_DEVICE_FUNC(vkUnmapMemory, PFN_vkUnmapMemory, "vkUnmapMemory"); + VMA_FETCH_DEVICE_FUNC(vkFlushMappedMemoryRanges, PFN_vkFlushMappedMemoryRanges, "vkFlushMappedMemoryRanges"); + VMA_FETCH_DEVICE_FUNC(vkInvalidateMappedMemoryRanges, PFN_vkInvalidateMappedMemoryRanges, "vkInvalidateMappedMemoryRanges"); + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory, PFN_vkBindBufferMemory, "vkBindBufferMemory"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory, PFN_vkBindImageMemory, "vkBindImageMemory"); + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements, PFN_vkGetBufferMemoryRequirements, "vkGetBufferMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements, PFN_vkGetImageMemoryRequirements, "vkGetImageMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkCreateBuffer, PFN_vkCreateBuffer, "vkCreateBuffer"); + VMA_FETCH_DEVICE_FUNC(vkDestroyBuffer, PFN_vkDestroyBuffer, "vkDestroyBuffer"); + VMA_FETCH_DEVICE_FUNC(vkCreateImage, PFN_vkCreateImage, "vkCreateImage"); + VMA_FETCH_DEVICE_FUNC(vkDestroyImage, PFN_vkDestroyImage, "vkDestroyImage"); + VMA_FETCH_DEVICE_FUNC(vkCmdCopyBuffer, PFN_vkCmdCopyBuffer, "vkCmdCopyBuffer"); + +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2, "vkGetBufferMemoryRequirements2"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2, "vkGetImageMemoryRequirements2"); + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2, "vkBindBufferMemory2"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2, "vkBindImageMemory2"); + } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + // Try to fetch the pointer from the other name, based on suspected driver bug - see issue #410. + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); + } + else if(m_UseExtMemoryBudget) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); + // Try to fetch the pointer from the other name, based on suspected driver bug - see issue #410. + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + } +#endif + +#if VMA_DEDICATED_ALLOCATION + if(m_UseKhrDedicatedAllocation) + { + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2KHR, "vkGetBufferMemoryRequirements2KHR"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2KHR, "vkGetImageMemoryRequirements2KHR"); + } +#endif + +#if VMA_BIND_MEMORY2 + if(m_UseKhrBindMemory2) + { + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2KHR, "vkBindBufferMemory2KHR"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2KHR, "vkBindImageMemory2KHR"); + } +#endif // #if VMA_BIND_MEMORY2 + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + } + else if(m_UseExtMemoryBudget) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); + } +#endif // #if VMA_MEMORY_BUDGET + +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirements, "vkGetDeviceBufferMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirements, "vkGetDeviceImageMemoryRequirements"); + } +#endif +#if VMA_KHR_MAINTENANCE4 + if(m_UseKhrMaintenance4) + { + VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirementsKHR, "vkGetDeviceBufferMemoryRequirementsKHR"); + VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirementsKHR, "vkGetDeviceImageMemoryRequirementsKHR"); + } +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 + if (m_UseKhrExternalMemoryWin32) + { + VMA_FETCH_DEVICE_FUNC(vkGetMemoryWin32HandleKHR, PFN_vkGetMemoryWin32HandleKHR, "vkGetMemoryWin32HandleKHR"); + } +#endif +#undef VMA_FETCH_DEVICE_FUNC +#undef VMA_FETCH_INSTANCE_FUNC +} + +#endif // VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ValidateVulkanFunctions() +{ + VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceProperties != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkAllocateMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkFreeMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkMapMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkUnmapMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkFlushMappedMemoryRanges != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkInvalidateMappedMemoryRanges != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkCreateBuffer != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkDestroyBuffer != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkCmdCopyBuffer != VMA_NULL); + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrDedicatedAllocation) + { + VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements2KHR != VMA_NULL); + } +#endif + +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrBindMemory2) + { + VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL); + } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_UseExtMemoryBudget || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL); + } +#endif +#if VMA_EXTERNAL_MEMORY_WIN32 + if (m_UseKhrExternalMemoryWin32) + { + VMA_ASSERT(m_VulkanFunctions.vkGetMemoryWin32HandleKHR != VMA_NULL); + } +#endif + + // Not validating these due to suspected driver bugs with these function + // pointers being null despite correct extension or Vulkan version is enabled. + // See issue #397. Their usage in VMA is optional anyway. + // + // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements != VMA_NULL); + // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceImageMemoryRequirements != VMA_NULL); +} + +VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) +{ + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); + const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; + const bool isSmallHeap = heapSize <= VMA_SMALL_HEAP_MAX_SIZE; + return VmaAlignUp(isSmallHeap ? (heapSize / 8) : m_PreferredLargeHeapBlockSize, (VkDeviceSize)32); +} + +VkResult VmaAllocator_T::AllocateMemoryOfType( + VmaPool pool, + VkDeviceSize size, + VkDeviceSize alignment, + bool dedicatedPreferred, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + uint32_t memTypeIndex, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + VmaBlockVector& blockVector, + size_t allocationCount, + VmaAllocation* pAllocations) +{ + VMA_ASSERT(pAllocations != VMA_NULL); + VMA_DEBUG_LOG_FORMAT(" AllocateMemory: MemoryTypeIndex=%" PRIu32 ", AllocationCount=%zu, Size=%" PRIu64, memTypeIndex, allocationCount, size); + + VmaAllocationCreateInfo finalCreateInfo = createInfo; + VkResult res = CalcMemTypeParams( + finalCreateInfo, + memTypeIndex, + size, + allocationCount); + if(res != VK_SUCCESS) + return res; + + if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) + { + return AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); + } + else + { + const bool canAllocateDedicated = + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0 && + (pool == VK_NULL_HANDLE || !blockVector.HasExplicitBlockSize()); + + if(canAllocateDedicated) + { + // Heuristics: Allocate dedicated memory if requested size if greater than half of preferred block size. + if(size > blockVector.GetPreferredBlockSize() / 2) + { + dedicatedPreferred = true; + } + // Protection against creating each allocation as dedicated when we reach or exceed heap size/budget, + // which can quickly deplete maxMemoryAllocationCount: Don't prefer dedicated allocations when above + // 3/4 of the maximum allocation count. + if(m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount < UINT32_MAX / 4 && + m_DeviceMemoryCount.load() > m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount * 3 / 4) + { + dedicatedPreferred = false; + } + + if(dedicatedPreferred) + { + res = AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); + if(res == VK_SUCCESS) + { + // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here. + VMA_DEBUG_LOG(" Allocated as DedicatedMemory"); + return VK_SUCCESS; + } + } + } + + res = blockVector.Allocate( + size, + alignment, + finalCreateInfo, + suballocType, + allocationCount, + pAllocations); + if(res == VK_SUCCESS) + return VK_SUCCESS; + + // Try dedicated memory. + if(canAllocateDedicated && !dedicatedPreferred) + { + res = AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); + if(res == VK_SUCCESS) + { + // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here. + VMA_DEBUG_LOG(" Allocated as DedicatedMemory"); + return VK_SUCCESS; + } + } + // Everything failed: Return error code. + VMA_DEBUG_LOG(" vkAllocateMemory FAILED"); + return res; + } +} + +VkResult VmaAllocator_T::AllocateDedicatedMemory( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + uint32_t memTypeIndex, + bool map, + bool isUserDataString, + bool isMappingAllowed, + bool canAliasMemory, + void* pUserData, + float priority, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + size_t allocationCount, + VmaAllocation* pAllocations, + const void* pNextChain) +{ + VMA_ASSERT(allocationCount > 0 && pAllocations); + + VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + allocInfo.memoryTypeIndex = memTypeIndex; + allocInfo.allocationSize = size; + allocInfo.pNext = pNextChain; + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + VkMemoryDedicatedAllocateInfoKHR dedicatedAllocInfo = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR }; + if(!canAliasMemory) + { + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + if(dedicatedBuffer != VK_NULL_HANDLE) + { + VMA_ASSERT(dedicatedImage == VK_NULL_HANDLE); + dedicatedAllocInfo.buffer = dedicatedBuffer; + VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); + } + else if(dedicatedImage != VK_NULL_HANDLE) + { + dedicatedAllocInfo.image = dedicatedImage; + VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); + } + } + } +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + +#if VMA_BUFFER_DEVICE_ADDRESS + VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; + if(m_UseKhrBufferDeviceAddress) + { + bool canContainBufferWithDeviceAddress = true; + if(dedicatedBuffer != VK_NULL_HANDLE) + { + canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == VmaBufferImageUsage::UNKNOWN || + dedicatedBufferImageUsage.Contains(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT); + } + else if(dedicatedImage != VK_NULL_HANDLE) + { + canContainBufferWithDeviceAddress = false; + } + if(canContainBufferWithDeviceAddress) + { + allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; + VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); + } + } +#endif // #if VMA_BUFFER_DEVICE_ADDRESS + +#if VMA_MEMORY_PRIORITY + VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; + if(m_UseExtMemoryPriority) + { + VMA_ASSERT(priority >= 0.f && priority <= 1.f); + priorityInfo.priority = priority; + VmaPnextChainPushFront(&allocInfo, &priorityInfo); + } +#endif // #if VMA_MEMORY_PRIORITY + +#if VMA_EXTERNAL_MEMORY + // Attach VkExportMemoryAllocateInfoKHR if necessary. + VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR }; + exportMemoryAllocInfo.handleTypes = GetExternalMemoryHandleTypeFlags(memTypeIndex); + if(exportMemoryAllocInfo.handleTypes != 0) + { + VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo); + } +#endif // #if VMA_EXTERNAL_MEMORY + + size_t allocIndex; + VkResult res = VK_SUCCESS; + for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + res = AllocateDedicatedMemoryPage( + pool, + size, + suballocType, + memTypeIndex, + allocInfo, + map, + isUserDataString, + isMappingAllowed, + pUserData, + pAllocations + allocIndex); + if(res != VK_SUCCESS) + { + break; + } + } + + if(res == VK_SUCCESS) + { + for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + dedicatedAllocations.Register(pAllocations[allocIndex]); + } + VMA_DEBUG_LOG_FORMAT(" Allocated DedicatedMemory Count=%zu, MemoryTypeIndex=#%" PRIu32, allocationCount, memTypeIndex); + } + else + { + // Free all already created allocations. + while(allocIndex--) + { + VmaAllocation currAlloc = pAllocations[allocIndex]; + VkDeviceMemory hMemory = currAlloc->GetMemory(); + + /* + There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory + before vkFreeMemory. + + if(currAlloc->GetMappedData() != VMA_NULL) + { + (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); + } + */ + + FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory); + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize()); + m_AllocationObjectAllocator.Free(currAlloc); + } + + memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); + } + + return res; +} + +VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + uint32_t memTypeIndex, + const VkMemoryAllocateInfo& allocInfo, + bool map, + bool isUserDataString, + bool isMappingAllowed, + void* pUserData, + VmaAllocation* pAllocation) +{ + VkDeviceMemory hMemory = VK_NULL_HANDLE; + VkResult res = AllocateVulkanMemory(&allocInfo, &hMemory); + if(res < 0) + { + VMA_DEBUG_LOG(" vkAllocateMemory FAILED"); + return res; + } + + void* pMappedData = VMA_NULL; + if(map) + { + res = (*m_VulkanFunctions.vkMapMemory)( + m_hDevice, + hMemory, + 0, + VK_WHOLE_SIZE, + 0, + &pMappedData); + if(res < 0) + { + VMA_DEBUG_LOG(" vkMapMemory FAILED"); + FreeVulkanMemory(memTypeIndex, size, hMemory); + return res; + } + } + + *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed); + (*pAllocation)->InitDedicatedAllocation(this, pool, memTypeIndex, hMemory, suballocType, pMappedData, size); + if (isUserDataString) + (*pAllocation)->SetName(this, (const char*)pUserData); + else + (*pAllocation)->SetUserData(this, pUserData); + m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); + if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); + } + + return VK_SUCCESS; +} + +void VmaAllocator_T::GetBufferMemoryRequirements( + VkBuffer hBuffer, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const +{ +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VkBufferMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR }; + memReqInfo.buffer = hBuffer; + + VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; + + VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; + VmaPnextChainPushFront(&memReq2, &memDedicatedReq); + + (*m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); + + memReq = memReq2.memoryRequirements; + requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE); + prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); + } + else +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + { + (*m_VulkanFunctions.vkGetBufferMemoryRequirements)(m_hDevice, hBuffer, &memReq); + requiresDedicatedAllocation = false; + prefersDedicatedAllocation = false; + } +} + +void VmaAllocator_T::GetImageMemoryRequirements( + VkImage hImage, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const +{ +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VkImageMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR }; + memReqInfo.image = hImage; + + VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; + + VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; + VmaPnextChainPushFront(&memReq2, &memDedicatedReq); + + (*m_VulkanFunctions.vkGetImageMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); + + memReq = memReq2.memoryRequirements; + requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE); + prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); + } + else +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + { + (*m_VulkanFunctions.vkGetImageMemoryRequirements)(m_hDevice, hImage, &memReq); + requiresDedicatedAllocation = false; + prefersDedicatedAllocation = false; + } +} + +VkResult VmaAllocator_T::FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VmaBufferImageUsage bufImgUsage, + uint32_t* pMemoryTypeIndex) const +{ + memoryTypeBits &= GetGlobalMemoryTypeBits(); + + if(pAllocationCreateInfo->memoryTypeBits != 0) + { + memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; + } + + VkMemoryPropertyFlags requiredFlags = 0, preferredFlags = 0, notPreferredFlags = 0; + if(!FindMemoryPreferences( + IsIntegratedGpu(), + *pAllocationCreateInfo, + bufImgUsage, + requiredFlags, preferredFlags, notPreferredFlags)) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + *pMemoryTypeIndex = UINT32_MAX; + uint32_t minCost = UINT32_MAX; + for(uint32_t memTypeIndex = 0, memTypeBit = 1; + memTypeIndex < GetMemoryTypeCount(); + ++memTypeIndex, memTypeBit <<= 1) + { + // This memory type is acceptable according to memoryTypeBits bitmask. + if((memTypeBit & memoryTypeBits) != 0) + { + const VkMemoryPropertyFlags currFlags = + m_MemProps.memoryTypes[memTypeIndex].propertyFlags; + // This memory type contains requiredFlags. + if((requiredFlags & ~currFlags) == 0) + { + // Calculate cost as number of bits from preferredFlags not present in this memory type. + uint32_t currCost = VMA_COUNT_BITS_SET(preferredFlags & ~currFlags) + + VMA_COUNT_BITS_SET(currFlags & notPreferredFlags); + // Remember memory type with lowest cost. + if(currCost < minCost) + { + *pMemoryTypeIndex = memTypeIndex; + if(currCost == 0) + { + return VK_SUCCESS; + } + minCost = currCost; + } + } + } + } + return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; +} + +VkResult VmaAllocator_T::CalcMemTypeParams( + VmaAllocationCreateInfo& inoutCreateInfo, + uint32_t memTypeIndex, + VkDeviceSize size, + size_t allocationCount) +{ + // If memory type is not HOST_VISIBLE, disable MAPPED. + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && + (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + inoutCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT; + } + + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0) + { + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); + VmaBudget heapBudget = {}; + GetHeapBudgets(&heapBudget, heapIndex, 1); + if(heapBudget.usage + size * allocationCount > heapBudget.budget) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + } + return VK_SUCCESS; +} + +VkResult VmaAllocator_T::CalcAllocationParams( + VmaAllocationCreateInfo& inoutCreateInfo, + bool dedicatedRequired, + bool dedicatedPreferred) +{ + VMA_ASSERT((inoutCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) && + "Specifying both flags VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT and VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT is incorrect."); + VMA_ASSERT((((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) == 0 || + (inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0)) && + "Specifying VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT requires also VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + if(inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0) + { + VMA_ASSERT((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0 && + "When using VMA_ALLOCATION_CREATE_MAPPED_BIT and usage = VMA_MEMORY_USAGE_AUTO*, you must also specify VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + } + } + + // If memory is lazily allocated, it should be always dedicated. + if(dedicatedRequired || + inoutCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + } + + if(inoutCreateInfo.pool != VK_NULL_HANDLE) + { + if(inoutCreateInfo.pool->m_BlockVector.HasExplicitBlockSize() && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) + { + VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT while current custom pool doesn't support dedicated allocations."); + return VK_ERROR_FEATURE_NOT_PRESENT; + } + inoutCreateInfo.priority = inoutCreateInfo.pool->m_BlockVector.GetPriority(); + } + + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) + { + VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT together with VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT makes no sense."); + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + if(VMA_DEBUG_ALWAYS_DEDICATED_MEMORY && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + } + + // Non-auto USAGE values imply HOST_ACCESS flags. + // And so does VMA_MEMORY_USAGE_UNKNOWN because it is used with custom pools. + // Which specific flag is used doesn't matter. They change things only when used with VMA_MEMORY_USAGE_AUTO*. + // Otherwise they just protect from assert on mapping. + if(inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) == 0) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + } + } + + return VK_SUCCESS; +} + +VkResult VmaAllocator_T::AllocateMemory( + const VkMemoryRequirements& vkMemReq, + bool requiresDedicatedAllocation, + bool prefersDedicatedAllocation, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations) +{ + memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); + + VMA_ASSERT(VmaIsPow2(vkMemReq.alignment)); + + if(vkMemReq.size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VmaAllocationCreateInfo createInfoFinal = createInfo; + VkResult res = CalcAllocationParams(createInfoFinal, requiresDedicatedAllocation, prefersDedicatedAllocation); + if(res != VK_SUCCESS) + return res; + + if(createInfoFinal.pool != VK_NULL_HANDLE) + { + VmaBlockVector& blockVector = createInfoFinal.pool->m_BlockVector; + return AllocateMemoryOfType( + createInfoFinal.pool, + vkMemReq.size, + vkMemReq.alignment, + prefersDedicatedAllocation, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + createInfoFinal, + blockVector.GetMemoryTypeIndex(), + suballocType, + createInfoFinal.pool->m_DedicatedAllocations, + blockVector, + allocationCount, + pAllocations); + } + else + { + // Bit mask of memory Vulkan types acceptable for this allocation. + uint32_t memoryTypeBits = vkMemReq.memoryTypeBits; + uint32_t memTypeIndex = UINT32_MAX; + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); + // Can't find any single memory type matching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT. + if(res != VK_SUCCESS) + return res; + do + { + VmaBlockVector* blockVector = m_pBlockVectors[memTypeIndex]; + VMA_ASSERT(blockVector && "Trying to use unsupported memory type!"); + res = AllocateMemoryOfType( + VK_NULL_HANDLE, + vkMemReq.size, + vkMemReq.alignment, + requiresDedicatedAllocation || prefersDedicatedAllocation, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + createInfoFinal, + memTypeIndex, + suballocType, + m_DedicatedAllocations[memTypeIndex], + *blockVector, + allocationCount, + pAllocations); + // Allocation succeeded + if(res == VK_SUCCESS) + return VK_SUCCESS; + + // Remove old memTypeIndex from list of possibilities. + memoryTypeBits &= ~(1u << memTypeIndex); + // Find alternative memTypeIndex. + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); + } while(res == VK_SUCCESS); + + // No other matching memory type index could be found. + // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once. + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } +} + +void VmaAllocator_T::FreeMemory( + size_t allocationCount, + const VmaAllocation* pAllocations) +{ + VMA_ASSERT(pAllocations); + + for(size_t allocIndex = allocationCount; allocIndex--; ) + { + VmaAllocation allocation = pAllocations[allocIndex]; + + if(allocation != VK_NULL_HANDLE) + { + if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED); + } + + switch(allocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaBlockVector* pBlockVector = VMA_NULL; + VmaPool hPool = allocation->GetParentPool(); + if(hPool != VK_NULL_HANDLE) + { + pBlockVector = &hPool->m_BlockVector; + } + else + { + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + pBlockVector = m_pBlockVectors[memTypeIndex]; + VMA_ASSERT(pBlockVector && "Trying to free memory of unsupported type!"); + } + pBlockVector->Free(allocation); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + FreeDedicatedMemory(allocation); + break; + default: + VMA_ASSERT(0); + } + } + } +} + +void VmaAllocator_T::CalculateStatistics(VmaTotalStatistics* pStats) +{ + // Initialize. + VmaClearDetailedStatistics(pStats->total); + for(uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) + VmaClearDetailedStatistics(pStats->memoryType[i]); + for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) + VmaClearDetailedStatistics(pStats->memoryHeap[i]); + + // Process default pools. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; + if (pBlockVector != VMA_NULL) + pBlockVector->AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + } + + // Process custom pools. + { + VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); + for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + VmaBlockVector& blockVector = pool->m_BlockVector; + const uint32_t memTypeIndex = blockVector.GetMemoryTypeIndex(); + blockVector.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + pool->m_DedicatedAllocations.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + } + } + + // Process dedicated allocations. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + m_DedicatedAllocations[memTypeIndex].AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + } + + // Sum from memory types to memory heaps. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + const uint32_t memHeapIndex = m_MemProps.memoryTypes[memTypeIndex].heapIndex; + VmaAddDetailedStatistics(pStats->memoryHeap[memHeapIndex], pStats->memoryType[memTypeIndex]); + } + + // Sum from memory heaps to total. + for(uint32_t memHeapIndex = 0; memHeapIndex < GetMemoryHeapCount(); ++memHeapIndex) + VmaAddDetailedStatistics(pStats->total, pStats->memoryHeap[memHeapIndex]); + + VMA_ASSERT(pStats->total.statistics.allocationCount == 0 || + pStats->total.allocationSizeMax >= pStats->total.allocationSizeMin); + VMA_ASSERT(pStats->total.unusedRangeCount == 0 || + pStats->total.unusedRangeSizeMax >= pStats->total.unusedRangeSizeMin); +} + +void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount) +{ +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + if(m_Budget.m_OperationsSinceBudgetFetch < 30) + { + VmaMutexLockRead lockRead(m_Budget.m_BudgetMutex, m_UseMutex); + for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets) + { + const uint32_t heapIndex = firstHeap + i; + + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + + if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->statistics.blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) + { + outBudgets->usage = m_Budget.m_VulkanUsage[heapIndex] + + outBudgets->statistics.blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + } + else + { + outBudgets->usage = 0; + } + + // Have to take MIN with heap size because explicit HeapSizeLimit is included in it. + outBudgets->budget = VMA_MIN( + m_Budget.m_VulkanBudget[heapIndex], m_MemProps.memoryHeaps[heapIndex].size); + } + } + else + { + UpdateVulkanBudget(); // Outside of mutex lock + GetHeapBudgets(outBudgets, firstHeap, heapCount); // Recursion + } + } + else +#endif + { + for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets) + { + const uint32_t heapIndex = firstHeap + i; + + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + + outBudgets->usage = outBudgets->statistics.blockBytes; + outBudgets->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. + } + } +} + +void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo) +{ + pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); + pAllocationInfo->deviceMemory = hAllocation->GetMemory(); + pAllocationInfo->offset = hAllocation->GetOffset(); + pAllocationInfo->size = hAllocation->GetSize(); + pAllocationInfo->pMappedData = hAllocation->GetMappedData(); + pAllocationInfo->pUserData = hAllocation->GetUserData(); + pAllocationInfo->pName = hAllocation->GetName(); +} + +void VmaAllocator_T::GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo) +{ + GetAllocationInfo(hAllocation, &pAllocationInfo->allocationInfo); + + switch (hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + pAllocationInfo->blockSize = hAllocation->GetBlock()->m_pMetadata->GetSize(); + pAllocationInfo->dedicatedMemory = VK_FALSE; + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + pAllocationInfo->blockSize = pAllocationInfo->allocationInfo.size; + pAllocationInfo->dedicatedMemory = VK_TRUE; + break; + default: + VMA_ASSERT(0); + } +} + +VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool) +{ + VMA_DEBUG_LOG_FORMAT(" CreatePool: MemoryTypeIndex=%" PRIu32 ", flags=%" PRIu32, pCreateInfo->memoryTypeIndex, pCreateInfo->flags); + + VmaPoolCreateInfo newCreateInfo = *pCreateInfo; + + // Protection against uninitialized new structure member. If garbage data are left there, this pointer dereference would crash. + if(pCreateInfo->pMemoryAllocateNext) + { + VMA_ASSERT(((const VkBaseInStructure*)pCreateInfo->pMemoryAllocateNext)->sType != 0); + } + + if(newCreateInfo.maxBlockCount == 0) + { + newCreateInfo.maxBlockCount = SIZE_MAX; + } + if(newCreateInfo.minBlockCount > newCreateInfo.maxBlockCount) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + // Memory type index out of range or forbidden. + if(pCreateInfo->memoryTypeIndex >= GetMemoryTypeCount() || + ((1u << pCreateInfo->memoryTypeIndex) & m_GlobalMemoryTypeBits) == 0) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + if(newCreateInfo.minAllocationAlignment > 0) + { + VMA_ASSERT(VmaIsPow2(newCreateInfo.minAllocationAlignment)); + } + + const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex); + + *pPool = vma_new(this, VmaPool_T)(this, newCreateInfo, preferredBlockSize); + + VkResult res = (*pPool)->m_BlockVector.CreateMinBlocks(); + if(res != VK_SUCCESS) + { + vma_delete(this, *pPool); + *pPool = VMA_NULL; + return res; + } + + // Add to m_Pools. + { + VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex); + (*pPool)->SetId(m_NextPoolId++); + m_Pools.PushBack(*pPool); + } + + return VK_SUCCESS; +} + +void VmaAllocator_T::DestroyPool(VmaPool pool) +{ + // Remove from m_Pools. + { + VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex); + m_Pools.Remove(pool); + } + + vma_delete(this, pool); +} + +void VmaAllocator_T::GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats) +{ + VmaClearStatistics(*pPoolStats); + pool->m_BlockVector.AddStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddStatistics(*pPoolStats); +} + +void VmaAllocator_T::CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats) +{ + VmaClearDetailedStatistics(*pPoolStats); + pool->m_BlockVector.AddDetailedStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddDetailedStatistics(*pPoolStats); +} + +void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) +{ + m_CurrentFrameIndex.store(frameIndex); + +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + UpdateVulkanBudget(); + } +#endif // #if VMA_MEMORY_BUDGET +} + +VkResult VmaAllocator_T::CheckPoolCorruption(VmaPool hPool) +{ + return hPool->m_BlockVector.CheckCorruption(); +} + +VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) +{ + VkResult finalRes = VK_ERROR_FEATURE_NOT_PRESENT; + + // Process default pools. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; + if(pBlockVector != VMA_NULL) + { + VkResult localRes = pBlockVector->CheckCorruption(); + switch(localRes) + { + case VK_ERROR_FEATURE_NOT_PRESENT: + break; + case VK_SUCCESS: + finalRes = VK_SUCCESS; + break; + default: + return localRes; + } + } + } + + // Process custom pools. + { + VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); + for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + if(((1u << pool->m_BlockVector.GetMemoryTypeIndex()) & memoryTypeBits) != 0) + { + VkResult localRes = pool->m_BlockVector.CheckCorruption(); + switch(localRes) + { + case VK_ERROR_FEATURE_NOT_PRESENT: + break; + case VK_SUCCESS: + finalRes = VK_SUCCESS; + break; + default: + return localRes; + } + } + } + } + + return finalRes; +} + +VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory) +{ + AtomicTransactionalIncrement deviceMemoryCountIncrement; + const uint64_t prevDeviceMemoryCount = deviceMemoryCountIncrement.Increment(&m_DeviceMemoryCount); +#if VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT + if(prevDeviceMemoryCount >= m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount) + { + return VK_ERROR_TOO_MANY_OBJECTS; + } +#endif + + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(pAllocateInfo->memoryTypeIndex); + + // HeapSizeLimit is in effect for this heap. + if((m_HeapSizeLimitMask & (1u << heapIndex)) != 0) + { + const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; + VkDeviceSize blockBytes = m_Budget.m_BlockBytes[heapIndex]; + for(;;) + { + const VkDeviceSize blockBytesAfterAllocation = blockBytes + pAllocateInfo->allocationSize; + if(blockBytesAfterAllocation > heapSize) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + if(m_Budget.m_BlockBytes[heapIndex].compare_exchange_strong(blockBytes, blockBytesAfterAllocation)) + { + break; + } + } + } + else + { + m_Budget.m_BlockBytes[heapIndex] += pAllocateInfo->allocationSize; + } + ++m_Budget.m_BlockCount[heapIndex]; + + // VULKAN CALL vkAllocateMemory. + VkResult res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); + + if(res == VK_SUCCESS) + { +#if VMA_MEMORY_BUDGET + ++m_Budget.m_OperationsSinceBudgetFetch; +#endif + + // Informative callback. + if(m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL) + { + (*m_DeviceMemoryCallbacks.pfnAllocate)(this, pAllocateInfo->memoryTypeIndex, *pMemory, pAllocateInfo->allocationSize, m_DeviceMemoryCallbacks.pUserData); + } + + deviceMemoryCountIncrement.Commit(); + } + else + { + --m_Budget.m_BlockCount[heapIndex]; + m_Budget.m_BlockBytes[heapIndex] -= pAllocateInfo->allocationSize; + } + + return res; +} + +void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory) +{ + // Informative callback. + if(m_DeviceMemoryCallbacks.pfnFree != VMA_NULL) + { + (*m_DeviceMemoryCallbacks.pfnFree)(this, memoryType, hMemory, size, m_DeviceMemoryCallbacks.pUserData); + } + + // VULKAN CALL vkFreeMemory. + (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks()); + + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType); + --m_Budget.m_BlockCount[heapIndex]; + m_Budget.m_BlockBytes[heapIndex] -= size; + + --m_DeviceMemoryCount; +} + +VkResult VmaAllocator_T::BindVulkanBuffer( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkBuffer buffer, + const void* pNext) +{ + if(pNext != VMA_NULL) + { +#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) && + m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL) + { + VkBindBufferMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR }; + bindBufferMemoryInfo.pNext = pNext; + bindBufferMemoryInfo.buffer = buffer; + bindBufferMemoryInfo.memory = memory; + bindBufferMemoryInfo.memoryOffset = memoryOffset; + return (*m_VulkanFunctions.vkBindBufferMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo); + } + else +#endif // #if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } + else + { + return (*m_VulkanFunctions.vkBindBufferMemory)(m_hDevice, buffer, memory, memoryOffset); + } +} + +VkResult VmaAllocator_T::BindVulkanImage( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkImage image, + const void* pNext) +{ + if(pNext != VMA_NULL) + { +#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) && + m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL) + { + VkBindImageMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR }; + bindBufferMemoryInfo.pNext = pNext; + bindBufferMemoryInfo.image = image; + bindBufferMemoryInfo.memory = memory; + bindBufferMemoryInfo.memoryOffset = memoryOffset; + return (*m_VulkanFunctions.vkBindImageMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo); + } + else +#endif // #if VMA_BIND_MEMORY2 + { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } + else + { + return (*m_VulkanFunctions.vkBindImageMemory)(m_hDevice, image, memory, memoryOffset); + } +} + +VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData) +{ + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); + char *pBytes = VMA_NULL; + VkResult res = pBlock->Map(this, 1, (void**)&pBytes); + if(res == VK_SUCCESS) + { + *ppData = pBytes + (ptrdiff_t)hAllocation->GetOffset(); + hAllocation->BlockAllocMap(); + } + return res; + } + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + return hAllocation->DedicatedAllocMap(this, ppData); + default: + VMA_ASSERT(0); + return VK_ERROR_MEMORY_MAP_FAILED; + } +} + +void VmaAllocator_T::Unmap(VmaAllocation hAllocation) +{ + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); + hAllocation->BlockAllocUnmap(); + pBlock->Unmap(this, 1); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + hAllocation->DedicatedAllocUnmap(this); + break; + default: + VMA_ASSERT(0); + } +} + +VkResult VmaAllocator_T::BindBufferMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext) +{ + VkResult res = VK_ERROR_UNKNOWN_COPY; + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + res = BindVulkanBuffer(hAllocation->GetMemory(), allocationLocalOffset, hBuffer, pNext); + break; + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); + VMA_ASSERT(pBlock && "Binding buffer to allocation that doesn't belong to any block."); + res = pBlock->BindBufferMemory(this, hAllocation, allocationLocalOffset, hBuffer, pNext); + break; + } + default: + VMA_ASSERT(0); + } + return res; +} + +VkResult VmaAllocator_T::BindImageMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext) +{ + VkResult res = VK_ERROR_UNKNOWN_COPY; + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + res = BindVulkanImage(hAllocation->GetMemory(), allocationLocalOffset, hImage, pNext); + break; + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); + VMA_ASSERT(pBlock && "Binding image to allocation that doesn't belong to any block."); + res = pBlock->BindImageMemory(this, hAllocation, allocationLocalOffset, hImage, pNext); + break; + } + default: + VMA_ASSERT(0); + } + return res; +} + +VkResult VmaAllocator_T::FlushOrInvalidateAllocation( + VmaAllocation hAllocation, + VkDeviceSize offset, VkDeviceSize size, + VMA_CACHE_OPERATION op) +{ + VkResult res = VK_SUCCESS; + + VkMappedMemoryRange memRange = {}; + if(GetFlushOrInvalidateRange(hAllocation, offset, size, memRange)) + { + switch(op) + { + case VMA_CACHE_FLUSH: + res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, 1, &memRange); + break; + case VMA_CACHE_INVALIDATE: + res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, 1, &memRange); + break; + default: + VMA_ASSERT(0); + } + } + // else: Just ignore this call. + return res; +} + +VkResult VmaAllocator_T::FlushOrInvalidateAllocations( + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + VMA_CACHE_OPERATION op) +{ + typedef VmaStlAllocator RangeAllocator; + typedef VmaSmallVector RangeVector; + RangeVector ranges = RangeVector(RangeAllocator(GetAllocationCallbacks())); + + for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + const VmaAllocation alloc = allocations[allocIndex]; + const VkDeviceSize offset = offsets != VMA_NULL ? offsets[allocIndex] : 0; + const VkDeviceSize size = sizes != VMA_NULL ? sizes[allocIndex] : VK_WHOLE_SIZE; + VkMappedMemoryRange newRange; + if(GetFlushOrInvalidateRange(alloc, offset, size, newRange)) + { + ranges.push_back(newRange); + } + } + + VkResult res = VK_SUCCESS; + if(!ranges.empty()) + { + switch(op) + { + case VMA_CACHE_FLUSH: + res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data()); + break; + case VMA_CACHE_INVALIDATE: + res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data()); + break; + default: + VMA_ASSERT(0); + } + } + // else: Just ignore this call. + return res; +} + +VkResult VmaAllocator_T::CopyMemoryToAllocation( + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size) +{ + void* dstMappedData = VMA_NULL; + VkResult res = Map(dstAllocation, &dstMappedData); + if(res == VK_SUCCESS) + { + memcpy((char*)dstMappedData + dstAllocationLocalOffset, pSrcHostPointer, (size_t)size); + Unmap(dstAllocation); + res = FlushOrInvalidateAllocation(dstAllocation, dstAllocationLocalOffset, size, VMA_CACHE_FLUSH); + } + return res; +} + +VkResult VmaAllocator_T::CopyAllocationToMemory( + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size) +{ + void* srcMappedData = VMA_NULL; + VkResult res = Map(srcAllocation, &srcMappedData); + if(res == VK_SUCCESS) + { + res = FlushOrInvalidateAllocation(srcAllocation, srcAllocationLocalOffset, size, VMA_CACHE_INVALIDATE); + if(res == VK_SUCCESS) + { + memcpy(pDstHostPointer, (const char*)srcMappedData + srcAllocationLocalOffset, (size_t)size); + Unmap(srcAllocation); + } + } + return res; +} + +void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) +{ + VMA_ASSERT(allocation && allocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + VmaPool parentPool = allocation->GetParentPool(); + if(parentPool == VK_NULL_HANDLE) + { + // Default pool + m_DedicatedAllocations[memTypeIndex].Unregister(allocation); + } + else + { + // Custom pool + parentPool->m_DedicatedAllocations.Unregister(allocation); + } + + VkDeviceMemory hMemory = allocation->GetMemory(); + + /* + There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory + before vkFreeMemory. + + if(allocation->GetMappedData() != VMA_NULL) + { + (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); + } + */ + + FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); + + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); + allocation->Destroy(this); + m_AllocationObjectAllocator.Free(allocation); + + VMA_DEBUG_LOG_FORMAT(" Freed DedicatedMemory MemoryTypeIndex=%" PRIu32, memTypeIndex); +} + +uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const +{ + VkBufferCreateInfo dummyBufCreateInfo; + VmaFillGpuDefragmentationBufferCreateInfo(dummyBufCreateInfo); + + uint32_t memoryTypeBits = 0; + + // Create buffer. + VkBuffer buf = VK_NULL_HANDLE; + VkResult res = (*GetVulkanFunctions().vkCreateBuffer)( + m_hDevice, &dummyBufCreateInfo, GetAllocationCallbacks(), &buf); + if(res == VK_SUCCESS) + { + // Query for supported memory types. + VkMemoryRequirements memReq; + (*GetVulkanFunctions().vkGetBufferMemoryRequirements)(m_hDevice, buf, &memReq); + memoryTypeBits = memReq.memoryTypeBits; + + // Destroy buffer. + (*GetVulkanFunctions().vkDestroyBuffer)(m_hDevice, buf, GetAllocationCallbacks()); + } + + return memoryTypeBits; +} + +uint32_t VmaAllocator_T::CalculateGlobalMemoryTypeBits() const +{ + // Make sure memory information is already fetched. + VMA_ASSERT(GetMemoryTypeCount() > 0); + + uint32_t memoryTypeBits = UINT32_MAX; + + if(!m_UseAmdDeviceCoherentMemory) + { + // Exclude memory types that have VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) + { + memoryTypeBits &= ~(1u << memTypeIndex); + } + } + } + + return memoryTypeBits; +} + +bool VmaAllocator_T::GetFlushOrInvalidateRange( + VmaAllocation allocation, + VkDeviceSize offset, VkDeviceSize size, + VkMappedMemoryRange& outRange) const +{ + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + if(size > 0 && IsMemoryTypeNonCoherent(memTypeIndex)) + { + const VkDeviceSize nonCoherentAtomSize = m_PhysicalDeviceProperties.limits.nonCoherentAtomSize; + const VkDeviceSize allocationSize = allocation->GetSize(); + VMA_ASSERT(offset <= allocationSize); + + outRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + outRange.pNext = VMA_NULL; + outRange.memory = allocation->GetMemory(); + + switch(allocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); + if(size == VK_WHOLE_SIZE) + { + outRange.size = allocationSize - outRange.offset; + } + else + { + VMA_ASSERT(offset + size <= allocationSize); + outRange.size = VMA_MIN( + VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize), + allocationSize - outRange.offset); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + // 1. Still within this allocation. + outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); + if(size == VK_WHOLE_SIZE) + { + size = allocationSize - offset; + } + else + { + VMA_ASSERT(offset + size <= allocationSize); + } + outRange.size = VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize); + + // 2. Adjust to whole block. + const VkDeviceSize allocationOffset = allocation->GetOffset(); + VMA_ASSERT(allocationOffset % nonCoherentAtomSize == 0); + const VkDeviceSize blockSize = allocation->GetBlock()->m_pMetadata->GetSize(); + outRange.offset += allocationOffset; + outRange.size = VMA_MIN(outRange.size, blockSize - outRange.offset); + + break; + } + default: + VMA_ASSERT(0); + } + return true; + } + return false; +} + +#if VMA_MEMORY_BUDGET +void VmaAllocator_T::UpdateVulkanBudget() +{ + VMA_ASSERT(m_UseExtMemoryBudget); + + VkPhysicalDeviceMemoryProperties2KHR memProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2_KHR }; + + VkPhysicalDeviceMemoryBudgetPropertiesEXT budgetProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT }; + VmaPnextChainPushFront(&memProps, &budgetProps); + + GetVulkanFunctions().vkGetPhysicalDeviceMemoryProperties2KHR(m_PhysicalDevice, &memProps); + + { + VmaMutexLockWrite lockWrite(m_Budget.m_BudgetMutex, m_UseMutex); + + for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) + { + m_Budget.m_VulkanUsage[heapIndex] = budgetProps.heapUsage[heapIndex]; + m_Budget.m_VulkanBudget[heapIndex] = budgetProps.heapBudget[heapIndex]; + m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] = m_Budget.m_BlockBytes[heapIndex].load(); + + // Some bugged drivers return the budget incorrectly, e.g. 0 or much bigger than heap size. + if(m_Budget.m_VulkanBudget[heapIndex] == 0) + { + m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. + } + else if(m_Budget.m_VulkanBudget[heapIndex] > m_MemProps.memoryHeaps[heapIndex].size) + { + m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size; + } + if(m_Budget.m_VulkanUsage[heapIndex] == 0 && m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] > 0) + { + m_Budget.m_VulkanUsage[heapIndex] = m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + } + } + m_Budget.m_OperationsSinceBudgetFetch = 0; + } +} +#endif // VMA_MEMORY_BUDGET + +void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern) +{ + if(VMA_DEBUG_INITIALIZE_ALLOCATIONS && + hAllocation->IsMappingAllowed() && + (m_MemProps.memoryTypes[hAllocation->GetMemoryTypeIndex()].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) + { + void* pData = VMA_NULL; + VkResult res = Map(hAllocation, &pData); + if(res == VK_SUCCESS) + { + memset(pData, (int)pattern, (size_t)hAllocation->GetSize()); + FlushOrInvalidateAllocation(hAllocation, 0, VK_WHOLE_SIZE, VMA_CACHE_FLUSH); + Unmap(hAllocation); + } + else + { + VMA_ASSERT(0 && "VMA_DEBUG_INITIALIZE_ALLOCATIONS is enabled, but couldn't map memory to fill allocation."); + } + } +} + +uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits() +{ + uint32_t memoryTypeBits = m_GpuDefragmentationMemoryTypeBits.load(); + if(memoryTypeBits == UINT32_MAX) + { + memoryTypeBits = CalculateGpuDefragmentationMemoryTypeBits(); + m_GpuDefragmentationMemoryTypeBits.store(memoryTypeBits); + } + return memoryTypeBits; +} + +#if VMA_STATS_STRING_ENABLED +void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) +{ + json.WriteString("DefaultPools"); + json.BeginObject(); + { + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + VmaBlockVector* pBlockVector = m_pBlockVectors[memTypeIndex]; + VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex]; + if (pBlockVector != VMA_NULL) + { + json.BeginString("Type "); + json.ContinueString(memTypeIndex); + json.EndString(); + json.BeginObject(); + { + json.WriteString("PreferredBlockSize"); + json.WriteNumber(pBlockVector->GetPreferredBlockSize()); + + json.WriteString("Blocks"); + pBlockVector->PrintDetailedMap(json); + + json.WriteString("DedicatedAllocations"); + dedicatedAllocList.BuildStatsString(json); + } + json.EndObject(); + } + } + } + json.EndObject(); + + json.WriteString("CustomPools"); + json.BeginObject(); + { + VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); + if (!m_Pools.IsEmpty()) + { + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + bool displayType = true; + size_t index = 0; + for (VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + VmaBlockVector& blockVector = pool->m_BlockVector; + if (blockVector.GetMemoryTypeIndex() == memTypeIndex) + { + if (displayType) + { + json.BeginString("Type "); + json.ContinueString(memTypeIndex); + json.EndString(); + json.BeginArray(); + displayType = false; + } + + json.BeginObject(); + { + json.WriteString("Name"); + json.BeginString(); + json.ContinueString((uint64_t)index++); + if (pool->GetName()) + { + json.ContinueString(" - "); + json.ContinueString(pool->GetName()); + } + json.EndString(); + + json.WriteString("PreferredBlockSize"); + json.WriteNumber(blockVector.GetPreferredBlockSize()); + + json.WriteString("Blocks"); + blockVector.PrintDetailedMap(json); + + json.WriteString("DedicatedAllocations"); + pool->m_DedicatedAllocations.BuildStatsString(json); + } + json.EndObject(); + } + } + + if (!displayType) + json.EndArray(); + } + } + } + json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_ALLOCATOR_T_FUNCTIONS + + +#ifndef _VMA_PUBLIC_INTERFACE +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( + const VmaAllocatorCreateInfo* pCreateInfo, + VmaAllocator* pAllocator) +{ + VMA_ASSERT(pCreateInfo && pAllocator); + VMA_ASSERT(pCreateInfo->vulkanApiVersion == 0 || + (VK_VERSION_MAJOR(pCreateInfo->vulkanApiVersion) == 1 && VK_VERSION_MINOR(pCreateInfo->vulkanApiVersion) <= 4)); + VMA_DEBUG_LOG("vmaCreateAllocator"); + *pAllocator = vma_new(pCreateInfo->pAllocationCallbacks, VmaAllocator_T)(pCreateInfo); + VkResult result = (*pAllocator)->Init(pCreateInfo); + if(result < 0) + { + vma_delete(pCreateInfo->pAllocationCallbacks, *pAllocator); + *pAllocator = VK_NULL_HANDLE; + } + return result; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( + VmaAllocator allocator) +{ + if(allocator != VK_NULL_HANDLE) + { + VMA_DEBUG_LOG("vmaDestroyAllocator"); + VkAllocationCallbacks allocationCallbacks = allocator->m_AllocationCallbacks; // Have to copy the callbacks when destroying. + vma_delete(&allocationCallbacks, allocator); + } +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(VmaAllocator allocator, VmaAllocatorInfo* pAllocatorInfo) +{ + VMA_ASSERT(allocator && pAllocatorInfo); + pAllocatorInfo->instance = allocator->m_hInstance; + pAllocatorInfo->physicalDevice = allocator->GetPhysicalDevice(); + pAllocatorInfo->device = allocator->m_hDevice; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( + VmaAllocator allocator, + const VkPhysicalDeviceProperties **ppPhysicalDeviceProperties) +{ + VMA_ASSERT(allocator && ppPhysicalDeviceProperties); + *ppPhysicalDeviceProperties = &allocator->m_PhysicalDeviceProperties; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( + VmaAllocator allocator, + const VkPhysicalDeviceMemoryProperties** ppPhysicalDeviceMemoryProperties) +{ + VMA_ASSERT(allocator && ppPhysicalDeviceMemoryProperties); + *ppPhysicalDeviceMemoryProperties = &allocator->m_MemProps; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( + VmaAllocator allocator, + uint32_t memoryTypeIndex, + VkMemoryPropertyFlags* pFlags) +{ + VMA_ASSERT(allocator && pFlags); + VMA_ASSERT(memoryTypeIndex < allocator->GetMemoryTypeCount()); + *pFlags = allocator->m_MemProps.memoryTypes[memoryTypeIndex].propertyFlags; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( + VmaAllocator allocator, + uint32_t frameIndex) +{ + VMA_ASSERT(allocator); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->SetCurrentFrameIndex(frameIndex); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( + VmaAllocator allocator, + VmaTotalStatistics* pStats) +{ + VMA_ASSERT(allocator && pStats); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + allocator->CalculateStatistics(pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( + VmaAllocator allocator, + VmaBudget* pBudgets) +{ + VMA_ASSERT(allocator && pBudgets); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + allocator->GetHeapBudgets(pBudgets, 0, allocator->GetMemoryHeapCount()); +} + +#if VMA_STATS_STRING_ENABLED + +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( + VmaAllocator allocator, + char** ppStatsString, + VkBool32 detailedMap) +{ + VMA_ASSERT(allocator && ppStatsString); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VmaStringBuilder sb(allocator->GetAllocationCallbacks()); + { + VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; + allocator->GetHeapBudgets(budgets, 0, allocator->GetMemoryHeapCount()); + + VmaTotalStatistics stats; + allocator->CalculateStatistics(&stats); + + VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); + json.BeginObject(); + { + json.WriteString("General"); + json.BeginObject(); + { + const VkPhysicalDeviceProperties& deviceProperties = allocator->m_PhysicalDeviceProperties; + const VkPhysicalDeviceMemoryProperties& memoryProperties = allocator->m_MemProps; + + json.WriteString("API"); + json.WriteString("Vulkan"); + + json.WriteString("apiVersion"); + json.BeginString(); + json.ContinueString(VK_VERSION_MAJOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_VERSION_MINOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_VERSION_PATCH(deviceProperties.apiVersion)); + json.EndString(); + + json.WriteString("GPU"); + json.WriteString(deviceProperties.deviceName); + json.WriteString("deviceType"); + json.WriteNumber(static_cast(deviceProperties.deviceType)); + + json.WriteString("maxMemoryAllocationCount"); + json.WriteNumber(deviceProperties.limits.maxMemoryAllocationCount); + json.WriteString("bufferImageGranularity"); + json.WriteNumber(deviceProperties.limits.bufferImageGranularity); + json.WriteString("nonCoherentAtomSize"); + json.WriteNumber(deviceProperties.limits.nonCoherentAtomSize); + + json.WriteString("memoryHeapCount"); + json.WriteNumber(memoryProperties.memoryHeapCount); + json.WriteString("memoryTypeCount"); + json.WriteNumber(memoryProperties.memoryTypeCount); + } + json.EndObject(); + } + { + json.WriteString("Total"); + VmaPrintDetailedStatistics(json, stats.total); + } + { + json.WriteString("MemoryInfo"); + json.BeginObject(); + { + for (uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) + { + json.BeginString("Heap "); + json.ContinueString(heapIndex); + json.EndString(); + json.BeginObject(); + { + const VkMemoryHeap& heapInfo = allocator->m_MemProps.memoryHeaps[heapIndex]; + json.WriteString("Flags"); + json.BeginArray(true); + { + if (heapInfo.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + #if VMA_VULKAN_VERSION >= 1001000 + if (heapInfo.flags & VK_MEMORY_HEAP_MULTI_INSTANCE_BIT) + json.WriteString("MULTI_INSTANCE"); + #endif + + VkMemoryHeapFlags flags = heapInfo.flags & + ~(VK_MEMORY_HEAP_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_HEAP_MULTI_INSTANCE_BIT + #endif + ); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Size"); + json.WriteNumber(heapInfo.size); + + json.WriteString("Budget"); + json.BeginObject(); + { + json.WriteString("BudgetBytes"); + json.WriteNumber(budgets[heapIndex].budget); + json.WriteString("UsageBytes"); + json.WriteNumber(budgets[heapIndex].usage); + } + json.EndObject(); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]); + + json.WriteString("MemoryPools"); + json.BeginObject(); + { + for (uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) + { + if (allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) + { + json.BeginString("Type "); + json.ContinueString(typeIndex); + json.EndString(); + json.BeginObject(); + { + json.WriteString("Flags"); + json.BeginArray(true); + { + VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; + if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + json.WriteString("HOST_VISIBLE"); + if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + json.WriteString("HOST_COHERENT"); + if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) + json.WriteString("HOST_CACHED"); + if (flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) + json.WriteString("LAZILY_ALLOCATED"); + #if VMA_VULKAN_VERSION >= 1001000 + if (flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) + json.WriteString("PROTECTED"); + #endif + #if VK_AMD_device_coherent_memory + if (flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) + json.WriteString("DEVICE_COHERENT_AMD"); + if (flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) + json.WriteString("DEVICE_UNCACHED_AMD"); + #endif + + flags &= ~(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT + #endif + #if VK_AMD_device_coherent_memory + | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY + | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY + #endif + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]); + } + json.EndObject(); + } + } + + } + json.EndObject(); + } + json.EndObject(); + } + } + json.EndObject(); + } + + if (detailedMap == VK_TRUE) + allocator->PrintDetailedMap(json); + + json.EndObject(); + } + + *ppStatsString = VmaCreateStringCopy(allocator->GetAllocationCallbacks(), sb.GetData(), sb.GetLength()); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( + VmaAllocator allocator, + char* pStatsString) +{ + if(pStatsString != VMA_NULL) + { + VMA_ASSERT(allocator); + VmaFreeString(allocator->GetAllocationCallbacks(), pStatsString); + } +} + +#endif // VMA_STATS_STRING_ENABLED + +/* +This function is not protected by any mutex because it just reads immutable data. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( + VmaAllocator allocator, + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + uint32_t* pMemoryTypeIndex) +{ + VMA_ASSERT(allocator != VK_NULL_HANDLE); + VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); + VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + + return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, VmaBufferImageUsage::UNKNOWN, pMemoryTypeIndex); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( + VmaAllocator allocator, + const VkBufferCreateInfo* pBufferCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + uint32_t* pMemoryTypeIndex) +{ + VMA_ASSERT(allocator != VK_NULL_HANDLE); + VMA_ASSERT(pBufferCreateInfo != VMA_NULL); + VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); + VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + + const VkDevice hDev = allocator->m_hDevice; + const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); + VkResult res; + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + if(funcs->vkGetDeviceBufferMemoryRequirements) + { + // Can query straight from VkBufferCreateInfo :) + VkDeviceBufferMemoryRequirementsKHR devBufMemReq = {VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS_KHR}; + devBufMemReq.pCreateInfo = pBufferCreateInfo; + + VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; + (*funcs->vkGetDeviceBufferMemoryRequirements)(hDev, &devBufMemReq, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); + } + else +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + { + // Must create a dummy buffer to query :( + VkBuffer hBuffer = VK_NULL_HANDLE; + res = funcs->vkCreateBuffer( + hDev, pBufferCreateInfo, allocator->GetAllocationCallbacks(), &hBuffer); + if(res == VK_SUCCESS) + { + VkMemoryRequirements memReq = {}; + funcs->vkGetBufferMemoryRequirements(hDev, hBuffer, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); + + funcs->vkDestroyBuffer( + hDev, hBuffer, allocator->GetAllocationCallbacks()); + } + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( + VmaAllocator allocator, + const VkImageCreateInfo* pImageCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + uint32_t* pMemoryTypeIndex) +{ + VMA_ASSERT(allocator != VK_NULL_HANDLE); + VMA_ASSERT(pImageCreateInfo != VMA_NULL); + VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); + VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + + const VkDevice hDev = allocator->m_hDevice; + const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); + VkResult res; + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + if(funcs->vkGetDeviceImageMemoryRequirements) + { + // Can query straight from VkImageCreateInfo :) + VkDeviceImageMemoryRequirementsKHR devImgMemReq = {VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS_KHR}; + devImgMemReq.pCreateInfo = pImageCreateInfo; + VMA_ASSERT(pImageCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY && (pImageCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT_COPY) == 0 && + "Cannot use this VkImageCreateInfo with vmaFindMemoryTypeIndexForImageInfo as I don't know what to pass as VkDeviceImageMemoryRequirements::planeAspect."); + + VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; + (*funcs->vkGetDeviceImageMemoryRequirements)(hDev, &devImgMemReq, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); + } + else +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + { + // Must create a dummy image to query :( + VkImage hImage = VK_NULL_HANDLE; + res = funcs->vkCreateImage( + hDev, pImageCreateInfo, allocator->GetAllocationCallbacks(), &hImage); + if(res == VK_SUCCESS) + { + VkMemoryRequirements memReq = {}; + funcs->vkGetImageMemoryRequirements(hDev, hImage, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); + + funcs->vkDestroyImage( + hDev, hImage, allocator->GetAllocationCallbacks()); + } + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( + VmaAllocator allocator, + const VmaPoolCreateInfo* pCreateInfo, + VmaPool* pPool) +{ + VMA_ASSERT(allocator && pCreateInfo && pPool); + + VMA_DEBUG_LOG("vmaCreatePool"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CreatePool(pCreateInfo, pPool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( + VmaAllocator allocator, + VmaPool pool) +{ + VMA_ASSERT(allocator); + + if(pool == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaDestroyPool"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->DestroyPool(pool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( + VmaAllocator allocator, + VmaPool pool, + VmaStatistics* pPoolStats) +{ + VMA_ASSERT(allocator && pool && pPoolStats); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetPoolStatistics(pool, pPoolStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( + VmaAllocator allocator, + VmaPool pool, + VmaDetailedStatistics* pPoolStats) +{ + VMA_ASSERT(allocator && pool && pPoolStats); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->CalculatePoolStatistics(pool, pPoolStats); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool) +{ + VMA_ASSERT(allocator && pool); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VMA_DEBUG_LOG("vmaCheckPoolCorruption"); + + return allocator->CheckPoolCorruption(pool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( + VmaAllocator allocator, + VmaPool pool, + const char** ppName) +{ + VMA_ASSERT(allocator && pool && ppName); + + VMA_DEBUG_LOG("vmaGetPoolName"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *ppName = pool->GetName(); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( + VmaAllocator allocator, + VmaPool pool, + const char* pName) +{ + VMA_ASSERT(allocator && pool); + + VMA_DEBUG_LOG("vmaSetPoolName"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + pool->SetName(pName); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( + VmaAllocator allocator, + const VkMemoryRequirements* pVkMemoryRequirements, + const VmaAllocationCreateInfo* pCreateInfo, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocation); + + VMA_DEBUG_LOG("vmaAllocateMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkResult result = allocator->AllocateMemory( + *pVkMemoryRequirements, + false, // requiresDedicatedAllocation + false, // prefersDedicatedAllocation + VK_NULL_HANDLE, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_UNKNOWN, + 1, // allocationCount + pAllocation); + + if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( + VmaAllocator allocator, + const VkMemoryRequirements* pVkMemoryRequirements, + const VmaAllocationCreateInfo* pCreateInfo, + size_t allocationCount, + VmaAllocation* pAllocations, + VmaAllocationInfo* pAllocationInfo) +{ + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocations); + + VMA_DEBUG_LOG("vmaAllocateMemoryPages"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkResult result = allocator->AllocateMemory( + *pVkMemoryRequirements, + false, // requiresDedicatedAllocation + false, // prefersDedicatedAllocation + VK_NULL_HANDLE, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_UNKNOWN, + allocationCount, + pAllocations); + + if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) + { + for(size_t i = 0; i < allocationCount; ++i) + { + allocator->GetAllocationInfo(pAllocations[i], pAllocationInfo + i); + } + } + + return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( + VmaAllocator allocator, + VkBuffer buffer, + const VmaAllocationCreateInfo* pCreateInfo, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && buffer != VK_NULL_HANDLE && pCreateInfo && pAllocation); + + VMA_DEBUG_LOG("vmaAllocateMemoryForBuffer"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetBufferMemoryRequirements(buffer, vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation); + + VkResult result = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + buffer, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_BUFFER, + 1, // allocationCount + pAllocation); + + if(pAllocationInfo && result == VK_SUCCESS) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( + VmaAllocator allocator, + VkImage image, + const VmaAllocationCreateInfo* pCreateInfo, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && image != VK_NULL_HANDLE && pCreateInfo && pAllocation); + + VMA_DEBUG_LOG("vmaAllocateMemoryForImage"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetImageMemoryRequirements(image, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + VkResult result = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + VK_NULL_HANDLE, // dedicatedBuffer + image, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN, + 1, // allocationCount + pAllocation); + + if(pAllocationInfo && result == VK_SUCCESS) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return result; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( + VmaAllocator allocator, + VmaAllocation allocation) +{ + VMA_ASSERT(allocator); + + if(allocation == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaFreeMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->FreeMemory( + 1, // allocationCount + &allocation); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( + VmaAllocator allocator, + size_t allocationCount, + const VmaAllocation* pAllocations) +{ + if(allocationCount == 0) + { + return; + } + + VMA_ASSERT(allocator); + + VMA_DEBUG_LOG("vmaFreeMemoryPages"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->FreeMemory(allocationCount, pAllocations); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( + VmaAllocator allocator, + VmaAllocation allocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && allocation && pAllocationInfo); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetAllocationInfo(allocation, pAllocationInfo); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( + VmaAllocator allocator, + VmaAllocation allocation, + VmaAllocationInfo2* pAllocationInfo) +{ + VMA_ASSERT(allocator && allocation && pAllocationInfo); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetAllocationInfo2(allocation, pAllocationInfo); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( + VmaAllocator allocator, + VmaAllocation allocation, + void* pUserData) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocation->SetUserData(allocator, pUserData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName) +{ + allocation->SetName(allocator, pName); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags) +{ + VMA_ASSERT(allocator && allocation && pFlags); + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + *pFlags = allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( + VmaAllocator allocator, + VmaAllocation allocation, + void** ppData) +{ + VMA_ASSERT(allocator && allocation && ppData); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->Map(allocation, ppData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( + VmaAllocator allocator, + VmaAllocation allocation) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->Unmap(allocation); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize offset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_LOG("vmaFlushAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize offset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_LOG("vmaInvalidateAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( + VmaAllocator allocator, + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) +{ + VMA_ASSERT(allocator); + + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocations); + + VMA_DEBUG_LOG("vmaFlushAllocations"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( + VmaAllocator allocator, + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) +{ + VMA_ASSERT(allocator); + + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocations); + + VMA_DEBUG_LOG("vmaInvalidateAllocations"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( + VmaAllocator allocator, + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && pSrcHostPointer && dstAllocation); + + if(size == 0) + { + return VK_SUCCESS; + } + + VMA_DEBUG_LOG("vmaCopyMemoryToAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CopyMemoryToAllocation(pSrcHostPointer, dstAllocation, dstAllocationLocalOffset, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( + VmaAllocator allocator, + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && srcAllocation && pDstHostPointer); + + if(size == 0) + { + return VK_SUCCESS; + } + + VMA_DEBUG_LOG("vmaCopyAllocationToMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CopyAllocationToMemory(srcAllocation, srcAllocationLocalOffset, pDstHostPointer, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( + VmaAllocator allocator, + uint32_t memoryTypeBits) +{ + VMA_ASSERT(allocator); + + VMA_DEBUG_LOG("vmaCheckCorruption"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CheckCorruption(memoryTypeBits); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( + VmaAllocator allocator, + const VmaDefragmentationInfo* pInfo, + VmaDefragmentationContext* pContext) +{ + VMA_ASSERT(allocator && pInfo && pContext); + + VMA_DEBUG_LOG("vmaBeginDefragmentation"); + + if (pInfo->pool != VMA_NULL) + { + // Check if run on supported algorithms + if (pInfo->pool->m_BlockVector.GetAlgorithm() & VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pContext = vma_new(allocator, VmaDefragmentationContext_T)(allocator, *pInfo); + return VK_SUCCESS; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( + VmaAllocator allocator, + VmaDefragmentationContext context, + VmaDefragmentationStats* pStats) +{ + VMA_ASSERT(allocator && context); + + VMA_DEBUG_LOG("vmaEndDefragmentation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if (pStats) + context->GetStats(*pStats); + vma_delete(allocator, context); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) +{ + VMA_ASSERT(context && pPassInfo); + + VMA_DEBUG_LOG("vmaBeginDefragmentationPass"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return context->DefragmentPassBegin(*pPassInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) +{ + VMA_ASSERT(context && pPassInfo); + + VMA_DEBUG_LOG("vmaEndDefragmentationPass"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return context->DefragmentPassEnd(*pPassInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( + VmaAllocator allocator, + VmaAllocation allocation, + VkBuffer buffer) +{ + VMA_ASSERT(allocator && allocation && buffer); + + VMA_DEBUG_LOG("vmaBindBufferMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindBufferMemory(allocation, 0, buffer, VMA_NULL); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize allocationLocalOffset, + VkBuffer buffer, + const void* pNext) +{ + VMA_ASSERT(allocator && allocation && buffer); + + VMA_DEBUG_LOG("vmaBindBufferMemory2"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindBufferMemory(allocation, allocationLocalOffset, buffer, pNext); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( + VmaAllocator allocator, + VmaAllocation allocation, + VkImage image) +{ + VMA_ASSERT(allocator && allocation && image); + + VMA_DEBUG_LOG("vmaBindImageMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindImageMemory(allocation, 0, image, VMA_NULL); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize allocationLocalOffset, + VkImage image, + const void* pNext) +{ + VMA_ASSERT(allocator && allocation && image); + + VMA_DEBUG_LOG("vmaBindImageMemory2"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindImageMemory(allocation, allocationLocalOffset, image, pNext); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( + VmaAllocator allocator, + const VkBufferCreateInfo* pBufferCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkBuffer* pBuffer, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && pBuffer && pAllocation); + + if(pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_LOG("vmaCreateBuffer"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pBuffer = VK_NULL_HANDLE; + *pAllocation = VK_NULL_HANDLE; + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if(res >= 0) + { + // 2. vkGetBufferMemoryRequirements. + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + // 3. Allocate memory using allocator. + res = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + *pBuffer, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage + *pAllocationCreateInfo, + VMA_SUBALLOCATION_TYPE_BUFFER, + 1, // allocationCount + pAllocation); + + if(res >= 0) + { + // 3. Bind buffer with memory. + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL); + } + if(res >= 0) + { + // All steps succeeded. + #if VMA_STATS_STRING_ENABLED + (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); + #endif + if(pAllocationInfo != VMA_NULL) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return VK_SUCCESS; + } + allocator->FreeMemory( + 1, // allocationCount + pAllocation); + *pAllocation = VK_NULL_HANDLE; + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( + VmaAllocator allocator, + const VkBufferCreateInfo* pBufferCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkDeviceSize minAlignment, + VkBuffer* pBuffer, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && VmaIsPow2(minAlignment) && pBuffer && pAllocation); + + if(pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_LOG("vmaCreateBufferWithAlignment"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pBuffer = VK_NULL_HANDLE; + *pAllocation = VK_NULL_HANDLE; + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if(res >= 0) + { + // 2. vkGetBufferMemoryRequirements. + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + // 2a. Include minAlignment + vkMemReq.alignment = VMA_MAX(vkMemReq.alignment, minAlignment); + + // 3. Allocate memory using allocator. + res = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + *pBuffer, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage + *pAllocationCreateInfo, + VMA_SUBALLOCATION_TYPE_BUFFER, + 1, // allocationCount + pAllocation); + + if(res >= 0) + { + // 3. Bind buffer with memory. + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL); + } + if(res >= 0) + { + // All steps succeeded. + #if VMA_STATS_STRING_ENABLED + (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); + #endif + if(pAllocationInfo != VMA_NULL) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return VK_SUCCESS; + } + allocator->FreeMemory( + 1, // allocationCount + pAllocation); + *pAllocation = VK_NULL_HANDLE; + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ + return vmaCreateAliasingBuffer2(allocator, allocation, 0, pBufferCreateInfo, pBuffer); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pBuffer && allocation); + VMA_ASSERT(allocationLocalOffset + pBufferCreateInfo->size <= allocation->GetSize()); + + VMA_DEBUG_LOG("vmaCreateAliasingBuffer2"); + + *pBuffer = VK_NULL_HANDLE; + + if (pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if ((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if (res >= 0) + { + // 2. Bind buffer with memory. + res = allocator->BindBufferMemory(allocation, allocationLocalOffset, *pBuffer, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + } + return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( + VmaAllocator allocator, + VkBuffer buffer, + VmaAllocation allocation) +{ + VMA_ASSERT(allocator); + + if(buffer == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaDestroyBuffer"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if(buffer != VK_NULL_HANDLE) + { + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, buffer, allocator->GetAllocationCallbacks()); + } + + if(allocation != VK_NULL_HANDLE) + { + allocator->FreeMemory( + 1, // allocationCount + &allocation); + } +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( + VmaAllocator allocator, + const VkImageCreateInfo* pImageCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkImage* pImage, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pImageCreateInfo && pAllocationCreateInfo && pImage && pAllocation); + + if(pImageCreateInfo->extent.width == 0 || + pImageCreateInfo->extent.height == 0 || + pImageCreateInfo->extent.depth == 0 || + pImageCreateInfo->mipLevels == 0 || + pImageCreateInfo->arrayLayers == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_LOG("vmaCreateImage"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pImage = VK_NULL_HANDLE; + *pAllocation = VK_NULL_HANDLE; + + // 1. Create VkImage. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( + allocator->m_hDevice, + pImageCreateInfo, + allocator->GetAllocationCallbacks(), + pImage); + if(res == VK_SUCCESS) + { + VmaSuballocationType suballocType = pImageCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ? + VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL : + VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR; + + // 2. Allocate memory using allocator. + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetImageMemoryRequirements(*pImage, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + res = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + VK_NULL_HANDLE, // dedicatedBuffer + *pImage, // dedicatedImage + VmaBufferImageUsage(*pImageCreateInfo), // dedicatedBufferImageUsage + *pAllocationCreateInfo, + suballocType, + 1, // allocationCount + pAllocation); + + if(res == VK_SUCCESS) + { + // 3. Bind image with memory. + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindImageMemory(*pAllocation, 0, *pImage, VMA_NULL); + } + if(res == VK_SUCCESS) + { + // All steps succeeded. + #if VMA_STATS_STRING_ENABLED + (*pAllocation)->InitImageUsage(*pImageCreateInfo); + #endif + if(pAllocationInfo != VMA_NULL) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return VK_SUCCESS; + } + allocator->FreeMemory( + 1, // allocationCount + pAllocation); + *pAllocation = VK_NULL_HANDLE; + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + *pImage = VK_NULL_HANDLE; + return res; + } + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + *pImage = VK_NULL_HANDLE; + return res; + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ + return vmaCreateAliasingImage2(allocator, allocation, 0, pImageCreateInfo, pImage); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ + VMA_ASSERT(allocator && pImageCreateInfo && pImage && allocation); + + *pImage = VK_NULL_HANDLE; + + VMA_DEBUG_LOG("vmaCreateImage2"); + + if (pImageCreateInfo->extent.width == 0 || + pImageCreateInfo->extent.height == 0 || + pImageCreateInfo->extent.depth == 0 || + pImageCreateInfo->mipLevels == 0 || + pImageCreateInfo->arrayLayers == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkImage. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( + allocator->m_hDevice, + pImageCreateInfo, + allocator->GetAllocationCallbacks(), + pImage); + if (res >= 0) + { + // 2. Bind image with memory. + res = allocator->BindImageMemory(allocation, allocationLocalOffset, *pImage, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + } + return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NULLABLE_NON_DISPATCHABLE image, + VmaAllocation VMA_NULLABLE allocation) +{ + VMA_ASSERT(allocator); + + if(image == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaDestroyImage"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if(image != VK_NULL_HANDLE) + { + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, image, allocator->GetAllocationCallbacks()); + } + if(allocation != VK_NULL_HANDLE) + { + allocator->FreeMemory( + 1, // allocationCount + &allocation); + } +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock( + const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualBlock VMA_NULLABLE * VMA_NOT_NULL pVirtualBlock) +{ + VMA_ASSERT(pCreateInfo && pVirtualBlock); + VMA_ASSERT(pCreateInfo->size > 0); + VMA_DEBUG_LOG("vmaCreateVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + *pVirtualBlock = vma_new(pCreateInfo->pAllocationCallbacks, VmaVirtualBlock_T)(*pCreateInfo); + VkResult res = (*pVirtualBlock)->Init(); + if(res < 0) + { + vma_delete(pCreateInfo->pAllocationCallbacks, *pVirtualBlock); + *pVirtualBlock = VK_NULL_HANDLE; + } + return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock(VmaVirtualBlock VMA_NULLABLE virtualBlock) +{ + if(virtualBlock != VK_NULL_HANDLE) + { + VMA_DEBUG_LOG("vmaDestroyVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + VkAllocationCallbacks allocationCallbacks = virtualBlock->m_AllocationCallbacks; // Have to copy the callbacks when destroying. + vma_delete(&allocationCallbacks, virtualBlock); + } +} + +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty(VmaVirtualBlock VMA_NOT_NULL virtualBlock) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaIsVirtualBlockEmpty"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return virtualBlock->IsEmpty() ? VK_TRUE : VK_FALSE; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pVirtualAllocInfo != VMA_NULL); + VMA_DEBUG_LOG("vmaGetVirtualAllocationInfo"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->GetAllocationInfo(allocation, *pVirtualAllocInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation, + VkDeviceSize* VMA_NULLABLE pOffset) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pCreateInfo != VMA_NULL && pAllocation != VMA_NULL); + VMA_DEBUG_LOG("vmaVirtualAllocate"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return virtualBlock->Allocate(*pCreateInfo, *pAllocation, pOffset); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree(VmaVirtualBlock VMA_NOT_NULL virtualBlock, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation) +{ + if(allocation != VK_NULL_HANDLE) + { + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaVirtualFree"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->Free(allocation); + } +} + +VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock(VmaVirtualBlock VMA_NOT_NULL virtualBlock) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaClearVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->Clear(); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, void* VMA_NULLABLE pUserData) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaSetVirtualAllocationUserData"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->SetAllocationUserData(allocation, pUserData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaGetVirtualBlockStatistics"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->GetStatistics(*pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaCalculateVirtualBlockStatistics"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->CalculateDetailedStatistics(*pStats); +} + +#if VMA_STATS_STRING_ENABLED + +VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE * VMA_NOT_NULL ppStatsString, VkBool32 detailedMap) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && ppStatsString != VMA_NULL); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + const VkAllocationCallbacks* allocationCallbacks = virtualBlock->GetAllocationCallbacks(); + VmaStringBuilder sb(allocationCallbacks); + virtualBlock->BuildStatsString(detailedMap != VK_FALSE, sb); + *ppStatsString = VmaCreateStringCopy(allocationCallbacks, sb.GetData(), sb.GetLength()); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE pStatsString) +{ + if(pStatsString != VMA_NULL) + { + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + VmaFreeString(virtualBlock->GetAllocationCallbacks(), pStatsString); + } +} +#if VMA_EXTERNAL_MEMORY_WIN32 +VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle) +{ + VMA_ASSERT(allocator && allocation && pHandle); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return allocation->GetWin32Handle(allocator, hTargetProcess, pHandle); +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_PUBLIC_INTERFACE +#endif // VMA_IMPLEMENTATION + +/** +\page quick_start Quick start + +\section quick_start_project_setup Project setup + +Vulkan Memory Allocator comes in form of a "stb-style" single header file. +While you can pull the entire repository e.g. as Git module, there is also Cmake script provided, +you don't need to build it as a separate library project. +You can add file "vk_mem_alloc.h" directly to your project and submit it to code repository next to your other source files. + +"Single header" doesn't mean that everything is contained in C/C++ declarations, +like it tends to be in case of inline functions or C++ templates. +It means that implementation is bundled with interface in a single file and needs to be extracted using preprocessor macro. +If you don't do it properly, it will result in linker errors. + +To do it properly: + +-# Include "vk_mem_alloc.h" file in each CPP file where you want to use the library. + This includes declarations of all members of the library. +-# In exactly one CPP file define following macro before this include. + It enables also internal definitions. + +\code +#define VMA_IMPLEMENTATION +#include "vk_mem_alloc.h" +\endcode + +It may be a good idea to create dedicated CPP file just for this purpose, e.g. "VmaUsage.cpp". + +This library includes header ``, which in turn +includes `` on Windows. If you need some specific macros defined +before including these headers (like `WIN32_LEAN_AND_MEAN` or +`WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define +them before every `#include` of this library. +It may be a good idea to create a dedicate header file for this purpose, e.g. "VmaUsage.h", +that will be included in other source files instead of VMA header directly. + +This library is written in C++, but has C-compatible interface. +Thus, you can include and use "vk_mem_alloc.h" in C or C++ code, but full +implementation with `VMA_IMPLEMENTATION` macro must be compiled as C++, NOT as C. +Some features of C++14 are used and required. Features of C++20 are used optionally when available. +Some headers of standard C and C++ library are used, but STL containers, RTTI, or C++ exceptions are not used. + + +\section quick_start_initialization Initialization + +VMA offers library interface in a style similar to Vulkan, with object handles like #VmaAllocation, +structures describing parameters of objects to be created like #VmaAllocationCreateInfo, +and errors codes returned from functions using `VkResult` type. + +The first and the main object that needs to be created is #VmaAllocator. +It represents the initialization of the entire library. +Only one such object should be created per `VkDevice`. +You should create it at program startup, after `VkDevice` was created, and before any device memory allocator needs to be made. +It must be destroyed before `VkDevice` is destroyed. + +At program startup: + +-# Initialize Vulkan to have `VkInstance`, `VkPhysicalDevice`, `VkDevice` object. +-# Fill VmaAllocatorCreateInfo structure and call vmaCreateAllocator() to create #VmaAllocator object. + +Only members `physicalDevice`, `device`, `instance` are required. +However, you should inform the library which Vulkan version do you use by setting +VmaAllocatorCreateInfo::vulkanApiVersion and which extensions did you enable +by setting VmaAllocatorCreateInfo::flags. +Otherwise, VMA would use only features of Vulkan 1.0 core with no extensions. +See below for details. + +\subsection quick_start_initialization_selecting_vulkan_version Selecting Vulkan version + +VMA supports Vulkan version down to 1.0, for backward compatibility. +If you want to use higher version, you need to inform the library about it. +This is a two-step process. + +Step 1: Compile time. By default, VMA compiles with code supporting the highest +Vulkan version found in the included `` that is also supported by the library. +If this is OK, you don't need to do anything. +However, if you want to compile VMA as if only some lower Vulkan version was available, +define macro `VMA_VULKAN_VERSION` before every `#include "vk_mem_alloc.h"`. +It should have decimal numeric value in form of ABBBCCC, where A = major, BBB = minor, CCC = patch Vulkan version. +For example, to compile against Vulkan 1.2: + +\code +#define VMA_VULKAN_VERSION 1002000 // Vulkan 1.2 +#include "vk_mem_alloc.h" +\endcode + +Step 2: Runtime. Even when compiled with higher Vulkan version available, +VMA can use only features of a lower version, which is configurable during creation of the #VmaAllocator object. +By default, only Vulkan 1.0 is used. +To initialize the allocator with support for higher Vulkan version, you need to set member +VmaAllocatorCreateInfo::vulkanApiVersion to an appropriate value, e.g. using constants like `VK_API_VERSION_1_2`. +See code sample below. + +\subsection quick_start_initialization_importing_vulkan_functions Importing Vulkan functions + +You may need to configure importing Vulkan functions. There are 3 ways to do this: + +-# **If you link with Vulkan static library** (e.g. "vulkan-1.lib" on Windows): + - You don't need to do anything. + - VMA will use these, as macro `VMA_STATIC_VULKAN_FUNCTIONS` is defined to 1 by default. +-# **If you want VMA to fetch pointers to Vulkan functions dynamically** using `vkGetInstanceProcAddr`, + `vkGetDeviceProcAddr` (this is the option presented in the example below): + - Define `VMA_STATIC_VULKAN_FUNCTIONS` to 0, `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 1. + - Provide pointers to these two functions via VmaVulkanFunctions::vkGetInstanceProcAddr, + VmaVulkanFunctions::vkGetDeviceProcAddr. + - The library will fetch pointers to all other functions it needs internally. +-# **If you fetch pointers to all Vulkan functions in a custom way**, e.g. using some loader like + [Volk](https://github.com/zeux/volk): + - Define `VMA_STATIC_VULKAN_FUNCTIONS` and `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 0. + - Pass these pointers via structure #VmaVulkanFunctions. + +\subsection quick_start_initialization_enabling_extensions Enabling extensions + +VMA can automatically use following Vulkan extensions. +If you found them available on the selected physical device and you enabled them +while creating `VkInstance` / `VkDevice` object, inform VMA about their availability +by setting appropriate flags in VmaAllocatorCreateInfo::flags. + +Vulkan extension | VMA flag +------------------------------|----------------------------------------------------- +VK_KHR_dedicated_allocation | #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT +VK_KHR_bind_memory2 | #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT +VK_KHR_maintenance4 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT +VK_KHR_maintenance5 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT +VK_EXT_memory_budget | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT +VK_KHR_buffer_device_address | #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +VK_EXT_memory_priority | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +VK_AMD_device_coherent_memory | #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +VK_KHR_external_memory_win32 | #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT + +Example with fetching pointers to Vulkan functions dynamically: + +\code +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#include "vk_mem_alloc.h" + +... + +VmaVulkanFunctions vulkanFunctions = {}; +vulkanFunctions.vkGetInstanceProcAddr = &vkGetInstanceProcAddr; +vulkanFunctions.vkGetDeviceProcAddr = &vkGetDeviceProcAddr; + +VmaAllocatorCreateInfo allocatorCreateInfo = {}; +allocatorCreateInfo.flags = VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; +allocatorCreateInfo.vulkanApiVersion = VK_API_VERSION_1_2; +allocatorCreateInfo.physicalDevice = physicalDevice; +allocatorCreateInfo.device = device; +allocatorCreateInfo.instance = instance; +allocatorCreateInfo.pVulkanFunctions = &vulkanFunctions; + +VmaAllocator allocator; +vmaCreateAllocator(&allocatorCreateInfo, &allocator); + +// Entire program... + +// At the end, don't forget to: +vmaDestroyAllocator(allocator); +\endcode + + +\subsection quick_start_initialization_other_config Other configuration options + +There are additional configuration options available through preprocessor macros that you can define +before including VMA header and through parameters passed in #VmaAllocatorCreateInfo. +They include a possibility to use your own callbacks for host memory allocations (`VkAllocationCallbacks`), +callbacks for device memory allocations (instead of `vkAllocateMemory`, `vkFreeMemory`), +or your custom `VMA_ASSERT` macro, among others. +For more information, see: @ref configuration. + + +\section quick_start_resource_allocation Resource allocation + +When you want to create a buffer or image: + +-# Fill `VkBufferCreateInfo` / `VkImageCreateInfo` structure. +-# Fill VmaAllocationCreateInfo structure. +-# Call vmaCreateBuffer() / vmaCreateImage() to get `VkBuffer`/`VkImage` with memory + already allocated and bound to it, plus #VmaAllocation objects that represents its underlying memory. + +\code +VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufferInfo.size = 65536; +bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +Don't forget to destroy your buffer and allocation objects when no longer needed: + +\code +vmaDestroyBuffer(allocator, buffer, allocation); +\endcode + +If you need to map the buffer, you must set flag +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. +There are many additional parameters that can control the choice of memory type to be used for the allocation +and other features. +For more information, see documentation chapters: @ref choosing_memory_type, @ref memory_mapping. + + +\page choosing_memory_type Choosing memory type + +Physical devices in Vulkan support various combinations of memory heaps and +types. Help with choosing correct and optimal memory type for your specific +resource is one of the key features of this library. You can use it by filling +appropriate members of VmaAllocationCreateInfo structure, as described below. +You can also combine multiple methods. + +-# If you just want to find memory type index that meets your requirements, you + can use function: vmaFindMemoryTypeIndexForBufferInfo(), + vmaFindMemoryTypeIndexForImageInfo(), vmaFindMemoryTypeIndex(). +-# If you want to allocate a region of device memory without association with any + specific image or buffer, you can use function vmaAllocateMemory(). Usage of + this function is not recommended and usually not needed. + vmaAllocateMemoryPages() function is also provided for creating multiple allocations at once, + which may be useful for sparse binding. +-# If you already have a buffer or an image created, you want to allocate memory + for it and then you will bind it yourself, you can use function + vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). + For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory() + or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2(). +-# If you want to create a buffer or an image, allocate memory for it, and bind + them together, all in one call, you can use function vmaCreateBuffer(), + vmaCreateImage(). + This is the easiest and recommended way to use this library! + +When using 3. or 4., the library internally queries Vulkan for memory types +supported for that buffer or image (function `vkGetBufferMemoryRequirements()`) +and uses only one of these types. + +If no memory type can be found that meets all the requirements, these functions +return `VK_ERROR_FEATURE_NOT_PRESENT`. + +You can leave VmaAllocationCreateInfo structure completely filled with zeros. +It means no requirements are specified for memory type. +It is valid, although not very useful. + +\section choosing_memory_type_usage Usage + +The easiest way to specify memory requirements is to fill member +VmaAllocationCreateInfo::usage using one of the values of enum #VmaMemoryUsage. +It defines high level, common usage types. +Since version 3 of the library, it is recommended to use #VMA_MEMORY_USAGE_AUTO to let it select best memory type for your resource automatically. + +For example, if you want to create a uniform buffer that will be filled using +transfer only once or infrequently and then used for rendering every frame as a uniform buffer, you can +do it using following code. The buffer will most likely end up in a memory type with +`VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT` to be fast to access by the GPU device. + +\code +VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufferInfo.size = 65536; +bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +If you have a preference for putting the resource in GPU (device) memory or CPU (host) memory +on systems with discrete graphics card that have the memories separate, you can use +#VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST. + +When using `VMA_MEMORY_USAGE_AUTO*` while you want to map the allocated memory, +you also need to specify one of the host access flags: +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +This will help the library decide about preferred memory type to ensure it has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +so you can map it. + +For example, a staging buffer that will be filled via mapped pointer and then +used as a source of transfer to the buffer described previously can be created like this. +It will likely end up in a memory type that is `HOST_VISIBLE` and `HOST_COHERENT` +but not `HOST_CACHED` (meaning uncached, write-combined) and not `DEVICE_LOCAL` (meaning system RAM). + +\code +VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +stagingBufferInfo.size = 65536; +stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo stagingAllocInfo = {}; +stagingAllocInfo.usage = VMA_MEMORY_USAGE_AUTO; +stagingAllocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer stagingBuffer; +VmaAllocation stagingAllocation; +vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocInfo, &stagingBuffer, &stagingAllocation, nullptr); +\endcode + +For more examples of creating different kinds of resources, see chapter \ref usage_patterns. +See also: @ref memory_mapping. + +Usage values `VMA_MEMORY_USAGE_AUTO*` are legal to use only when the library knows +about the resource being created by having `VkBufferCreateInfo` / `VkImageCreateInfo` passed, +so they work with functions like: vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo() etc. +If you allocate raw memory using function vmaAllocateMemory(), you have to use other means of selecting +memory type, as described below. + +\note +Old usage values (`VMA_MEMORY_USAGE_GPU_ONLY`, `VMA_MEMORY_USAGE_CPU_ONLY`, +`VMA_MEMORY_USAGE_CPU_TO_GPU`, `VMA_MEMORY_USAGE_GPU_TO_CPU`, `VMA_MEMORY_USAGE_CPU_COPY`) +are still available and work same way as in previous versions of the library +for backward compatibility, but they are deprecated. + +\section choosing_memory_type_required_preferred_flags Required and preferred flags + +You can specify more detailed requirements by filling members +VmaAllocationCreateInfo::requiredFlags and VmaAllocationCreateInfo::preferredFlags +with a combination of bits from enum `VkMemoryPropertyFlags`. For example, +if you want to create a buffer that will be persistently mapped on host (so it +must be `HOST_VISIBLE`) and preferably will also be `HOST_COHERENT` and `HOST_CACHED`, +use following code: + +\code +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; +allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +A memory type is chosen that has all the required flags and as many preferred +flags set as possible. + +Value passed in VmaAllocationCreateInfo::usage is internally converted to a set of required and preferred flags, +plus some extra "magic" (heuristics). + +\section choosing_memory_type_explicit_memory_types Explicit memory types + +If you inspected memory types available on the physical device and you have +a preference for memory types that you want to use, you can fill member +VmaAllocationCreateInfo::memoryTypeBits. It is a bit mask, where each bit set +means that a memory type with that index is allowed to be used for the +allocation. Special value 0, just like `UINT32_MAX`, means there are no +restrictions to memory type index. + +Please note that this member is NOT just a memory type index. +Still you can use it to choose just one, specific memory type. +For example, if you already determined that your buffer should be created in +memory type 2, use following code: + +\code +uint32_t memoryTypeIndex = 2; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.memoryTypeBits = 1u << memoryTypeIndex; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +You can also use this parameter to exclude some memory types. +If you inspect memory heaps and types available on the current physical device and +you determine that for some reason you don't want to use a specific memory type for the allocation, +you can enable automatic memory type selection but exclude certain memory type or types +by setting all bits of `memoryTypeBits` to 1 except the ones you choose. + +\code +// ... +uint32_t excludedMemoryTypeIndex = 2; +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocInfo.memoryTypeBits = ~(1u << excludedMemoryTypeIndex); +// ... +\endcode + + +\section choosing_memory_type_custom_memory_pools Custom memory pools + +If you allocate from custom memory pool, all the ways of specifying memory +requirements described above are not applicable and the aforementioned members +of VmaAllocationCreateInfo structure are ignored. Memory type is selected +explicitly when creating the pool and then used to make all the allocations from +that pool. For further details, see \ref custom_memory_pools. + +\section choosing_memory_type_dedicated_allocations Dedicated allocations + +Memory for allocations is reserved out of larger block of `VkDeviceMemory` +allocated from Vulkan internally. That is the main feature of this whole library. +You can still request a separate memory block to be created for an allocation, +just like you would do in a trivial solution without using any allocator. +In that case, a buffer or image is always bound to that memory at offset 0. +This is called a "dedicated allocation". +You can explicitly request it by using flag #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +The library can also internally decide to use dedicated allocation in some cases, e.g.: + +- When the size of the allocation is large. +- When [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension is enabled + and it reports that dedicated allocation is required or recommended for the resource. +- When allocation of next big memory block fails due to not enough device memory, + but allocation with the exact requested size succeeds. + + +\page memory_mapping Memory mapping + +To "map memory" in Vulkan means to obtain a CPU pointer to `VkDeviceMemory`, +to be able to read from it or write to it in CPU code. +Mapping is possible only of memory allocated from a memory type that has +`VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +Functions `vkMapMemory()`, `vkUnmapMemory()` are designed for this purpose. +You can use them directly with memory allocated by this library, +but it is not recommended because of following issue: +Mapping the same `VkDeviceMemory` block multiple times is illegal - only one mapping at a time is allowed. +This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan. +It is also not thread-safe. +Because of this, Vulkan Memory Allocator provides following facilities: + +\note If you want to be able to map an allocation, you need to specify one of the flags +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. These flags are required for an allocation to be mappable +when using #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` enum values. +For other usage values they are ignored and every such allocation made in `HOST_VISIBLE` memory type is mappable, +but these flags can still be used for consistency. + +\section memory_mapping_copy_functions Copy functions + +The easiest way to copy data from a host pointer to an allocation is to use convenience function vmaCopyMemoryToAllocation(). +It automatically maps the Vulkan memory temporarily (if not already mapped), performs `memcpy`, +and calls `vkFlushMappedMemoryRanges` (if required - if memory type is not `HOST_COHERENT`). + +It is also the safest one, because using `memcpy` avoids a risk of accidentally introducing memory reads +(e.g. by doing `pMappedVectors[i] += v`), which may be very slow on memory types that are not `HOST_CACHED`. + +\code +struct ConstantBuffer +{ + ... +}; +ConstantBuffer constantBufferData = ... + +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer buf; +VmaAllocation alloc; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); + +vmaCopyMemoryToAllocation(allocator, &constantBufferData, alloc, 0, sizeof(ConstantBuffer)); +\endcode + +Copy in the other direction - from an allocation to a host pointer can be performed the same way using function vmaCopyAllocationToMemory(). + +\section memory_mapping_mapping_functions Mapping functions + +The library provides following functions for mapping of a specific allocation: vmaMapMemory(), vmaUnmapMemory(). +They are safer and more convenient to use than standard Vulkan functions. +You can map an allocation multiple times simultaneously - mapping is reference-counted internally. +You can also map different allocations simultaneously regardless of whether they use the same `VkDeviceMemory` block. +The way it is implemented is that the library always maps entire memory block, not just region of the allocation. +For further details, see description of vmaMapMemory() function. +Example: + +\code +// Having these objects initialized: +struct ConstantBuffer +{ + ... +}; +ConstantBuffer constantBufferData = ... + +VmaAllocator allocator = ... +VkBuffer constantBuffer = ... +VmaAllocation constantBufferAllocation = ... + +// You can map and fill your buffer using following code: + +void* mappedData; +vmaMapMemory(allocator, constantBufferAllocation, &mappedData); +memcpy(mappedData, &constantBufferData, sizeof(constantBufferData)); +vmaUnmapMemory(allocator, constantBufferAllocation); +\endcode + +When mapping, you may see a warning from Vulkan validation layer similar to this one: + +Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used. + +It happens because the library maps entire `VkDeviceMemory` block, where different +types of images and buffers may end up together, especially on GPUs with unified memory like Intel. +You can safely ignore it if you are sure you access only memory of the intended +object that you wanted to map. + + +\section memory_mapping_persistently_mapped_memory Persistently mapped memory + +Keeping your memory persistently mapped is generally OK in Vulkan. +You don't need to unmap it before using its data on the GPU. +The library provides a special feature designed for that: +Allocations made with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag set in +VmaAllocationCreateInfo::flags stay mapped all the time, +so you can just access CPU pointer to it any time +without a need to call any "map" or "unmap" function. +Example: + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +// Buffer is already mapped. You can access its memory. +memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); +\endcode + +\note #VMA_ALLOCATION_CREATE_MAPPED_BIT by itself doesn't guarantee that the allocation will end up +in a mappable memory type. +For this, you need to also specify #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +#VMA_ALLOCATION_CREATE_MAPPED_BIT only guarantees that if the memory is `HOST_VISIBLE`, the allocation will be mapped on creation. +For an example of how to make use of this fact, see section \ref usage_patterns_advanced_data_uploading. + +\section memory_mapping_cache_control Cache flush and invalidate + +Memory in Vulkan doesn't need to be unmapped before using it on GPU, +but unless a memory types has `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` flag set, +you need to manually **invalidate** cache before reading of mapped pointer +and **flush** cache after writing to mapped pointer. +Map/unmap operations don't do that automatically. +Vulkan provides following functions for this purpose `vkFlushMappedMemoryRanges()`, +`vkInvalidateMappedMemoryRanges()`, but this library provides more convenient +functions that refer to given allocation object: vmaFlushAllocation(), +vmaInvalidateAllocation(), +or multiple objects at once: vmaFlushAllocations(), vmaInvalidateAllocations(). + +Regions of memory specified for flush/invalidate must be aligned to +`VkPhysicalDeviceLimits::nonCoherentAtomSize`. This is automatically ensured by the library. +In any memory type that is `HOST_VISIBLE` but not `HOST_COHERENT`, all allocations +within blocks are aligned to this value, so their offsets are always multiply of +`nonCoherentAtomSize` and two different allocations never share same "line" of this size. + +Also, Windows drivers from all 3 PC GPU vendors (AMD, Intel, NVIDIA) +currently provide `HOST_COHERENT` flag on all memory types that are +`HOST_VISIBLE`, so on PC you may not need to bother. + + +\page staying_within_budget Staying within budget + +When developing a graphics-intensive game or program, it is important to avoid allocating +more GPU memory than it is physically available. When the memory is over-committed, +various bad things can happen, depending on the specific GPU, graphics driver, and +operating system: + +- It may just work without any problems. +- The application may slow down because some memory blocks are moved to system RAM + and the GPU has to access them through PCI Express bus. +- A new allocation may take very long time to complete, even few seconds, and possibly + freeze entire system. +- The new allocation may fail with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +- It may even result in GPU crash (TDR), observed as `VK_ERROR_DEVICE_LOST` + returned somewhere later. + +\section staying_within_budget_querying_for_budget Querying for budget + +To query for current memory usage and available budget, use function vmaGetHeapBudgets(). +Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap. + +Please note that this function returns different information and works faster than +vmaCalculateStatistics(). vmaGetHeapBudgets() can be called every frame or even before every +allocation, while vmaCalculateStatistics() is intended to be used rarely, +only to obtain statistical information, e.g. for debugging purposes. + +It is recommended to use VK_EXT_memory_budget device extension to obtain information +about the budget from Vulkan device. VMA is able to use this extension automatically. +When not enabled, the allocator behaves same way, but then it estimates current usage +and available budget based on its internal information and Vulkan memory heap sizes, +which may be less precise. In order to use this extension: + +1. Make sure extensions VK_EXT_memory_budget and VK_KHR_get_physical_device_properties2 + required by it are available and enable them. Please note that the first is a device + extension and the second is instance extension! +2. Use flag #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT when creating #VmaAllocator object. +3. Make sure to call vmaSetCurrentFrameIndex() every frame. Budget is queried from + Vulkan inside of it to avoid overhead of querying it with every allocation. + +\section staying_within_budget_controlling_memory_usage Controlling memory usage + +There are many ways in which you can try to stay within the budget. + +First, when making new allocation requires allocating a new memory block, the library +tries not to exceed the budget automatically. If a block with default recommended size +(e.g. 256 MB) would go over budget, a smaller block is allocated, possibly even +dedicated memory for just this resource. + +If the size of the requested resource plus current memory usage is more than the +budget, by default the library still tries to create it, leaving it to the Vulkan +implementation whether the allocation succeeds or fails. You can change this behavior +by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is +not made if it would exceed the budget or if the budget is already exceeded. +VMA then tries to make the allocation from the next eligible Vulkan memory type. +If all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag +when creating resources that are not essential for the application (e.g. the texture +of a specific object) and not to pass it when creating critically important resources +(e.g. render targets). + +On AMD graphics cards there is a custom vendor extension available: VK_AMD_memory_overallocation_behavior +that allows to control the behavior of the Vulkan implementation in out-of-memory cases - +whether it should fail with an error code or still allow the allocation. +Usage of this extension involves only passing extra structure on Vulkan device creation, +so it is out of scope of this library. + +Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure +a new allocation is created only when it fits inside one of the existing memory blocks. +If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +This also ensures that the function call is very fast because it never goes to Vulkan +to obtain a new block. + +\note Creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount +set to more than 0 will currently try to allocate memory blocks without checking whether they +fit within budget. + + +\page resource_aliasing Resource aliasing (overlap) + +New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory +management, give an opportunity to alias (overlap) multiple resources in the +same region of memory - a feature not available in the old APIs (Direct3D 11, OpenGL). +It can be useful to save video memory, but it must be used with caution. + +For example, if you know the flow of your whole render frame in advance, you +are going to use some intermediate textures or buffers only during a small range of render passes, +and you know these ranges don't overlap in time, you can bind these resources to +the same place in memory, even if they have completely different parameters (width, height, format etc.). + +![Resource aliasing (overlap)](../gfx/Aliasing.png) + +Such scenario is possible using VMA, but you need to create your images manually. +Then you need to calculate parameters of an allocation to be made using formula: + +- allocation size = max(size of each image) +- allocation alignment = max(alignment of each image) +- allocation memoryTypeBits = bitwise AND(memoryTypeBits of each image) + +Following example shows two different images bound to the same place in memory, +allocated to fit largest of them. + +\code +// A 512x512 texture to be sampled. +VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img1CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img1CreateInfo.extent.width = 512; +img1CreateInfo.extent.height = 512; +img1CreateInfo.extent.depth = 1; +img1CreateInfo.mipLevels = 10; +img1CreateInfo.arrayLayers = 1; +img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB; +img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; +img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +// A full screen texture to be used as color attachment. +VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img2CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img2CreateInfo.extent.width = 1920; +img2CreateInfo.extent.height = 1080; +img2CreateInfo.extent.depth = 1; +img2CreateInfo.mipLevels = 1; +img2CreateInfo.arrayLayers = 1; +img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VkImage img1; +res = vkCreateImage(device, &img1CreateInfo, nullptr, &img1); +VkImage img2; +res = vkCreateImage(device, &img2CreateInfo, nullptr, &img2); + +VkMemoryRequirements img1MemReq; +vkGetImageMemoryRequirements(device, img1, &img1MemReq); +VkMemoryRequirements img2MemReq; +vkGetImageMemoryRequirements(device, img2, &img2MemReq); + +VkMemoryRequirements finalMemReq = {}; +finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size); +finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment); +finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits; +// Validate if(finalMemReq.memoryTypeBits != 0) + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + +VmaAllocation alloc; +res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr); + +res = vmaBindImageMemory(allocator, alloc, img1); +res = vmaBindImageMemory(allocator, alloc, img2); + +// You can use img1, img2 here, but not at the same time! + +vmaFreeMemory(allocator, alloc); +vkDestroyImage(allocator, img2, nullptr); +vkDestroyImage(allocator, img1, nullptr); +\endcode + +VMA also provides convenience functions that create a buffer or image and bind it to memory +represented by an existing #VmaAllocation: +vmaCreateAliasingBuffer(), vmaCreateAliasingBuffer2(), +vmaCreateAliasingImage(), vmaCreateAliasingImage2(). +Versions with "2" offer additional parameter `allocationLocalOffset`. + +Remember that using resources that alias in memory requires proper synchronization. +You need to issue a memory barrier to make sure commands that use `img1` and `img2` +don't overlap on GPU timeline. +You also need to treat a resource after aliasing as uninitialized - containing garbage data. +For example, if you use `img1` and then want to use `img2`, you need to issue +an image memory barrier for `img2` with `oldLayout` = `VK_IMAGE_LAYOUT_UNDEFINED`. + +Additional considerations: + +- Vulkan also allows to interpret contents of memory between aliasing resources consistently in some cases. +See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ALIAS_BIT` flag. +- You can create more complex layout where different images and buffers are bound +at different offsets inside one large allocation. For example, one can imagine +a big texture used in some render passes, aliasing with a set of many small buffers +used between in some further passes. To bind a resource at non-zero offset in an allocation, +use vmaBindBufferMemory2() / vmaBindImageMemory2(). +- Before allocating memory for the resources you want to alias, check `memoryTypeBits` +returned in memory requirements of each resource to make sure the bits overlap. +Some GPUs may expose multiple memory types suitable e.g. only for buffers or +images with `COLOR_ATTACHMENT` usage, so the sets of memory types supported by your +resources may be disjoint. Aliasing them is not possible in that case. + + +\page custom_memory_pools Custom memory pools + +A memory pool contains a number of `VkDeviceMemory` blocks. +The library automatically creates and manages default pool for each memory type available on the device. +Default memory pool automatically grows in size. +Size of allocated blocks is also variable and managed automatically. +You are using default pools whenever you leave VmaAllocationCreateInfo::pool = null. + +You can create custom pool and allocate memory out of it. +It can be useful if you want to: + +- Keep certain kind of allocations separate from others. +- Enforce particular, fixed size of Vulkan memory blocks. +- Limit maximum amount of Vulkan memory allocated for that pool. +- Reserve minimum or fixed amount of Vulkan memory always preallocated for that pool. +- Use extra parameters for a set of your allocations that are available in #VmaPoolCreateInfo but not in + #VmaAllocationCreateInfo - e.g., custom minimum alignment, custom `pNext` chain. +- Perform defragmentation on a specific subset of your allocations. + +To use custom memory pools: + +-# Fill VmaPoolCreateInfo structure. +-# Call vmaCreatePool() to obtain #VmaPool handle. +-# When making an allocation, set VmaAllocationCreateInfo::pool to this handle. + You don't need to specify any other parameters of this structure, like `usage`. + +Example: + +\code +// Find memoryTypeIndex for the pool. +VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +sampleBufCreateInfo.size = 0x10000; // Doesn't matter. +sampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo sampleAllocCreateInfo = {}; +sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(allocator, + &sampleBufCreateInfo, &sampleAllocCreateInfo, &memTypeIndex); +// Check res... + +// Create a pool that can have at most 2 blocks, 128 MiB each. +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +poolCreateInfo.blockSize = 128ull * 1024 * 1024; +poolCreateInfo.maxBlockCount = 2; + +VmaPool pool; +res = vmaCreatePool(allocator, &poolCreateInfo, &pool); +// Check res... + +// Allocate a buffer out of it. +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 1024; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.pool = pool; + +VkBuffer buf; +VmaAllocation alloc; +res = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... +\endcode + +You have to free all allocations made from this pool before destroying it. + +\code +vmaDestroyBuffer(allocator, buf, alloc); +vmaDestroyPool(allocator, pool); +\endcode + +New versions of this library support creating dedicated allocations in custom pools. +It is supported only when VmaPoolCreateInfo::blockSize = 0. +To use this feature, set VmaAllocationCreateInfo::pool to the pointer to your custom pool and +VmaAllocationCreateInfo::flags to #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + + +\section custom_memory_pools_MemTypeIndex Choosing memory type index + +When creating a pool, you must explicitly specify memory type index. +To find the one suitable for your buffers or images, you can use helper functions +vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo(). +You need to provide structures with example parameters of buffers or images +that you are going to create in that pool. + +\code +VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +exampleBufCreateInfo.size = 1024; // Doesn't matter +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +vmaFindMemoryTypeIndexForBufferInfo(allocator, &exampleBufCreateInfo, &allocCreateInfo, &memTypeIndex); + +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +// ... +\endcode + +When creating buffers/images allocated in that pool, provide following parameters: + +- `VkBufferCreateInfo`: Prefer to pass same parameters as above. + Otherwise you risk creating resources in a memory type that is not suitable for them, which may result in undefined behavior. + Using different `VK_BUFFER_USAGE_` flags may work, but you shouldn't create images in a pool intended for buffers + or the other way around. +- VmaAllocationCreateInfo: You don't need to pass same parameters. Fill only `pool` member. + Other members are ignored anyway. + + +\section custom_memory_pools_when_not_use When not to use custom pools + +Custom pools are commonly overused by VMA users. +While it may feel natural to keep some logical groups of resources separate in memory, +in most cases it does more harm than good. +Using custom pool shouldn't be your first choice. +Instead, please make all allocations from default pools first and only use custom pools +if you can prove and measure that it is beneficial in some way, +e.g. it results in lower memory usage, better performance, etc. + +Using custom pools has disadvantages: + +- Each pool has its own collection of `VkDeviceMemory` blocks. + Some of them may be partially or even completely empty. + Spreading allocations across multiple pools increases the amount of wasted (allocated but unbound) memory. +- You must manually choose specific memory type to be used by a custom pool (set as VmaPoolCreateInfo::memoryTypeIndex). + When using default pools, best memory type for each of your allocations can be selected automatically + using a carefully design algorithm that works across all kinds of GPUs. +- If an allocation from a custom pool at specific memory type fails, entire allocation operation returns failure. + When using default pools, VMA tries another compatible memory type. +- If you set VmaPoolCreateInfo::blockSize != 0, each memory block has the same size, + while default pools start from small blocks and only allocate next blocks larger and larger + up to the preferred block size. + +Many of the common concerns can be addressed in a different way than using custom pools: + +- If you want to keep your allocations of certain size (small versus large) or certain lifetime (transient versus long lived) + separate, you likely don't need to. + VMA uses a high quality allocation algorithm that manages memory well in various cases. + Please measure and check if using custom pools provides a benefit. +- If you want to keep your images and buffers separate, you don't need to. + VMA respects `bufferImageGranularity` limit automatically. +- If you want to keep your mapped and not mapped allocations separate, you don't need to. + VMA respects `nonCoherentAtomSize` limit automatically. + It also maps only those `VkDeviceMemory` blocks that need to map any allocation. + It even tries to keep mappable and non-mappable allocations in separate blocks to minimize the amount of mapped memory. +- If you want to choose a custom size for the default memory block, you can set it globally instead + using VmaAllocatorCreateInfo::preferredLargeHeapBlockSize. +- If you want to select specific memory type for your allocation, + you can set VmaAllocationCreateInfo::memoryTypeBits to `(1u << myMemoryTypeIndex)` instead. +- If you need to create a buffer with certain minimum alignment, you can still do it + using default pools with dedicated function vmaCreateBufferWithAlignment(). + + +\section linear_algorithm Linear allocation algorithm + +Each Vulkan memory block managed by this library has accompanying metadata that +keeps track of used and unused regions. By default, the metadata structure and +algorithm tries to find best place for new allocations among free regions to +optimize memory usage. This way you can allocate and free objects in any order. + +![Default allocation algorithm](../gfx/Linear_allocator_1_algo_default.png) + +Sometimes there is a need to use simpler, linear allocation algorithm. You can +create custom pool that uses such algorithm by adding flag +#VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating +#VmaPool object. Then an alternative metadata management is used. It always +creates new allocations after last one and doesn't reuse free regions after +allocations freed in the middle. It results in better allocation performance and +less memory consumed by metadata. + +![Linear allocation algorithm](../gfx/Linear_allocator_2_algo_linear.png) + +With this one flag, you can create a custom pool that can be used in many ways: +free-at-once, stack, double stack, and ring buffer. See below for details. +You don't need to specify explicitly which of these options you are going to use - it is detected automatically. + +\subsection linear_algorithm_free_at_once Free-at-once + +In a pool that uses linear algorithm, you still need to free all the allocations +individually, e.g. by using vmaFreeMemory() or vmaDestroyBuffer(). You can free +them in any order. New allocations are always made after last one - free space +in the middle is not reused. However, when you release all the allocation and +the pool becomes empty, allocation starts from the beginning again. This way you +can use linear algorithm to speed up creation of allocations that you are going +to release all at once. + +![Free-at-once](../gfx/Linear_allocator_3_free_at_once.png) + +This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount +value that allows multiple memory blocks. + +\subsection linear_algorithm_stack Stack + +When you free an allocation that was created last, its space can be reused. +Thanks to this, if you always release allocations in the order opposite to their +creation (LIFO - Last In First Out), you can achieve behavior of a stack. + +![Stack](../gfx/Linear_allocator_4_stack.png) + +This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount +value that allows multiple memory blocks. + +\subsection linear_algorithm_double_stack Double stack + +The space reserved by a custom pool with linear algorithm may be used by two +stacks: + +- First, default one, growing up from offset 0. +- Second, "upper" one, growing down from the end towards lower offsets. + +To make allocation from the upper stack, add flag #VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT +to VmaAllocationCreateInfo::flags. + +![Double stack](../gfx/Linear_allocator_7_double_stack.png) + +Double stack is available only in pools with one memory block - +VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. + +When the two stacks' ends meet so there is not enough space between them for a +new allocation, such allocation fails with usual +`VK_ERROR_OUT_OF_DEVICE_MEMORY` error. + +\subsection linear_algorithm_ring_buffer Ring buffer + +When you free some allocations from the beginning and there is not enough free space +for a new one at the end of a pool, allocator's "cursor" wraps around to the +beginning and starts allocation there. Thanks to this, if you always release +allocations in the same order as you created them (FIFO - First In First Out), +you can achieve behavior of a ring buffer / queue. + +![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png) + +Ring buffer is available only in pools with one memory block - +VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. + +\note \ref defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + + +\page defragmentation Defragmentation + +Interleaved allocations and deallocations of many objects of varying size can +cause fragmentation over time, which can lead to a situation where the library is unable +to find a continuous range of free memory for a new allocation despite there is +enough free space, just scattered across many small free ranges between existing +allocations. + +To mitigate this problem, you can use defragmentation feature. +It doesn't happen automatically though and needs your cooperation, +because VMA is a low level library that only allocates memory. +It cannot recreate buffers and images in a new place as it doesn't remember the contents of `VkBufferCreateInfo` / `VkImageCreateInfo` structures. +It cannot copy their contents as it doesn't record any commands to a command buffer. + +Example: + +\code +VmaDefragmentationInfo defragInfo = {}; +defragInfo.pool = myPool; +defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT; + +VmaDefragmentationContext defragCtx; +VkResult res = vmaBeginDefragmentation(allocator, &defragInfo, &defragCtx); +// Check res... + +for(;;) +{ + VmaDefragmentationPassMoveInfo pass; + res = vmaBeginDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... + + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + // Inspect pass.pMoves[i].srcAllocation, identify what buffer/image it represents. + VmaAllocationInfo allocInfo; + vmaGetAllocationInfo(allocator, pass.pMoves[i].srcAllocation, &allocInfo); + MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData; + + // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset. + VkImageCreateInfo imgCreateInfo = ... + VkImage newImg; + res = vkCreateImage(device, &imgCreateInfo, nullptr, &newImg); + // Check res... + res = vmaBindImageMemory(allocator, pass.pMoves[i].dstTmpAllocation, newImg); + // Check res... + + // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place. + vkCmdCopyImage(cmdBuf, resData->img, ..., newImg, ...); + } + + // Make sure the copy commands finished executing. + vkWaitForFences(...); + + // Destroy old buffers/images bound with pass.pMoves[i].srcAllocation. + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + // ... + vkDestroyImage(device, resData->img, nullptr); + } + + // Update appropriate descriptors to point to the new places... + + res = vmaEndDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... +} + +vmaEndDefragmentation(allocator, defragCtx, nullptr); +\endcode + +Although functions like vmaCreateBuffer(), vmaCreateImage(), vmaDestroyBuffer(), vmaDestroyImage() +create/destroy an allocation and a buffer/image at once, these are just a shortcut for +creating the resource, allocating memory, and binding them together. +Defragmentation works on memory allocations only. You must handle the rest manually. +Defragmentation is an iterative process that should repreat "passes" as long as related functions +return `VK_INCOMPLETE` not `VK_SUCCESS`. +In each pass: + +1. vmaBeginDefragmentationPass() function call: + - Calculates and returns the list of allocations to be moved in this pass. + Note this can be a time-consuming process. + - Reserves destination memory for them by creating temporary destination allocations + that you can query for their `VkDeviceMemory` + offset using vmaGetAllocationInfo(). +2. Inside the pass, **you should**: + - Inspect the returned list of allocations to be moved. + - Create new buffers/images and bind them at the returned destination temporary allocations. + - Copy data from source to destination resources if necessary. + - Destroy the source buffers/images, but NOT their allocations. +3. vmaEndDefragmentationPass() function call: + - Frees the source memory reserved for the allocations that are moved. + - Modifies source #VmaAllocation objects that are moved to point to the destination reserved memory. + - Frees `VkDeviceMemory` blocks that became empty. + +Unlike in previous iterations of the defragmentation API, there is no list of "movable" allocations passed as a parameter. +Defragmentation algorithm tries to move all suitable allocations. +You can, however, refuse to move some of them inside a defragmentation pass, by setting +`pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. +This is not recommended and may result in suboptimal packing of the allocations after defragmentation. +If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool. + +Inside a pass, for each allocation that should be moved: + +- You should copy its data from the source to the destination place by calling e.g. `vkCmdCopyBuffer()`, `vkCmdCopyImage()`. + - You need to make sure these commands finished executing before destroying the source buffers/images and before calling vmaEndDefragmentationPass(). +- If a resource doesn't contain any meaningful data, e.g. it is a transient color attachment image to be cleared, + filled, and used temporarily in each rendering frame, you can just recreate this image + without copying its data. +- If the resource is in `HOST_VISIBLE` and `HOST_CACHED` memory, you can copy its data on the CPU + using `memcpy()`. +- If you cannot move the allocation, you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + This will cancel the move. + - vmaEndDefragmentationPass() will then free the destination memory + not the source memory of the allocation, leaving it unchanged. +- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time), + you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + - vmaEndDefragmentationPass() will then free both source and destination memory, and will destroy the source #VmaAllocation object. + +You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool +(like in the example above) or all the default pools by setting this member to null. + +Defragmentation is always performed in each pool separately. +Allocations are never moved between different Vulkan memory types. +The size of the destination memory reserved for a moved allocation is the same as the original one. +Alignment of an allocation as it was determined using `vkGetBufferMemoryRequirements()` etc. is also respected after defragmentation. +Buffers/images should be recreated with the same `VkBufferCreateInfo` / `VkImageCreateInfo` parameters as the original ones. + +You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved +in each pass, e.g. to call it in sync with render frames and not to experience too big hitches. +See members: VmaDefragmentationInfo::maxBytesPerPass, VmaDefragmentationInfo::maxAllocationsPerPass. + +It is also safe to perform the defragmentation asynchronously to render frames and other Vulkan and VMA +usage, possibly from multiple threads, with the exception that allocations +returned in VmaDefragmentationPassMoveInfo::pMoves shouldn't be destroyed until the defragmentation pass is ended. + +Mapping is preserved on allocations that are moved during defragmentation. +Whether through #VMA_ALLOCATION_CREATE_MAPPED_BIT or vmaMapMemory(), the allocations +are mapped at their new place. Of course, pointer to the mapped data changes, so it needs to be queried +using VmaAllocationInfo::pMappedData. + +\note Defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + + +\page statistics Statistics + +This library contains several functions that return information about its internal state, +especially the amount of memory allocated from Vulkan. + +\section statistics_numeric_statistics Numeric statistics + +If you need to obtain basic statistics about memory usage per heap, together with current budget, +you can call function vmaGetHeapBudgets() and inspect structure #VmaBudget. +This is useful to keep track of memory usage and stay within budget +(see also \ref staying_within_budget). +Example: + +\code +uint32_t heapIndex = ... + +VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; +vmaGetHeapBudgets(allocator, budgets); + +printf("My heap currently has %u allocations taking %llu B,\n", + budgets[heapIndex].statistics.allocationCount, + budgets[heapIndex].statistics.allocationBytes); +printf("allocated out of %u Vulkan device memory blocks taking %llu B,\n", + budgets[heapIndex].statistics.blockCount, + budgets[heapIndex].statistics.blockBytes); +printf("Vulkan reports total usage %llu B with budget %llu B.\n", + budgets[heapIndex].usage, + budgets[heapIndex].budget); +\endcode + +You can query for more detailed statistics per memory heap, type, and totals, +including minimum and maximum allocation size and unused range size, +by calling function vmaCalculateStatistics() and inspecting structure #VmaTotalStatistics. +This function is slower though, as it has to traverse all the internal data structures, +so it should be used only for debugging purposes. + +You can query for statistics of a custom pool using function vmaGetPoolStatistics() +or vmaCalculatePoolStatistics(). + +You can query for information about a specific allocation using function vmaGetAllocationInfo(). +It fill structure #VmaAllocationInfo. + +\section statistics_json_dump JSON dump + +You can dump internal state of the allocator to a string in JSON format using function vmaBuildStatsString(). +The result is guaranteed to be correct JSON. +It uses ANSI encoding. +Any strings provided by user (see [Allocation names](@ref allocation_names)) +are copied as-is and properly escaped for JSON, so if they use UTF-8, ISO-8859-2 or any other encoding, +this JSON string can be treated as using this encoding. +It must be freed using function vmaFreeStatsString(). + +The format of this JSON string is not part of official documentation of the library, +but it will not change in backward-incompatible way without increasing library major version number +and appropriate mention in changelog. + +The JSON string contains all the data that can be obtained using vmaCalculateStatistics(). +It can also contain detailed map of allocated memory blocks and their regions - +free and occupied by allocations. +This allows e.g. to visualize the memory or assess fragmentation. + + +\page allocation_annotation Allocation names and user data + +\section allocation_user_data Allocation user data + +You can annotate allocations with your own information, e.g. for debugging purposes. +To do that, fill VmaAllocationCreateInfo::pUserData field when creating +an allocation. It is an opaque `void*` pointer. You can use it e.g. as a pointer, +some handle, index, key, ordinal number or any other value that would associate +the allocation with your custom metadata. +It is useful to identify appropriate data structures in your engine given #VmaAllocation, +e.g. when doing \ref defragmentation. + +\code +VkBufferCreateInfo bufCreateInfo = ... + +MyBufferMetadata* pMetadata = CreateBufferMetadata(); + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.pUserData = pMetadata; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buffer, &allocation, nullptr); +\endcode + +The pointer may be later retrieved as VmaAllocationInfo::pUserData: + +\code +VmaAllocationInfo allocInfo; +vmaGetAllocationInfo(allocator, allocation, &allocInfo); +MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData; +\endcode + +It can also be changed using function vmaSetAllocationUserData(). + +Values of (non-zero) allocations' `pUserData` are printed in JSON report created by +vmaBuildStatsString() in hexadecimal form. + +\section allocation_names Allocation names + +An allocation can also carry a null-terminated string, giving a name to the allocation. +To set it, call vmaSetAllocationName(). +The library creates internal copy of the string, so the pointer you pass doesn't need +to be valid for whole lifetime of the allocation. You can free it after the call. + +\code +std::string imageName = "Texture: "; +imageName += fileName; +vmaSetAllocationName(allocator, allocation, imageName.c_str()); +\endcode + +The string can be later retrieved by inspecting VmaAllocationInfo::pName. +It is also printed in JSON report created by vmaBuildStatsString(). + +\note Setting string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. +You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library. + + +\page virtual_allocator Virtual allocator + +As an extra feature, the core allocation algorithm of the library is exposed through a simple and convenient API of "virtual allocator". +It doesn't allocate any real GPU memory. It just keeps track of used and free regions of a "virtual block". +You can use it to allocate your own memory or other objects, even completely unrelated to Vulkan. +A common use case is sub-allocation of pieces of one large GPU buffer. + +\section virtual_allocator_creating_virtual_block Creating virtual block + +To use this functionality, there is no main "allocator" object. +You don't need to have #VmaAllocator object created. +All you need to do is to create a separate #VmaVirtualBlock object for each block of memory you want to be managed by the allocator: + +-# Fill in #VmaVirtualBlockCreateInfo structure. +-# Call vmaCreateVirtualBlock(). Get new #VmaVirtualBlock object. + +Example: + +\code +VmaVirtualBlockCreateInfo blockCreateInfo = {}; +blockCreateInfo.size = 1048576; // 1 MB + +VmaVirtualBlock block; +VkResult res = vmaCreateVirtualBlock(&blockCreateInfo, &block); +\endcode + +\section virtual_allocator_making_virtual_allocations Making virtual allocations + +#VmaVirtualBlock object contains internal data structure that keeps track of free and occupied regions +using the same code as the main Vulkan memory allocator. +Similarly to #VmaAllocation for standard GPU allocations, there is #VmaVirtualAllocation type +that represents an opaque handle to an allocation within the virtual block. + +In order to make such allocation: + +-# Fill in #VmaVirtualAllocationCreateInfo structure. +-# Call vmaVirtualAllocate(). Get new #VmaVirtualAllocation object that represents the allocation. + You can also receive `VkDeviceSize offset` that was assigned to the allocation. + +Example: + +\code +VmaVirtualAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.size = 4096; // 4 KB + +VmaVirtualAllocation alloc; +VkDeviceSize offset; +res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, &offset); +if(res == VK_SUCCESS) +{ + // Use the 4 KB of your memory starting at offset. +} +else +{ + // Allocation failed - no space for it could be found. Handle this error! +} +\endcode + +\section virtual_allocator_deallocation Deallocation + +When no longer needed, an allocation can be freed by calling vmaVirtualFree(). +You can only pass to this function an allocation that was previously returned by vmaVirtualAllocate() +called for the same #VmaVirtualBlock. + +When whole block is no longer needed, the block object can be released by calling vmaDestroyVirtualBlock(). +All allocations must be freed before the block is destroyed, which is checked internally by an assert. +However, if you don't want to call vmaVirtualFree() for each allocation, you can use vmaClearVirtualBlock() to free them all at once - +a feature not available in normal Vulkan memory allocator. Example: + +\code +vmaVirtualFree(block, alloc); +vmaDestroyVirtualBlock(block); +\endcode + +\section virtual_allocator_allocation_parameters Allocation parameters + +You can attach a custom pointer to each allocation by using vmaSetVirtualAllocationUserData(). +Its default value is null. +It can be used to store any data that needs to be associated with that allocation - e.g. an index, a handle, or a pointer to some +larger data structure containing more information. Example: + +\code +struct CustomAllocData +{ + std::string m_AllocName; +}; +CustomAllocData* allocData = new CustomAllocData(); +allocData->m_AllocName = "My allocation 1"; +vmaSetVirtualAllocationUserData(block, alloc, allocData); +\endcode + +The pointer can later be fetched, along with allocation offset and size, by passing the allocation handle to function +vmaGetVirtualAllocationInfo() and inspecting returned structure #VmaVirtualAllocationInfo. +If you allocated a new object to be used as the custom pointer, don't forget to delete that object before freeing the allocation! +Example: + +\code +VmaVirtualAllocationInfo allocInfo; +vmaGetVirtualAllocationInfo(block, alloc, &allocInfo); +delete (CustomAllocData*)allocInfo.pUserData; + +vmaVirtualFree(block, alloc); +\endcode + +\section virtual_allocator_alignment_and_units Alignment and units + +It feels natural to express sizes and offsets in bytes. +If an offset of an allocation needs to be aligned to a multiply of some number (e.g. 4 bytes), you can fill optional member +VmaVirtualAllocationCreateInfo::alignment to request it. Example: + +\code +VmaVirtualAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.size = 4096; // 4 KB +allocCreateInfo.alignment = 4; // Returned offset must be a multiply of 4 B + +VmaVirtualAllocation alloc; +res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, nullptr); +\endcode + +Alignments of different allocations made from one block may vary. +However, if all alignments and sizes are always multiply of some size e.g. 4 B or `sizeof(MyDataStruct)`, +you can express all sizes, alignments, and offsets in multiples of that size instead of individual bytes. +It might be more convenient, but you need to make sure to use this new unit consistently in all the places: + +- VmaVirtualBlockCreateInfo::size +- VmaVirtualAllocationCreateInfo::size and VmaVirtualAllocationCreateInfo::alignment +- Using offset returned by vmaVirtualAllocate() or in VmaVirtualAllocationInfo::offset + +\section virtual_allocator_statistics Statistics + +You can obtain statistics of a virtual block using vmaGetVirtualBlockStatistics() +(to get brief statistics that are fast to calculate) +or vmaCalculateVirtualBlockStatistics() (to get more detailed statistics, slower to calculate). +The functions fill structures #VmaStatistics, #VmaDetailedStatistics respectively - same as used by the normal Vulkan memory allocator. +Example: + +\code +VmaStatistics stats; +vmaGetVirtualBlockStatistics(block, &stats); +printf("My virtual block has %llu bytes used by %u virtual allocations\n", + stats.allocationBytes, stats.allocationCount); +\endcode + +You can also request a full list of allocations and free regions as a string in JSON format by calling +vmaBuildVirtualBlockStatsString(). +Returned string must be later freed using vmaFreeVirtualBlockStatsString(). +The format of this string differs from the one returned by the main Vulkan allocator, but it is similar. + +\section virtual_allocator_additional_considerations Additional considerations + +The "virtual allocator" functionality is implemented on a level of individual memory blocks. +Keeping track of a whole collection of blocks, allocating new ones when out of free space, +deleting empty ones, and deciding which one to try first for a new allocation must be implemented by the user. + +Alternative allocation algorithms are supported, just like in custom pools of the real GPU memory. +See enum #VmaVirtualBlockCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT). +You can find their description in chapter \ref custom_memory_pools. +Allocation strategies are also supported. +See enum #VmaVirtualAllocationCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT). + +Following features are supported only by the allocator of the real GPU memory and not by virtual allocations: +buffer-image granularity, `VMA_DEBUG_MARGIN`, `VMA_MIN_ALIGNMENT`. + + +\page debugging_memory_usage Debugging incorrect memory usage + +If you suspect a bug with memory usage, like usage of uninitialized memory or +memory being overwritten out of bounds of an allocation, +you can use debug features of this library to verify this. + +\section debugging_memory_usage_initialization Memory initialization + +If you experience a bug with incorrect and nondeterministic data in your program and you suspect uninitialized memory to be used, +you can enable automatic memory initialization to verify this. +To do it, define macro `VMA_DEBUG_INITIALIZE_ALLOCATIONS` to 1. + +\code +#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1 +#include "vk_mem_alloc.h" +\endcode + +It makes memory of new allocations initialized to bit pattern `0xDCDCDCDC`. +Before an allocation is destroyed, its memory is filled with bit pattern `0xEFEFEFEF`. +Memory is automatically mapped and unmapped if necessary. + +If you find these values while debugging your program, good chances are that you incorrectly +read Vulkan memory that is allocated but not initialized, or already freed, respectively. + +Memory initialization works only with memory types that are `HOST_VISIBLE` and with allocations that can be mapped. +It works also with dedicated allocations. + +\section debugging_memory_usage_margins Margins + +By default, allocations are laid out in memory blocks next to each other if possible +(considering required alignment, `bufferImageGranularity`, and `nonCoherentAtomSize`). + +![Allocations without margin](../gfx/Margins_1.png) + +Define macro `VMA_DEBUG_MARGIN` to some non-zero value (e.g. 16) to enforce specified +number of bytes as a margin after every allocation. + +\code +#define VMA_DEBUG_MARGIN 16 +#include "vk_mem_alloc.h" +\endcode + +![Allocations with margin](../gfx/Margins_2.png) + +If your bug goes away after enabling margins, it means it may be caused by memory +being overwritten outside of allocation boundaries. It is not 100% certain though. +Change in application behavior may also be caused by different order and distribution +of allocations across memory blocks after margins are applied. + +Margins work with all types of memory. + +Margin is applied only to allocations made out of memory blocks and not to dedicated +allocations, which have their own memory block of specific size. +It is thus not applied to allocations made using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag +or those automatically decided to put into dedicated allocations, e.g. due to its +large size or recommended by VK_KHR_dedicated_allocation extension. + +Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space. + +Note that enabling margins increases memory usage and fragmentation. + +Margins do not apply to \ref virtual_allocator. + +\section debugging_memory_usage_corruption_detection Corruption detection + +You can additionally define macro `VMA_DEBUG_DETECT_CORRUPTION` to 1 to enable validation +of contents of the margins. + +\code +#define VMA_DEBUG_MARGIN 16 +#define VMA_DEBUG_DETECT_CORRUPTION 1 +#include "vk_mem_alloc.h" +\endcode + +When this feature is enabled, number of bytes specified as `VMA_DEBUG_MARGIN` +(it must be multiply of 4) after every allocation is filled with a magic number. +This idea is also know as "canary". +Memory is automatically mapped and unmapped if necessary. + +This number is validated automatically when the allocation is destroyed. +If it is not equal to the expected value, `VMA_ASSERT()` is executed. +It clearly means that either CPU or GPU overwritten the memory outside of boundaries of the allocation, +which indicates a serious bug. + +You can also explicitly request checking margins of all allocations in all memory blocks +that belong to specified memory types by using function vmaCheckCorruption(), +or in memory blocks that belong to specified custom pool, by using function +vmaCheckPoolCorruption(). + +Margin validation (corruption detection) works only for memory types that are +`HOST_VISIBLE` and `HOST_COHERENT`. + + +\section debugging_memory_usage_leak_detection Leak detection features + +At allocation and allocator destruction time VMA checks for unfreed and unmapped blocks using +`VMA_ASSERT_LEAK()`. This macro defaults to an assertion, triggering a typically fatal error in Debug +builds, and doing nothing in Release builds. You can provide your own definition of `VMA_ASSERT_LEAK()` +to change this behavior. + +At memory block destruction time VMA lists out all unfreed allocations using the `VMA_LEAK_LOG_FORMAT()` +macro, which defaults to `VMA_DEBUG_LOG_FORMAT`, which in turn defaults to a no-op. +If you're having trouble with leaks - for example, the aforementioned assertion triggers, but you don't +quite know \em why -, overriding this macro to print out the the leaking blocks, combined with assigning +individual names to allocations using vmaSetAllocationName(), can greatly aid in fixing them. + +\page other_api_interop Interop with other graphics APIs + +VMA provides some features that help with interoperability with other graphics APIs, e.g. OpenGL. + +\section opengl_interop_exporting_memory Exporting memory + +If you want to attach `VkExportMemoryAllocateInfoKHR` or other structure to `pNext` chain of memory allocations made by the library: + +You can create \ref custom_memory_pools for such allocations. +Define and fill in your `VkExportMemoryAllocateInfoKHR` structure and attach it to VmaPoolCreateInfo::pMemoryAllocateNext +while creating the custom pool. +Please note that the structure must remain alive and unchanged for the whole lifetime of the #VmaPool, +not only while creating it, as no copy of the structure is made, +but its original pointer is used for each allocation instead. + +If you want to export all memory allocated by VMA from certain memory types, +also dedicated allocations or other allocations made from default pools, +an alternative solution is to fill in VmaAllocatorCreateInfo::pTypeExternalMemoryHandleTypes. +It should point to an array with `VkExternalMemoryHandleTypeFlagsKHR` to be automatically passed by the library +through `VkExportMemoryAllocateInfoKHR` on each allocation made from a specific memory type. +Please note that new versions of the library also support dedicated allocations created in custom pools. + +You should not mix these two methods in a way that allows to apply both to the same memory type. +Otherwise, `VkExportMemoryAllocateInfoKHR` structure would be attached twice to the `pNext` chain of `VkMemoryAllocateInfo`. + + +\section opengl_interop_custom_alignment Custom alignment + +Buffers or images exported to a different API like OpenGL may require a different alignment, +higher than the one used by the library automatically, queried from functions like `vkGetBufferMemoryRequirements`. +To impose such alignment: + +You can create \ref custom_memory_pools for such allocations. +Set VmaPoolCreateInfo::minAllocationAlignment member to the minimum alignment required for each allocation +to be made out of this pool. +The alignment actually used will be the maximum of this member and the alignment returned for the specific buffer or image +from a function like `vkGetBufferMemoryRequirements`, which is called by VMA automatically. + +If you want to create a buffer with a specific minimum alignment out of default pools, +use special function vmaCreateBufferWithAlignment(), which takes additional parameter `minAlignment`. + +Note the problem of alignment affects only resources placed inside bigger `VkDeviceMemory` blocks and not dedicated +allocations, as these, by definition, always have alignment = 0 because the resource is bound to the beginning of its dedicated block. +You can ensure that an allocation is created as dedicated by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +Contrary to Direct3D 12, Vulkan doesn't have a concept of alignment of the entire memory block passed on its allocation. + +\section opengl_interop_extended_allocation_information Extended allocation information + +If you want to rely on VMA to allocate your buffers and images inside larger memory blocks, +but you need to know the size of the entire block and whether the allocation was made +with its own dedicated memory, use function vmaGetAllocationInfo2() to retrieve +extended allocation information in structure #VmaAllocationInfo2. + + + +\page usage_patterns Recommended usage patterns + +Vulkan gives great flexibility in memory allocation. +This chapter shows the most common patterns. + +See also slides from talk: +[Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New) + + +\section usage_patterns_gpu_only GPU-only resource + +When: +Any resources that you frequently write and read on GPU, +e.g. images used as color attachments (aka "render targets"), depth-stencil attachments, +images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)"). + +What to do: +Let the library select the optimal memory type, which will likely have `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +Also consider: +Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, +especially if they are large or if you plan to destroy and recreate them with different sizes +e.g. when display resolution changes. +Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later. +When VK_EXT_memory_priority extension is enabled, it is also worth setting high priority to such allocation +to decrease chances to be evicted to system memory by the operating system. + +\section usage_patterns_staging_copy_upload Staging copy for upload + +When: +A "staging" buffer than you want to map and fill from CPU code, then use as a source of transfer +to some GPU resource. + +What to do: +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +memcpy(allocInfo.pMappedData, myData, myDataSize); +\endcode + +Also consider: +You can map the allocation using vmaMapMemory() or you can create it as persistenly mapped +using #VMA_ALLOCATION_CREATE_MAPPED_BIT, as in the example above. + + +\section usage_patterns_readback Readback + +When: +Buffers for data written by or transferred from the GPU that you want to read back on the CPU, +e.g. results of some computations. + +What to do: +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +const float* downloadedData = (const float*)allocInfo.pMappedData; +\endcode + + +\section usage_patterns_advanced_data_uploading Advanced data uploading + +For resources that you frequently write on CPU via mapped pointer and +frequently read on GPU e.g. as a uniform buffer (also called "dynamic"), multiple options are possible: + +-# Easiest solution is to have one copy of the resource in `HOST_VISIBLE` memory, + even if it means system RAM (not `DEVICE_LOCAL`) on systems with a discrete graphics card, + and make the device reach out to that resource directly. + - Reads performed by the device will then go through PCI Express bus. + The performance of this access may be limited, but it may be fine depending on the size + of this resource (whether it is small enough to quickly end up in GPU cache) and the sparsity + of access. +-# On systems with unified memory (e.g. AMD APU or Intel integrated graphics, mobile chips), + a memory type may be available that is both `HOST_VISIBLE` (available for mapping) and `DEVICE_LOCAL` + (fast to access from the GPU). Then, it is likely the best choice for such type of resource. +-# Systems with a discrete graphics card and separate video memory may or may not expose + a memory type that is both `HOST_VISIBLE` and `DEVICE_LOCAL`, also known as Base Address Register (BAR). + If they do, it represents a piece of VRAM (or entire VRAM, if ReBAR is enabled in the motherboard BIOS) + that is available to CPU for mapping. + - Writes performed by the host to that memory go through PCI Express bus. + The performance of these writes may be limited, but it may be fine, especially on PCIe 4.0, + as long as rules of using uncached and write-combined memory are followed - only sequential writes and no reads. +-# Finally, you may need or prefer to create a separate copy of the resource in `DEVICE_LOCAL` memory, + a separate "staging" copy in `HOST_VISIBLE` memory and perform an explicit transfer command between them. + +Thankfully, VMA offers an aid to create and use such resources in the the way optimal +for the current Vulkan device. To help the library make the best choice, +use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT together with +#VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT. +It will then prefer a memory type that is both `DEVICE_LOCAL` and `HOST_VISIBLE` (integrated memory or BAR), +but if no such memory type is available or allocation from it fails +(PC graphics cards have only 256 MB of BAR by default, unless ReBAR is supported and enabled in BIOS), +it will fall back to `DEVICE_LOCAL` memory for fast GPU access. +It is then up to you to detect that the allocation ended up in a memory type that is not `HOST_VISIBLE`, +so you need to create another "staging" allocation and perform explicit transfers. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +VkResult result = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +// Check result... + +VkMemoryPropertyFlags memPropFlags; +vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags); + +if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) +{ + // Allocation ended up in a mappable memory and is already mapped - write to it directly. + + // [Executed in runtime]: + memcpy(allocInfo.pMappedData, myData, myDataSize); + result = vmaFlushAllocation(allocator, alloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = buf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); +} +else +{ + // Allocation ended up in a non-mappable memory - a transfer using a staging buffer is required. + VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + stagingBufCreateInfo.size = 65536; + stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + VmaAllocationCreateInfo stagingAllocCreateInfo = {}; + stagingAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + stagingAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + + VkBuffer stagingBuf; + VmaAllocation stagingAlloc; + VmaAllocationInfo stagingAllocInfo; + result = vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, + &stagingBuf, &stagingAlloc, &stagingAllocInfo); + // Check result... + + // [Executed in runtime]: + memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); + result = vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = stagingBuf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); + + VkBufferCopy bufCopy = { + 0, // srcOffset + 0, // dstOffset, + myDataSize, // size + }; + + vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy); + + VkBufferMemoryBarrier bufMemBarrier2 = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier2.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufMemBarrier2.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; // We created a uniform buffer + bufMemBarrier2.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.buffer = buf; + bufMemBarrier2.offset = 0; + bufMemBarrier2.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr); +} +\endcode + +\section usage_patterns_other_use_cases Other use cases + +Here are some other, less obvious use cases and their recommended settings: + +- An image that is used only as transfer source and destination, but it should stay on the device, + as it is used to temporarily store a copy of some texture, e.g. from the current to the next frame, + for temporal antialiasing or other temporal effects. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO +- An image that is used only as transfer source and destination, but it should be placed + in the system RAM despite it doesn't need to be mapped, because it serves as a "swap" copy to evict + least recently used textures from VRAM. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_HOST, + as VMA needs a hint here to differentiate from the previous case. +- A buffer that you want to map and write from the CPU, directly read from the GPU + (e.g. as a uniform or vertex buffer), but you have a clear preference to place it in device or + host memory due to its large size. + - Use `VkBufferCreateInfo::usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST + - Use VmaAllocationCreateInfo::flags = #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT + + +\page configuration Configuration + +Please check "CONFIGURATION SECTION" in the code to find macros that you can define +before each include of this file or change directly in this file to provide +your own implementation of basic facilities like assert, `min()` and `max()` functions, +mutex, atomic etc. + +For example, define `VMA_ASSERT(expr)` before including the library to provide +custom implementation of the assertion, compatible with your project. +By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration +and empty otherwise. + +Similarly, you can define `VMA_LEAK_LOG_FORMAT` macro to enable printing of leaked (unfreed) allocations, +including their names and other parameters. Example: + +\code +#define VMA_LEAK_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ + printf("\n"); \ + } while(false) +\endcode + +\section config_Vulkan_functions Pointers to Vulkan functions + +There are multiple ways to import pointers to Vulkan functions in the library. +In the simplest case you don't need to do anything. +If the compilation or linking of your program or the initialization of the #VmaAllocator +doesn't work for you, you can try to reconfigure it. + +First, the allocator tries to fetch pointers to Vulkan functions linked statically, +like this: + +\code +m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; +\endcode + +If you want to disable this feature, set configuration macro: `#define VMA_STATIC_VULKAN_FUNCTIONS 0`. + +Second, you can provide the pointers yourself by setting member VmaAllocatorCreateInfo::pVulkanFunctions. +You can fetch them e.g. using functions `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` or +by using a helper library like [volk](https://github.com/zeux/volk). + +Third, VMA tries to fetch remaining pointers that are still null by calling +`vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own. +You need to only fill in VmaVulkanFunctions::vkGetInstanceProcAddr and VmaVulkanFunctions::vkGetDeviceProcAddr. +Other pointers will be fetched automatically. +If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`. + +Finally, all the function pointers required by the library (considering selected +Vulkan version and enabled extensions) are checked with `VMA_ASSERT` if they are not null. + + +\section custom_memory_allocator Custom host memory allocator + +If you use custom allocator for CPU memory rather than default operator `new` +and `delete` from C++, you can make this library using your allocator as well +by filling optional member VmaAllocatorCreateInfo::pAllocationCallbacks. These +functions will be passed to Vulkan, as well as used by the library itself to +make any CPU-side allocations. + +\section allocation_callbacks Device memory allocation callbacks + +The library makes calls to `vkAllocateMemory()` and `vkFreeMemory()` internally. +You can setup callbacks to be informed about these calls, e.g. for the purpose +of gathering some statistics. To do it, fill optional member +VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. + +\section heap_memory_limit Device heap memory limit + +When device memory of certain heap runs out of free space, new allocations may +fail (returning error code) or they may succeed, silently pushing some existing_ +memory blocks from GPU VRAM to system RAM (which degrades performance). This +behavior is implementation-dependent - it depends on GPU vendor and graphics +driver. + +On AMD cards it can be controlled while creating Vulkan device object by using +VK_AMD_memory_overallocation_behavior extension, if available. + +Alternatively, if you want to test how your program behaves with limited amount of Vulkan device +memory available without switching your graphics card to one that really has +smaller VRAM, you can use a feature of this library intended for this purpose. +To do it, fill optional member VmaAllocatorCreateInfo::pHeapSizeLimit. + + + +\page vk_khr_dedicated_allocation VK_KHR_dedicated_allocation + +VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve +performance on some GPUs. It augments Vulkan API with possibility to query +driver whether it prefers particular buffer or image to have its own, dedicated +allocation (separate `VkDeviceMemory` block) for better efficiency - to be able +to do some internal optimizations. The extension is supported by this library. +It will be used automatically when enabled. + +It has been promoted to core Vulkan 1.1, so if you use eligible Vulkan version +and inform VMA about it by setting VmaAllocatorCreateInfo::vulkanApiVersion, +you are all set. + +Otherwise, if you want to use it as an extension: + +1 . When creating Vulkan device, check if following 2 device extensions are +supported (call `vkEnumerateDeviceExtensionProperties()`). +If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`). + +- VK_KHR_get_memory_requirements2 +- VK_KHR_dedicated_allocation + +If you enabled these extensions: + +2 . Use #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag when creating +your #VmaAllocator to inform the library that you enabled required extensions +and you want the library to use them. + +\code +allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; + +vmaCreateAllocator(&allocatorInfo, &allocator); +\endcode + +That is all. The extension will be automatically used whenever you create a +buffer using vmaCreateBuffer() or image using vmaCreateImage(). + +When using the extension together with Vulkan Validation Layer, you will receive +warnings like this: + +_vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer._ + +It is OK, you should just ignore it. It happens because you use function +`vkGetBufferMemoryRequirements2KHR()` instead of standard +`vkGetBufferMemoryRequirements()`, while the validation layer seems to be +unaware of it. + +To learn more about this extension, see: + +- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap50.html#VK_KHR_dedicated_allocation) +- [VK_KHR_dedicated_allocation unofficial manual](http://asawicki.info/articles/VK_KHR_dedicated_allocation.php5) + + + +\page vk_ext_memory_priority VK_EXT_memory_priority + +VK_EXT_memory_priority is a device extension that allows to pass additional "priority" +value to Vulkan memory allocations that the implementation may use prefer certain +buffers and images that are critical for performance to stay in device-local memory +in cases when the memory is over-subscribed, while some others may be moved to the system memory. + +VMA offers convenient usage of this extension. +If you enable it, you can pass "priority" parameter when creating allocations or custom pools +and the library automatically passes the value to Vulkan using this extension. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_ext_memory_priority_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_EXT_memory_priority". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_EXT_memory_priority" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to +`VkPhysicalDeviceFeatures2::pNext` chain and set its member `memoryPriority` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_ext_memory_priority_usage Usage + +When using this extension, you should initialize following member: + +- VmaAllocationCreateInfo::priority when creating a dedicated allocation with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +- VmaPoolCreateInfo::priority when creating a custom pool. + +It should be a floating-point value between `0.0f` and `1.0f`, where recommended default is `0.5f`. +Memory allocated with higher value can be treated by the Vulkan implementation as higher priority +and so it can have lower chances of being pushed out to system memory, experiencing degraded performance. + +It might be a good idea to create performance-critical resources like color-attachment or depth-stencil images +as dedicated and set high priority to them. For example: + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +`priority` member is ignored in the following situations: + +- Allocations created in custom pools: They inherit the priority, along with all other allocation parameters + from the parameters passed in #VmaPoolCreateInfo when the pool was created. +- Allocations created in default pools: They inherit the priority from the parameters + VMA used when creating default pools, which means `priority == 0.5f`. + + +\page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory + +VK_AMD_device_coherent_memory is a device extension that enables access to +additional memory types with `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and +`VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flag. It is useful mostly for +allocation of buffers intended for writing "breadcrumb markers" in between passes +or draw calls, which in turn are useful for debugging GPU crash/hang/TDR cases. + +When the extension is available but has not been enabled, Vulkan physical device +still exposes those memory types, but their usage is forbidden. VMA automatically +takes care of that - it returns `VK_ERROR_FEATURE_NOT_PRESENT` when an attempt +to allocate memory of such type is made. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_amd_device_coherent_memory_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_AMD_device_coherent_memory". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_AMD_device_coherent_memory" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `deviceCoherentMemory` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_amd_device_coherent_memory_usage Usage + +After following steps described above, you can create VMA allocations and custom pools +out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligible +devices. There are multiple ways to do it, for example: + +- You can request or prefer to allocate out of such memory types by adding + `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags + or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with + other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage. +- If you manually found memory type index to use for this purpose, force allocation + from this specific index by setting VmaAllocationCreateInfo::memoryTypeBits `= 1u << index`. + +\section vk_amd_device_coherent_memory_more_information More information + +To learn more about this extension, see [VK_AMD_device_coherent_memory in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_AMD_device_coherent_memory.html) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + + +\page vk_khr_external_memory_win32 VK_KHR_external_memory_win32 + +On Windows, the VK_KHR_external_memory_win32 device extension allows exporting a Win32 `HANDLE` +of a `VkDeviceMemory` block, to be able to reference the memory on other Vulkan logical devices or instances, +in multiple processes, and/or in multiple APIs. +VMA offers support for it. + +\section vk_khr_external_memory_win32_initialization Initialization + +1) Make sure the extension is defined in the code by including following header before including VMA: + +\code +#include +\endcode + +2) Check if "VK_KHR_external_memory_win32" is available among device extensions. +Enable it when creating the `VkDevice` object. + +3) Enable the usage of this extension in VMA by setting flag #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT +when calling vmaCreateAllocator(). + +4) Make sure that VMA has access to the `vkGetMemoryWin32HandleKHR` function by either enabling `VMA_DYNAMIC_VULKAN_FUNCTIONS` macro +or setting VmaVulkanFunctions::vkGetMemoryWin32HandleKHR explicitly. +For more information, see \ref quick_start_initialization_importing_vulkan_functions. + +\section vk_khr_external_memory_win32_preparations Preparations + +You can find example usage among tests, in file "Tests.cpp", function `TestWin32Handles()`. + +To use the extenion, buffers need to be created with `VkExternalMemoryBufferCreateInfoKHR` attached to their `pNext` chain, +and memory allocations need to be made with `VkExportMemoryAllocateInfoKHR` attached to their `pNext` chain. +To make use of them, you need to use \ref custom_memory_pools. Example: + +\code +// Define an example buffer and allocation parameters. +VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = { + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +exampleBufCreateInfo.size = 0x10000; // Doesn't matter here. +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +exampleBufCreateInfo.pNext = &externalMemBufCreateInfo; + +VmaAllocationCreateInfo exampleAllocCreateInfo = {}; +exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +// Find memory type index to use for the custom pool. +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_Allocator, + &exampleBufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex); +// Check res... + +// Create a custom pool. +constexpr static VkExportMemoryAllocateInfoKHR exportMemAllocInfo = { + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +poolCreateInfo.pMemoryAllocateNext = (void*)&exportMemAllocInfo; + +VmaPool pool; +res = vmaCreatePool(g_Allocator, &poolCreateInfo, &pool); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, don't forget to destroy it! +vmaDestroyPool(g_Allocator, pool); +\endcode + +Note that the structure passed as VmaPoolCreateInfo::pMemoryAllocateNext must remain alive and unchanged +for the whole lifetime of the custom pool, because it will be used when the pool allocates a new device memory block. +No copy is made internally. This is why variable `exportMemAllocInfo` is defined as `static`. + +\section vk_khr_external_memory_win32_memory_allocation Memory allocation + +Finally, you can create a buffer with an allocation out of the custom pool. +The buffer should use same flags as the sample buffer used to find the memory type. +It should also specify `VkExternalMemoryBufferCreateInfoKHR` in its `pNext` chain. + +\code +VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = { + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = // Your desired buffer size. +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +bufCreateInfo.pNext = &externalMemBufCreateInfo; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.pool = pool; // It is enough to set this one member. + +VkBuffer buf; +VmaAllocation alloc; +res = vmaCreateBuffer(g_Allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, don't forget to destroy it! +vmaDestroyBuffer(g_Allocator, buf, alloc); +\endcode + +If you need each allocation to have its own device memory block and start at offset 0, you can still do +by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag. It works also with custom pools. + +\section vk_khr_external_memory_win32_exporting_win32_handle Exporting Win32 handle + +After the allocation is created, you can acquire a Win32 `HANDLE` to the `VkDeviceMemory` block it belongs to. +VMA function vmaGetMemoryWin32Handle() is a replacement of the Vulkan function `vkGetMemoryWin32HandleKHR`. + +\code +HANDLE handle; +res = vmaGetMemoryWin32Handle(g_Allocator, alloc, nullptr, &handle); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, you must close the handle. +CloseHandle(handle); +\endcode + +Documentation of the VK_KHR_external_memory_win32 extension states that: + +> If handleType is defined as an NT handle, vkGetMemoryWin32HandleKHR must be called no more than once for each valid unique combination of memory and handleType. + +This is ensured automatically inside VMA. +The library fetches the handle on first use, remembers it internally, and closes it when the memory block or dedicated allocation is destroyed. +Every time you call vmaGetMemoryWin32Handle(), VMA calls `DuplicateHandle` and returns a new handle that you need to close. + +For further information, please check documentation of the vmaGetMemoryWin32Handle() function. + + +\page enabling_buffer_device_address Enabling buffer device address + +Device extension VK_KHR_buffer_device_address +allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code. +It has been promoted to core Vulkan 1.2. + +If you want to use this feature in connection with VMA, follow these steps: + +\section enabling_buffer_device_address_initialization Initialization + +1) (For Vulkan version < 1.2) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains +"VK_KHR_buffer_device_address". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress` is true. + +3) (For Vulkan version < 1.2) While creating device with `vkCreateDevice`, enable this extension - add +"VK_KHR_buffer_device_address" to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `bufferDeviceAddress` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this feature - add #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +to VmaAllocatorCreateInfo::flags. + +\section enabling_buffer_device_address_usage Usage + +After following steps described above, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*` using VMA. +The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT*` to +allocated memory blocks wherever it might be needed. + +Please note that the library supports only `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*`. +The second part of this functionality related to "capture and replay" is not supported, +as it is intended for usage in debugging tools like RenderDoc, not in everyday Vulkan usage. + +\section enabling_buffer_device_address_more_information More information + +To learn more about this extension, see [VK_KHR_buffer_device_address in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap46.html#VK_KHR_buffer_device_address) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + +\page general_considerations General considerations + +\section general_considerations_thread_safety Thread safety + +- The library has no global state, so separate #VmaAllocator objects can be used + independently. + There should be no need to create multiple such objects though - one per `VkDevice` is enough. +- By default, all calls to functions that take #VmaAllocator as first parameter + are safe to call from multiple threads simultaneously because they are + synchronized internally when needed. + This includes allocation and deallocation from default memory pool, as well as custom #VmaPool. +- When the allocator is created with #VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT + flag, calls to functions that take such #VmaAllocator object must be + synchronized externally. +- Access to a #VmaAllocation object must be externally synchronized. For example, + you must not call vmaGetAllocationInfo() and vmaMapMemory() from different + threads at the same time if you pass the same #VmaAllocation object to these + functions. +- #VmaVirtualBlock is not safe to be used from multiple threads simultaneously. + +\section general_considerations_versioning_and_compatibility Versioning and compatibility + +The library uses [**Semantic Versioning**](https://semver.org/), +which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where: + +- Incremented Patch version means a release is backward- and forward-compatible, + introducing only some internal improvements, bug fixes, optimizations etc. + or changes that are out of scope of the official API described in this documentation. +- Incremented Minor version means a release is backward-compatible, + so existing code that uses the library should continue to work, while some new + symbols could have been added: new structures, functions, new values in existing + enums and bit flags, new structure members, but not new function parameters. +- Incrementing Major version means a release could break some backward compatibility. + +All changes between official releases are documented in file "CHANGELOG.md". + +\warning Backward compatibility is considered on the level of C++ source code, not binary linkage. +Adding new members to existing structures is treated as backward compatible if initializing +the new members to binary zero results in the old behavior. +You should always fully initialize all library structures to zeros and not rely on their +exact binary size. + +\section general_considerations_validation_layer_warnings Validation layer warnings + +When using this library, you can meet following types of warnings issued by +Vulkan validation layer. They don't necessarily indicate a bug, so you may need +to just ignore them. + +- *vkBindBufferMemory(): Binding memory to buffer 0xeb8e4 but vkGetBufferMemoryRequirements() has not been called on that buffer.* + - It happens when VK_KHR_dedicated_allocation extension is enabled. + `vkGetBufferMemoryRequirements2KHR` function is used instead, while validation layer seems to be unaware of it. +- *Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.* + - It happens when you map a buffer or image, because the library maps entire + `VkDeviceMemory` block, where different types of images and buffers may end + up together, especially on GPUs with unified memory like Intel. +- *Non-linear image 0xebc91 is aliased with linear buffer 0xeb8e4 which may indicate a bug.* + - It may happen when you use [defragmentation](@ref defragmentation). + +\section general_considerations_allocation_algorithm Allocation algorithm + +The library uses following algorithm for allocation, in order: + +-# Try to find free range of memory in existing blocks. +-# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size. +-# If failed, try to create such block with size / 2, size / 4, size / 8. +-# If failed, try to allocate separate `VkDeviceMemory` for this allocation, + just like when you use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +-# If failed, choose other memory type that meets the requirements specified in + VmaAllocationCreateInfo and go to point 1. +-# If failed, return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + +\section general_considerations_features_not_supported Features not supported + +Features deliberately excluded from the scope of this library: + +-# **Data transfer.** Uploading (streaming) and downloading data of buffers and images + between CPU and GPU memory and related synchronization is responsibility of the user. + Defining some "texture" object that would automatically stream its data from a + staging copy in CPU memory to GPU memory would rather be a feature of another, + higher-level library implemented on top of VMA. + VMA doesn't record any commands to a `VkCommandBuffer`. It just allocates memory. +-# **Recreation of buffers and images.** Although the library has functions for + buffer and image creation: vmaCreateBuffer(), vmaCreateImage(), you need to + recreate these objects yourself after defragmentation. That is because the big + structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in + #VmaAllocation object. +-# **Handling CPU memory allocation failures.** When dynamically creating small C++ + objects in CPU memory (not Vulkan memory), allocation failures are not checked + and handled gracefully, because that would complicate code significantly and + is usually not needed in desktop PC applications anyway. + Success of an allocation is just checked with an assert. +-# **Code free of any compiler warnings.** Maintaining the library to compile and + work correctly on so many different platforms is hard enough. Being free of + any warnings, on any version of any compiler, is simply not feasible. + There are many preprocessor macros that make some variables unused, function parameters unreferenced, + or conditional expressions constant in some configurations. + The code of this library should not be bigger or more complicated just to silence these warnings. + It is recommended to disable such warnings instead. +-# This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but + are not going to be included into this repository. +*/ diff --git a/lib/imgui/CMakeLists.txt b/lib/imgui/CMakeLists.txt index 69a52850f28..10f85786a9e 100644 --- a/lib/imgui/CMakeLists.txt +++ b/lib/imgui/CMakeLists.txt @@ -64,7 +64,7 @@ if(FSO_BUILD_WITH_OPENGL_ES) endif() if (FSO_BUILD_WITH_VULKAN) - find_package(Vulkan REQUIRED) - target_compile_definitions(imgui INTERFACE VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1 VK_NO_PROTOTYPES) - target_link_libraries(imgui PRIVATE Vulkan::Vulkan) + target_compile_definitions(imgui PUBLIC VK_NO_PROTOTYPES) + target_compile_definitions(imgui INTERFACE VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) + target_link_libraries(imgui PUBLIC Vulkan::Headers) endif() diff --git a/lib/prebuilt.cmake b/lib/prebuilt.cmake index c97d3ef95be..9438461605a 100644 --- a/lib/prebuilt.cmake +++ b/lib/prebuilt.cmake @@ -1,5 +1,5 @@ -set(PREBUILT_VERSION_NAME "21d0b52") +set(PREBUILT_VERSION_NAME "4fbde4a") set(FSO_PREBUILT_OVERRIDE "" CACHE PATH "Path to the prebuilt binaries, if empty the binaries will be downloaded.") set(PREBUILT_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}/prebuilt") diff --git a/lib/vulkan.cmake b/lib/vulkan.cmake index a6be2ab90d1..2e705d27ea2 100644 --- a/lib/vulkan.cmake +++ b/lib/vulkan.cmake @@ -1,4 +1,8 @@ +if(NOT FSO_BUILD_WITH_VULKAN) + return() +endif() + find_program(GLSLC_PATH glslc) # Add an option for this so that this can be disabled locally when not needed @@ -82,3 +86,93 @@ if (SHADERS_ENABLE_COMPILATION AND GLSLC_PATH) add_executable(shadertool IMPORTED GLOBAL) set_target_properties(shadertool PROPERTIES IMPORTED_LOCATION "${SHADERTOOL_PATH}") endif () + +# +# Install any required libraries +# + +option(VULKAN_USE_PRECOMPILED "Force use of precompiled versions of Vulkan-Loader and Shaderc." OFF) + +get_prebuilt_path(PREBUILT_PATH) + +set(USING_PREBUILT_VULKAN ${VULKAN_USE_PRECOMPILED}) + +if(PLATFORM_WINDOWS OR PLATFORM_MAC) + set(USING_PREBUILT_VULKAN TRUE) +elseif(PLATFORM_LINUX AND FSO_BUILD_APPIMAGE) + set(USING_PREBUILT_VULKAN TRUE) +endif() + +# Shaderc - runtime compilation of glsl to SPIRV +# +# This is dynamically loaded, so we don't have to link against it. That also +# means that we don't have to jump through hoops to make sure it's available at +# build time. We just have prebuilt libs for packaging purposes and system libs +# will automatically be used otherwise. + +if(USING_PREBUILT_VULKAN) + message(STATUS "Using pre-built Shaderc library.") + + if(PLATFORM_WINDOWS) + file(GLOB SHADERC_LIB "${PREBUILT_PATH}/shaderc/bin/*.dll") + else() + file(GLOB SHADERC_LIB "${PREBUILT_PATH}/shaderc/lib/lib*") + endif() + + add_target_copy_files("${SHADERC_LIB}") +endif() + +# Vulkan loader +# +# This is dynamically loaded by SDL. It's presence doesn't necessarily mean that +# Vulkan is supported, but having it here does allow things to fail more gracefully +# than if the loader is not present at all. Prebuilt lib is for packaging purposes +# and system libs are assumed to be present otherwise. + +if(USING_PREBUILT_VULKAN) + # We use MoltenVK instead of Vulkan-Loader on Mac, but if Vulkan-Loader is + # installed system wide then SDL will prefer using it. + if(PLATFORM_MAC) + message(STATUS "Using pre-built MoltenVK framework.") + + unset(MOLTENVK_LIBRARY CACHE) + find_library(MOLTENVK_LIBRARY MoltenVK PATHS "${PREBUILT_PATH}" NO_DEFAULT_PATH) + + add_target_copy_files("${MOLTENVK_LIBRARY}") + else() + message(STATUS "Using pre-built Vulkan-Loader library.") + + if(PLATFORM_WINDOWS) + file(GLOB VULKAN_LOADER_LIB "${PREBUILT_PATH}/vulkan-loader/bin/*.dll") + else() + file(GLOB VULKAN_LOADER_LIB "${PREBUILT_PATH}/vulkan-loader/lib/lib*") + endif() + + add_target_copy_files("${VULKAN_LOADER_LIB}") + endif() +endif() + +# Vulkan/Shaderc headers +# +# Use prebuilt if we should, or just as a fallback if the SDK isn't installed. +# The find_package() min version should be what is used in the prebuilt repo. +# Note that we only rely on the headers and do NOT link against the Vulkan libs! + +if(NOT USING_PREBUILT_VULKAN) + find_package(Vulkan 1.4.341) +endif() + +# prebuilt/fallback +if(NOT TARGET Vulkan::Headers) + add_library(VulkanHeaders INTERFACE) + + target_include_directories(VulkanHeaders SYSTEM INTERFACE + "${PREBUILT_PATH}/vulkan-headers/include" + "${PREBUILT_PATH}/shaderc/include" + ) + + add_library(Vulkan::Headers ALIAS VulkanHeaders) +endif() + +# Just use our VMA +add_subdirectory(VulkanMemoryAllocator)