From 0f82d0a404d37ca086e392e9f57c07776d170419 Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Sun, 14 Dec 2025 19:50:15 -0800 Subject: [PATCH 01/11] modify warptile tuning for xe3 --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 34ec09d4034..87170109db8 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2805,11 +2805,12 @@ static void ggml_vk_load_shaders(vk_device& device) { const uint32_t tk_s = device->coopmat_support ? device->coopmat_k : 1; l_warptile = { 128, 128, 128, 16, subgroup_size_8 * 2, 64, 2, tm_l, tn_l, tk_l, subgroup_size_8 }; - m_warptile = { 128, 64, 64, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + m_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; s_warptile = { subgroup_size_16, 32, 32, 16, 32, 32, 2, tm_s, tn_s, tk_s, subgroup_size_8 }; l_warptile_mmq = { 128, 128, 128, 32, subgroup_size_8 * 2, 64, 2, tm_l, tn_l, tk_l, subgroup_size_8 }; - m_warptile_mmq = { 128, 64, 64, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + m_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + // (BLOCK SIZE, BM, BN, BK, WM, WN, WMITER, TM, TN, TK, WARP) s_warptile_mmq = { subgroup_size_32, 32, 32, 32, 32, 32, 2, tm_s, tn_s, tk_s, subgroup_size_8 }; // Integer MMQ has a smaller shared memory profile, but heavier register use @@ -2845,7 +2846,7 @@ static void ggml_vk_load_shaders(vk_device& device) { } l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 }; - m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 }; + m_mmq_wg_denoms = m_wg_denoms = { 128, 128, 1 }; s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 }; l_align = 128; m_align = 64; From c9087117859bb0cbe1f16af50662785447dfa0bf Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Wed, 17 Dec 2025 13:50:08 -0700 Subject: [PATCH 02/11] intel vendor check w/ coopmat support --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 87170109db8..bc5b4030e15 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2805,12 +2805,11 @@ static void ggml_vk_load_shaders(vk_device& device) { const uint32_t tk_s = device->coopmat_support ? device->coopmat_k : 1; l_warptile = { 128, 128, 128, 16, subgroup_size_8 * 2, 64, 2, tm_l, tn_l, tk_l, subgroup_size_8 }; - m_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + m_warptile = { 128, 64, 64, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; s_warptile = { subgroup_size_16, 32, 32, 16, 32, 32, 2, tm_s, tn_s, tk_s, subgroup_size_8 }; l_warptile_mmq = { 128, 128, 128, 32, subgroup_size_8 * 2, 64, 2, tm_l, tn_l, tk_l, subgroup_size_8 }; - m_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; - // (BLOCK SIZE, BM, BN, BK, WM, WN, WMITER, TM, TN, TK, WARP) + m_warptile_mmq = { 128, 64, 64, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; s_warptile_mmq = { subgroup_size_32, 32, 32, 32, 32, 32, 2, tm_s, tn_s, tk_s, subgroup_size_8 }; // Integer MMQ has a smaller shared memory profile, but heavier register use @@ -2845,13 +2844,22 @@ static void ggml_vk_load_shaders(vk_device& device) { m_warptile_mmqid = m_warptile_mmqid_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; } - l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 }; - m_mmq_wg_denoms = m_wg_denoms = { 128, 128, 1 }; + l_mmq_wg_denoms = l_wg_denoms = { 128, 128, 1 }; + m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 }; s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 }; l_align = 128; m_align = 64; s_align = 32; + if ((device->vendor_id == VK_VENDOR_ID_INTEL) && (device->driver_id == vk::DriverId::eIntelProprietaryWindows)) { + if (device->coopmat_support && device->architecture == INTEL_XE2) { + // Xe2/Xe3 with coopmat enabled - warptile performance tuning + m_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + m_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + m_mmq_wg_denoms = m_wg_denoms = { 128, 128, 1 }; + } + } + for (uint32_t i = 0; i < GGML_TYPE_COUNT; ++i) { ggml_type t = (ggml_type)i; // Disable medium and large matrix multiplication if not enough shared memory is available From 3441283df8cb92f29e4b16fd37cd13843de8a516 Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Wed, 17 Dec 2025 13:51:45 -0700 Subject: [PATCH 03/11] fix back formatting --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index bc5b4030e15..11647d4b802 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2805,11 +2805,11 @@ static void ggml_vk_load_shaders(vk_device& device) { const uint32_t tk_s = device->coopmat_support ? device->coopmat_k : 1; l_warptile = { 128, 128, 128, 16, subgroup_size_8 * 2, 64, 2, tm_l, tn_l, tk_l, subgroup_size_8 }; - m_warptile = { 128, 64, 64, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + m_warptile = { 128, 64, 64, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; s_warptile = { subgroup_size_16, 32, 32, 16, 32, 32, 2, tm_s, tn_s, tk_s, subgroup_size_8 }; l_warptile_mmq = { 128, 128, 128, 32, subgroup_size_8 * 2, 64, 2, tm_l, tn_l, tk_l, subgroup_size_8 }; - m_warptile_mmq = { 128, 64, 64, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + m_warptile_mmq = { 128, 64, 64, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; s_warptile_mmq = { subgroup_size_32, 32, 32, 32, 32, 32, 2, tm_s, tn_s, tk_s, subgroup_size_8 }; // Integer MMQ has a smaller shared memory profile, but heavier register use From 6b7f1e867ecb2563fbddd605ac9afccdbae4aeb2 Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Wed, 17 Dec 2025 13:52:39 -0700 Subject: [PATCH 04/11] fix formatting change 2 --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 11647d4b802..0610a1dc846 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2844,7 +2844,7 @@ static void ggml_vk_load_shaders(vk_device& device) { m_warptile_mmqid = m_warptile_mmqid_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; } - l_mmq_wg_denoms = l_wg_denoms = { 128, 128, 1 }; + l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 }; m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 }; s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 }; l_align = 128; From 0cfa616d61688670a0ada52fbf516d32b8d8dbe2 Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Thu, 18 Dec 2025 15:45:06 -0700 Subject: [PATCH 05/11] move intel check to chip specific tuning part --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 0610a1dc846..c70273f0c76 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2838,20 +2838,19 @@ static void ggml_vk_load_shaders(vk_device& device) { m_warptile_mmqid_int_k = { 128, 64, 64, 32, mul_mat_subgroup_size_16, 32, 1, 2, 2, 1, mul_mat_subgroup_size_16 }; s_warptile_mmqid_int_k = { mul_mat_subgroup_size_32, 32, 32, 32, 32, 32, 1, 2, 1, 1, mul_mat_subgroup_size_16 }; + l_mmq_wg_denoms = l_wg_denoms = { 128, 128, 1 }; + m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 }; + s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 }; + l_align = 128; + m_align = 64; + s_align = 32; + // chip specific tuning if ((device->architecture == AMD_GCN) && (device->driver_id != vk::DriverId::eAmdProprietary)) { m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; m_warptile_mmqid = m_warptile_mmqid_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; } - - l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 }; - m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 }; - s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 }; - l_align = 128; - m_align = 64; - s_align = 32; - - if ((device->vendor_id == VK_VENDOR_ID_INTEL) && (device->driver_id == vk::DriverId::eIntelProprietaryWindows)) { + else if ((device->vendor_id == VK_VENDOR_ID_INTEL) && (device->driver_id == vk::DriverId::eIntelProprietaryWindows)) { if (device->coopmat_support && device->architecture == INTEL_XE2) { // Xe2/Xe3 with coopmat enabled - warptile performance tuning m_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; From c40396a40dd0699b0fd4acb2aa6134b37f0eb946 Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Fri, 19 Dec 2025 23:40:45 -0700 Subject: [PATCH 06/11] Change to support both windows and linux --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index c70273f0c76..7188c4036b6 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2850,7 +2850,7 @@ static void ggml_vk_load_shaders(vk_device& device) { m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; m_warptile_mmqid = m_warptile_mmqid_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; } - else if ((device->vendor_id == VK_VENDOR_ID_INTEL) && (device->driver_id == vk::DriverId::eIntelProprietaryWindows)) { + else if ((device->vendor_id == VK_VENDOR_ID_INTEL)) { if (device->coopmat_support && device->architecture == INTEL_XE2) { // Xe2/Xe3 with coopmat enabled - warptile performance tuning m_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; From dc147928eafd600e796a575e930510a1600d095f Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Mon, 29 Dec 2025 09:47:31 -0700 Subject: [PATCH 07/11] modify m_warptile to l_warptile for intel --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 7188c4036b6..f7d1481048e 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2850,12 +2850,11 @@ static void ggml_vk_load_shaders(vk_device& device) { m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; m_warptile_mmqid = m_warptile_mmqid_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; } - else if ((device->vendor_id == VK_VENDOR_ID_INTEL)) { + else if (device->vendor_id == VK_VENDOR_ID_INTEL) { if (device->coopmat_support && device->architecture == INTEL_XE2) { // Xe2/Xe3 with coopmat enabled - warptile performance tuning - m_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; - m_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; - m_mmq_wg_denoms = m_wg_denoms = { 128, 128, 1 }; + l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + l_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; } } @@ -4876,10 +4875,15 @@ static vk_device ggml_vk_get_device(size_t idx) { #ifndef GGML_VULKAN_RUN_TESTS case VK_VENDOR_ID_AMD: case VK_VENDOR_ID_INTEL: - device->mul_mat_l[i] = false; + if (!device->coopmat_support || device->architecture != INTEL_XE2) { + device->mul_mat_l[i] = false; + device->mul_mat_id_l[i] = false; + } else { + device->mul_mat_l[i] = true; // if coopmat & XE2+, allow large matmul warptile config for Intel + device->mul_mat_id_l[i] = true; + } device->mul_mat_m[i] = true; device->mul_mat_s[i] = true; - device->mul_mat_id_l[i] = false; device->mul_mat_id_m[i] = true; device->mul_mat_id_s[i] = true; break; From 328116e62f92fe889aec546a76a116712c666fa5 Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Tue, 30 Dec 2025 13:07:54 -0700 Subject: [PATCH 08/11] modify warptile tuning for bf16 matmuls to fix regression (m_warptile to l_warptile) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index f7d1481048e..5f085db4c30 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -3528,6 +3528,13 @@ static void ggml_vk_load_shaders(vk_device& device) { m_wg_denoms = { 64, 64, 1 }; s_wg_denoms = { 32, 32, 1 }; + if (device->vendor_id == VK_VENDOR_ID_INTEL) { + if (device->architecture == INTEL_XE2) { + // Xe2/Xe3 - bf16 warptile performance tuning + l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, 4, 4, 1, subgroup_size_8 }; + } + } + CREATE_MM(GGML_TYPE_BF16, pipeline_matmul_bf16, matmul_bf16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3, , 0); CREATE_MM(GGML_TYPE_BF16, pipeline_matmul_id_bf16, matmul_id_bf16, , wg_denoms, warptile, vk_mat_mat_id_push_constants, 4, _id, 0); } From a1ed874cd6c9baae425472e30d8e7b9935cbc5dc Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Tue, 6 Jan 2026 09:15:15 -0700 Subject: [PATCH 09/11] Code style changes --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 37 +++++++++++++++------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 5f085db4c30..e1757636fab 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2838,6 +2838,16 @@ static void ggml_vk_load_shaders(vk_device& device) { m_warptile_mmqid_int_k = { 128, 64, 64, 32, mul_mat_subgroup_size_16, 32, 1, 2, 2, 1, mul_mat_subgroup_size_16 }; s_warptile_mmqid_int_k = { mul_mat_subgroup_size_32, 32, 32, 32, 32, 32, 1, 2, 1, 1, mul_mat_subgroup_size_16 }; + // chip specific tuning + if ((device->architecture == AMD_GCN) && (device->driver_id != vk::DriverId::eAmdProprietary)) { + m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; + m_warptile_mmqid = m_warptile_mmqid_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; + } else if (device->vendor_id == VK_VENDOR_ID_INTEL && device->coopmat_support && device->architecture == INTEL_XE2) { + // Xe2/Xe3 with coopmat enabled - warptile performance tuning + l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + l_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; + } + l_mmq_wg_denoms = l_wg_denoms = { 128, 128, 1 }; m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 }; s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 }; @@ -2845,18 +2855,6 @@ static void ggml_vk_load_shaders(vk_device& device) { m_align = 64; s_align = 32; - // chip specific tuning - if ((device->architecture == AMD_GCN) && (device->driver_id != vk::DriverId::eAmdProprietary)) { - m_warptile_mmq = m_warptile_mmq_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; - m_warptile_mmqid = m_warptile_mmqid_int = { 256, 64, 64, 32, 16, 16, 2, 2, 2, 1, 16 }; - } - else if (device->vendor_id == VK_VENDOR_ID_INTEL) { - if (device->coopmat_support && device->architecture == INTEL_XE2) { - // Xe2/Xe3 with coopmat enabled - warptile performance tuning - l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; - l_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; - } - } for (uint32_t i = 0; i < GGML_TYPE_COUNT; ++i) { ggml_type t = (ggml_type)i; @@ -3528,11 +3526,9 @@ static void ggml_vk_load_shaders(vk_device& device) { m_wg_denoms = { 64, 64, 1 }; s_wg_denoms = { 32, 32, 1 }; - if (device->vendor_id == VK_VENDOR_ID_INTEL) { - if (device->architecture == INTEL_XE2) { - // Xe2/Xe3 - bf16 warptile performance tuning - l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, 4, 4, 1, subgroup_size_8 }; - } + if (device->vendor_id == VK_VENDOR_ID_INTEL && device->architecture == INTEL_XE2) { + // Xe2/Xe3 - bf16 warptile performance tuning + l_warptile = { 512, 128, 128, 16, subgroup_size_8, 32, 2, 4, 4, 1, subgroup_size_8 }; } CREATE_MM(GGML_TYPE_BF16, pipeline_matmul_bf16, matmul_bf16, , wg_denoms, warptile, vk_mat_mat_push_constants, 3, , 0); @@ -4881,6 +4877,13 @@ static vk_device ggml_vk_get_device(size_t idx) { switch (device->vendor_id) { #ifndef GGML_VULKAN_RUN_TESTS case VK_VENDOR_ID_AMD: + device->mul_mat_l[i] = false; + device->mul_mat_m[i] = true; + device->mul_mat_s[i] = true; + device->mul_mat_id_l[i] = false; + device->mul_mat_id_m[i] = true; + device->mul_mat_id_s[i] = true; + break; case VK_VENDOR_ID_INTEL: if (!device->coopmat_support || device->architecture != INTEL_XE2) { device->mul_mat_l[i] = false; From 60891f66f533d49f7425d6b9d24e3ef44eee4a58 Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Tue, 6 Jan 2026 09:23:55 -0700 Subject: [PATCH 10/11] Code style changes (2) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index e1757636fab..64c1378ba7a 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2851,10 +2851,9 @@ static void ggml_vk_load_shaders(vk_device& device) { l_mmq_wg_denoms = l_wg_denoms = { 128, 128, 1 }; m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 }; s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 }; - l_align = 128; - m_align = 64; - s_align = 32; - + l_align = 128; + m_align = 64; + s_align = 32; for (uint32_t i = 0; i < GGML_TYPE_COUNT; ++i) { ggml_type t = (ggml_type)i; From a6a47ae3cb40eca738becef95287b32d36ac951c Mon Sep 17 00:00:00 2001 From: Viraj Wadhwa Date: Tue, 6 Jan 2026 09:25:47 -0700 Subject: [PATCH 11/11] Code style changes (3) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 64c1378ba7a..733d9d68edd 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -2848,12 +2848,12 @@ static void ggml_vk_load_shaders(vk_device& device) { l_warptile_mmq = { 512, 128, 128, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 }; } - l_mmq_wg_denoms = l_wg_denoms = { 128, 128, 1 }; - m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 }; - s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 }; + l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 }; + m_mmq_wg_denoms = m_wg_denoms = { 64, 64, 1 }; + s_mmq_wg_denoms = s_wg_denoms = { 32, 32, 1 }; l_align = 128; - m_align = 64; - s_align = 32; + m_align = 64; + s_align = 32; for (uint32_t i = 0; i < GGML_TYPE_COUNT; ++i) { ggml_type t = (ggml_type)i;