vulkan: Implement topk_moe fused shader, ported from CUDA (#16641)

This is similar to the CUDA shader from #16130, but doesn't use shared memory and handles different subgroup sizes.
2025-11-19 11:57:07 +00:00 · 2025-10-18 05:22:57 -05:00
parent 38355c6c8e
commit e56abd2098
4 changed files with 412 additions and 8 deletions
--- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
@@ -920,6 +920,8 @@ void process_shaders() {

    string_to_spv("ssm_conv_f32", "ssm_conv.comp", {{"A_TYPE", "float"}});

+    string_to_spv("topk_moe_f32", "topk_moe.comp", {});
+
    for (auto &c : compiles) {
        c.wait();
    }