vulkan: Implement topk_moe fused shader, ported from CUDA (#16641)

This is similar to the CUDA shader from #16130, but doesn't use shared memory
and handles different subgroup sizes.
This commit is contained in:
Jeff Bolz
2025-10-18 05:22:57 -05:00
committed by GitHub
parent 38355c6c8e
commit e56abd2098
4 changed files with 412 additions and 8 deletions

View File

@@ -920,6 +920,8 @@ void process_shaders() {
string_to_spv("ssm_conv_f32", "ssm_conv.comp", {{"A_TYPE", "float"}});
string_to_spv("topk_moe_f32", "topk_moe.comp", {});
for (auto &c : compiles) {
c.wait();
}