mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-19 11:57:07 +00:00
vulkan: Implement topk_moe fused shader, ported from CUDA (#16641)
This is similar to the CUDA shader from #16130, but doesn't use shared memory and handles different subgroup sizes.
This commit is contained in:
@@ -920,6 +920,8 @@ void process_shaders() {
|
||||
|
||||
string_to_spv("ssm_conv_f32", "ssm_conv.comp", {{"A_TYPE", "float"}});
|
||||
|
||||
string_to_spv("topk_moe_f32", "topk_moe.comp", {});
|
||||
|
||||
for (auto &c : compiles) {
|
||||
c.wait();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user