vulkan: use scalar FA rather than coopmat2 when N==1 (#13554)

2025-10-30 08:42:00 +00:00 · 2025-05-17 15:35:47 +09:00
parent 3e0be1cace
commit 4f41ee11d6
1 changed files with 7 additions and 0 deletions
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -5872,10 +5872,17 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
    vk_pipeline *pipelines;
    bool small_rows = N <= get_fa_num_small_rows(path);
    // coopmat1 does not actually support "small rows" (it needs 16 rows).
    // So use scalar instead.
    if (small_rows && path == FA_COOPMAT1) {
        path = FA_SCALAR;
    }
    // scalar is faster than coopmat2 when N==1
    if (N == 1 && path == FA_COOPMAT2) {
        path = FA_SCALAR;
    }
    bool f32acc = path == FA_SCALAR || dst->op_params[3] == GGML_PREC_F32;
    switch (path) {