ggml : fix FA mask dim 2 and 3 (#14505)

* ggml : fix FA mask dim 2 and 3 ggml-ci * backends : unsupport batched FA in CUDA and Vulkan ggml-ci * vulkan : disable FA for mask->ne[2] != 1
2025-11-06 09:46:50 +00:00 · 2025-07-03 10:46:57 +03:00
parent d4cdd9c1c3
commit 9067487c44
9 changed files with 26 additions and 15 deletions
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@@ -7799,7 +7799,7 @@ static void ggml_compute_forward_flash_attn_ext_f16(
            memset(VKQ32, 0, DV*sizeof(float));
        }

-        const ggml_fp16_t * mp = mask ? (ggml_fp16_t *)((char *) mask->data + iq1*mask->nb[1] + (iq3%mask->ne[2])*mask->nb[2]) : NULL;
+        const ggml_fp16_t * mp = mask ? (ggml_fp16_t *)((char *) mask->data + iq1*mask->nb[1] + (iq2%mask->ne[2])*mask->nb[2] + (iq3%mask->ne[3])*mask->nb[3]) : NULL;

        // k indices
        const int ik3 = iq3 / rk3;