metal : add support for non-padded FA KV (#16148)

* metal : pad K, V and Mask when needed * cont : simplify * cuda : add TODO about KV padding requirement * metal : add comments * metal : remove mask padding requirement
2025-10-27 08:21:30 +00:00 · 2025-10-07 08:23:30 +03:00
parent 1d6092fc72
commit 0a319bb75e
9 changed files with 460 additions and 72 deletions
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -6773,7 +6773,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
                                    if (hsk > 64 && nr3 > 1) continue; // skip broadcast for large head sizes
                                    for (int nr2 : { 1, 4, 16 }) {
                                        if (nr2 == 16 && hsk != 128) continue;
-                                        for (int kv : { 512, 1024, }) {
+                                        //for (int kv : { 1, 17, 31, 33, 61, 113, 65, 127, 129, 130, 255, 260, 371, 380, 407, 512, 1024, }) {
+                                        for (int kv : { 113, 512, 1024, }) {
                                            if (nr2 != 1 && kv != 512) continue;
                                            for (int nb : { 1, 3, 32, 35, }) {
                                                for (ggml_prec prec : {GGML_PREC_F32, GGML_PREC_DEFAULT}) {