vulkan: fix noncontig check for mat_mul_id splitting (#14683)

* vulkan: fix noncontig check for mat_mul_id splitting Remove supports_op check for > 4096 (splitting fixes this) * vulkan: fix batched matmul dequant for Q*_K
2025-10-29 08:41:22 +00:00 · 2025-07-15 14:51:09 -05:00
parent 10a0351a97
commit ba1ceb3456
6 changed files with 6 additions and 10 deletions
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -4922,7 +4922,7 @@ static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) {
    return
        tensor->nb[0] == ggml_type_size(tensor->type) &&
        tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&
-        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
+        (tensor->ne[3] == 1 || tensor->nb[3] == tensor->nb[2]*tensor->ne[2]);
 }

 static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const ggml_tensor * src, const ggml_tensor * dst, ggml_type to) {
@@ -10356,10 +10356,6 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
                        // If there's not enough shared memory for row_ids and the result tile, fallback to CPU
                        return false;
                    }
-                    // Check against size of shared memory variable
-                    if (op->src[2]->ne[0] > 4096) {
-                        return false;
-                    }
                }
                switch (src0_type) {
                    case GGML_TYPE_F32: