From 65156105069fa86a4a81b6cb0e8cb583f6420677 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= <johannesg@5d6.de>
Date: Fri, 7 Nov 2025 20:53:14 +0100
Subject: [PATCH] CUDA: fix should_use_mmvf for ne11 == 1 (#17085)

* CUDA: fix should_use_mmvf for ne11 == 1

* Apply suggestion from @am17an

Co-authored-by: Aman Gupta <amangupta052@gmail.com>

---------

Co-authored-by: Aman Gupta <amangupta052@gmail.com>
---
 ggml/src/ggml-cuda/mmf.cu  | 8 +++++++-
 ggml/src/ggml-cuda/mmvf.cu | 9 ++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/ggml/src/ggml-cuda/mmf.cu b/ggml/src/ggml-cuda/mmf.cu
index 69a60aceb8..153dd5a97d 100644
--- a/ggml/src/ggml-cuda/mmf.cu
+++ b/ggml/src/ggml-cuda/mmf.cu
@@ -129,7 +129,13 @@ bool ggml_cuda_should_use_mmf(enum ggml_type type, int cc, int warp_size, const
     if (src0_ne[0] % (warp_size * (4/ts)) != 0) {
         return false;
     }
-    for (size_t i = 0; i < GGML_MAX_DIMS; ++i) {
+
+    if (src0_nb[0] != ts) {
+        return false;
+    }
+
+    // Pointers not aligned to the size of half2/nv_bfloat162/float2 would result in a crash:
+    for (size_t i = 1; i < GGML_MAX_DIMS; ++i) {
         if (src0_nb[i] % (2*ts) != 0) {
             return false;
         }
diff --git a/ggml/src/ggml-cuda/mmvf.cu b/ggml/src/ggml-cuda/mmvf.cu
index 526d90d7ae..6238ce7ebd 100644
--- a/ggml/src/ggml-cuda/mmvf.cu
+++ b/ggml/src/ggml-cuda/mmvf.cu
@@ -720,12 +720,19 @@ bool ggml_cuda_should_use_mmvf(enum ggml_type type, int cc, const int64_t * src0
     if (src0_ne[0] % 2 != 0) {
         return false;
     }
+
     const size_t ts = ggml_type_size(type);
-    for (size_t i = 0; i < GGML_MAX_DIMS; ++i) {
+    if (src0_nb[0] != ts) {
+        return false;
+    }
+
+    // Pointers not aligned to the size of half2/nv_bfloat162/float2 would result in a crash:
+    for (size_t i = 1; i < GGML_MAX_DIMS; ++i) {
         if (src0_nb[i] % (2*ts) != 0) {
             return false;
         }
     }
+
     switch (type) {
         case GGML_TYPE_F32:
             if (GGML_CUDA_CC_IS_NVIDIA(cc)) {