From e14e842e87103ec2a004770cec95a3f94f861bda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sat, 8 Nov 2025 08:26:18 +0100 Subject: [PATCH] CUDA: fix MMQ stream-k fixup ne1 indices (#17089) --- ggml/src/ggml-cuda/mmq.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index c9a07e82fe..2e133b6bda 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -3494,7 +3494,7 @@ static __global__ void mul_mat_q_stream_k_fixup( const int col_diff = col_high - col_low; for (int j = threadIdx.y*warp_size + threadIdx.x; j < mmq_x; j += nwarps*warp_size) { - ids_dst_shared[j] = ids_dst[col_low + j]; + ids_dst_shared[j] = ids_dst[col_low + jt*mmq_x + j]; } __syncthreads();