CUDA: fix MMQ stream-k fixup ne1 indices (#17089)

This commit is contained in:
Johannes Gäßler
2025-11-08 08:26:18 +01:00
committed by GitHub
parent 647b960bd8
commit e14e842e87

View File

@@ -3494,7 +3494,7 @@ static __global__ void mul_mat_q_stream_k_fixup(
const int col_diff = col_high - col_low;
for (int j = threadIdx.y*warp_size + threadIdx.x; j < mmq_x; j += nwarps*warp_size) {
ids_dst_shared[j] = ids_dst[col_low + j];
ids_dst_shared[j] = ids_dst[col_low + jt*mmq_x + j];
}
__syncthreads();