rocBLAS: Avoid fp32->fp16->fp32 conversion on cdna (#11356)

2025-11-17 11:37:10 +00:00 · 2025-01-24 17:50:49 +01:00
parent 9755129c27
commit 9fbadaef4f
2 changed files with 40 additions and 25 deletions
--- a/ggml/src/ggml-cuda/mmvq.cu
+++ b/ggml/src/ggml-cuda/mmvq.cu
@@ -142,7 +142,7 @@ static void mul_mat_vec_q_cuda(
    int64_t nwarps = 1;
    int64_t rows_per_cuda_block = 1;

-    if (ggml_cuda_info().devices[id].cc < GGML_CUDA_CC_CDNA || ggml_cuda_info().devices[id].cc == GGML_CUDA_CC_RDNA1) { // NVIDIA and AMD older than RDNA2 but not CDNA
+    if (ggml_cuda_info().devices[id].cc < GGML_CUDA_CC_RDNA2) { // NVIDIA and AMD older than RDNA2
        switch(ncols_y) {
            case 1:
                nwarps = 4;
@@ -166,6 +166,7 @@ static void mul_mat_vec_q_cuda(
                break;
        }
    }
+
    const int64_t nblocks = (nrows_x + rows_per_cuda_block - 1) / rows_per_cuda_block;
    const dim3 block_nums(nblocks, 1, 1);
    const dim3 block_dims(WARP_SIZE, nwarps, 1);