ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (#4787)

2025-10-29 08:41:22 +00:00 · 2024-01-07 01:52:42 -05:00
parent 67984921a7
commit 63ee677efd
1 changed files with 1 additions and 1 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
 static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
 #if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
    c = __builtin_amdgcn_sdot4(a, b, c, false);
-#elif defined(__gfx1100__)
+#elif defined(RDNA3)
    c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
 #elif defined(__gfx1010__) || defined(__gfx900__)
    int tmp1;