HIP: implement FlashAttention via rocWMMA for CDNA and RDNA3+ (#12032)

Adds GGML_HIP_ROCWMMA_FATTN and rocwmma header check
Adds rocWMMA support to fattn-wmma-f16

---

Signed-off-by: Carl Klemm <carl@uvos.xyz>
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
Co-authored-by: Ben Jackson <ben@ben.com>
This commit is contained in:
David Huang
2025-03-04 05:10:54 +08:00
committed by GitHub
parent dfd6b2c0be
commit becade5de7
6 changed files with 145 additions and 95 deletions

View File

@@ -39,6 +39,12 @@ endif()
find_package(hip REQUIRED)
find_package(hipblas REQUIRED)
find_package(rocblas REQUIRED)
if (GGML_HIP_ROCWMMA_FATTN)
CHECK_INCLUDE_FILE_CXX("rocwmma/rocwmma.hpp" FOUND_ROCWMMA)
if (NOT ${FOUND_ROCWMMA})
message(FATAL_ERROR "rocwmma has not been found")
endif()
endif()
if (${hip_VERSION} VERSION_LESS 5.5)
message(FATAL_ERROR "At least ROCM/HIP V5.5 is required")
@@ -107,6 +113,10 @@ if (GGML_HIP_NO_VMM)
add_compile_definitions(GGML_HIP_NO_VMM)
endif()
if (GGML_HIP_ROCWMMA_FATTN)
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN)
endif()
if (NOT GGML_CUDA_FA)
add_compile_definitions(GGML_CUDA_NO_FA)
endif()