mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
HIP: Disable ROCWMMA fattn on CDNA when compiled against ROCWMMA 2.0.0 (#16221)
* HIP: Disable ROCWMMA fatt on CDNA when compiled against ROCWMMA 2.0.0 rocwmma 2.0.0 includes a bug in the code fakeing fp16 accumulation on CDNA * CUDA: Fix volta condition in ggml_cuda_should_use_wmma_fattn
This commit is contained in:
@@ -209,7 +209,6 @@ option(GGML_HIP "ggml: use HIP"
|
||||
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
||||
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
||||
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
|
||||
option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
|
||||
option(GGML_HIP_MMQ_MFMA "ggml: enable MFMA MMA for CDNA in MMQ" ON)
|
||||
option(GGML_HIP_EXPORT_METRICS "ggml: enable kernel perf metrics output" OFF)
|
||||
option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
|
||||
|
||||
Reference in New Issue
Block a user