mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-05 09:36:52 +00:00
CUDA/HIP: optimize mmv paths taken for HIP devices (#14324)
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
@@ -263,7 +263,11 @@ static bool fp16_mma_hardware_available(const int cc) {
|
||||
}
|
||||
|
||||
static bool bf16_mma_hardware_available(const int cc) {
|
||||
return GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE;
|
||||
return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE) || GGML_CUDA_CC_IS_CDNA(cc) || cc >= GGML_CUDA_CC_RDNA3;
|
||||
}
|
||||
|
||||
static bool fp32_mma_hardware_available(const int cc) {
|
||||
return GGML_CUDA_CC_IS_CDNA(cc);
|
||||
}
|
||||
|
||||
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.
|
||||
|
||||
Reference in New Issue
Block a user