OpenCL: add initial FA support (#14987)

* add F16/F16 fa support

* fix kernel init

* use mad instead of fma

* use inline function

* mark FA with sinks as unsupported for now

* add pragma unroll to loops
This commit is contained in:
rmatif
2025-08-16 10:05:55 +02:00
committed by GitHub
parent 5e6229a840
commit 912ff8c119
5 changed files with 1283 additions and 0 deletions

View File

@@ -112,6 +112,9 @@ set(GGML_OPENCL_KERNELS
mul_mat_f16_f32
conv2d
conv2d_f16_f32
flash_attn_f32_f16
flash_attn_f16
flash_attn_f32
)
foreach (K ${GGML_OPENCL_KERNELS})