CUDA: refactor and deduplicate vector FA kernels (#16208)

* CUDA: refactor and deduplicate vector FA kernels
2025-11-15 11:17:31 +00:00 · 2025-09-27 18:45:07 +02:00
parent 0499b29c6f
commit 75a3a6c2cd
129 changed files with 1396 additions and 1989 deletions
--- a/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu
+++ b/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu
@@ -0,0 +1,7 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../fattn-vec.cuh"
+
+DECL_FATTN_VEC_CASE( 64, GGML_TYPE_Q4_1, GGML_TYPE_F16);
+DECL_FATTN_VEC_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);
+DECL_FATTN_VEC_CASE(256, GGML_TYPE_Q4_1, GGML_TYPE_F16);