CUDA: refactor and deduplicate vector FA kernels (#16208)

* CUDA: refactor and deduplicate vector FA kernels
This commit is contained in:
Johannes Gäßler
2025-09-27 18:45:07 +02:00
committed by GitHub
parent 0499b29c6f
commit 75a3a6c2cd
129 changed files with 1396 additions and 1989 deletions

View File

@@ -0,0 +1,7 @@
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec.cuh"
DECL_FATTN_VEC_CASE( 64, GGML_TYPE_Q4_1, GGML_TYPE_F16);
DECL_FATTN_VEC_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);
DECL_FATTN_VEC_CASE(256, GGML_TYPE_Q4_1, GGML_TYPE_F16);