mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-31 08:51:55 +00:00
vulkan: Support FA with any multiple of 8 head sizes (#15537)
The scalar FA shader already handled multiples of 8. The coopmat1 FA shader assumed 16x16x16 and the shared memory allocations need the HSK dimensions padded to a multiple of 16. NVIDIA's coopmat2 implementation requires multiples of 16 for N and K, and needs the matrix dimensions padded and loads clamped. Store the FA pipelines in a map, indexed by the pipeline state.
This commit is contained in:
@@ -9,6 +9,10 @@ layout (constant_id = 4) const uint32_t HSV = 32;
|
||||
layout (constant_id = 5) const uint32_t Clamp = 0;
|
||||
layout (constant_id = 6) const uint32_t D_split = 16;
|
||||
|
||||
// Round up head sizes to a multiple of 16, for coopmat1/coopmat2 paths
|
||||
const uint32_t HSK_pad = (HSK + 15) & ~15;
|
||||
const uint32_t HSV_pad = (HSV + 15) & ~15;
|
||||
|
||||
layout (push_constant) uniform parameter {
|
||||
uint32_t N;
|
||||
uint32_t KV;
|
||||
|
||||
Reference in New Issue
Block a user