memory : remove KV cache size padding

2025-11-08 10:07:01 +00:00 · 2025-10-28 09:26:16 +02:00
parent 3479efd112
commit e28cec364e
4 changed files with 5 additions and 28 deletions
--- a/src/llama-kv-cache.cpp
+++ b/src/llama-kv-cache.cpp
@@ -2010,8 +2010,3 @@ void llama_kv_cache_context::set_input_kq_mask(ggml_tensor * dst, const llama_ub
 void llama_kv_cache_context::set_input_pos_bucket(ggml_tensor * dst, const llama_ubatch * ubatch) const {
    kv->set_input_pos_bucket(dst, ubatch);
 }
-
-uint32_t llama_kv_cache::get_padding(const llama_cparams & cparams) {
-    // the FA kernels require padding to avoid extra runtime boundary checks
-    return cparams.flash_attn ? 256u : 32u;
-}