memory : remove KV cache size padding

This commit is contained in:
Georgi Gerganov
2025-10-28 09:26:16 +02:00
parent 3479efd112
commit e28cec364e
4 changed files with 5 additions and 28 deletions

View File

@@ -2010,8 +2010,3 @@ void llama_kv_cache_context::set_input_kq_mask(ggml_tensor * dst, const llama_ub
void llama_kv_cache_context::set_input_pos_bucket(ggml_tensor * dst, const llama_ubatch * ubatch) const {
kv->set_input_pos_bucket(dst, ubatch);
}
uint32_t llama_kv_cache::get_padding(const llama_cparams & cparams) {
// the FA kernels require padding to avoid extra runtime boundary checks
return cparams.flash_attn ? 256u : 32u;
}