From de9297fd5e631272f72b6e31265f9d4c9fa19933 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Thu, 5 Jun 2025 15:54:50 -0600 Subject: [PATCH] fix: Add missing padding to n_ctx for hybrid cache construction Branch: GraniteFour Signed-off-by: Gabe Goodhart --- src/llama-model.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 439d542b05..6e9dd53223 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -13768,13 +13768,17 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, std::max((uint32_t) 1, cparams.n_seq_max), cparams.n_seq_max); } else if (llm_arch_is_hybrid_recurrent(arch)) { + const auto padding = llama_kv_cache_unified::get_padding(cparams); + + cparams.n_ctx = GGML_PAD(cparams.n_ctx, padding); + res = new llama_kv_cache_hybrid_recurrent( /* model */ *this, /* attn_type_k */ params.type_k, /* attn_type_v */ params.type_v, /* attn_v_trans */ !cparams.flash_attn, /* attn_kv_size */ cparams.n_ctx, - /* attn_n_pad */ llama_kv_cache_unified::get_padding(cparams), + /* attn_n_pad */ padding, /* attn_n_swa */ hparams.n_swa, /* attn_swa_type */ hparams.swa_type, /* recurrent_type_k */ GGML_TYPE_F32,