fix: Add missing padding to n_ctx for hybrid cache construction

Branch: GraniteFour

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
Gabe Goodhart
2025-06-05 15:54:50 -06:00
parent 911e694476
commit de9297fd5e

View File

@@ -13768,13 +13768,17 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
std::max((uint32_t) 1, cparams.n_seq_max), std::max((uint32_t) 1, cparams.n_seq_max),
cparams.n_seq_max); cparams.n_seq_max);
} else if (llm_arch_is_hybrid_recurrent(arch)) { } else if (llm_arch_is_hybrid_recurrent(arch)) {
const auto padding = llama_kv_cache_unified::get_padding(cparams);
cparams.n_ctx = GGML_PAD(cparams.n_ctx, padding);
res = new llama_kv_cache_hybrid_recurrent( res = new llama_kv_cache_hybrid_recurrent(
/* model */ *this, /* model */ *this,
/* attn_type_k */ params.type_k, /* attn_type_k */ params.type_k,
/* attn_type_v */ params.type_v, /* attn_type_v */ params.type_v,
/* attn_v_trans */ !cparams.flash_attn, /* attn_v_trans */ !cparams.flash_attn,
/* attn_kv_size */ cparams.n_ctx, /* attn_kv_size */ cparams.n_ctx,
/* attn_n_pad */ llama_kv_cache_unified::get_padding(cparams), /* attn_n_pad */ padding,
/* attn_n_swa */ hparams.n_swa, /* attn_n_swa */ hparams.n_swa,
/* attn_swa_type */ hparams.swa_type, /* attn_swa_type */ hparams.swa_type,
/* recurrent_type_k */ GGML_TYPE_F32, /* recurrent_type_k */ GGML_TYPE_F32,