context : fix n_ctx_per_seq computation

2025-11-08 10:07:01 +00:00 · 2025-10-23 14:51:26 +03:00
parent a42fb77147
commit 492f628c58
2 changed files with 7 additions and 9 deletions
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -6712,7 +6712,7 @@ float llama_model::get_rope_freq_scale(const llama_cparams & cparams, int il) co
 }

 ggml_tensor * llama_model::get_rope_factors(const llama_cparams & cparams, int il) const {
-    const uint32_t n_ctx_per_seq = cparams.n_ctx / cparams.n_seq_max;
+    const uint32_t n_ctx_per_seq = cparams.kv_unified ? cparams.n_ctx : cparams.n_ctx / cparams.n_seq_max;

    // choose long/short freq factors based on the context size
    if (layers[il].rope_freqs != nullptr) {