llama : remove KV cache defragmentation logic (#15473)

ggml-ci
2025-10-27 08:21:30 +00:00 · 2025-08-22 12:22:13 +03:00
parent ad5c975c2d
commit 9ebebef62f
16 changed files with 32 additions and 440 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -312,7 +312,7 @@ extern "C" {
        float    yarn_beta_fast;   // YaRN low correction dim
        float    yarn_beta_slow;   // YaRN high correction dim
        uint32_t yarn_orig_ctx;    // YaRN original context size
-        float    defrag_thold;     // defragment the KV cache if holes/size > thold, <= 0 disabled (default)
+        float    defrag_thold;     // [DEPRECATED] defragment the KV cache if holes/size > thold, <= 0 disabled (default)

        ggml_backend_sched_eval_callback cb_eval;
        void * cb_eval_user_data;