Merge branch 'master' into gg/llama-kv-cache

ggml-ci
2025-11-07 09:57:00 +00:00 · 2025-02-10 14:45:54 +02:00
parent b15fede7a9 d7b31a9d84
commit 972f91c7d7
61 changed files with 8505 additions and 3580 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -531,10 +531,10 @@ struct llama_batch_manager : public llama_batch_manager_i {
        auto & kv_self = lctx.kv_self;

        // decide if we need to defrag the kv cache
-        if (cparams.causal_attn && cparams.defrag_thold >= 0.0f) {
+        if (cparams.causal_attn && cparams.defrag_thold > 0.0f) {
            // - do not defrag small contexts (i.e. < 2048 tokens)
            // - count the padding towards the number of used tokens
-            const float fragmentation = kv_self.n >= 2048 ? 1.0f - float(kv_self.used + lctx.get_ctx_padding(cparams))/float(kv_self.n) : 0.0f;
+            const float fragmentation = kv_self.n >= 2048 ? std::max(0.0f, 1.0f - float(kv_self.used + lctx.get_ctx_padding(cparams))/float(kv_self.n)) : 0.0f;

            // queue defragmentation for next llama_kv_cache_update
            if (fragmentation > cparams.defrag_thold) {