llama : remove implicit recurrent state rollbacks

2025-11-05 09:36:52 +00:00 · 2024-11-24 20:35:30 -05:00
parent 124c222f76
commit 8006f3b3c8
25 changed files with 411 additions and 1119 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1029,7 +1029,7 @@ struct server_context {
        SRV_DBG("%s", "clearing KV cache\n");

        // clear the entire KV cache
-        llama_past_clear(ctx);
+        llama_kv_cache_clear(ctx);
        clean_kv_cache = false;
    }

@@ -1760,7 +1760,7 @@ struct server_context {

                    // Erase token cache
                    const size_t n_erased = slot->cache_tokens.size();
-                    llama_past_seq_rm(ctx, slot->id + 1, -1, -1);
+                    llama_kv_cache_seq_rm(ctx, slot->id + 1, -1, -1);
                    slot->cache_tokens.clear();

                    server_task_result result;