llama : remove implicit recurrent state rollbacks

2025-11-07 09:57:00 +00:00 · 2024-11-24 20:35:30 -05:00
parent 124c222f76
commit 8006f3b3c8
25 changed files with 411 additions and 1119 deletions
--- a/examples/retrieval/retrieval.cpp
+++ b/examples/retrieval/retrieval.cpp
@@ -83,7 +83,7 @@ static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & toke

 static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd) {
    // clear previous kv_cache values (irrelevant for embeddings)
-    llama_past_clear(ctx);
+    llama_kv_cache_clear(ctx);

    // run model
    LOG_INF("%s: n_tokens = %d, n_seq = %d\n", __func__, batch.n_tokens, n_seq);