llama : add llama_kv_cache_shift_seq + no more context swaps

2025-11-09 10:17:06 +00:00 · 2023-09-18 18:00:25 +03:00
parent 86c90e34f5
commit 0cbf3bfef8
4 changed files with 66 additions and 29 deletions
--- a/llama.h
+++ b/llama.h
@@ -324,15 +324,15 @@ extern "C" {
    // Remove all tokens data of cells in [c0, c1)
    LLAMA_API void llama_kv_cache_rm_tokens(struct llama_context * ctx, int32_t c0, int32_t c1);

-    // Removes all tokens that belong to the specified sequence
-    LLAMA_API void llama_kv_cache_rm_seq(struct llama_context * ctx, llama_seq_id seq_id);
+    // Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
+    LLAMA_API void llama_kv_cache_rm_seq(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1);

    // Removes all tokens that do not belong to the specified sequence
    LLAMA_API void llama_kv_cache_keep_seq(struct llama_context * ctx, llama_seq_id seq_id);

    // Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
    // If the KV cache is RoPEd, the KV data is updated accordingly
-    LLAMA_API void llama_kv_cache_shift(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos delta);
+    LLAMA_API void llama_kv_cache_shift_seq(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos delta);

    //
    // State / sessions