llama: consistent ctx <-> buf order for KV cache (#16746)

2025-11-07 09:57:00 +00:00 · 2025-10-28 11:23:54 +01:00
parent 280d97be96
commit 7a0e900e36
5 changed files with 41 additions and 33 deletions
--- a/src/llama-memory-recurrent.h
+++ b/src/llama-memory-recurrent.h
@@ -109,8 +109,8 @@ private:

    const uint32_t n_seq_max = 1;

-    std::vector<ggml_context_ptr>        ctxs;
-    std::vector<ggml_backend_buffer_ptr> bufs;
+    // ggml contexts for the KV cache along with the allocated backend buffers:
+    std::vector<std::pair<ggml_context_ptr, ggml_backend_buffer_ptr>> ctxs_bufs;

    size_t total_size() const;