llama: consistent ctx <-> buf order for KV cache (#16746)

This commit is contained in:
Johannes Gäßler
2025-10-28 11:23:54 +01:00
committed by GitHub
parent 280d97be96
commit 7a0e900e36
5 changed files with 41 additions and 33 deletions

View File

@@ -109,8 +109,8 @@ private:
const uint32_t n_seq_max = 1;
std::vector<ggml_context_ptr> ctxs;
std::vector<ggml_backend_buffer_ptr> bufs;
// ggml contexts for the KV cache along with the allocated backend buffers:
std::vector<std::pair<ggml_context_ptr, ggml_backend_buffer_ptr>> ctxs_bufs;
size_t total_size() const;