kv-cache : fix out-of-bounds view during reserve graph (#13547)

* kv-cache : fix reserve graph out-of-bounds access ggml-ci * cont : add comment * cont : fix comments [no ci] * cont : more correct comment [no ci]
2025-11-07 09:57:00 +00:00 · 2025-05-14 23:15:15 +03:00
parent 5ab5d5fb25
commit e3a9421b78
2 changed files with 12 additions and 10 deletions
--- a/src/llama-kv-cache.h
+++ b/src/llama-kv-cache.h
@@ -171,11 +171,8 @@ public:
    void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1) const override;
    void state_read (llama_io_read_i  & io, llama_seq_id seq_id = -1) override;

-    // Note: The value of head isn't only used to optimize searching
-    // for a free KV slot. llama_decode_impl also uses it, so it
-    // cannot be freely changed after a slot has been allocated.
-    uint32_t head = 0;
-    uint32_t size = 0;
+    uint32_t head = 0; // the location where the batch will be placed in the cache (see find_slot())
+    uint32_t size = 0; // total number of cells, shared across all sequences
    uint32_t used = 0; // used cells (i.e. at least one seq_id)

    // computed before each graph build
@@ -343,11 +340,8 @@ public:
    void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1) const override;
    void state_read (llama_io_read_i  & io, llama_seq_id seq_id = -1) override;

-    // Note: The value of head isn't only used to optimize searching
-    // for a free KV slot. llama_decode_impl also uses it, so it
-    // cannot be freely changed after a slot has been allocated.
-    uint32_t head = 0;
-    uint32_t size = 0;
+    uint32_t head = 0; // the location where the batch will be placed in the cache (see find_slot())
+    uint32_t size = 0; // total number of cells, shared across all sequences
    uint32_t used = 0; // used cells (i.e. at least one seq_id)

    // computed before each graph build