llama : remove KV cache defragmentation logic (#15473)

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-08-22 12:22:13 +03:00
committed by GitHub
parent ad5c975c2d
commit 9ebebef62f
16 changed files with 32 additions and 440 deletions

View File

@@ -77,7 +77,7 @@ struct llama_memory_i {
// simulate full cache, used for allocating worst-case compute buffers
virtual llama_memory_context_ptr init_full() = 0;
// prepare for any pending memory updates, such as shifts, defrags, etc.
// prepare for any pending memory updates, such as shifts, copies, etc.
// status == LLAMA_MEMORY_STATUS_NO_UPDATE if there is nothing to update
virtual llama_memory_context_ptr init_update(llama_context * lctx, bool optimize) = 0;