kv-cache : fix SWA checks + disable cacheless iSWA (#15811)

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-09-05 10:39:22 +03:00
committed by GitHub
parent 5d6688de08
commit c610b6c11b
9 changed files with 29 additions and 11 deletions

View File

@@ -89,6 +89,7 @@ public:
uint32_t n_seq_max,
uint32_t n_pad,
uint32_t n_swa,
llama_swa_type swa_type,
const layer_filter_cb & filter,
const layer_reuse_cb & reuse);
@@ -211,6 +212,9 @@ private:
// env: LLAMA_KV_CACHE_DEBUG
int debug = 0;
// this is the SWA type of the cache - not to be confused with the model SWA type
const llama_swa_type swa_type = LLAMA_SWA_TYPE_NONE;
std::vector<ggml_context_ptr> ctxs;
std::vector<ggml_backend_buffer_ptr> bufs;