kv-cache : fix SWA checks + disable cacheless iSWA (#15811)

ggml-ci
2025-11-01 09:01:57 +00:00 · 2025-09-05 10:39:22 +03:00
parent 5d6688de08
commit c610b6c11b
9 changed files with 29 additions and 11 deletions
--- a/src/llama-kv-cache.h
+++ b/src/llama-kv-cache.h
@@ -89,6 +89,7 @@ public:
                     uint32_t   n_seq_max,
                     uint32_t   n_pad,
                     uint32_t   n_swa,
+               llama_swa_type   swa_type,
        const layer_filter_cb & filter,
        const  layer_reuse_cb & reuse);

@@ -211,6 +212,9 @@ private:
    // env: LLAMA_KV_CACHE_DEBUG
    int debug = 0;

+    // this is the SWA type of the cache - not to be confused with the model SWA type
+    const llama_swa_type swa_type = LLAMA_SWA_TYPE_NONE;
+
    std::vector<ggml_context_ptr>        ctxs;
    std::vector<ggml_backend_buffer_ptr> bufs;