kv-cache : support layer reuse (#15504)

* kv-cache : support layer reuse ggml-ci * cont : update comments [no ci]
2025-11-07 09:57:00 +00:00 · 2025-08-24 13:07:07 +03:00
parent c9a24fb932
commit b730706a49
12 changed files with 203 additions and 136 deletions
--- a/src/llama-memory-recurrent.cpp
+++ b/src/llama-memory-recurrent.cpp
@@ -16,13 +16,13 @@
 //

 llama_memory_recurrent::llama_memory_recurrent(
-        const llama_model &  model,
-          layer_filter_cb && filter,
-                ggml_type    type_r,
-                ggml_type    type_s,
-                     bool    offload,
-                 uint32_t    mem_size,
-                 uint32_t    n_seq_max) : hparams(model.hparams), n_seq_max(n_seq_max) {
+        const llama_model & model,
+                ggml_type   type_r,
+                ggml_type   type_s,
+                     bool   offload,
+                 uint32_t   mem_size,
+                 uint32_t   n_seq_max,
+    const layer_filter_cb & filter) : hparams(model.hparams), n_seq_max(n_seq_max) {
    const int32_t n_layer = hparams.n_layer;

    head = 0;