kv-cache : support layer reuse (#15504)

* kv-cache : support layer reuse

ggml-ci

* cont : update comments [no ci]
This commit is contained in:
Georgi Gerganov
2025-08-24 13:07:07 +03:00
committed by GitHub
parent c9a24fb932
commit b730706a49
12 changed files with 203 additions and 136 deletions

View File

@@ -16,13 +16,13 @@
//
llama_memory_recurrent::llama_memory_recurrent(
const llama_model & model,
layer_filter_cb && filter,
ggml_type type_r,
ggml_type type_s,
bool offload,
uint32_t mem_size,
uint32_t n_seq_max) : hparams(model.hparams), n_seq_max(n_seq_max) {
const llama_model & model,
ggml_type type_r,
ggml_type type_s,
bool offload,
uint32_t mem_size,
uint32_t n_seq_max,
const layer_filter_cb & filter) : hparams(model.hparams), n_seq_max(n_seq_max) {
const int32_t n_layer = hparams.n_layer;
head = 0;