kv-cache : drop the "unified" prefix (#15467)

* kv-cache : drop the "unified" prefix

ggml-ci

* cont : fix comment [no ci]
This commit is contained in:
Georgi Gerganov
2025-08-21 17:00:33 +03:00
committed by GitHub
parent ad294df03f
commit 715a6db02c
15 changed files with 346 additions and 360 deletions

View File

@@ -30,7 +30,7 @@ llama_memory_hybrid::llama_memory_hybrid(
layer_filter_cb && filter_attn,
layer_filter_cb && filter_recr) :
hparams(model.hparams),
mem_attn(new llama_kv_cache_unified(
mem_attn(new llama_kv_cache(
model,
filter_attn == nullptr ?
[&](int32_t il) { return !hparams.is_recurrent(il); }
@@ -179,7 +179,7 @@ void llama_memory_hybrid::state_read(llama_io_read_i & io, llama_seq_id seq_id,
mem_recr->state_read(io, seq_id);
}
llama_kv_cache_unified * llama_memory_hybrid::get_mem_attn() const {
llama_kv_cache * llama_memory_hybrid::get_mem_attn() const {
return mem_attn.get();
}
@@ -210,7 +210,7 @@ llama_memory_hybrid_context::llama_memory_hybrid_context(
std::vector<llama_ubatch> ubatches) :
ubatches(std::move(ubatches)),
// note: here we copy the ubatches. not sure if this is ideal
ctx_attn(new llama_kv_cache_unified_context(mem->get_mem_attn(), std::move(sinfos_attn), this->ubatches)),
ctx_attn(new llama_kv_cache_context(mem->get_mem_attn(), std::move(sinfos_attn), this->ubatches)),
ctx_recr(new llama_memory_recurrent_context(mem->get_mem_recr(), this->ubatches)),
status(llama_memory_status_combine(ctx_attn->get_status(), ctx_recr->get_status())) {
}
@@ -248,8 +248,8 @@ const llama_ubatch & llama_memory_hybrid_context::get_ubatch() const {
return ubatches[i_next];
}
const llama_kv_cache_unified_context * llama_memory_hybrid_context::get_attn() const {
return static_cast<const llama_kv_cache_unified_context *>(ctx_attn.get());
const llama_kv_cache_context * llama_memory_hybrid_context::get_attn() const {
return static_cast<const llama_kv_cache_context *>(ctx_attn.get());
}
const llama_memory_recurrent_context * llama_memory_hybrid_context::get_recr() const {