kv-cache : better estimate of n_kv for multi-sequence batches (#15610)

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-08-27 13:55:12 +03:00
committed by GitHub
parent 1e7489745a
commit 1bded5a3b3
2 changed files with 15 additions and 16 deletions

View File

@@ -38,8 +38,8 @@ public:
using idx_vec_t = std::vector<uint32_t>;
// number of streams: ns = s1 - s0 + 1
llama_seq_id s0;
llama_seq_id s1;
uint32_t s0;
uint32_t s1;
std::vector<llama_seq_id> strm; // [ns]
std::vector<idx_vec_t> idxs; // [ns]
@@ -139,7 +139,7 @@ public:
// graph_build API
//
uint32_t get_n_kv() const;
uint32_t get_n_kv(const slot_info & sinfo) const;
// TODO: temporary
bool get_supports_set_rows() const;