context : add llama_kv_cache_recurrent prototype

ggml-ci
2025-11-17 11:37:10 +00:00 · 2025-02-20 20:54:18 +02:00
parent ad870c49f4
commit 08011c2ca1
3 changed files with 477 additions and 102 deletions
--- a/src/llama-kv-cache.h
+++ b/src/llama-kv-cache.h
@@ -48,7 +48,6 @@ struct llama_kv_cache_slot_info {
 // ring-buffer of cached KV data
 // TODO: pimpl
 // TODO: add notion of max sequences
-// TODO: add llama_hparams &
 struct llama_kv_cache {
    llama_kv_cache(const llama_hparams & hparams);
    virtual ~llama_kv_cache() = default;
@@ -108,7 +107,10 @@ struct llama_kv_cache {

    bool has_shift = false;
    bool do_defrag = false;
+
+    // TODO: remove this and implement llama_kv_cache_recurrent instead
    bool recurrent = false; // with recurrent state models, a cell can hold the state for more than one past token
+
    bool v_trans   = true;  // the value tensor is transposed
    bool can_shift = false;

@@ -141,6 +143,11 @@ private:
    bool state_read_data(llama_io_read_i & io, uint32_t cell_count);
 };

+// TODO: temporary reusing llama_kv_cache -- implement recurrent cache and simplify llama_kv_cache
+struct llama_kv_cache_recurrent : public llama_kv_cache {
+    using llama_kv_cache::llama_kv_cache;
+};
+
 //
 // kv cache restore
 //