kv_cache : minor

2025-11-11 10:36:54 +00:00 · 2025-01-14 11:56:53 +02:00
parent fef90cb3d7
commit 73a14eccc9
3 changed files with 47 additions and 27 deletions
--- a/src/llama-kv-cache.h
+++ b/src/llama-kv-cache.h
@@ -61,17 +61,11 @@ struct llama_kv_cache {
    // computed before each graph build
    uint32_t n = 0;

-    ggml_type type_k = GGML_TYPE_F16;
-    ggml_type type_v = GGML_TYPE_F16;
-
    std::vector<llama_kv_cell> cells;

    std::vector<struct ggml_tensor *> k_l; // per layer
    std::vector<struct ggml_tensor *> v_l;

-    std::vector<ggml_context_ptr> ctxs;
-    std::vector<ggml_backend_buffer_ptr> bufs;
-
    // TODO: become constructor
    bool init(
            const llama_model & model,
@@ -86,7 +80,7 @@ struct llama_kv_cache {
    size_t total_size() const;

    // TODO: better data structures to reduce the cost of this operation
-    llama_pos max_pos() const;
+    llama_pos pos_max() const;

    void clear();

@@ -112,6 +106,16 @@ struct llama_kv_cache {

    // find how many cells are currently in use
    uint32_t cell_max() const;
+
+    size_t size_k_bytes() const;
+    size_t size_v_bytes() const;
+
+private:
+    ggml_type type_k = GGML_TYPE_F16;
+    ggml_type type_v = GGML_TYPE_F16;
+
+    std::vector<ggml_context_ptr> ctxs;
+    std::vector<ggml_backend_buffer_ptr> bufs;
 };

 //