improved memory management fixes

2025-11-07 09:57:00 +00:00 · 2023-07-21 12:41:46 +02:00
parent 56e9ae062c
commit 3d679827e7
4 changed files with 72 additions and 28 deletions
--- a/llama.cpp
+++ b/llama.cpp
@@ -703,7 +703,9 @@ static bool kv_cache_init(
    const int64_t n_mem      = n_layer*n_ctx;
    const int64_t n_elements = n_embd*n_mem;

-    size_t size = 2u*n_elements*ggml_type_size(wtype) + 2u*MB;
+    size_t size = 2u*n_elements*ggml_type_size(wtype);
+
+    fprintf(stderr, "%s: allocating %.2f MB for kv cache\n", __func__, size / 1024.0 / 1024.0);

    cache.buf = ggml_buffer_alloc(backend, size, 2);
    cache.n = 0;