memory : remove KV cache size padding (#16812)

* memory : remove KV cache size padding * cont : restore padding for n_kv tensor shape * server : use slot context size instead of training context size * server : simplify context limit logic
2025-11-12 10:47:01 +00:00 · 2025-10-28 20:19:44 +02:00
parent a8ca18b4b8
commit 85a7d8677b
6 changed files with 14 additions and 54 deletions
--- a/src/llama-kv-cache.h
+++ b/src/llama-kv-cache.h
@@ -19,8 +19,6 @@ struct llama_context;

 class llama_kv_cache : public llama_memory_i {
 public:
-    static uint32_t get_padding(const llama_cparams & cparams);
-
    struct stream_copy_info {
        bool empty() const {
            assert(ssrc.size() == sdst.size());