From 23323cd1c427e388202216456c58737ef2a2b4cc Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 30 Oct 2025 20:15:34 +0200 Subject: [PATCH] server : use 4 slots + unified KV by default --- tools/server/server.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 38276d7f51..117451ee9a 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4432,6 +4432,13 @@ int main(int argc, char ** argv) { return 1; } + if (params.n_parallel == 1 && params.kv_unified == false) { + LOG_WRN("%s: setting n_parallel = 4 and kv_unified = true\n", __func__); + + params.n_parallel = 4; + params.kv_unified = true; + } + common_init(); // struct that contains llama context and inference