From 2d69109dc1370577eaa9372bb35e2a096c492079 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 30 Oct 2025 20:15:34 +0200 Subject: [PATCH] server : use 4 slots + unified KV by default --- tools/server/server.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 60d92fd705..763fead257 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4432,6 +4432,13 @@ int main(int argc, char ** argv) { return 1; } + if (params.n_parallel == 1 && params.kv_unified == false) { + LOG_WRN("%s: setting n_parallel = 4 and kv_unified = true\n", __func__); + + params.n_parallel = 4; + params.kv_unified = true; + } + common_init(); // struct that contains llama context and inference