server : use 4 slots + unified KV by default

2025-11-02 09:12:03 +00:00 · 2025-10-30 20:15:34 +02:00
parent a5d27aafe3
commit 2d69109dc1
1 changed files with 7 additions and 0 deletions
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -4432,6 +4432,13 @@ int main(int argc, char ** argv) {
        return 1;
    }

+    if (params.n_parallel == 1 && params.kv_unified == false) {
+        LOG_WRN("%s: setting n_parallel = 4 and kv_unified = true\n", __func__);
+
+        params.n_parallel = 4;
+        params.kv_unified = true;
+    }
+
    common_init();

    // struct that contains llama context and inference