mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-04 09:32:00 +00:00
server : use 4 slots + unified KV by default
This commit is contained in:
@@ -4432,6 +4432,13 @@ int main(int argc, char ** argv) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (params.n_parallel == 1 && params.kv_unified == false) {
|
||||||
|
LOG_WRN("%s: setting n_parallel = 4 and kv_unified = true\n", __func__);
|
||||||
|
|
||||||
|
params.n_parallel = 4;
|
||||||
|
params.kv_unified = true;
|
||||||
|
}
|
||||||
|
|
||||||
common_init();
|
common_init();
|
||||||
|
|
||||||
// struct that contains llama context and inference
|
// struct that contains llama context and inference
|
||||||
|
|||||||
Reference in New Issue
Block a user