mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
Add --no-op-offload to improve -ot pp perf in MoE models like llama4 400B (#13386)
This commit is contained in:
@@ -332,6 +332,7 @@ struct common_params {
|
||||
bool no_kv_offload = false; // disable KV offloading
|
||||
bool warmup = true; // warmup run
|
||||
bool check_tensors = false; // validate tensor data
|
||||
bool no_op_offload = false; // globally disable offload host tensor operations to device
|
||||
|
||||
bool single_turn = false; // single turn chat conversation
|
||||
|
||||
|
||||
Reference in New Issue
Block a user