mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-01 09:01:57 +00:00
Add --no-op-offload to improve -ot pp perf in MoE models like llama4 400B (#13386)
This commit is contained in:
@@ -30,6 +30,7 @@ struct llama_cparams {
|
||||
bool flash_attn;
|
||||
bool no_perf;
|
||||
bool warmup;
|
||||
bool op_offload;
|
||||
|
||||
enum llama_pooling_type pooling_type;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user