llama-bench: add --n-cpu-moe support (#15952)

* llama-bench: add --n-cpu-moe support Support --n-cpu-moe in llama-bench the same way it is supported by llama-server.
2025-10-28 08:31:25 +00:00 · 2025-09-16 16:17:08 +02:00
parent 77475530b8
commit 8ff206097c
3 changed files with 90 additions and 19 deletions
--- a/common/common.h
+++ b/common/common.h
@@ -734,6 +734,20 @@ const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";

 }

+//
+// MoE utils
+//
+
+const char * const LLM_FFN_EXPS_REGEX = "\\.ffn_(up|down|gate)_exps";
+
+static std::string llm_ffn_exps_block_regex(int idx) {
+    return string_format("blk\\.%d%s", idx, LLM_FFN_EXPS_REGEX);
+}
+
+static llama_model_tensor_buft_override llm_ffn_exps_cpu_override() {
+    return { LLM_FFN_EXPS_REGEX, ggml_backend_cpu_buffer_type() };
+}
+
 //
 // training utils
 //