llama : merge build_moe_ffn_from_probs function into build_moe_ffn (#14968)

2025-10-30 08:42:00 +00:00 · 2025-07-31 20:12:20 +08:00
parent a9f77a8be3
commit c1dacaa99b
3 changed files with 32 additions and 114 deletions
--- a/src/llama-graph.h
+++ b/src/llama-graph.h
@@ -631,19 +631,8 @@ struct llm_graph_context {
                    bool   scale_w,
                   float   w_scale,
            llama_expert_gating_func_type gating_op,
-                     int   il) const;
-
-    ggml_tensor * build_moe_ffn_from_probs(
-             ggml_tensor * cur,
-             ggml_tensor * probs,
-             ggml_tensor * up_exps,
-             ggml_tensor * gate_exps,
-             ggml_tensor * down_exps,
-             ggml_tensor * exp_probs_b,
-                 int64_t   n_expert,
-                 int64_t   n_expert_used,
-            llama_expert_gating_func_type gating_op,
-                     int   il) const;
+                     int   il,
+             ggml_tensor * probs_in = nullptr) const;

    //
    // inputs