From f696428ce8e4d16c17acbffeaa7feac3b0fb9061 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sun, 26 Oct 2025 17:20:32 +0100 Subject: [PATCH] graph : add clamping to ffn_moe_weights_sum to avoid div-by-zero (#16655) * add missing norm topk bias * use clamping instead, update number and add comment --- src/llama-graph.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index c1b946e3f7..112d195f29 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -1009,10 +1009,9 @@ ggml_tensor * llm_graph_context::build_moe_ffn( ggml_tensor * weights_sum = ggml_sum_rows(ctx0, weights); // [1, n_tokens] cb(weights_sum, "ffn_moe_weights_sum", il); - if (arch == LLM_ARCH_BAILINGMOE2) { - weights_sum = ggml_scale_bias(ctx0, weights_sum, 1.0, 1e-20); - cb(weights_sum, "ffn_moe_weights_sum_biased", il); - } + // Avoid division by zero, clamp to smallest number representable by F16 + weights_sum = ggml_clamp(ctx0, weights_sum, 6.103515625e-5, INFINITY); + cb(weights_sum, "ffn_moe_weights_sum_clamped", il); weights = ggml_div(ctx0, weights, weights_sum); // [n_expert_used, n_tokens] cb(weights, "ffn_moe_weights_norm", il);