mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-29 08:41:22 +00:00
graph : add clamping to ffn_moe_weights_sum to avoid div-by-zero (#16655)
* add missing norm topk bias * use clamping instead, update number and add comment
This commit is contained in:
@@ -1009,10 +1009,9 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
|
|||||||
ggml_tensor * weights_sum = ggml_sum_rows(ctx0, weights); // [1, n_tokens]
|
ggml_tensor * weights_sum = ggml_sum_rows(ctx0, weights); // [1, n_tokens]
|
||||||
cb(weights_sum, "ffn_moe_weights_sum", il);
|
cb(weights_sum, "ffn_moe_weights_sum", il);
|
||||||
|
|
||||||
if (arch == LLM_ARCH_BAILINGMOE2) {
|
// Avoid division by zero, clamp to smallest number representable by F16
|
||||||
weights_sum = ggml_scale_bias(ctx0, weights_sum, 1.0, 1e-20);
|
weights_sum = ggml_clamp(ctx0, weights_sum, 6.103515625e-5, INFINITY);
|
||||||
cb(weights_sum, "ffn_moe_weights_sum_biased", il);
|
cb(weights_sum, "ffn_moe_weights_sum_clamped", il);
|
||||||
}
|
|
||||||
|
|
||||||
weights = ggml_div(ctx0, weights, weights_sum); // [n_expert_used, n_tokens]
|
weights = ggml_div(ctx0, weights, weights_sum); // [n_expert_used, n_tokens]
|
||||||
cb(weights, "ffn_moe_weights_norm", il);
|
cb(weights, "ffn_moe_weights_norm", il);
|
||||||
|
|||||||
Reference in New Issue
Block a user