mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	graph : add clamping to ffn_moe_weights_sum to avoid div-by-zero (#16655)
* add missing norm topk bias * use clamping instead, update number and add comment
This commit is contained in:
		| @@ -1009,10 +1009,9 @@ ggml_tensor * llm_graph_context::build_moe_ffn( | |||||||
|         ggml_tensor * weights_sum = ggml_sum_rows(ctx0, weights); // [1, n_tokens] |         ggml_tensor * weights_sum = ggml_sum_rows(ctx0, weights); // [1, n_tokens] | ||||||
|         cb(weights_sum, "ffn_moe_weights_sum", il); |         cb(weights_sum, "ffn_moe_weights_sum", il); | ||||||
|  |  | ||||||
|         if (arch == LLM_ARCH_BAILINGMOE2) { |         // Avoid division by zero, clamp to smallest number representable by F16 | ||||||
|             weights_sum = ggml_scale_bias(ctx0, weights_sum, 1.0, 1e-20); |         weights_sum = ggml_clamp(ctx0, weights_sum, 6.103515625e-5, INFINITY); | ||||||
|             cb(weights_sum, "ffn_moe_weights_sum_biased", il); |         cb(weights_sum, "ffn_moe_weights_sum_clamped", il); | ||||||
|         } |  | ||||||
|  |  | ||||||
|         weights = ggml_div(ctx0, weights, weights_sum); // [n_expert_used, n_tokens] |         weights = ggml_div(ctx0, weights, weights_sum); // [n_expert_used, n_tokens] | ||||||
|         cb(weights, "ffn_moe_weights_norm", il); |         cb(weights, "ffn_moe_weights_norm", il); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sigbjørn Skjæret
					Sigbjørn Skjæret