mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : merge build_moe_ffn_from_probs function into build_moe_ffn (#14968)
This commit is contained in:
		| @@ -631,19 +631,8 @@ struct llm_graph_context { | ||||
|                     bool   scale_w, | ||||
|                    float   w_scale, | ||||
|             llama_expert_gating_func_type gating_op, | ||||
|                      int   il) const; | ||||
|  | ||||
|     ggml_tensor * build_moe_ffn_from_probs( | ||||
|              ggml_tensor * cur, | ||||
|              ggml_tensor * probs, | ||||
|              ggml_tensor * up_exps, | ||||
|              ggml_tensor * gate_exps, | ||||
|              ggml_tensor * down_exps, | ||||
|              ggml_tensor * exp_probs_b, | ||||
|                  int64_t   n_expert, | ||||
|                  int64_t   n_expert_used, | ||||
|             llama_expert_gating_func_type gating_op, | ||||
|                      int   il) const; | ||||
|                      int   il, | ||||
|              ggml_tensor * probs_in = nullptr) const; | ||||
|  | ||||
|     // | ||||
|     // inputs | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Dongliang Wei
					Dongliang Wei