mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	model : add BailingMoeV2 support (#16063)
* add BailingMoeV2 support * update llm types * undo * undo * update llm types * add model collection link * update * almost working * correct group selection and rename n_group_exp * avoid large top_k and use argmax instead for now if we had something like argmax2 that would be equivalent, but this works fine until then * poke * skip group selection when there are no tokens * fix 1T conversion * hopefully fixed expert group selection third time's the charm? * make expert group selection generally available The new LLaDA2Moe model uses this method too, make it generally available regardless of architecture. * allow n_expert_groups to be 1 (Kimi K2) * address review suggestions
This commit is contained in:
		| @@ -85,6 +85,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { | ||||
|     { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" }, | ||||
|     { LLM_ARCH_PLM,              "plm"              }, | ||||
|     { LLM_ARCH_BAILINGMOE,       "bailingmoe"       }, | ||||
|     { LLM_ARCH_BAILINGMOE2,      "bailingmoe2"      }, | ||||
|     { LLM_ARCH_DOTS1,            "dots1"            }, | ||||
|     { LLM_ARCH_ARCEE,            "arcee"            }, | ||||
|     { LLM_ARCH_ERNIE4_5,         "ernie4_5"         }, | ||||
| @@ -135,6 +136,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = { | ||||
|     { LLM_KV_EXPERT_COUNT,                      "%s.expert_count"                      }, | ||||
|     { LLM_KV_EXPERT_USED_COUNT,                 "%s.expert_used_count"                 }, | ||||
|     { LLM_KV_EXPERT_SHARED_COUNT,               "%s.expert_shared_count"               }, | ||||
|     { LLM_KV_EXPERT_GROUP_COUNT,                "%s.expert_group_count"                }, | ||||
|     { LLM_KV_EXPERT_GROUP_USED_COUNT,           "%s.expert_group_used_count"           }, | ||||
|     { LLM_KV_EXPERT_WEIGHTS_SCALE,              "%s.expert_weights_scale"              }, | ||||
|     { LLM_KV_EXPERT_WEIGHTS_NORM,               "%s.expert_weights_norm"               }, | ||||
|     { LLM_KV_EXPERT_GATING_FUNC,                "%s.expert_gating_func"                }, | ||||
| @@ -1946,6 +1949,38 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_BAILINGMOE2, | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,         "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,        "output_norm" }, | ||||
|             { LLM_TENSOR_OUTPUT,             "output" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" }, | ||||
|             { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" }, | ||||
|             { LLM_TENSOR_ATTN_QKV,           "blk.%d.attn_qkv" }, | ||||
|             { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" }, | ||||
|             { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" }, | ||||
|             { LLM_TENSOR_FFN_EXP_PROBS_B,    "blk.%d.exp_probs_b" }, | ||||
|             { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" }, | ||||
|             { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" }, | ||||
|             { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" }, | ||||
|             { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" }, | ||||
|             { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" }, | ||||
|             { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" }, | ||||
|             { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" }, | ||||
|             { LLM_TENSOR_NEXTN_EH_PROJ,      "blk.%d.nextn.eh_proj" }, | ||||
|             { LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.nextn.embed_tokens" }, | ||||
|             { LLM_TENSOR_NEXTN_ENORM,        "blk.%d.nextn.enorm" }, | ||||
|             { LLM_TENSOR_NEXTN_HNORM,        "blk.%d.nextn.hnorm" }, | ||||
|             { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.nextn.shared_head_head" }, | ||||
|             { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.nextn.shared_head_norm" }, | ||||
|             { LLM_TENSOR_LAYER_OUT_NORM,     "blk.%d.layer_output_norm" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_DOTS1, | ||||
|         { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sigbjørn Skjæret
					Sigbjørn Skjæret