mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	model : add GroveMoE support (#15510)
* add GroveMoE support * remove constexpr that fails on certain compilers * revert crude scalar div implementation, use cast * build_attn_inp_kv_unified -> build_attn_inp_kv * fix build_attn * re-apply ffn_exps regex changes
This commit is contained in:
		| @@ -98,6 +98,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { | ||||
|     { LLM_ARCH_LLADA,            "llada"            }, | ||||
|     { LLM_ARCH_LLADA_MOE,        "llada-moe"        }, | ||||
|     { LLM_ARCH_SEED_OSS,         "seed_oss"         }, | ||||
|     { LLM_ARCH_GROVEMOE,         "grovemoe"         }, | ||||
|     { LLM_ARCH_UNKNOWN,          "(unknown)"        }, | ||||
| }; | ||||
|  | ||||
| @@ -125,6 +126,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = { | ||||
|     { LLM_KV_FEED_FORWARD_LENGTH,               "%s.feed_forward_length"               }, | ||||
|     { LLM_KV_EXPERT_FEED_FORWARD_LENGTH,        "%s.expert_feed_forward_length"        }, | ||||
|     { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" }, | ||||
|     { LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH,  "%s.expert_chunk_feed_forward_length"  }, | ||||
|     { LLM_KV_USE_PARALLEL_RESIDUAL,             "%s.use_parallel_residual"             }, | ||||
|     { LLM_KV_TENSOR_DATA_LAYOUT,                "%s.tensor_data_layout"                }, | ||||
|     { LLM_KV_EXPERT_COUNT,                      "%s.expert_count"                      }, | ||||
| @@ -133,6 +135,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = { | ||||
|     { LLM_KV_EXPERT_WEIGHTS_SCALE,              "%s.expert_weights_scale"              }, | ||||
|     { LLM_KV_EXPERT_WEIGHTS_NORM,               "%s.expert_weights_norm"               }, | ||||
|     { LLM_KV_EXPERT_GATING_FUNC,                "%s.expert_gating_func"                }, | ||||
|     { LLM_KV_EXPERT_GROUP_SCALE,                "%s.expert_group_scale"                }, | ||||
|     { LLM_KV_EXPERTS_PER_GROUP,                 "%s.experts_per_group"                 }, | ||||
|     { LLM_KV_MOE_EVERY_N_LAYERS,                "%s.moe_every_n_layers"                }, | ||||
|     { LLM_KV_NEXTN_PREDICT_LAYERS,              "%s.nextn_predict_layers"              }, | ||||
|     { LLM_KV_POOLING_TYPE,                      "%s.pooling_type"                      }, | ||||
| @@ -2186,6 +2190,29 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_GROVEMOE, | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,         "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,        "output_norm" }, | ||||
|             { LLM_TENSOR_OUTPUT,             "output" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" }, | ||||
|             { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" }, | ||||
|             { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" }, | ||||
|             { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" }, | ||||
|             { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" }, | ||||
|             { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" }, | ||||
|             { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" }, | ||||
|             { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" }, | ||||
|             { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" }, | ||||
|             { LLM_TENSOR_FFN_GATE_CHEXPS,    "blk.%d.ffn_gate_chexps" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_CHEXPS,    "blk.%d.ffn_down_chexps" }, | ||||
|             { LLM_TENSOR_FFN_UP_CHEXPS,      "blk.%d.ffn_up_chexps" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_UNKNOWN, | ||||
|         { | ||||
| @@ -2318,6 +2345,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = { | ||||
|     {LLM_TENSOR_FFN_DOWN_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}}, | ||||
|     {LLM_TENSOR_FFN_GATE_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}}, | ||||
|     {LLM_TENSOR_FFN_UP_EXPS,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}}, | ||||
|     {LLM_TENSOR_FFN_DOWN_CHEXPS,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}}, | ||||
|     {LLM_TENSOR_FFN_GATE_CHEXPS,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}}, | ||||
|     {LLM_TENSOR_FFN_UP_CHEXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}}, | ||||
|     {LLM_TENSOR_FFN_EXP_PROBS_B,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}}, | ||||
|     // altup / laurel (gemma 3n) | ||||
|     {LLM_TENSOR_PER_LAYER_TOKEN_EMBD,       {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_GET_ROWS}}, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sigbjørn Skjæret
					Sigbjørn Skjæret