mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : Support Qwen3 and Qwen3MoE (#12828)
* add qwen3 & qwen3moe support. * fix --------- Co-authored-by: bozheng-hit <dsoul0621@gmail.com>
This commit is contained in:
		| @@ -26,6 +26,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { | ||||
|     { LLM_ARCH_QWEN2,            "qwen2"            }, | ||||
|     { LLM_ARCH_QWEN2MOE,         "qwen2moe"         }, | ||||
|     { LLM_ARCH_QWEN2VL,          "qwen2vl"          }, | ||||
|     { LLM_ARCH_QWEN3,            "qwen3"            }, | ||||
|     { LLM_ARCH_QWEN3MOE,         "qwen3moe"         }, | ||||
|     { LLM_ARCH_PHI2,             "phi2"             }, | ||||
|     { LLM_ARCH_PHI3,             "phi3"             }, | ||||
|     { LLM_ARCH_PHIMOE,           "phimoe"           }, | ||||
| @@ -595,6 +597,45 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N | ||||
|             { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_QWEN3, | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" }, | ||||
|             { LLM_TENSOR_OUTPUT,          "output" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" }, | ||||
|             { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" }, | ||||
|             { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" }, | ||||
|             { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" }, | ||||
|             { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" }, | ||||
|             { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" }, | ||||
|             { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" }, | ||||
|             { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" }, | ||||
|             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_QWEN3MOE, | ||||
|         { | ||||
|             { LLM_TENSOR_TOKEN_EMBD,         "token_embd" }, | ||||
|             { LLM_TENSOR_OUTPUT_NORM,        "output_norm" }, | ||||
|             { LLM_TENSOR_OUTPUT,             "output" }, | ||||
|             { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" }, | ||||
|             { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" }, | ||||
|             { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" }, | ||||
|             { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" }, | ||||
|             { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" }, | ||||
|             { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" }, | ||||
|             { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" }, | ||||
|             { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" }, | ||||
|             { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" }, | ||||
|             { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" }, | ||||
|             { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" }, | ||||
|             { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" }, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         LLM_ARCH_PHI2, | ||||
|         { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Bo Zheng
					Bo Zheng