mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
llama : Support Qwen3 and Qwen3MoE (#12828)
* add qwen3 & qwen3moe support. * fix --------- Co-authored-by: bozheng-hit <dsoul0621@gmail.com>
This commit is contained in:
@@ -2459,6 +2459,16 @@ class Qwen2MoeModel(Model):
|
||||
raise ValueError(f"Unprocessed experts: {experts}")
|
||||
|
||||
|
||||
@Model.register("Qwen3ForCausalLM")
|
||||
class Qwen3Model(Qwen2Model):
|
||||
model_arch = gguf.MODEL_ARCH.QWEN3
|
||||
|
||||
|
||||
@Model.register("Qwen3MoeForCausalLM")
|
||||
class Qwen3MoeModel(Qwen2MoeModel):
|
||||
model_arch = gguf.MODEL_ARCH.QWEN3MOE
|
||||
|
||||
|
||||
@Model.register("GPT2LMHeadModel")
|
||||
class GPT2Model(Model):
|
||||
model_arch = gguf.MODEL_ARCH.GPT2
|
||||
|
||||
Reference in New Issue
Block a user