mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
First draft
This commit is contained in:
@@ -3741,6 +3741,28 @@ class Qwen3MoeModel(Qwen2MoeModel):
|
||||
|
||||
super().set_vocab()
|
||||
|
||||
@ModelBase.register("Qwen3NextForCausalLM")
|
||||
class Qwen3NextModel(Qwen3MoeModel):
|
||||
model_arch = gguf.MODEL_ARCH.QWEN3NEXT
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
super().set_gguf_parameters()
|
||||
self.gguf_writer.add_ssm_conv_kernel(self.find_hparam(["linear_conv_kernel_dim"]))
|
||||
self.gguf_writer.add_ssm_state_size(self.find_hparam(["linear_key_head_dim"]))
|
||||
self.gguf_writer.add_ssm_group_count(self.find_hparam(["linear_num_key_heads"]))
|
||||
self.gguf_writer.add_ssm_time_step_rank(self.find_hparam(["linear_num_value_heads"]))
|
||||
self.gguf_writer.add_ssm_inner_size(self.find_hparam(["hidden_size"]) * (self.find_hparam(["linear_num_value_heads"]) // self.find_hparam(["linear_num_key_heads"])))
|
||||
|
||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||
if name.endswith(".A_log"):
|
||||
data_torch = -torch.exp(data_torch)
|
||||
elif name.endswith(".dt_bias"):
|
||||
name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
|
||||
elif "conv1d" in name:
|
||||
data_torch = data_torch.squeeze()
|
||||
|
||||
return Qwen2MoeModel.modify_tensors(self, data_torch, name, bid)
|
||||
|
||||
|
||||
@ModelBase.register("GPT2LMHeadModel")
|
||||
class GPT2Model(TextModel):
|
||||
|
||||
Reference in New Issue
Block a user