mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
split q_proj/gate
This commit is contained in:
@@ -3767,8 +3767,12 @@ class Qwen3NextModel(Qwen3MoeModel):
|
||||
name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
|
||||
elif "conv1d" in name:
|
||||
data_torch = data_torch.squeeze()
|
||||
elif "q_proj.weight" in name:
|
||||
q_proj, gate = data_torch.chunk(2, dim=0)
|
||||
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_GATE, bid), gate)
|
||||
data_torch = q_proj
|
||||
|
||||
return Qwen2MoeModel.modify_tensors(self, data_torch, name, bid)
|
||||
yield from Qwen2MoeModel.modify_tensors(self, data_torch, name, bid)
|
||||
|
||||
|
||||
@ModelBase.register("GPT2LMHeadModel")
|
||||
|
||||
Reference in New Issue
Block a user