convert : add Llama4ForCausalLM (#16042)

* convert : add Llama4ForCausalLM

* handle swa

* half working version

* fix use_kq_norm

* fix use_kq_norm
This commit is contained in:
Xuan-Son Nguyen
2025-09-18 00:18:21 +07:00
committed by GitHub
parent c959b676be
commit 8f8f2274ee
4 changed files with 50 additions and 12 deletions

View File

@@ -2393,7 +2393,10 @@ class SmolVLMModel(MmprojModel):
return [] # skip other tensors
@ModelBase.register("Llama4ForConditionalGeneration")
@ModelBase.register(
"Llama4ForConditionalGeneration",
"Llama4ForCausalLM",
)
class Llama4Model(LlamaModel):
model_arch = gguf.MODEL_ARCH.LLAMA4
undo_permute = False
@@ -2411,6 +2414,10 @@ class Llama4Model(LlamaModel):
super().set_gguf_parameters()
self.gguf_writer.add_interleave_moe_layer_step(self.hparams["interleave_moe_layer_step"])
self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"])
if "layer_types" in self.hparams:
if all(lt == "full_attention" for lt in self.hparams["layer_types"]):
# all layers are full attention (for MobileLLM), disable swa
self.gguf_writer.add_sliding_window(0)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
if name.startswith("language_model."):