convert : add Llama4ForCausalLM (#16042)

* convert : add Llama4ForCausalLM * handle swa * half working version * fix use_kq_norm * fix use_kq_norm
2025-10-27 08:21:30 +00:00 · 2025-09-18 00:18:21 +07:00
parent c959b676be
commit 8f8f2274ee
4 changed files with 50 additions and 12 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -2393,7 +2393,10 @@ class SmolVLMModel(MmprojModel):
        return [] # skip other tensors


-@ModelBase.register("Llama4ForConditionalGeneration")
+@ModelBase.register(
+    "Llama4ForConditionalGeneration",
+    "Llama4ForCausalLM",
+)
 class Llama4Model(LlamaModel):
    model_arch = gguf.MODEL_ARCH.LLAMA4
    undo_permute = False
@@ -2411,6 +2414,10 @@ class Llama4Model(LlamaModel):
        super().set_gguf_parameters()
        self.gguf_writer.add_interleave_moe_layer_step(self.hparams["interleave_moe_layer_step"])
        self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"])
+        if "layer_types" in self.hparams:
+            if all(lt == "full_attention" for lt in self.hparams["layer_types"]):
+                # all layers are full attention (for MobileLLM), disable swa
+                self.gguf_writer.add_sliding_window(0)

    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
        if name.startswith("language_model."):