Merge branch 'ggml-org:master' into qwen3_next

2025-10-27 08:21:30 +00:00 · 2025-09-18 12:59:39 +02:00
parent 344331c2b6 2b6b55a59f
commit c78f9fce68
331 changed files with 37062 additions and 20361 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -2393,7 +2393,10 @@ class SmolVLMModel(MmprojModel):
        return [] # skip other tensors


-@ModelBase.register("Llama4ForConditionalGeneration")
+@ModelBase.register(
+    "Llama4ForConditionalGeneration",
+    "Llama4ForCausalLM",
+)
 class Llama4Model(LlamaModel):
    model_arch = gguf.MODEL_ARCH.LLAMA4
    undo_permute = False
@@ -2411,6 +2414,10 @@ class Llama4Model(LlamaModel):
        super().set_gguf_parameters()
        self.gguf_writer.add_interleave_moe_layer_step(self.hparams["interleave_moe_layer_step"])
        self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"])
+        if "layer_types" in self.hparams:
+            if all(lt == "full_attention" for lt in self.hparams["layer_types"]):
+                # all layers are full attention (for MobileLLM), disable swa
+                self.gguf_writer.add_sliding_window(0)

    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
        if name.startswith("language_model."):