mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
Merge branch 'ggml-org:master' into qwen3_next
This commit is contained in:
@@ -2393,7 +2393,10 @@ class SmolVLMModel(MmprojModel):
|
||||
return [] # skip other tensors
|
||||
|
||||
|
||||
@ModelBase.register("Llama4ForConditionalGeneration")
|
||||
@ModelBase.register(
|
||||
"Llama4ForConditionalGeneration",
|
||||
"Llama4ForCausalLM",
|
||||
)
|
||||
class Llama4Model(LlamaModel):
|
||||
model_arch = gguf.MODEL_ARCH.LLAMA4
|
||||
undo_permute = False
|
||||
@@ -2411,6 +2414,10 @@ class Llama4Model(LlamaModel):
|
||||
super().set_gguf_parameters()
|
||||
self.gguf_writer.add_interleave_moe_layer_step(self.hparams["interleave_moe_layer_step"])
|
||||
self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"])
|
||||
if "layer_types" in self.hparams:
|
||||
if all(lt == "full_attention" for lt in self.hparams["layer_types"]):
|
||||
# all layers are full attention (for MobileLLM), disable swa
|
||||
self.gguf_writer.add_sliding_window(0)
|
||||
|
||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
|
||||
if name.startswith("language_model."):
|
||||
|
||||
Reference in New Issue
Block a user