mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-28 08:31:25 +00:00
convert : text-only support for GLM-4.1V-9B-Thinking (#14823)
* use language_model part only, ignore visual layers * fix rope_dim calculation
This commit is contained in:
@@ -6486,7 +6486,7 @@ class JaisModel(TextModel):
|
|||||||
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
|
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
|
||||||
|
|
||||||
|
|
||||||
@ModelBase.register("Glm4ForCausalLM")
|
@ModelBase.register("Glm4ForCausalLM", "Glm4vForConditionalGeneration")
|
||||||
class Glm4Model(TextModel):
|
class Glm4Model(TextModel):
|
||||||
model_arch = gguf.MODEL_ARCH.GLM4
|
model_arch = gguf.MODEL_ARCH.GLM4
|
||||||
|
|
||||||
@@ -6508,7 +6508,8 @@ class Glm4Model(TextModel):
|
|||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
super().set_gguf_parameters()
|
super().set_gguf_parameters()
|
||||||
rope_dim = self.hparams["head_dim"]
|
if (rope_dim := self.hparams.get("head_dim")) is None:
|
||||||
|
rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
|
||||||
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5)))
|
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5)))
|
||||||
rope_scaling = self.hparams.get("rope_scaling") or {}
|
rope_scaling = self.hparams.get("rope_scaling") or {}
|
||||||
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
|
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
|
||||||
@@ -6516,6 +6517,13 @@ class Glm4Model(TextModel):
|
|||||||
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
|
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
|
||||||
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
|
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
if name.startswith("model.visual."): # ignore visual part of Glm4v
|
||||||
|
return []
|
||||||
|
elif name.startswith("model.language_model."):
|
||||||
|
name = name.replace("language_model.", "") # for Glm4v
|
||||||
|
return super().modify_tensors(data_torch, name, bid)
|
||||||
|
|
||||||
|
|
||||||
@ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
|
@ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
|
||||||
class ChatGLMModel(TextModel):
|
class ChatGLMModel(TextModel):
|
||||||
|
|||||||
Reference in New Issue
Block a user