mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	convert : text-only support for GLM-4.1V-9B-Thinking (#14823)
* use language_model part only, ignore visual layers * fix rope_dim calculation
This commit is contained in:
		| @@ -6486,7 +6486,7 @@ class JaisModel(TextModel): | |||||||
|         self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias) |         self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias) | ||||||
|  |  | ||||||
|  |  | ||||||
| @ModelBase.register("Glm4ForCausalLM") | @ModelBase.register("Glm4ForCausalLM", "Glm4vForConditionalGeneration") | ||||||
| class Glm4Model(TextModel): | class Glm4Model(TextModel): | ||||||
|     model_arch = gguf.MODEL_ARCH.GLM4 |     model_arch = gguf.MODEL_ARCH.GLM4 | ||||||
|  |  | ||||||
| @@ -6508,7 +6508,8 @@ class Glm4Model(TextModel): | |||||||
|  |  | ||||||
|     def set_gguf_parameters(self): |     def set_gguf_parameters(self): | ||||||
|         super().set_gguf_parameters() |         super().set_gguf_parameters() | ||||||
|         rope_dim = self.hparams["head_dim"] |         if (rope_dim := self.hparams.get("head_dim")) is None: | ||||||
|  |             rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"] | ||||||
|         self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5))) |         self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5))) | ||||||
|         rope_scaling = self.hparams.get("rope_scaling") or {} |         rope_scaling = self.hparams.get("rope_scaling") or {} | ||||||
|         if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling: |         if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling: | ||||||
| @@ -6516,6 +6517,13 @@ class Glm4Model(TextModel): | |||||||
|             self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"]) |             self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"]) | ||||||
|             self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"]) |             self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"]) | ||||||
|  |  | ||||||
|  |     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: | ||||||
|  |         if name.startswith("model.visual."): # ignore visual part of Glm4v | ||||||
|  |             return [] | ||||||
|  |         elif name.startswith("model.language_model."): | ||||||
|  |             name = name.replace("language_model.", "") # for Glm4v | ||||||
|  |         return super().modify_tensors(data_torch, name, bid) | ||||||
|  |  | ||||||
|  |  | ||||||
| @ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration") | @ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration") | ||||||
| class ChatGLMModel(TextModel): | class ChatGLMModel(TextModel): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 jacekpoplawski
					jacekpoplawski