mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	convert : qwen2/3moe : set yarn metadata if present (#13331)
* set yarn metadata if present * add comment about enabling YaRN Co-authored-by: Xuan-Son Nguyen <son@huggingface.co> --------- Co-authored-by: Xuan-Son Nguyen <son@huggingface.co>
This commit is contained in:
		| @@ -2761,6 +2761,13 @@ class Qwen2MoeModel(TextModel): | ||||
|         if (shared_expert_intermediate_size := self.hparams.get('shared_expert_intermediate_size')) is not None: | ||||
|             self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size) | ||||
|             logger.info(f"gguf: expert shared feed forward length = {shared_expert_intermediate_size}") | ||||
|         # YaRN is not enabled by default | ||||
|         # To enable it, please refer to this guide: https://huggingface.co/Qwen/Qwen3-30B-A3B#processing-long-texts | ||||
|         if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: | ||||
|             if self.hparams["rope_scaling"].get("type") == "yarn": | ||||
|                 self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) | ||||
|                 self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) | ||||
|                 self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) | ||||
|  | ||||
|     _experts: list[dict[str, Tensor]] | None = None | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sigbjørn Skjæret
					Sigbjørn Skjæret