mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : add 128k yarn context for Qwen (#10698)
* add 128k yarn context for Qwen * added property for model tensors * removing useless line
This commit is contained in:
		| @@ -1992,6 +1992,14 @@ class Qwen2Model(Model): | ||||
|         except FileNotFoundError: | ||||
|             self._set_vocab_gpt2() | ||||
|  | ||||
|     def set_gguf_parameters(self): | ||||
|         super().set_gguf_parameters() | ||||
|         if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: | ||||
|             if self.hparams["rope_scaling"].get("type") == "yarn": | ||||
|                 self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) | ||||
|                 self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) | ||||
|                 self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) | ||||
|  | ||||
|  | ||||
| @Model.register("Qwen2MoeForCausalLM") | ||||
| class Qwen2MoeModel(Model): | ||||
|   | ||||
| @@ -761,6 +761,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { | ||||
|         MODEL_TENSOR.TOKEN_EMBD, | ||||
|         MODEL_TENSOR.OUTPUT_NORM, | ||||
|         MODEL_TENSOR.OUTPUT, | ||||
|         MODEL_TENSOR.ROPE_FREQS, | ||||
|         MODEL_TENSOR.ATTN_NORM, | ||||
|         MODEL_TENSOR.ATTN_Q, | ||||
|         MODEL_TENSOR.ATTN_K, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Robert Collins
					Robert Collins