mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama: Add support for RWKV v7 architecture (#12412)
* ggml: Add op l2_norm Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * ggml: Add op rwkv_wkv7 Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * llama: Add support for RWKV7 and ARWKV7 models Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * llama: fix inference with RWKV6Qwen2 Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * llama: add more (a)rwkv7 variants in size Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * Apply code-format changes Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * fix MUSA build Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * llama: fix shape error with rwkv using llama-parallel Signed-off-by: Molly Sophia <mollysophia379@gmail.com> --------- Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
		| @@ -756,10 +756,19 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: | ||||
|         // NOTE: can't use LLM_TN here because the layer number is not known | ||||
|         quantize &= name.find("ssm_conv1d.weight") == std::string::npos; | ||||
|  | ||||
|         // do not quantize RWKV's time_mix_first tensors | ||||
|         // do not quantize RWKV's small yet 2D weights | ||||
|         quantize &= name.find("time_mix_first.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_w0.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_w1.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_w2.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_v0.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_v1.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_v2.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_a0.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_a1.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_a2.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_g1.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_g2.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos; | ||||
|         quantize &= name.find("time_mix_lerp_fused.weight") == std::string::npos; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Molly Sophia
					Molly Sophia