mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : correction of the attn.v.weight quantization for IQ3_XS (#6209)
IQ3_XS was not mentioned, IQ3_S and IQ3_M were present twice. That PR corrects this in the manner which was probably intended initially.
This commit is contained in:
		| @@ -12027,13 +12027,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n | |||||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) { |         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) { | ||||||
|             new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS; |             new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS; | ||||||
|         } |         } | ||||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) { |         else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) && qs.model.hparams.n_gqa() >= 4) { | ||||||
|             new_type = GGML_TYPE_Q4_K; |  | ||||||
|         } |  | ||||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) { |  | ||||||
|             new_type = GGML_TYPE_Q4_K; |  | ||||||
|         } |  | ||||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) { |  | ||||||
|             new_type = GGML_TYPE_Q4_K; |             new_type = GGML_TYPE_Q4_K; | ||||||
|         } |         } | ||||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) { |         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Nexesenex
					Nexesenex