mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	iq3_xxs: quards for the no-imatrix situation (#5334)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
		
							
								
								
									
										10
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -9456,8 +9456,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && qs.model.hparams.n_gqa() >= 4) { | ||||
|             new_type = GGML_TYPE_Q4_K; | ||||
|         } | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && qs.model.hparams.n_gqa() >= 4) { | ||||
|             new_type = GGML_TYPE_Q4_K; | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) { | ||||
|             new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_Q3_K : GGML_TYPE_IQ3_XXS; | ||||
|         } | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) { | ||||
|             new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K; | ||||
| @@ -9496,9 +9496,9 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS) { | ||||
|             if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K; | ||||
|         } | ||||
|         //else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) { | ||||
|         //    if (i_layer < n_layer/8) new_type = GGML_TYPE_Q5_K; | ||||
|         //} | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && !qs.has_imatrix) { | ||||
|             new_type = i_layer < n_layer/8 ? GGML_TYPE_Q4_K : GGML_TYPE_Q3_K; | ||||
|         } | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) { | ||||
|             new_type = i_layer < n_layer/16 ? GGML_TYPE_Q5_K | ||||
|                      : arch != LLM_ARCH_FALCON || use_more_bits(i_layer, n_layer) ? GGML_TYPE_Q4_K | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kawrakow
					Kawrakow