mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : check for 256 divisibility for IQ2_XS, IQ2_XXS (#4950)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
		| @@ -8559,7 +8559,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty | ||||
|     //} | ||||
|     bool convert_incompatible_tensor = false; | ||||
|     if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K || | ||||
|         new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K) { | ||||
|         new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K || | ||||
|         new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_XXS) { | ||||
|         int nx = tensor->ne[0]; | ||||
|         int ny = tensor->ne[1]; | ||||
|         if (nx % QK_K != 0) { | ||||
| @@ -8571,6 +8572,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty | ||||
|     } | ||||
|     if (convert_incompatible_tensor) { | ||||
|         switch (new_type) { | ||||
|             case GGML_TYPE_IQ2_XXS: | ||||
|             case GGML_TYPE_IQ2_XS: | ||||
|             case GGML_TYPE_Q2_K: new_type = GGML_TYPE_Q4_0; break; | ||||
|             case GGML_TYPE_Q3_K: new_type = GGML_TYPE_Q4_1; break; | ||||
|             case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kawrakow
					Kawrakow