mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : valign + remove unused ftype (#8502)
This commit is contained in:
		| @@ -4510,40 +4510,36 @@ static std::string llama_model_ftype_name(llama_ftype ftype) { | ||||
|     } | ||||
|  | ||||
|     switch (ftype) { | ||||
|         case LLAMA_FTYPE_ALL_F32:     return "all F32"; | ||||
|         case LLAMA_FTYPE_MOSTLY_F16:  return "F16"; | ||||
|         case LLAMA_FTYPE_MOSTLY_BF16: return "BF16"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: | ||||
|                                       return "Q4_1, some F16"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0"; | ||||
|  | ||||
|         // K-quants | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K:   return "Q2_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K_S: return "Q2_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "Q3_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "Q3_K - Large"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "Q4_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "Q4_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "Q5_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q6_K:   return "Q6_K"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_XXS:return "IQ2_XXS - 2.0625 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_S:  return "IQ2_S - 2.5 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_M:  return "IQ2_M - 2.7 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ1_S  :return "IQ1_S - 1.5625 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ1_M  :return "IQ1_M - 1.75 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_S:  return "IQ3_S - 3.4375 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_M:  return "IQ3_S mix - 3.66 bpw"; | ||||
|         case LLAMA_FTYPE_ALL_F32:         return "all F32"; | ||||
|         case LLAMA_FTYPE_MOSTLY_F16:      return "F16"; | ||||
|         case LLAMA_FTYPE_MOSTLY_BF16:     return "BF16"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_0:     return "Q4_0"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_1:     return "Q4_1"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_0:     return "Q5_0"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_1:     return "Q5_1"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q8_0:     return "Q8_0"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K:     return "Q2_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K_S:   return "Q2_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_S:   return "Q3_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_M:   return "Q3_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_L:   return "Q3_K - Large"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_S:   return "Q4_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_M:   return "Q4_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_S:   return "Q5_K - Small"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_M:   return "Q5_K - Medium"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q6_K:     return "Q6_K"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_XXS:  return "IQ2_XXS - 2.0625 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_XS:   return "IQ2_XS - 2.3125 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_S:    return "IQ2_S - 2.5 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_M:    return "IQ2_M - 2.7 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_XS:   return "IQ3_XS - 3.3 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_XXS:  return "IQ3_XXS - 3.0625 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ1_S:    return "IQ1_S - 1.5625 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ1_M:    return "IQ1_M - 1.75 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ4_NL:   return "IQ4_NL - 4.5 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ4_XS:   return "IQ4_XS - 4.25 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_S:    return "IQ3_S - 3.4375 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_M:    return "IQ3_S mix - 3.66 bpw"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_0_4_4: return "Q4_0_4_4"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_0_4_8: return "Q4_0_4_8"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_0_8_8: return "Q4_0_8_8"; | ||||
| @@ -18069,10 +18065,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n | ||||
|     //    if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_S) new_type = GGML_TYPE_Q4_K; | ||||
|     //} | ||||
|     bool convert_incompatible_tensor = false; | ||||
|     if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K || | ||||
|         new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K || new_type == GGML_TYPE_IQ4_XS || | ||||
|         new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_XXS || new_type == GGML_TYPE_IQ2_S || | ||||
|         new_type == GGML_TYPE_IQ3_XXS || new_type == GGML_TYPE_IQ1_S || new_type == GGML_TYPE_IQ3_S || | ||||
|     if (new_type == GGML_TYPE_Q2_K    || new_type == GGML_TYPE_Q3_K    || new_type == GGML_TYPE_Q4_K   || | ||||
|         new_type == GGML_TYPE_Q5_K    || new_type == GGML_TYPE_Q6_K    || new_type == GGML_TYPE_IQ4_XS || | ||||
|         new_type == GGML_TYPE_IQ2_XS  || new_type == GGML_TYPE_IQ2_XXS || new_type == GGML_TYPE_IQ2_S  || | ||||
|         new_type == GGML_TYPE_IQ3_XXS || new_type == GGML_TYPE_IQ1_S   || new_type == GGML_TYPE_IQ3_S  || | ||||
|         new_type == GGML_TYPE_IQ1_M) { | ||||
|         int nx = tensor->ne[0]; | ||||
|         int ny = tensor->ne[1]; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov