mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : fix quantization when tensors are missing (#5423)
This commit is contained in:
		
							
								
								
									
										32
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										32
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -772,22 +772,37 @@ struct LLM_TN { | ||||
|     llm_arch arch; | ||||
|  | ||||
|     std::string operator()(llm_tensor tensor) const { | ||||
|         if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) { | ||||
|             return "__missing__"; | ||||
|         } | ||||
|         return LLM_TENSOR_NAMES[arch].at(tensor); | ||||
|     } | ||||
|  | ||||
|     std::string operator()(llm_tensor tensor, const std::string & suffix) const { | ||||
|         if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) { | ||||
|             return "__missing__"; | ||||
|         } | ||||
|         return LLM_TENSOR_NAMES[arch].at(tensor) + "." + suffix; | ||||
|     } | ||||
|  | ||||
|     std::string operator()(llm_tensor tensor, int bid) const { | ||||
|         if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) { | ||||
|             return "__missing__"; | ||||
|         } | ||||
|         return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid); | ||||
|     } | ||||
|  | ||||
|     std::string operator()(llm_tensor tensor, const std::string & suffix, int bid) const { | ||||
|         if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) { | ||||
|             return "__missing__"; | ||||
|         } | ||||
|         return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid) + "." + suffix; | ||||
|     } | ||||
|  | ||||
|     std::string operator()(llm_tensor tensor, const std::string & suffix, int bid, int xid) const { | ||||
|         if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) { | ||||
|             return "__missing__"; | ||||
|         } | ||||
|         return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid, xid) + "." + suffix; | ||||
|     } | ||||
| }; | ||||
| @@ -10227,6 +10242,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty | ||||
|         } | ||||
|         ++qs.i_ffn_up; | ||||
|     } | ||||
|  | ||||
|     //    if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K; | ||||
|     //} | ||||
|     // IK: let's remove this, else Q2_K is almost the same as Q3_K_S | ||||
| @@ -10286,19 +10302,19 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s | ||||
|  | ||||
|         // K-quants | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K_S: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K:   quantized_type = GGML_TYPE_Q2_K; break; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_K:    quantized_type = GGML_TYPE_Q2_K;    break; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_XS: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_S: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_M: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q3_K_L:  quantized_type = GGML_TYPE_Q3_K;    break; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_S: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_K_M:  quantized_type = GGML_TYPE_Q4_K;    break; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_S: | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q6_K:   quantized_type = GGML_TYPE_Q6_K; break; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_XXS:quantized_type = GGML_TYPE_IQ2_XXS; break; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_XS :quantized_type = GGML_TYPE_IQ2_XS;  break; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_XXS:quantized_type = GGML_TYPE_IQ3_XXS; break; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q5_K_M:  quantized_type = GGML_TYPE_Q5_K;    break; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q6_K:    quantized_type = GGML_TYPE_Q6_K;    break; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_XXS: quantized_type = GGML_TYPE_IQ2_XXS; break; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ2_XS:  quantized_type = GGML_TYPE_IQ2_XS;  break; | ||||
|         case LLAMA_FTYPE_MOSTLY_IQ3_XXS: quantized_type = GGML_TYPE_IQ3_XXS; break; | ||||
|  | ||||
|         default: throw std::runtime_error(format("invalid output file type %d\n", ftype)); | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov