mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	llama : fix tensor name grepping during quantization
ggml-ci
This commit is contained in:
		@@ -3432,6 +3432,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        const std::string name = ggml_get_name(meta);
 | 
					        const std::string name = ggml_get_name(meta);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // TODO: avoid hardcoded tensor names - use the TN_* constants
 | 
				
			||||||
        if (name.find("attn_v.weight") != std::string::npos) {
 | 
					        if (name.find("attn_v.weight") != std::string::npos) {
 | 
				
			||||||
            ++n_attention_wv;
 | 
					            ++n_attention_wv;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@@ -3510,6 +3511,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 | 
				
			|||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            new_type = quantized_type;
 | 
					            new_type = quantized_type;
 | 
				
			||||||
#ifdef GGML_USE_K_QUANTS
 | 
					#ifdef GGML_USE_K_QUANTS
 | 
				
			||||||
 | 
					            // TODO: avoid hardcoded tensor names - use the TN_* constants
 | 
				
			||||||
            if (name == TN_OUTPUT) {
 | 
					            if (name == TN_OUTPUT) {
 | 
				
			||||||
                int nx = tensor->ne[0];
 | 
					                int nx = tensor->ne[0];
 | 
				
			||||||
                int ny = tensor->ne[1];
 | 
					                int ny = tensor->ne[1];
 | 
				
			||||||
@@ -3524,7 +3526,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 | 
				
			|||||||
                else if (QK_K == 64 && (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S) &&
 | 
					                else if (QK_K == 64 && (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S) &&
 | 
				
			||||||
                        (i_attention_wv < n_attention_wv/8 || i_attention_wv >= 7*n_attention_wv/8)) new_type = GGML_TYPE_Q6_K;
 | 
					                        (i_attention_wv < n_attention_wv/8 || i_attention_wv >= 7*n_attention_wv/8)) new_type = GGML_TYPE_Q6_K;
 | 
				
			||||||
                ++i_attention_wv;
 | 
					                ++i_attention_wv;
 | 
				
			||||||
            } else if (name.find("feed_forward.w2.weight") != std::string::npos) {
 | 
					            } else if (name.find("ffn_down.weight") != std::string::npos) {
 | 
				
			||||||
                if      (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q4_K;
 | 
					                if      (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q4_K;
 | 
				
			||||||
                else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
 | 
					                else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
 | 
				
			||||||
                else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
 | 
					                else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user