mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : add gguf_remove_key + remove split meta during quantize (#6591)
* Remove split metadata when quantize model shards * Find metadata key by enum * Correct loop range for gguf_remove_key and code format * Free kv memory --------- Co-authored-by: z5269887 <z5269887@unsw.edu.au>
This commit is contained in:
		| @@ -13535,6 +13535,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s | ||||
|     gguf_set_kv     (ctx_out, ml.meta); | ||||
|     gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION); | ||||
|     gguf_set_val_u32(ctx_out, "general.file_type", ftype); | ||||
|     // Remove split metadata | ||||
|     gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_NO).c_str()); | ||||
|     gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str()); | ||||
|     gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str()); | ||||
|  | ||||
|     if (params->kv_overrides) { | ||||
|         const std::vector<llama_model_kv_override> & overrides = *(const std::vector<llama_model_kv_override> *)params->kv_overrides; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 jiez
					jiez