mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	convert : for FP8, use scale type to decide auto type
This commit is contained in:
		| @@ -376,11 +376,13 @@ class ModelBase: | ||||
|                         weight_name = name.removesuffix("_scale_inv") | ||||
|                         w = self.model_tensors[weight_name] | ||||
|                         s = self.model_tensors[name] | ||||
|                         # TODO: change to FP8 once natively supported | ||||
|                         auto_qtype = s.auto_qtype if s.auto_qtype is not gguf.GGMLQuantizationType.F32 else gguf.GGMLQuantizationType.BF16 | ||||
|                         self.model_tensors[weight_name] = ModelTensorInfo( | ||||
|                             load=lambda w=w, s=s: dequant_simple(w.load(), s.load()), | ||||
|                             size=w.size, | ||||
|                             src_type=w.src_type, | ||||
|                             auto_qtype=gguf.GGMLQuantizationType.BF16, # TODO: change to FP8 once natively supported | ||||
|                             auto_qtype=auto_qtype, | ||||
|                         ) | ||||
|                         tensors_to_remove.append(name) | ||||
|             elif quant_method == "gptq": | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin