mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	convert : use f32 outtype for bf16 tensors (#6106)
The old behaviour is to use f16, but bf16 to f16 is not a lossless conversion. Change the outtype to f32 to default to a lossless conversion.
This commit is contained in:
		| @@ -1167,9 +1167,9 @@ class OutputFile: | |||||||
| def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType: | def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType: | ||||||
|     wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0) + ".weight"].data_type |     wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0) + ".weight"].data_type | ||||||
|  |  | ||||||
|     if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32): |     if output_type_str == "f32" or (output_type_str is None and wq_type in (DT_F32, DT_BF16)): | ||||||
|         return GGMLFileType.AllF32 |         return GGMLFileType.AllF32 | ||||||
|     if output_type_str == "f16" or (output_type_str is None and wq_type in (DT_F16, DT_BF16)): |     if output_type_str == "f16" or (output_type_str is None and wq_type == DT_F16): | ||||||
|         return GGMLFileType.MostlyF16 |         return GGMLFileType.MostlyF16 | ||||||
|     if output_type_str == "q8_0": |     if output_type_str == "q8_0": | ||||||
|         return GGMLFileType.MostlyQ8_0 |         return GGMLFileType.MostlyQ8_0 | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Romain D
					Romain D