mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	gguf-py : add support for endian conversion of BF16 data (#16594)
BF16 requires special handling in this script while it's a 2-bytes data, but view is 1-byte by default. Switch to correct view before attempting byteswapping. With this change correctly byteswapping models like Meta-Llama-3-8B-Instruct-bf16-GGUF should be possible.
This commit is contained in:
		 Aleksei Nikiforov
					Aleksei Nikiforov
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							466c1911ab
						
					
				
				
					commit
					7adc79c032
				
			| @@ -91,6 +91,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None | |||||||
|            tensor.tensor_type not in ( |            tensor.tensor_type not in ( | ||||||
|                 gguf.GGMLQuantizationType.F32, |                 gguf.GGMLQuantizationType.F32, | ||||||
|                 gguf.GGMLQuantizationType.F16, |                 gguf.GGMLQuantizationType.F16, | ||||||
|  |                 gguf.GGMLQuantizationType.BF16, | ||||||
|            ): |            ): | ||||||
|             raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}") |             raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}") | ||||||
|     logger.info(f"* Preparing to convert from {file_endian} to {order}") |     logger.info(f"* Preparing to convert from {file_endian} to {order}") | ||||||
| @@ -148,6 +149,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None | |||||||
|  |  | ||||||
|             # restore old shape in case it's ever used |             # restore old shape in case it's ever used | ||||||
|             tensor.data.resize(oldshape) |             tensor.data.resize(oldshape) | ||||||
|  |         elif tensor.tensor_type == gguf.GGMLQuantizationType.BF16: | ||||||
|  |             # Special case for BF16 | ||||||
|  |             # It is 2-bytes data, but by default view loads it as 1-byte data. | ||||||
|  |             # Change to correct view before byteswapping. | ||||||
|  |             tensor.data.view(dtype=np.uint16).byteswap(inplace=True) | ||||||
|         else: |         else: | ||||||
|             # Handle other tensor types |             # Handle other tensor types | ||||||
|             tensor.data.byteswap(inplace=True) |             tensor.data.byteswap(inplace=True) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user