mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	gguf-py : add support for endian conversion of BF16 data (#16594)
BF16 requires special handling in this script while it's a 2-bytes data, but view is 1-byte by default. Switch to correct view before attempting byteswapping. With this change correctly byteswapping models like Meta-Llama-3-8B-Instruct-bf16-GGUF should be possible.
This commit is contained in:
		
				
					committed by
					
						
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							466c1911ab
						
					
				
				
					commit
					7adc79c032
				
			@@ -91,6 +91,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
 | 
			
		||||
           tensor.tensor_type not in (
 | 
			
		||||
                gguf.GGMLQuantizationType.F32,
 | 
			
		||||
                gguf.GGMLQuantizationType.F16,
 | 
			
		||||
                gguf.GGMLQuantizationType.BF16,
 | 
			
		||||
           ):
 | 
			
		||||
            raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
 | 
			
		||||
    logger.info(f"* Preparing to convert from {file_endian} to {order}")
 | 
			
		||||
@@ -148,6 +149,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
 | 
			
		||||
 | 
			
		||||
            # restore old shape in case it's ever used
 | 
			
		||||
            tensor.data.resize(oldshape)
 | 
			
		||||
        elif tensor.tensor_type == gguf.GGMLQuantizationType.BF16:
 | 
			
		||||
            # Special case for BF16
 | 
			
		||||
            # It is 2-bytes data, but by default view loads it as 1-byte data.
 | 
			
		||||
            # Change to correct view before byteswapping.
 | 
			
		||||
            tensor.data.view(dtype=np.uint16).byteswap(inplace=True)
 | 
			
		||||
        else:
 | 
			
		||||
            # Handle other tensor types
 | 
			
		||||
            tensor.data.byteswap(inplace=True)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user