From 7adc79c03234de9a20661fd6dbf2d02c32ca7acb Mon Sep 17 00:00:00 2001 From: Aleksei Nikiforov <103434461+AlekseiNikiforovIBM@users.noreply.github.com> Date: Wed, 15 Oct 2025 22:43:08 +0200 Subject: [PATCH] gguf-py : add support for endian conversion of BF16 data (#16594) BF16 requires special handling in this script while it's a 2-bytes data, but view is 1-byte by default. Switch to correct view before attempting byteswapping. With this change correctly byteswapping models like Meta-Llama-3-8B-Instruct-bf16-GGUF should be possible. --- gguf-py/gguf/scripts/gguf_convert_endian.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gguf-py/gguf/scripts/gguf_convert_endian.py b/gguf-py/gguf/scripts/gguf_convert_endian.py index 211a3f536a..0bda490a20 100755 --- a/gguf-py/gguf/scripts/gguf_convert_endian.py +++ b/gguf-py/gguf/scripts/gguf_convert_endian.py @@ -91,6 +91,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None tensor.tensor_type not in ( gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16, + gguf.GGMLQuantizationType.BF16, ): raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}") logger.info(f"* Preparing to convert from {file_endian} to {order}") @@ -148,6 +149,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None # restore old shape in case it's ever used tensor.data.resize(oldshape) + elif tensor.tensor_type == gguf.GGMLQuantizationType.BF16: + # Special case for BF16 + # It is 2-bytes data, but by default view loads it as 1-byte data. + # Change to correct view before byteswapping. + tensor.data.view(dtype=np.uint16).byteswap(inplace=True) else: # Handle other tensor types tensor.data.byteswap(inplace=True)