From d7f794eadb4d9bf7073a3712302c2ab41b313107 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Fri, 24 Oct 2025 07:46:34 -0400 Subject: [PATCH] convert : avoid dequantizing mxfp4 for GPT-OSS --- convert_hf_to_gguf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 3e3db999c9..859c1443f5 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -8943,6 +8943,13 @@ class SmolLM3Model(LlamaModel): class GptOssModel(TextModel): model_arch = gguf.MODEL_ARCH.GPT_OSS + # TODO: remove once MXFP4 is supported more generally + def dequant_model(self): + quant_config = self.hparams.get("quantization_config") + if quant_config is not None and quant_config.get("quant_method") == "mxfp4": + return + return super().dequant_model() + def transform_nibble_layout(self, tensor): assert tensor.dtype == torch.uint8 assert tensor.shape[-1] == 16