From d7f794eadb4d9bf7073a3712302c2ab41b313107 Mon Sep 17 00:00:00 2001
From: Francis Couture-Harpin <git@compilade.net>
Date: Fri, 24 Oct 2025 07:46:34 -0400
Subject: [PATCH] convert : avoid dequantizing mxfp4 for GPT-OSS

---
 convert_hf_to_gguf.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 3e3db999c9..859c1443f5 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -8943,6 +8943,13 @@ class SmolLM3Model(LlamaModel):
 class GptOssModel(TextModel):
     model_arch = gguf.MODEL_ARCH.GPT_OSS
 
+    # TODO: remove once MXFP4 is supported more generally
+    def dequant_model(self):
+        quant_config = self.hparams.get("quantization_config")
+        if quant_config is not None and quant_config.get("quant_method") == "mxfp4":
+            return
+        return super().dequant_model()
+
     def transform_nibble_layout(self, tensor):
         assert tensor.dtype == torch.uint8
         assert tensor.shape[-1] == 16