diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 3e3db999c9..859c1443f5 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -8943,6 +8943,13 @@ class SmolLM3Model(LlamaModel): class GptOssModel(TextModel): model_arch = gguf.MODEL_ARCH.GPT_OSS + # TODO: remove once MXFP4 is supported more generally + def dequant_model(self): + quant_config = self.hparams.get("quantization_config") + if quant_config is not None and quant_config.get("quant_method") == "mxfp4": + return + return super().dequant_model() + def transform_nibble_layout(self, tensor): assert tensor.dtype == torch.uint8 assert tensor.shape[-1] == 16